[Statistics][Foreign Exchange]水曜日のドル円は上昇する、あるいは7月のドル円は下落する

Table of Contents

使用データ

引き続き時系列データの解析を行う。便利なので、ドル円の始値、高値などのデータを再度用いる。

一気に解析・図示

大した内容ではないので、一気にプロットまで。

import pandas as pd
import numpy as np
import scipy
from scipy import stats as st

from matplotlib import pylab as plt
import seaborn as sns
sns.set()
import statsmodels.api as sm
from statsmodels.stats.multicomp import pairwise_tukeyhsd

import datetime
import time

USDJPY_DAY = pd.read_csv("/Users/Documents/USDJPY_DAY.csv")
USDJPY_DAY["date"] = pd.to_datetime(USDJPY_DAY["date"])

# pandasの計算は遅いので、全てnumpy配列に変換
date = np.array(USDJPY_DAY["date"])
opening = np.array(USDJPY_DAY["opening"])#始値
high = np.array(USDJPY_DAY["high"])#高値
low = np.array(USDJPY_DAY["low"])#低値
closing = np.array(USDJPY_DAY["closing"])#終値

dict_week = {"Monday":0, "Tuesday":0, "Wednesday":0, "Thursday":0, "Friday":0}# 値上がりする日の集計
dict_all = {"Monday":0, "Tuesday":0, "Wednesday":0, "Thursday":0, "Friday":0}# 曜日ごとの日数

for i in range(len(date)):# dict_weekもdict_allも一度に計算
    temp = datetime.datetime.strptime(date[i], "%Y-%m-%d")
    temp = temp.strftime("%A")
    dict_all += 1
    if closing[i] >= opening[i]:
        dict_week += 1

# 割合に変換
ratio_of_increase = [a/b for a, b in zip(dict_week.values(), dict_all.values())]

# %%まとめて選択
plt.bar(dict_week.keys(), ratio_of_increase, color = "skyblue", alpha = 0.7)
plt.title("USDJPY_DAY")
plt.xlabel("week")
plt.ylabel("increase")
plt.savefig("/Users/Documents/week.png", format = "png", dpi = 300)

ここ数年の円安傾向を反映しているのか、金曜日以外では値上がりしている。金曜日に値下がりが多いのはポジションを解消するためであろうか。

\(\chi\)二乗検定

期待値はいずれの曜日も0.5になると考えられるので、\(\chi\)二乗検定を行ってみる（テーブルが\(2\times 2\)ではないので、Fisherの正確確率検定は行えない）。

# 実際のデータ
observed_up = [413, 417, 419, 404, 391]

# 各曜日の日数から値上がりの回数を引いて、値下がりの回数を計算
observed_down = [total - up for total, up in zip([793, 795, 795, 795, 793], observed_up)]

# 2x5のコンティンジェンシーテーブルを作成
observed = [observed_up, observed_down]

# カイ二乗検定を実行
st.chi2_contingency(observed)

(2.532379386488967,
 0.6388472993439482,
 4,
 array([[408.18232183, 409.21178544, 409.21178544, 409.21178544,
         408.18232183],
        [384.81767817, 385.78821456, 385.78821456, 385.78821456,
         384.81767817]]))

\(p = 0.639\cdots\)となり、有意差があるとは言えない。

月によって上昇・下落に差があるか

ほとんど同じなので、月によって上昇や下落に差があるのかもみてみよう。引き続き日足のデータを用いるが、月足に変換する。

USDJPY_DAY = pd.read_csv("/Users/Documents/USDJPY_DAY.csv")
USDJPY_DAY["date"] = pd.to_datetime(USDJPY_DAY["date"])
USDJPY_DAY.set_index("date", inplace = True)

# 曜日ごとにリサンプリング
USDJPY_DAY = USDJPY_DAY.resample("MS").agg({"opening": "first", "high": max, "low": min, "closing": "last"})

# pandasの計算は遅いので、全てnumpy配列に変換
opening = np.array(USDJPY_DAY["opening"])# 始値
high = np.array(USDJPY_DAY["high"])# 高値
low = np.array(USDJPY_DAY["low"])# 低値
closing = np.array(USDJPY_DAY["closing"])# 終値

USDJPY_DAY["positive"] = USDJPY_DAY["closing"] - USDJPY_DAY["opening"] > 0
# 月ごとにpositiveの平均を計算
monthly_positive_ratio = USDJPY_DAY.groupby(USDJPY_DAY.index.month)["positive"].mean()

monthly_positive_ratioを用いてカイ二乗検定とグラフの描写を行う。

# χ二乗検定を行う。
# 各月のpositiveの回数を計算
positive_counts = USDJPY_DAY.groupby(USDJPY_DAY.index.month)['positive'].sum()

# 各月のそれ以外の回数を計算
negative_counts = USDJPY_DAY.groupby(USDJPY_DAY.index.month).size() - positive_counts

# 2x12のコンティンジェンシーテーブルを作成
observed = pd.concat([positive_counts, negative_counts], axis=1)

# カイ二乗検定を実行
st.chi2_contingency(observed)

(8.411507032102229,
 0.6760359835065202,
 11,
 array([[7.54054054, 7.45945946],
        [7.54054054, 7.45945946],
        [8.04324324, 7.95675676],
        [8.04324324, 7.95675676],
        [8.04324324, 7.95675676],
        [8.04324324, 7.95675676],
        [8.04324324, 7.95675676],
        [7.54054054, 7.45945946],
        [7.54054054, 7.45945946],
        [7.54054054, 7.45945946],
        [7.54054054, 7.45945946],
        [7.54054054, 7.45945946]]))

\(p = 0.676\cdots\)となり、有意差はない。ちなみに、（等分散性などは考慮せず）ANOVAも一応やってみたが、結果は同じである。グラフはこちら。

# グラフを描画
plt.figure(figsize=(10, 6))
plt.bar(monthly_positive_ratio.index, monthly_positive_ratio.values, color = "skyblue", alpha = 0.7)
plt.xlabel('Month')
plt.ylabel('Positive Ratio')
plt.title('Monthly Positive Ratio of Closing - Opening')
plt.xticks(range(1, 13), ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
plt.show()

どう見ても7月は下落しているように見えるが、検定してみると意外に差はない。

# 7月
# 7月のpositiveの回数を計算
july_positive_count = USDJPY_DAY[USDJPY_DAY.index.month == 7]['positive'].sum()

# 7月の日数を計算
july_days = len(USDJPY_DAY[USDJPY_DAY.index.month == 7])

# 他の月のpositiveの割合を計算
other_months_positive_ratio = USDJPY_DAY[USDJPY_DAY.index.month != 7]['positive'].mean()

# 二項検定を実行
p = st.binom_test(july_positive_count, n=july_days, p=other_months_positive_ratio)

print(f"P-value = {p}")
P-value = 0.13208599522026054

様々な確率分布から、データが最もフィットするものを選ぶ方法
 単位根検定とは？Pythonでの実装方法と結果の解釈について
 変動値の従う分布について。正規分布と正規性の検定手法