# 协整套利的实现

## 协整关系检验函数 coint

import numpy as np
import pandas as pd
import statsmodels.api as sm
import seaborn as sns

def find_cointegrated_pairs(dataframe):
# 得到DataFrame长度
n = dataframe.shape[1]
# 初始化p值矩阵
pvalue_matrix = np.ones((n, n))
# 抽取列的名称
keys = dataframe.keys()
# 初始化强协整组
pairs = []
# 对于每一个i
for i in range(n):
# 对于大于i的j
for j in range(i+1, n):
# 获取相应的两只股票的价格Series
stock1 = dataframe[keys[i]]
stock2 = dataframe[keys[j]]
# 分析它们的协整关系
result = sm.tsa.stattools.coint(stock1, stock2)
# 取出并记录p值
pvalue = result[1]
pvalue_matrix[i, j] = pvalue
# 如果p值小于0.05
if pvalue < 0.05:
# 记录股票对和相应的p值
pairs.append((keys[i], keys[j], pvalue))
# 返回结果
return pvalue_matrix, pairs

instruments =D.instruments()[0:20]
# 确定起始时间
start_date = '2015-01-01'
# 确定结束时间
end_date = '2017-02-18'
# 获取股票总市值数据，返回DataFrame数据格式
prices_temp = D.history_data(instruments,start_date,end_date,
fields=['close'] )
prices_df=pd.pivot_table(prices_temp, values='close', index=['date'], columns=['instrument'])
pvalues, pairs = find_cointegrated_pairs(prices_df)
#画协整检验热度图，输出pvalue < 0.05的股票对
#sns.heatmap(1-pvalues, xticklabels=instruments, yticklabels=instruments, cmap='RdYlGn_r', mask = (pvalues == 1))
#print(pairs)

df = pd.DataFrame(pairs, index=range(0,len(pairs)), columns=list(['Name1','Name2','pvalue']))
#pvalue越小表示相关性越大,按pvalue升序排名就是获取相关性从大到小的股票对
df.sort_values(by='pvalue')

x = prices_df["000012.SZA"]
y = prices_df["000017.SZA"]
plt=x.plot();
plt.plot(y);
result = (sm.OLS(y,X)).fit()
print(result.summary())
plt.legend(["000012.SZA", "000017.SZA"],loc='best')

def zscore(series):
return (series - series.mean()) / np.std(series)

XZ=zscore(0.2048*x-y)
plt=XZ.plot()
plt.axhline(1.0, color="red", linestyle="--")
plt.axhline(-1.0, color="green", linestyle="--")
plt.legend(["z-score", "mean", "+1", "-1"])