Codes (Latex Environment)
\chapter{附件}
\section{Python代码}
编程软件:Python3.8\ jupyter\ notebook
电脑系统:Window\ 10\ 家庭中文版
电脑型号:ASUS\ TUF\ Gaming\ FA506IU
处理器:AMD\ Ryzen\ 7\ 4800H\ with\ Radeon\ Graphics\ 2.90GHz
电脑内存(RAM):16.0GB
\subsection{收益率边缘分布}
\begin{lstlisting}[language=Python]
import numpy as np
import pandas as pd
import scipy.stats as st
from scipy.stats import t
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
sh = pd.read_csv('上证指数历史数据(2).csv')
sz = pd.read_csv('深证成份指数历史数据(2).csv')
hs = pd.read_csv('香港恒生指数历史数据.csv')
def get_pct(data):
data.index = data['日期']
data_pct = data ['涨跌幅']
return data_pct
sh_pct = get_pct(sh)
sz_pct = get_pct(sz)
hs_pct = get_pct(hs)
#边缘密度
def show_edge(data,choose):
mu = data.mean()#.values
sigma = data.std()#.values
num_bins = 100
x = mu + sigma * np.random.randn(1000000)
# 正态分布的数据
data.hist(bins=300,figsize=(16,8),density = True,range=(-0.1,0.1)
,color='b',alpha=0.6,label='密度函数')
n, bins, patches = plt.hist(x, num_bins, density = True,alpha=0)
# 拟合曲线
y = st.norm.pdf(bins, mu, sigma)
plt.plot(bins, y,'r', alpha = 0.8,linewidth=2.5,label='正态分布')
#data.plot(kind = 'kde', color = 'g',linewidth=1.5,alpha=1,
label = '核密度图')
t_dimen = 10
z = np.linspace(t.ppf(0.01, t_dimen,loc=mu,scale=sigma),t.ppf(0.99
,t_dimen,loc=mu,scale=sigma), 1000)
plt.plot(z, t.pdf(z, t_dimen,loc=mu,scale=sigma), alpha=1.0
,linewidth=2.5, c='gray',label='t-分布')
scipy_kde=st.gaussian_kde(data)#高斯核密度估计
X_plot=np.linspace(-0.1,0.1,1000)
dens=scipy_kde.evaluate(X_plot)
plt.plot(X_plot,dens,c='g',linewidth=2.5,label='核密度图')
font = {'size': 15}#设置横纵坐标的名称以及对应字体格式、大小
plt.xlabel('Expectation',font)
plt.ylabel('Probability',font)
plt.title('histogram of normal distribution: $\mu =$' +
str(mu)[:6] + ', $\sigma =$'+ str(sigma)[:6],font)
axes = plt.gca()
axes.set_xlim([-0.075,0.075])
#显示题注
plt.legend(loc=0)
plt.legend(fontsize=15)#显示图例,设置图例字体大小
plt.grid(True)
plt.savefig(str(choose)+'.png')
plt.show()
show_edge(sh_pct,choose='sh')
show_edge(sz_pct,choose='sz')
show_edge(hs_pct,choose='hs')
time_preiod = ['2015年12月9日','2016年12月9日','2017年12月8日'
,'2018年12月7日','2019年12月9日','2020年12月9日']
def get_period_pct(time_preiod):
n = len(time_preiod)
S=[]
for i in range(n-1):
sh_mean,sh_std = sh_pct[time_preiod[i]:time_preiod[i+1]]
.mean(),sh_pct[time_preiod[i]:time_preiod[i+1]].std()
sz_mean,sz_std = sz_pct[time_preiod[i]:time_preiod[i+1]]
.mean(),sz_pct[time_preiod[i]:time_preiod[i+1]].std()
hs_mean,hs_std = hs_pct[time_preiod[i]:time_preiod[i+1]]
.mean(),hs_pct[time_preiod[i]:time_preiod[i+1]].std()
S.append([sh_mean,sh_std,sz_mean,sz_std,hs_mean,hs_std])
return pd.DataFrame(S)
get_period_pct(time_preiod)
\end{lstlisting}
\subsection{相关系数及等高线}
\begin{lstlisting}[language=Python]
def merge(datalist):
n=len(datalist)
result = pd.DataFrame(datalist[0]).copy()
result.columns=[0]
for i in range(1,n):
result[i]=datalist[i]
result = result.dropna()
return result
a= [sh_pct,sz_pct,hs_pct]
data = merge(a)
data.corr(method='pearson', min_periods=1)
data.corr(method='kendall', min_periods=1)
data.corr(method='spearman', min_periods=1)
#分布图
def show_mult(data,a,b,choose):
plt.figure(figsize=(16,8))
font = {'size': 15}#设置横纵坐标的名称以及对应字体格式、大小
plt.scatter(data.values[:,a],data.values[:,b],marker = '+')
plt.xlabel('深证指数',font)
plt.ylabel('恒生指数',font)
plt.title('scatter figure',font)
plt.grid(True)
plt.savefig(str(choose)+'.png')
plt.show()
#这里修改参数即可得到不同数据集之间散点图,下同
show_mult(data,1,2,choose='scatter3')
#等高线
def norm_d(x1,x2,mu,Sigma):
mu1 = mu[0]
mu2 = mu[1]
s1,s2 = Sigma[0,0]**0.5,Sigma[1,1]**0.5
rho = Sigma[0,1]/s1/s2
num=((1)/(2*np.pi*s1*s2*np.sqrt(1-rho**2)))
A=((x1-mu1)**2)/(s1**2)
B=2*rho*(((x1-mu1)*(x2-mu2))/(s1*s2))
C=((x2-mu2)**2)/(s2**2)
D=-1/(2*(1-rho**2))*(A-B+C)
pdf=num*np.exp(D)
return pdf
def show_contour(data,a,b,choose):
data1 = data[[a,b]]
plt.style.use('seaborn-whitegrid')
x1=np.arange(-0.1,0.1,0.001)
y1=np.arange(-0.1,0.1,0.001)
x1,y1 = np.meshgrid(x1,y1)
plt.figure(figsize=(16,8))
mu = data1.mean().values
Sigma = np.sqrt(data1.cov().values)
z = norm_d(x1,y1,mu,Sigma)
plt.contourf(x1, y1, z, 10)
plt.scatter(data.values[:,a],data.values[:,b],marker = 'x')
plt.grid(True)
font = {'size': 15}#设置横纵坐标的名称以及对应字体格式、大小
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
plt.xlabel('上证指数',font)
plt.ylabel('深证指数',font)
plt.title('contour figure',font)
plt.savefig(str(choose)+'.png')
plt.show()
show_contour(data,0,1,choose='contour1')
\end{lstlisting}
\subsection{VaR计算}
\begin{lstlisting}[language=Python]
import scipy.stats as st
def get_VaR(data,choose):
if choose == 0:
return np.ptp(data),st.skew(data),st.kurtosis(data)
,np.percentile(data,[5,10,90,95])
if choose ==1:
mu = data.mean()
sigma = data.std()
num_bins = 100
x = mu + sigma * np.random.randn(1000000)
return np.percentile(x,[5,10,90,95])
if choose ==2:
mu = data.mean()
sigma = data.std()
X = st.t(df=10,loc=mu, scale=sigma)
return X.ppf(0.05),X.ppf(0.1),X.ppf(0.9),X.ppf(0.95)
for i in range(3):
get_VaR(sh_pct,i)
get_VaR(sz_pct,i)
get_VaR(hs_pct,i)
def MCMC_kde(data,choose):
scipy_kde=st.gaussian_kde(data)
X_plot=np.linspace(-0.12,0.12,1201)
dens=scipy_kde.evaluate(X_plot)
X=[]
x=0
for i in range(100000):
z1 = np.random.randint(-100,100)/5000
y=x+z1
pro = dens[int((y+0.12)*5000)]/dens[int((x+0.12)*5000)]
z2 = np.random.uniform()
if pro >z2:
X.append(y)
x=y
else:
X.append(x)
data_MCMC = pd.DataFrame(X[50000:100000])
data_MCMC.hist(bins=300,figsize=(16,8),density = True
,range=(-0.1,0.1),color='b',alpha=0.6,label='MCMC抽样')
plt.plot(X_plot,dens,c='r',linewidth=2.5,label='核密度值')
font = {'size': 15}#设置横纵坐标的名称以及对应字体格式、大小
plt.xlabel('Expectation',font)
plt.ylabel('Probability',font)
plt.title('MCMC取样',font)
axes = plt.gca()
axes.set_xlim([-0.075,0.075])
#显示题注
plt.legend(loc=0)
plt.legend(fontsize=15)#显示图例,设置图例字体大小
plt.grid(True)
plt.savefig(str(choose)+'MCMC.png')
plt.show()
print(np.percentile(data_MCMC,[5,10,90,95]))
MCMC_kde(sh_pct,choose='sh_pct')
MCMC_kde(sz_pct,choose='sz_pct')
MCMC_kde(hs_pct,choose='hs_pct')
\end{lstlisting}
\subsection{相依性计算}
\begin{lstlisting}[language=Python]
def rank(df):
df = df.rank(method='min',axis=0)
df=df.T
return (df)
def pct_to_rank(data0):
data=data0.copy()
col = data.columns
n = len(col)
for i in range(n):
data[col[i]]=rank(data[col[i]])
return data
data_rank = pct_to_rank(data)
def depends(data,a,precent,b):
df = data[[a,b]].copy()
n = len(df.index)
k=0
for i in range(n):
if df[a][i]>=n*precent and df[b][i]>=n*precent and precent>=0.5:
k = k+1
if df[a][i]<=n*precent and df[b][i]<=n*precent and precent<0.5:
k = k+1
return k/(n*precent)
def show_depends(data,a,b,start,end,n):
l = []
step = (end-start)/n
for i in range(n+1):
precent = start+i*step
l.append(depends(data,a,precent,b))
plt.figure(figsize=(16,8))
plt.plot(l)
plt.grid(True)
plt.show()
show_depends(data_rank,0,1,0.9,1,10)
#注意到a不能取0
show_depends(data_rank,0,1,0.01,0.11,10)
\end{lstlisting}
\subsection{MCMC模拟}
\begin{lstlisting}[language=Python]
import scipy
import sympy
from sympy import diff, symbols
import numpy as np
import pandas as pd
import math
from math import *
import matplotlib.pyplot as plt
x = symbols('x', real=True)
y = symbols('y', real=True)
z = symbols('z', real=True)
e = sympy.exp(1)
t = symbols('t', real=True)
def fun_diff3(function,x,y,z):
function1 = diff(function, x, 1)
function2 = diff(function1, y, 1)
function3 = diff(function2, z, 1)
return (function3)
def pdf(sth_pdf,x0,y0,z0,t0):
if 0<x0<1 and 0<y0<1 and 0<z0<1:
return float(sth_pdf.subs({x:x0,y:y0,z:z0,t:t0,e:math.e}))
else:
return 0
def get_MCMC_values(func_pdf,t0,itn):
L=[]
l=np.array([0.5,0.5,0.5])
for i in range(itn):
z1 = (np.random.random()-0.5)/5
z2 = (np.random.random()-0.5)/5
z3 = (np.random.random()-0.5)/5
u = np.random.random()
z4 = pdf(func_pdf,l[0],l[1],l[2],t0)
l_new = l+np.array([z1,z2,z3])
z5 = pdf(func_pdf,l_new[0],l_new[1],l_new[2],t0)
if (z5/z4)>u:
L.append(l_new)
l=l_new
else:
L.append(l)
return (pd.DataFrame(L))
def get_VaR(L,precent):
S=[]
n=len(L)
for i in range(n):
S.append(L.iloc[i].sum())
S=np.array(S)
return np.percentile(S,precent)
gumble_cdf = e**(-((-sympy.log(x))**t+(-sympy.log(y))**t+(-sympy.log(z))**t)**(1/t))
gumble_pdf = fun_diff3(gumble_cdf,x,y,z)
gumble_values = get_MCMC_values(gumble_pdf,t0=1.788,itn=2000)
print(get_VaR(gumble_values,[5,10,90,95]))
clayton_cdf = (x**(-t)+y**(-t)+z**(-t)-2)**(-1/t)
clayton_pdf = fun_diff3(clayton_cdf,x,y,z)
clayton_values = get_MCMC_values(clayton_pdf,t0=1.136,itn=2000)
print(get_VaR(clayton_values,[5,10,90,95]))
\end{lstlisting}
\section{R代码}
\subsection{copula拟合}
\begin{lstlisting}[]
library(parallel)
library(zoo)
library(rugarch)
library(xts)
library(ADGofTest)
library(qqtest)
library(copula)
library(qrmdata)
library(qrmtools)
library(dplyr)
datax<-read.csv(file="1.csv",sep=",",header=TRUE)
x<-datax$SZI
y<-datax$SH
o<-datax$HSI
plot(x,y)
plot(x,o)
plot(y,o)
data<-data.frame(x,y,o)
S<-data
### 2 Fitting marginal ARMA(1,1)-GARCH(1,1) models with
standardized t residuals
## Build negative log-returns
X <- -returns(S) # -log-returns
Y <- returns(S)
## Basic plot
plot.zoo(X, main = "Negative log-returns")
plot.zoo(Y, main = "log-returns")
## Fit marginal time series
uspec <- rep(list(ugarchspec(distribution.model = "std")), ncol(X))
fit.ARMA.GARCH <- fit_ARMA_GARCH(X, ugarchspec.list = uspec)
stopifnot(sapply(fit.ARMA.GARCH$error, is.null)) # NULL = no error
if(FALSE)
fit.ARMA.GARCH$warning
## => Warning comes from finding initial values and can
be ignored here
fits <- fit.ARMA.GARCH$fit # fitted models
Z <- as.matrix(do.call(merge, lapply(fits, residuals, standardize
= TRUE))) # grab out standardized residuals
colnames(Z) <- colnames(S)
(nu.mar <- vapply(fits, function(x) x@fit$coef[["shape"]],
NA_real_)) # vector of estimated df
n <- nrow(X) # sample size
d <- ncol(X) # dimension
### Fitting copulas ##########################################
## Compute pseudo-observations from the standardized t residuals
U <- pobs(Z)
pairs2(U, cex = 0.4, col = adjustcolor("black", alpha.f = 0.5))
## Compare various bivariate copulas
fit.nc <- fitCopula(normalCopula(dim= d), data = U)
fit.tc <- fitCopula(tCopula(dim= d), data = U) # df of
freedom are estimated, too
fit.cl <- fitCopula(claytonCopula(dim= d), data = U)
fit.gc <- fitCopula(gumbelCopula(dim=d), data = U)
## Comparing the likelihoods
sort(c(nc = fit.nc@loglik, tc = fit.tc@loglik, cl = fit.cl@loglik
, gc = fit.gc@loglik),
decreasing = TRUE)
### 3 Fitting copulas #########################################
## Fitting a Gumbel copula
fit.gc <- fitCopula(gumbelCopula(dim = d),
data = U, method = "mpl")
fit.gc@estimate # estimated copula parameter
gc <- fit.gc@copula # fitted copula
## Compute matrices of pairwise Kendall's tau and upper
tail-dependence coefficients
p2P(tau(gc), d = d)
p2P(lambda(gc)["upper"], d = d)
## Fitting a Clayton copula
fit.cl <- fitCopula(claytonCopula(dim = d),
data = U)
fit.cl@estimate # estimated copula parameter
cl <- fit.cl@copula # fitted copula
## Compute matrices of pairwise Kendall's tau and lower
tail-dependence coefficients
p2P(tau(cl), d = d)
p2P(lambda(cl)["lower"], d = d)
## Fitting a Normal copula
fit.nc <- fitCopula(normalCopula(dim = d),
data = U)
fit.nc@estimate # estimated copula parameter
nc <- fit.nc@copula # fitted copula
## Compute matrices of pairwise Kendall's tau and lower
tail-dependence coefficients
p2P(tau(nc), d = d)
p2P(lambda(nc), d = d)
## Fitting a t copula
fit.tc <- fitCopula(tCopula(dim = d, dispstr = "un"),
data = U, method = "itau.mpl")
(nu <- tail(fit.tc@estimate, n = 1))
# estimated degrees of freedom nu
(P <- p2P(head(fit.tc@estimate, n = -1)))
# estimated correlation matrix
tc <- fit.tc@copula # fitted copula
## Compute matrices of pairwise Kendall's tau and upper
tail-dependence coefficients
p2P(tau(tc))
p2P(lambda(tc)[(choose(d,2)+1):(d*(d-1))])
### 4 Goodness-of-fit ##########################################
## We use the parametric bootstrap here, based on the
maximum pseudo-likelihood
## estimator.
N <- 200
## Check the Gumbel copula
gof.gc <- gofCopula(gc, x = U, N = N) # warning also because
the copula doesn't fit well here
stopifnot(gof.gc$p.value < 0.05) # if occur=> rejection
## Check the Clayton copula
gof.cl <- gofCopula(cl, x = U, N = N) # warning also because
the copula doesn't fit well here
stopifnot(gof.cl$p.value < 0.05) # if occur=> rejection
## Check the normal copula
gof.nc <- gofCopula(nc, x = U, N = N) # warning also because
the copula doesn't fit well here
stopifnot(gof.nc$p.value < 0.05) # if occur=> rejection
## Check the t copula is different from the three above since
t copula contains two integers.
U.Rsnbl_tc <- cCopula(U, copula = tc)
pairs2(U.Rsnbl_tc, cex = 0.4, col = adjustcolor("black",
alpha.f = 0.5),main ="t copula") # looks ok
## normal copula also checked
U.Rsnbl_nc <- cCopula(U, copula = nc)
pairs2(U.Rsnbl_nc, cex = 0.4, col = adjustcolor("black",
alpha.f = 0.5),main ="normal copula") # looks not good
## clayton copula also checked
U.Rsnbl_cl <- cCopula(U, copula = cl)
pairs2(U.Rsnbl_cl, cex = 0.4, col = adjustcolor("black",
alpha.f = 0.5),main="clayton copula") # looks not good
## Gumbel copula also checked
U.Rsnbl_gc <- cCopula(U, copula = gc)
pairs2(U.Rsnbl_gc, cex = 0.4, col = adjustcolor("black",
alpha.f = 0.5),main ="gumbel copula") # looks ok
## Map it to a K_d distribution ("kay") and do a Q-Q plot
U.Rsnbl.K <- sqrt(rowMeans(qnorm(U.Rsnbl_tc)^2))
# map to a kay distribution
pK <- function(q, d) pchisq(d*q*q, df = d)
# df of a K_d distribution
AD <- ad.test(U.Rsnbl.K, distr.fun = pK, d = d)
# compute an AD test for t copula
stopifnot(AD$p.value >= 0.05)# contrary to the above test
## A (sophisticated) Q-Q plot, Attetion: we have to clear the
chart space since we defined above.
qqtest(U.Rsnbl.K, dist = "kay", df = d, nreps = 1000, pch = 1,
col = adjustcolor("black", alpha.f = 0.5), main = "good",
xlab = substitute(K[dof]~"quantiles", list(dof = d)))
# => looks ok
\end{lstlisting}
\subsection{t\ copula的蒙特卡罗}
\begin{lstlisting}
## t copula montecarlo
library(mvtnorm)
tcsample<-rmvt(2000,sigma=Sigma,df=v,type=c("shifted","Kshirsagar")[2])
tcsample[,2]<-
(tcsample[,2]-min(tcsample[,2]))/(max(tcsample[,2])-min(tcsample[,2]))
tcsample[,3]<-
(tcsample[,3]-min(tcsample[,3]))/(max(tcsample[,3])-min(tcsample[,3]))
tcsample[,1]<-
(tcsample[,1]-min(tcsample[,1]))/(max(tcsample[,1])-min(tcsample[,1]))
tcsample1<-(apply(tcsample,1,sum))
quantile(tcsample1, probs = seq(0,1, 0.01), type = 7)
\end{lstlisting}
% vim:ts=4:sw=4