import time
import math
import numpy as np
import pandas as pd
import datetime
import scipy as sc
import matplotlib.pyplot as plt
#from pandas_datareader import data as pdr
from IPython.display import display, Latex
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from fredapi import Fred
fred_key = 'ffc8bae82c04b73a42221721a114855a'
fred = Fred(api_key=fred_key)
stock_price = fred.get_series('SP500')
Denote by $S_t$ the price of a financial asset and $X_t=ln\ S_t$ the logarithm of it. Given a tiem scale $\Delta$, the log price, return, as scale $\Delta$ is defined as:
log_price = np.log( stock_price/stock_price.shift(1) ).dropna()
log_price.plot()
plt.title('Daily Log Price')
plot_acf(log_price)
plt.show()
/Users/mie/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arraylike.py:358: RuntimeWarning: invalid value encountered in log result = getattr(ufunc, method)(*inputs, **kwargs)
return_squared = np.square( log_price )
return_squared.plot()
plt.title('Squared Daily Return')
plot_acf(return_squared)
plt.show()
Trading_Days = 40
volatility = log_price.rolling(window=Trading_Days).std()*np.sqrt(252)
volatility = volatility.dropna()
volatility.plot()
#plot_acf(volatility)
#plot_pacf(volatility)
plt.show()
We aim to capture the movement of volatility, thus we include Ornstein-Uhlenbeck process here to mimic the flow of volatility.
Ornstein-Uhlenbeck is a stochastic process that is a stationary Gauss-Markov Process.
$$ dX_t= \kappa X_t \ dt +\sigma \ dW_t $$Sometimes, an additional drift term is added, which is known as the Vasicek model:
$$ dX_t = \kappa (\theta - X_t)dt +\sigma \ dW_t $$We could apply the MLE estimation to find the parameter of the above random process.
Recall our Vasicek Form Ornstein-Uhlenbeck process is like the following:
$$ dX_t = \kappa (\theta - X_t)dt +\sigma \ dW_t $$Multiply both sides by $e^{kt}$, then we get,
$$ e^{kt}dX_t = \kappa e^{kt} \theta \ dt - \kappa e^{kt} X_t \ dt +\sigma e^{kt}\ dW_t $$We know that $ d( e^{kt} X_t )=e^{kt}dX_t + k e^{kt}X_t dt $, and substitute it inside. Then, we get,
$$ d(e^{kt}X_t)=e^{kt}\theta \ dt + e^{kt}\sigma \ dW_t $$Take an integral from [0,T],
$$ \int_0^T d(e^{kt}X_t)= \int_0^T e^{kt}\theta \ dt + \int_0^T e^{kt}\sigma \ dW_t $$$$ X_T = X_0 e^{-kT} +\theta (1-e^{-kT}) + \int_0^T e^{-k(T-t)}\sigma \ dW_t $$$\int_0^T e^{-k(T-t)}\sigma \ dW_t \sim N(0, \sigma^2\int_0^T e^{-2k(T-t)}dt)$
We then find $\mathbb{E}(X_T)$ and $Var(X_T)$.
$ \mathbb{E}(X_T) =\mathbb{E}\bigg( X_0 e^{-kT} +\theta (1-e^{-kT}) + \int_0^T e^{-k(T-t)}\sigma \ dW_t \bigg)$
$ \mathbb{E}(X_T)= X_0 e^{-kT} +\theta (1-e^{-kT}) $
$Var(X_T) = \mathbb{E}\bigg( \big( X_T - \mathbb{E}(X_T) \big)^2 \bigg)$
$Var(X_T)= \mathbb{E}\bigg( \big( \int_0^T e^{-k(T-t)}\sigma \ dW_t \big)^2 \bigg) $
By Ito's Isometry: $I(t)=\int_0^t \Delta(s)dW_s$, then
$Var[I(t)]=\mathbb{E}[(I^2(t))]=\int_0^t \Delta^2(s)ds$
Then,
$ Var(X_T)= \int_0^T e^{-2k(T-t)}\sigma^2 \ dt $
$ Var(X_T)= \frac{\sigma^2}{2k}\big( 1-e^{-2kT} \big) $
Therefore, we finally get,
$ X_T \sim N\bigg(X_0 e^{-kT} +\theta (1-e^{-kT}), \frac{\sigma^2}{2k}\big( 1-e^{-2kT} \big) \bigg)$
$X_t \sim N(\cdot,\cdot)$
,where $\mathbb{E}(X_{t+\delta t})=X_0 e^{-k\ \delta t} +\theta (1-e^{-k\ \delta t})$, and $Var(X_{t+\delta t})= \frac{\sigma^2}{2k}(1-e^{-2k\ \delta t})$.
$$ f_{\theta}(x_{t+\delta t}|\theta)=\frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x-\mu)^2}{2\sigma^2}} $$We replace $\mu = \mathbb{E}(X_{t+\delta t})$ and $\sigma^2 = Var(X_{t+\delta t})$ insider.
Code realisation is as the following.
# calculate mu
def mu(x, dt, k, theta):
ekt = np.exp( -k* dt ) # e^{-k dt}
return x*ekt + theta*(1-ekt)
# calcuate sigma
def std( dt, k, sigma ):
e2kt=np.exp( -2*k*dt ) # e^{-2k dt}
return np.sqrt( sigma**2/(2*k)*(1-e2kt) )
# calculate the log-likelihood function
def log_likelihood_OU(theta_hat, x):
k = theta_hat[0]
theta = theta_hat[1]
sigma = theta_hat[2]
x_dt = x[1:]
x_t = x[:-1]
dt = 1/252
mu_OU = mu(x_t, dt, k, theta)
sigma_OU = std(dt, k, sigma)
log_likelihood_func = np.sum( np.log( sc.stats.norm.pdf(x_dt, loc = mu_OU, scale = sigma_OU) ) )
return - log_likelihood_func
# contraint, let k > 0
def k_positive(theta_hat):
k = np.abs(theta_hat[0])
return k
# contraint, let sigma > 0
def sigma_positive(theta_hat):
sigma = np.abs(theta_hat[2])
return sigma
vol = np.array(volatility)
# constraint equations are defined as dictionaries
cons_set = [ { 'type':'ineq', 'fun': k_positive },
{ 'type':'ineq', 'fun': sigma_positive } ]
# initial guess of parameter theta
# theta =[ k , theta , sigma ]
theta0 = [1,3,1]
# apply minimizer to find the MLE paramether
opt = sc.optimize.minimize( fun = log_likelihood_OU, x0 = theta0, args = (vol, ), constraints = cons_set)
# round our parameters
kappa = round(opt.x[0], 3)
theta = round(opt.x[1], 3)
sigma = round(opt.x[2], 3)
vol0 = vol[-1]
for_kappa_hat = '$\hat{\kappa} = '+str(kappa)+'$'
for_theta_hat = '$\hat{ \theta } = '+str(theta)+'$'
for_sigma_hat = '$\hat{\sigma} = '+str(sigma)+'$'
print('The MLE for data is:')
display(Latex(for_kappa_hat))
display(Latex(for_theta_hat))
display(Latex(for_sigma_hat))
print('Last Volatility', round(vol0,3))
The MLE for data is:
/var/folders/90/hk9jz15n56zckxcy3wlt2twh0000gn/T/ipykernel_41709/1550404899.py:25: RuntimeWarning: divide by zero encountered in log log_likelihood_func = np.sum( np.log( sc.stats.norm.pdf(x_dt, loc = mu_OU, scale = sigma_OU) ) ) /var/folders/90/hk9jz15n56zckxcy3wlt2twh0000gn/T/ipykernel_41709/1550404899.py:25: RuntimeWarning: invalid value encountered in log log_likelihood_func = np.sum( np.log( sc.stats.norm.pdf(x_dt, loc = mu_OU, scale = sigma_OU) ) ) /var/folders/90/hk9jz15n56zckxcy3wlt2twh0000gn/T/ipykernel_41709/2563713482.py:13: RuntimeWarning: invalid value encountered in double_scalars return sigma * np.sqrt((1 - e2kt ) / ( 2 * kappa))
Last Volatility 0.289
The ODE is,
$$ dX_t = \kappa (\theta - X_t) dt + \sigma \ dW_t$$The Continuous-time Stochastic Process is,
$$ X_t = X_0 e^{-\kappa t}+\theta (1-e^{-\kappa t}) + \sigma \int_0^t e^{-\kappa (t-s)}dW_s $$# Define Parameters
Time = 3
M = 10000
Z = np.random.normal(size=(M))
def mu(x, dt, kappa, theta):
ekt = np.exp(- kappa * dt)
return x * ekt + theta * (1 - ekt)
def std(dt, kappa, sigma):
e2kt = np.exp( - 2 * kappa * dt )
return sigma * np.sqrt((1 - e2kt ) / ( 2 * kappa))
drift_OU = mu(vol0, Time, kappa, theta)
diffusion_OU = std( Time, kappa, theta )
vol_OU = drift_OU + diffusion_OU * Z
plt.hist(vol_OU)
plt.title('Ornsterin-Uhlenbeck Continuous Distribution @ Time')
plt.xlabel('Volatility')
plt.show()
Euler-Maryuama Discretisation: This is an approximation of Variance,
$$ \Delta x_{t+1} = \kappa (\theta - x_t)\Delta t + \sigma \sqrt{ \Delta t }\epsilon $$Parameters are assined as the above optimisation.
days = 1
years = 2
dt = days/252
M = 1000
N = int(years/dt)
Recursive Function
vol_OU = np.full( shape=(N,M), fill_value = vol0 )
Z = np.random.normal(size = (N,M))
def OU_recursive(t, vol_OU):
# Return the final state
if t == N:
return vol_OU
# Thread the state through the recursive call
else:
drift_OU = kappa*(theta - vol_OU[t-1])*dt
diffusion_OU = sigma*np.sqrt(dt)
vol_OU[t] = vol_OU[t-1] + drift_OU + diffusion_OU*Z[t]
return OU_recursive(t + 1, vol_OU)
start_time = time.time()
vol_OU = OU_recursive(0, vol_OU)
print('Execution time', time.time() - start_time)
vol_OU = np.concatenate( (np.full(shape=(1, M), fill_value=vol0), vol_OU ) )
plt.plot(vol_OU)
plt.title('Ornstein-Uhlenbeck Euler Discretization')
plt.ylabel('Volatility')
plt.show()
Execution time 0.007803916931152344
Python Loop
vol_OU = np.full(shape=(N, M), fill_value=vol0)
Z = np.random.normal(size=(N, M))
start_time = time.time()
for t in range(1,N):
drift_OU = kappa*(theta - vol_OU[t-1])*dt
diffusion_OU = sigma*np.sqrt(dt)
vol_OU[t] = vol_OU[t-1] + drift_OU + diffusion_OU*Z[t]
print('Execution time', time.time() - start_time)
vol_OU = np.concatenate( (np.full(shape=(1, M), fill_value=vol0), vol_OU ) )
plt.plot(vol_OU)
plt.title('Ornstein-Uhlenbeck Euler Discretization')
plt.ylabel('Volatility')
plt.show()
Execution time 0.005856037139892578