In [91]:
import time
import math
import numpy as np
import pandas as pd
import datetime
import scipy as sc
import matplotlib.pyplot as plt
#from pandas_datareader import data as pdr
from IPython.display import display, Latex
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
In [2]:
from fredapi import Fred
fred_key = 'ffc8bae82c04b73a42221721a114855a'
fred = Fred(api_key=fred_key)
stock_price = fred.get_series('SP500')

Denote by $S_t$ the price of a financial asset and $X_t=ln\ S_t$ the logarithm of it. Given a tiem scale $\Delta$, the log price, return, as scale $\Delta$ is defined as:

$$ r_t = X_{t+\Delta} - X_t =ln (S_t + \Delta S_t) $$
In [3]:
log_price = np.log( stock_price/stock_price.shift(1) ).dropna()
log_price.plot()
plt.title('Daily Log Price')
plot_acf(log_price)
plt.show()
/Users/mie/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arraylike.py:358: RuntimeWarning: invalid value encountered in log
  result = getattr(ufunc, method)(*inputs, **kwargs)
In [4]:
return_squared = np.square( log_price )
return_squared.plot()
plt.title('Squared Daily Return')
plot_acf(return_squared)
plt.show()
$$Volatility = \sqrt{\frac{ Var(r_i) }{252}}=\frac{s.d. (r_i)}{\sqrt{252}} $$
In [93]:
Trading_Days = 40
volatility = log_price.rolling(window=Trading_Days).std()*np.sqrt(252)
volatility = volatility.dropna()
volatility.plot()
#plot_acf(volatility)
#plot_pacf(volatility)
plt.show()

We aim to capture the movement of volatility, thus we include Ornstein-Uhlenbeck process here to mimic the flow of volatility.

Ornstein-Uhlenbeck¶

Ornstein-Uhlenbeck is a stochastic process that is a stationary Gauss-Markov Process.

$$ dX_t= \kappa X_t \ dt +\sigma \ dW_t $$

Sometimes, an additional drift term is added, which is known as the Vasicek model:

$$ dX_t = \kappa (\theta - X_t)dt +\sigma \ dW_t $$

We could apply the MLE estimation to find the parameter of the above random process.

So what’s the distribution function of Ornstein-Uhlenbeck process?¶

Recall our Vasicek Form Ornstein-Uhlenbeck process is like the following:

$$ dX_t = \kappa (\theta - X_t)dt +\sigma \ dW_t $$

Multiply both sides by $e^{kt}$, then we get,

$$ e^{kt}dX_t = \kappa e^{kt} \theta \ dt - \kappa e^{kt} X_t \ dt +\sigma e^{kt}\ dW_t $$

We know that $ d( e^{kt} X_t )=e^{kt}dX_t + k e^{kt}X_t dt $, and substitute it inside. Then, we get,

$$ d(e^{kt}X_t)=e^{kt}\theta \ dt + e^{kt}\sigma \ dW_t $$

Take an integral from [0,T],

$$ \int_0^T d(e^{kt}X_t)= \int_0^T e^{kt}\theta \ dt + \int_0^T e^{kt}\sigma \ dW_t $$$$ X_T = X_0 e^{-kT} +\theta (1-e^{-kT}) + \int_0^T e^{-k(T-t)}\sigma \ dW_t $$

$\int_0^T e^{-k(T-t)}\sigma \ dW_t \sim N(0, \sigma^2\int_0^T e^{-2k(T-t)}dt)$

We then find $\mathbb{E}(X_T)$ and $Var(X_T)$.

$ \mathbb{E}(X_T) =\mathbb{E}\bigg( X_0 e^{-kT} +\theta (1-e^{-kT}) + \int_0^T e^{-k(T-t)}\sigma \ dW_t \bigg)$

$ \mathbb{E}(X_T)= X_0 e^{-kT} +\theta (1-e^{-kT}) $

$Var(X_T) = \mathbb{E}\bigg( \big( X_T - \mathbb{E}(X_T) \big)^2 \bigg)$

$Var(X_T)= \mathbb{E}\bigg( \big( \int_0^T e^{-k(T-t)}\sigma \ dW_t \big)^2 \bigg) $

By Ito's Isometry: $I(t)=\int_0^t \Delta(s)dW_s$, then

$Var[I(t)]=\mathbb{E}[(I^2(t))]=\int_0^t \Delta^2(s)ds$

Then,

$ Var(X_T)= \int_0^T e^{-2k(T-t)}\sigma^2 \ dt $

$ Var(X_T)= \frac{\sigma^2}{2k}\big( 1-e^{-2kT} \big) $

Therefore, we finally get,

$ X_T \sim N\bigg(X_0 e^{-kT} +\theta (1-e^{-kT}), \frac{\sigma^2}{2k}\big( 1-e^{-2kT} \big) \bigg)$

MLE of Ornsterin-Uhlenbeck Process¶

$X_t \sim N(\cdot,\cdot)$

,where $\mathbb{E}(X_{t+\delta t})=X_0 e^{-k\ \delta t} +\theta (1-e^{-k\ \delta t})$, and $Var(X_{t+\delta t})= \frac{\sigma^2}{2k}(1-e^{-2k\ \delta t})$.

$$ f_{\theta}(x_{t+\delta t}|\theta)=\frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(x-\mu)^2}{2\sigma^2}} $$

We replace $\mu = \mathbb{E}(X_{t+\delta t})$ and $\sigma^2 = Var(X_{t+\delta t})$ insider.

Code realisation is as the following.

$$\mathbb{E}(X_{t+\delta t})=X_0 e^{-k\ \delta t} +\theta (1-e^{-k\ \delta t})$$$$Var(X_{t+\delta t})= \frac{\sigma^2}{2k}(1-e^{-2k\ \delta t})$$
In [79]:
# calculate mu
def mu(x, dt, k, theta):
    ekt = np.exp( -k* dt ) # e^{-k dt}
    return x*ekt + theta*(1-ekt)

# calcuate sigma
def std( dt, k, sigma ):
    e2kt=np.exp( -2*k*dt ) # e^{-2k dt}
    return np.sqrt(  sigma**2/(2*k)*(1-e2kt)  )

# calculate the log-likelihood function
def log_likelihood_OU(theta_hat, x):
    k = theta_hat[0]
    theta = theta_hat[1]
    sigma = theta_hat[2]
    
    x_dt = x[1:]
    x_t = x[:-1]
    
    dt = 1/252
    
    mu_OU = mu(x_t, dt, k, theta)
    sigma_OU = std(dt, k, sigma)
    
    log_likelihood_func = np.sum( np.log( sc.stats.norm.pdf(x_dt, loc = mu_OU, scale = sigma_OU) ) )
    
    return - log_likelihood_func

# contraint, let k > 0
def k_positive(theta_hat):
    k = np.abs(theta_hat[0])
    return k

# contraint, let sigma > 0
def sigma_positive(theta_hat):
    sigma = np.abs(theta_hat[2])
    return sigma
In [86]:
vol = np.array(volatility)

# constraint equations are defined as dictionaries
cons_set = [ { 'type':'ineq', 'fun': k_positive },
            { 'type':'ineq', 'fun': sigma_positive } ]

# initial guess of parameter theta
# theta =[ k , theta , sigma ]
theta0 = [1,3,1]

# apply minimizer to find the MLE paramether
opt = sc.optimize.minimize( fun = log_likelihood_OU, x0 = theta0, args = (vol, ), constraints = cons_set)

# round our parameters
kappa = round(opt.x[0], 3)
theta = round(opt.x[1], 3)
sigma = round(opt.x[2], 3)
vol0 = vol[-1]

for_kappa_hat = '$\hat{\kappa} = '+str(kappa)+'$'
for_theta_hat = '$\hat{ \theta } = '+str(theta)+'$'
for_sigma_hat = '$\hat{\sigma} = '+str(sigma)+'$'
print('The MLE for data is:')
display(Latex(for_kappa_hat))
display(Latex(for_theta_hat))
display(Latex(for_sigma_hat))
print('Last Volatility', round(vol0,3))
The MLE for data is:
/var/folders/90/hk9jz15n56zckxcy3wlt2twh0000gn/T/ipykernel_41709/1550404899.py:25: RuntimeWarning: divide by zero encountered in log
  log_likelihood_func = np.sum( np.log( sc.stats.norm.pdf(x_dt, loc = mu_OU, scale = sigma_OU) ) )
/var/folders/90/hk9jz15n56zckxcy3wlt2twh0000gn/T/ipykernel_41709/1550404899.py:25: RuntimeWarning: invalid value encountered in log
  log_likelihood_func = np.sum( np.log( sc.stats.norm.pdf(x_dt, loc = mu_OU, scale = sigma_OU) ) )
/var/folders/90/hk9jz15n56zckxcy3wlt2twh0000gn/T/ipykernel_41709/2563713482.py:13: RuntimeWarning: invalid value encountered in double_scalars
  return sigma * np.sqrt((1 - e2kt ) / ( 2 * kappa))
$\hat{\kappa} = 0.0$
$\hat{ heta } = -0.198$
$\hat{\sigma} = 0.11$
Last Volatility 0.289

Simulating Ornstein-Uhlenbeck Process:¶

The ODE is,

$$ dX_t = \kappa (\theta - X_t) dt + \sigma \ dW_t$$

The Continuous-time Stochastic Process is,

$$ X_t = X_0 e^{-\kappa t}+\theta (1-e^{-\kappa t}) + \sigma \int_0^t e^{-\kappa (t-s)}dW_s $$
In [81]:
# Define Parameters
Time = 3
M = 10000

Z = np.random.normal(size=(M))

def mu(x, dt, kappa, theta):
    ekt = np.exp(- kappa * dt)
    return x * ekt + theta * (1 - ekt)

def std(dt, kappa, sigma):
    e2kt = np.exp( - 2 * kappa * dt )
    return sigma * np.sqrt((1 - e2kt ) / ( 2 * kappa))
                         
drift_OU = mu(vol0, Time, kappa, theta)
diffusion_OU = std( Time, kappa, theta )
vol_OU = drift_OU + diffusion_OU * Z

plt.hist(vol_OU)
plt.title('Ornsterin-Uhlenbeck Continuous Distribution @ Time')
plt.xlabel('Volatility')
plt.show()

Discrete SDE¶

Euler-Maryuama Discretisation: This is an approximation of Variance,

$$ \Delta x_{t+1} = \kappa (\theta - x_t)\Delta t + \sigma \sqrt{ \Delta t }\epsilon $$

Parameters are assined as the above optimisation.

In [87]:
days = 1
years = 2

dt = days/252

M = 1000
N = int(years/dt)

Recursive Function

In [88]:
vol_OU = np.full( shape=(N,M), fill_value = vol0 )
Z = np.random.normal(size = (N,M))

def OU_recursive(t, vol_OU):
    # Return the final state
    if t == N:
        return vol_OU
    
    # Thread the state through the recursive call
    else:
        drift_OU = kappa*(theta - vol_OU[t-1])*dt
        diffusion_OU = sigma*np.sqrt(dt)
        vol_OU[t] = vol_OU[t-1] + drift_OU + diffusion_OU*Z[t]
        return OU_recursive(t + 1, vol_OU)
    
    
start_time = time.time() 
vol_OU = OU_recursive(0, vol_OU)
print('Execution time', time.time() - start_time)
vol_OU = np.concatenate( (np.full(shape=(1, M), fill_value=vol0), vol_OU ) )
plt.plot(vol_OU)
plt.title('Ornstein-Uhlenbeck Euler Discretization')
plt.ylabel('Volatility')
plt.show()
Execution time 0.007803916931152344

Python Loop

In [89]:
vol_OU = np.full(shape=(N, M), fill_value=vol0)
Z = np.random.normal(size=(N, M))
start_time = time.time()
for t in range(1,N):
    drift_OU = kappa*(theta - vol_OU[t-1])*dt
    diffusion_OU = sigma*np.sqrt(dt)
    vol_OU[t] = vol_OU[t-1] + drift_OU + diffusion_OU*Z[t]
print('Execution time', time.time() - start_time)
vol_OU = np.concatenate( (np.full(shape=(1, M), fill_value=vol0), vol_OU ) )
plt.plot(vol_OU)
plt.title('Ornstein-Uhlenbeck Euler Discretization')
plt.ylabel('Volatility')
plt.show()
Execution time 0.005856037139892578