Answer To: UNIVERSITY OF TORONTO Rotman School of Management RSM316 PROBLEM SET #3 Raymond Kan The file...
Sathishkumar answered on Oct 22 2021
solutions/forecast.ipynb
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# Import the modules\n",
"import pandas as pd\n",
"import numpy as np\n",
"from datetime import datetime\n",
"from tqdm import tqdm_notebook as tqdm\n",
"import statsmodels.api as sm"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Import Data\n",
"df_monthly = pd.read_excel('PredictorData2020.xlsx',sheet_name=\"Monthly\")\n",
"# Parse the dates properly\n",
"time = [str(d) for d in df_monthly.yyyymm]\n",
"df_monthly.index = pd.to_datetime(time,format=\"%Y%m\")"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [],
"source": [
"# Variable construction\n",
"df_monthly['ExRet'] = df_monthly['CRSP_SPvw']-df_monthly['Rfree']\n",
"df_monthly['DP'] = np.log(df_monthly['D12'])-np.log(df_monthly['Index'])\n",
"df_monthly['DY'] = np.log(df_monthly['D12'])-np.log(df_monthly['Index'].shift())\n",
"df_monthly['EP'] = np.log(df_monthly['E12'])-np.log(df_monthly['Index'])\n",
"df_monthly['DE'] = np.log(df_monthly['D12'])-np.log(df_monthly['E12'])\n",
"df_monthly['tms'] = df_monthly['lty']-df_monthly['tbl']\n",
"df_monthly['dfr'] = df_monthly['corpr']-df_monthly['ltr']\n",
"df_monthly['dfy'] = df_monthly['BAA']-df_monthly['AAA']\n",
"\n",
"# infl needs to be lagged one more month\n",
"df_monthly['infl'] = df_monthly['infl'].shift().copy()\n",
"\n",
"# Construction of dependent and independent variables\n",
"dep_var = 'ExRet'\n",
"indep_vars = ['DE','svar','dfr','lty','ltr','infl','tms','tbl','dfy','DP','DY','EP','b/m','ntis']\n",
"\n",
"# Use the data from 1926/12 to 2020/12\n",
"subperiod = df_monthly.index>='1926-12-01'\n",
"df = df_monthly[subperiod]\n",
"M = 240 # Initial length of estimation window\n",
"gam = 3 # risk aversion coefficient\n",
"\n",
"# Create the benchmark using historical average\n",
"Hist_Mean = np.asarray(df[dep_var].expanding().mean().shift())\n",
"Hist_Variance = np.asarray(df[dep_var].expanding().var().shift())\n",
"\n",
"# Benchmark SSE (Historical Average)\n",
"OOS_SSE_Hist = np.sum((df[dep_var][M+1:]-Hist_Mean[M+1:])**2)\n",
"\n",
"# Benchmark Certainty Equivalence\n",
"w0 = ((1/gam)*(Hist_Mean/Hist_Variance)).clip(None,1.5);\n",
"r0 = df[dep_var]*w0\n",
"CE_Hist = np.mean(r0[M+1:])-gam/2*np.var(r0[M+1:],ddof=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### The following codes demonstrate how to compute OOS $R^2$ and CEV for one predictive regression (using DY)"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [],
"source": [
"Y = np.asarray(df[dep_var])\n",
"X = np.asarray(df['DY'])\n",
"Y_Hat = np.full(len(Y), np.nan)\n",
"X = sm.add_constant(X)\n",
"# Note that we start the index at M+1 because the first element of predicted return is at t=M+2.\n",
"for i in range(M+1,len(Y)):\n",
" Y1 = Y[1:i]\n",
" X1 = X[0:i-1,:] \n",
" reg = sm.OLS(Y1, X1, missing='drop').fit()\n",
" Y_Hat[i] = reg.predict(X[i-1,:]) # The predicted value is based on the observation before"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [],
"source": [
"OOS_SSE = np.sum((Y[M+1:]-Y_Hat[M+1:])**2)\n",
"OOS_R2 = 1-OOS_SSE/OOS_SSE_Hist\n",
"w1 = ((1/gam)*(Y_Hat/Hist_Variance)).clip(None,1.5);\n",
"r1 = Y*w1\n",
"CE = np.mean(r1[M+1:])-gam/2*np.var(r1[M+1:],ddof=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### In-sample $R^2$ and out-of-sample $R^2$"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"IS R^2 = 0.402\n",
"OOS R^2 = -0.942\n"
]
}
],
"source": [
"reg1 = sm.OLS(Y[1:],X[0:len(Y)-1,:],missing='drop').fit()\n",
"IS_R2 = reg1.rsquared\n",
"print(\"IS R^2 = %6.3f\"%(100.0*IS_R2))\n",
"print(\"OOS R^2 = %6.3f\"%(100.0*OOS_R2))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### $\\Delta CEV$"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Difference in Certainty Equivalence = 0.1519\n"
]
}
],
"source": [
"print('Difference in Certainty Equivalence = %7.4f'%(100*(CE-CE_Hist)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Recompute the out-of-sample R2 and ∆CEV in part (1) by forcing the predicted market risk premium to be nonnegative."
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"IS R^2 = 0.402\n",
"OOS R^2 = -0.942\n",
"Difference in Certainty Equivalence = -0.0046\n"
]
}
],
"source": [
"# Import the modules\n",
"import pandas as pd\n",
"import numpy as np\n",
"from datetime import datetime\n",
"from tqdm import tqdm_notebook as tqdm\n",
"import statsmodels.api as sm\n",
"\n",
"# Import Data\n",
"df_monthly = pd.read_excel('PredictorData2020.xlsx',sheet_name=\"Monthly\")\n",
"# Parse the dates properly\n",
"time = [str(d) for d in df_monthly.yyyymm]\n",
"df_monthly.index = pd.to_datetime(time,format=\"%Y%m\")\n",
"\n",
"# Variable construction\n",
"df_monthly['ExRet'] = df_monthly['CRSP_SPvw']-df_monthly['Rfree']\n",
"df_monthly['DP'] = np.log(df_monthly['D12'])-np.log(df_monthly['Index'])\n",
"df_monthly['DY'] = np.log(df_monthly['D12'])-np.log(df_monthly['Index'].shift())\n",
"df_monthly['EP'] = np.log(df_monthly['E12'])-np.log(df_monthly['Index'])\n",
"df_monthly['DE'] = np.log(df_monthly['D12'])-np.log(df_monthly['E12'])\n",
"df_monthly['tms'] = df_monthly['lty']-df_monthly['tbl']\n",
"df_monthly['dfr'] = df_monthly['corpr']-df_monthly['ltr']\n",
"df_monthly['dfy'] = df_monthly['BAA']-df_monthly['AAA']\n",
"\n",
"# infl needs to be lagged one more month\n",
"df_monthly['infl'] = df_monthly['infl'].shift().copy()\n",
"\n",
"# Construction of dependent and independent variables\n",
"dep_var = 'ExRet'\n",
"indep_vars = ['svar','dfr','lty','ltr','infl','tbl','dfy','DP','DY','EP','b/m','ntis']\n",
"\n",
"# Use the data from 1926/12 to 2020/12\n",
"subperiod = df_monthly.index>='1926-12-01'\n",
"df = df_monthly[subperiod]\n",
"M = 240 # Initial length of estimation window\n",
"gam = -3 # risk aversion coefficient\n",
"\n",
"# Create the benchmark using historical average\n",
"Hist_Mean = np.asarray(df[dep_var].expanding().mean().shift())\n",
"Hist_Variance = np.asarray(df[dep_var].expanding().var().shift())\n",
"\n",
"# Benchmark SSE (Historical Average)\n",
"OOS_SSE_Hist = np.sum((df[dep_var][M+1:]-Hist_Mean[M+1:])**2)\n",
"\n",
"# Benchmark Certainty Equivalence\n",
"w0 = ((1/gam)*(Hist_Mean/Hist_Variance)).clip(None,1.5);\n",
"r0 = df[dep_var]*w0\n",
"CE_Hist = np.mean(r0[M+1:])-gam/2*np.var(r0[M+1:],ddof=1)\n",
"Y = np.asarray(df[dep_var])\n",
"X = np.asarray(df['DY'])\n",
"Y_Hat = np.full(len(Y), np.nan)\n",
"X = sm.add_constant(X)\n",
"# Note that we start the index at M+1 because the first element of predicted return is at t=M+2.\n",
"for i in range(M+1,len(Y)):\n",
" Y1 = Y[1:i]\n",
" X1 = X[0:i-1,:] \n",
" reg = sm.OLS(Y1, X1, missing='drop').fit()\n",
" Y_Hat[i] = reg.predict(X[i-1,:]) # The predicted value is based on the observation before\n",
"\n",
"OOS_SSE = np.sum((Y[M+1:]-Y_Hat[M+1:])**2)\n",
"OOS_R2 = 1-OOS_SSE/OOS_SSE_Hist\n",
"w1 = ((1/gam)*(Y_Hat/Hist_Variance)).clip(None,1.5);\n",
"r1 = Y*w1\n",
"CE = np.mean(r1[M+1:])-gam/2*np.var(r1[M+1:],ddof=1)\n",
"reg1 = sm.OLS(Y[1:],X[0:len(Y)-1,:],missing='drop').fit()\n",
"IS_R2 = reg1.rsquared\n",
"print(\"IS R^2 = %6.3f\"%(100.0*IS_R2))\n",
"print(\"OOS R^2 = %6.3f\"%(100.0*OOS_R2))\n",
"print('Difference in Certainty Equivalence = %7.4f'%(100*(CE-CE_Hist)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#median of various forecasts "
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"IS R^2 = 0.402\n",
"OOS R^2 = -0.162\n",
"Difference in Certainty Equivalence = 0.6681\n"
]
}
],
"source": [
"# Import the modules\n",
"import pandas as pd\n",
"import numpy as np\n",
"from datetime import datetime\n",
"from tqdm import tqdm_notebook as tqdm\n",
"import statsmodels.api as sm\n",
"\n",
"# Import Data\n",
"df_monthly = pd.read_excel('PredictorData2020.xlsx',sheet_name=\"Monthly\")\n",
"# Parse the dates properly\n",
"time = [str(d) for d in df_monthly.yyyymm]\n",
"df_monthly.index = pd.to_datetime(time,format=\"%Y%m\")\n",
"\n",
"# Variable construction\n",
"df_monthly['ExRet'] = df_monthly['CRSP_SPvw']-df_monthly['Rfree']\n",
"df_monthly['DP'] = np.log(df_monthly['D12'])-np.log(df_monthly['Index'])\n",
"df_monthly['DY'] = np.log(df_monthly['D12'])-np.log(df_monthly['Index'].shift())\n",
"df_monthly['EP'] = np.log(df_monthly['E12'])-np.log(df_monthly['Index'])\n",
"df_monthly['DE'] = np.log(df_monthly['D12'])-np.log(df_monthly['E12'])\n",
"df_monthly['tms'] = df_monthly['lty']-df_monthly['tbl']\n",
"df_monthly['dfr'] = df_monthly['corpr']-df_monthly['ltr']\n",
"df_monthly['dfy'] = df_monthly['BAA']-df_monthly['AAA']\n",
"\n",
"# infl needs to be lagged one more month\n",
"df_monthly['infl'] = df_monthly['infl'].shift().copy()\n",
"\n",
"# Construction of dependent and independent variables\n",
"dep_var = 'ExRet'\n",
"indep_vars = ['svar','dfr','lty','ltr','infl','tbl','dfy','DP','DY','EP','b/m','ntis']\n",
"\n",
"# Use the data from 1926/12 to 2020/12\n",
"subperiod = df_monthly.index>='1926-12-01'\n",
"df = df_monthly[subperiod]\n",
"M = 240 # Initial length of estimation window\n",
"gam = -3 # risk aversion coefficient\n",
"\n",
"# Create the benchmark using historical average\n",
"Hist_median = np.asarray(df[dep_var].expanding().median().shift())\n",
"Hist_Variance = np.asarray(df[dep_var].expanding().var().shift())\n",
"\n",
"# Benchmark SSE (Historical Average)\n",
"OOS_SSE_Hist = np.sum((df[dep_var][M+1:]-Hist_median[M+1:])**2)\n",
"\n",
"# Benchmark Certainty Equivalence\n",
"w0 = ((1/gam)*(Hist_median/Hist_Variance)).clip(None,1.5);\n",
"r0 = df[dep_var]*w0\n",
"CE_Hist = np.median(r0[M+1:])-gam/2*np.var(r0[M+1:],ddof=1)\n",
"Y = np.asarray(df[dep_var])\n",
"X = np.asarray(df['DY'])\n",
"Y_Hat = np.full(len(Y), np.nan)\n",
"X = sm.add_constant(X)\n",
"# Note that we start the index at M+1 because the first element of predicted return is at t=M+2.\n",
"for i in range(M+1,len(Y)):\n",
" Y1 = Y[1:i]\n",
" X1 = X[0:i-1,:] \n",
" reg = sm.OLS(Y1, X1, missing='drop').fit()\n",
" Y_Hat[i] = reg.predict(X[i-1,:]) # The predicted value is based on the observation before\n",
"\n",
"OOS_SSE = np.sum((Y[M+1:]-Y_Hat[M+1:])**2)\n",
"OOS_R2 = 1-OOS_SSE/OOS_SSE_Hist\n",
"w1 = ((1/gam)*(Y_Hat/Hist_Variance)).clip(None,1.5);\n",
"r1 = Y*w1\n",
"CE = np.median(r1[M+1:])-gam/2*np.var(r1[M+1:],ddof=1)\n",
"reg1 = sm.OLS(Y[1:],X[0:len(Y)-1,:],missing='drop').fit()\n",
"IS_R2 = reg1.rsquared\n",
"print(\"IS R^2 = %6.3f\"%(100.0*IS_R2))\n",
"print(\"OOS R^2 = %6.3f\"%(100.0*OOS_R2))\n",
"print('Difference in Certainty Equivalence = %7.4f'%(100*(CE-CE_Hist)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor":...