Answer To: Project 1: Stock price reaction to analyst recommendations (Due 11:59pm on Friday, November 6)...
Sandeep Kumar answered on Nov 12 2021
""" project1.py
Complete the sections below marked with ''
"""
import os
import datetime as dt
import numpy as np
import pandas as pd
# SRCDIR is the folder containing all the data:
# - `_prc.dat`
# - `_rec.csv`
# - ff_dailyc.csv`
#
SRCDIR = './data/'
# TICKERS is the location of the TICKERS.txt file
TICKERS = './TICKERS.txt'
# FF_CSV is the location of the ff_daily.csv file
FF_CSV = './data/'
# ----------------------------------------------------------------------------
# Modify these variables as specified by the README.txt file
# ----------------------------------------------------------------------------
# NOTE:
# - SRC_COLS must be a list
# - The order of the elements must match the order of the columns specified
# in the README.txt file.
#
SRC_COLS = ['Adj Close', 'Date', 'Volume', 'High', 'Open']
# NOTE:
# - SRC_COL_DTYPES must be a dict
# - The keys should be the column names, the values should be their dtype, as
# specified in the README.txt file.
#
SRC_COL_DTYPES = {
SRC_COLS[0]: 'float64',
SRC_COLS[1]: 'datetime64',
SRC_COLS[2]: 'int64',
SRC_COLS[3]: 'float64',
SRC_COLS[4]: 'float64',
}
# NOTE:
# - SRC_COL_WIDTHS should be a dict
# - The keys should be the column names, the values should be the width of
# that field in the DAT file. These should match the widths defined in the
# README.txt file.
#
SRC_COL_WIDTHS = {
SRC_COLS[0]: 10,
SRC_COLS[1]: 10,
SRC_COLS[2]: 16,
SRC_COLS[3]: 9,
SRC_COLS[4]: 18,
}
# ----------------------------------------------------------------------------
# Function get_tics
# ----------------------------------------------------------------------------
def get_tics(pth):
""" Reads a file with tickers (one per line) and returns a list
of formatted tickers (see the notes below).
Parameters
----------
pth : str
Location of the TICKERS.txt file
Returns
-------
list
List where each element represents a ticker (formatted as below)
Notes
-----
- The tickers returned must conform with the following rules:
- All characters are in lower case
- There are no spaces
- The list contains no empty/blank tickers
"""
with open(pth, 'r') as tickers:
return [t.strip().lower() for t in tickers]
# ----------------------------------------------------------------------------
# Function dat_to_df
# ----------------------------------------------------------------------------
def dat_to_df(pth,
src_cols,
src_col_dtypes,
src_col_widths,
):
""" This function creates a dataframe with the contents of a DAT file
containing stock price information for a given ticker.
Parameters
----------
pth : str
Location of the DAT file containing price information (i.e. some
`_prc.dta`)
src_cols : list
List containing the column names in the order they appear in each
source DAT file. The order of columns must match the order specified
in the README.txt file
src_col_dtypes : dict
A dictionary mapping each column name in `src_cols` to its data type,
as it appears in the `README.txt` file.
src_col_widths : dict
A dictionary mapping each column name in `src_cols` to its column
width as it appears in the `README.txt` file.
Returns
-------
df
A Pandas dataframe containing the stock price information from the DAT
file in `pth` This dataframe must meet the following criteria:
- df.index: DatetimeIndex with dates, matching the dates contained in
the DAT file. The labels in the index must be datetime objects.
- df.columns: each column label will be a column in `src_cols`, with the
exception of 'Date'. The order of the column labels in this index
must match the order specified in the README.txt file
- Each series inside this dataframe (must correspond to a column in
the README.txt file (with the exception of 'Date'). The datatype of
each series must match the data type specified in the README.txt
file.
"""
#
# ----------------------------------------------------------------------------
# Function mk_prc_df
# ----------------------------------------------------------------------------
def mk_prc_df(
tickers,
srcdir,
src_cols,
src_col_dtypes,
src_col_widths,
):
""" This function creates a dataframe from the information found in
a DAT file located at `pth`
Parameters
----------
tickers : list
List of tickers in the order they appear in the TICKERS.txt file
srcdir : str
Directory containing the source files:
- _prc.dat for each in TICKERS.txt
- _rec.csv for each in TICKERS.txt
src_cols : list
List containing the column names in the order they appear in each
source DAT file. The order of columns must match the order specified
in the README.txt file
src_col_dtypes : dict
A dictionary mapping each column name in `src_cols` to its data type,
as it appears in the `README.txt` file.
src_col_widths : dict
A dictionary mapping each column name in `src_cols` to its column
width as it appears in the `README.txt` file.
Returns
-------
df
A Pandas dataframe containing the adjusted closing price for each
stock identified in the TICKERS.txt file. This dataframe must match
the following criteria:
- df.index: DatetimeIndex with dates.
- df.columns: each column label will contain the ticker code
(in lower case). The number of columns in this dataframe must correspond
to the number of tickers in the `TICKERS.txt` file above. The order
of the columns must match the order of the tickers in `TICKERS.txt`.
- The data inside each column (i.e. series) will contain the closing
prices included in each DAT file (the Adj Close column). All valid
closing prices (for tickers in TICKERS.txt) must be included in this
dataframe. If the closing price for a ticker is not available in the
DAT file, it will take a NaN value.
Notes
-----
- This function will call the `dat_to_df` function for each ticker
- The output of this function is a dataframe that looks like this (the
contents of the df below are for illustration purposes only and will
**not** necessarily represent the actual contents of the dataframe you
create):
aapl ... tsla
Date
1980-12-12 0.101261 ... NaN
... ... ...
2020-10-05 116.500000 ... 425.679993
2020-10-06 113.160004 ... 413.980011
2020-10-07 115.080002 ... 425.299988
2020-10-08 114.970001 ... 425.920013
2020-10-09 116.970001 ... 434.000000
"""
#
# ----------------------------------------------------------------------------
# Function mk_aret_df
# ----------------------------------------------------------------------------
def mk_aret_df(prc_df):
""" Creates a dataframe with abnormal returns given the price information
contained in the `prc_df`
Parameters
----------
prc_df : dataframe
Dataframe produced by the function `mk_prc_df` above
Returns
-------
dataframe
Dataframe with abnormal returns for each ticker. Abnormal returns are
computed by subtracting the market return from that stock's returns.
- df.index: DatetimeIndex with dates, matching the dates contained in
the DAT file. The labels in the index must be datetime objects.
- df.columns: each column label will be a column in `src_cols`, with the
exception of 'Date'. The order of the column labels in this index
must match the order specified in the README.txt file
- Each series inside this dataframe contains the abnormal return for
each ticker in TICKERS.txt.
Notes
-----
The output of this function is a dataframe that looks like this (the
contents of the df below are for illustration purposes only and will
**not** necessarily represent the actual contents of the dataframe you
create):
aapl tsla
Date
1980-12-12 NaN NaN
1980-12-15 -0.052684 NaN
1980-12-16 -0.079905 NaN
1980-12-17 0.010143 NaN
1980-12-18 0.025475 NaN
... ... ...
2020-08-25 -0.011804 0.000938
... ... ...
"""
# --------------------------------------------------------
# Create returns
# ret_df must be similar to this:
#
# aapl tsla
# Date
# 1980-12-12 NaN NaN
# 1980-12-15 -0.052174 NaN
# 1980-12-16 -0.073395 NaN
# 1980-12-17 0.024753 NaN
# 1980-12-18 0.028985 NaN
# ... ... ...
# 2020-10-12 0.063521 0.019124
# ... ... ...
# --------------------------------------------------------
ret_df = ''
# --------------------------------------------------------
# Load FF mkt rets (do not change this part)
# --------------------------------------------------------
...