This is what I have currently, but it is not working.
Code
import numpy as np
import pandas as pd
import scipy
from scipy.stats import t
# Setup Regression Model Class
class RegressionModel(object):
def __init__(self, x, y, create_intercept, regression_type = "ols"):
# Convert to DataFrame (https://www.geeksforgeeks.org/python-pandas-dataframe/#Basics)
self.x = x
self.y = y
if create_intercept:
self.x['interept'] = [ 1 for i in self.y ]
self.create_intercept = create_intercept
self.regression_type = regression_type
self.results = None
def ols_regression(self):
beta_value = np.linalg.inv(self.x.T @ self.x) @ ( self.x.T @ self.y )
n, k = np.shape(self.x)
s_value = ((self.y.T @ self.y) - (self.y.T @ self.x @ beta_value))/ (n-k)
cov_value = s_value * np.linalg.inv(self.x.T @ self.x)
var_value = np.diag(cov_value)
stdErr = np.sqrt(var_value)
tstat = beta_value/stdErr
pval = t.sf(tstat, (n-k))
self.results = {}
i = 0
for j in self.x:
sub_results = {}
sub_results['Variable name'] = j
sub_results['coefficient value']=beta_value[ i ]
sub_results['standard error']=stdErr[ i ]
sub_results['t-statistic']=tstat[ i ]
sub_results['p-value']=pval[ i ]
i += 1
self.results.update({ j : sub_results })
return self.results
def summary(self):
return pd.DataFrame( data = self.results ).T
data = pd.read_csv("https://github.com/dustywhite7/Econ8320/raw/master/AssignmentData/assignment8Data.csv")
x = data[['sex','age','educ','white']]
y = data['incwage']
reg = RegressionModel(x, y, create_intercept=True)
output = reg.ols_regression()
reg.summary()