### Important: see the HW 7 README for details.
# QUESTION 0 --- call in the data.
# CG Q0a # Read the data file ames2009.csv into R
########## and name the object ames. Use strings = T.
ames <->(
"ames2009.csv"
,
strings =
T
)
->
# CG Q0b # Use str() on the ames data frame to inspect.
str(ames)
# QUESTION 1 --- Confidence interval for the mean.
# CG Q1a # Use the mean() function to compute the
########## average sales price for a home in the dataset.
########## Name the object xbar and print xbar in one line of code.
xbar <->(ames$SalePrice)
->
print(xbar)
# CG Q1b # Use the sd(), sqrt(), and nrow() functions
########## to compute the standard error for the average
########## sales price for a home in the dataset.
########## Name the object se and print se in one line of code.
se <->(ames$SalePrice)
/ sqrt(nrow(ames))
->
print(se)
# CG Q1c # Use xbar and se to compute a 95% CI for the average
########## sales price of a home in Ames, Iowa. Use 1.96 for the cirtical value.
lower <- xbar="">
1.96
* se->
upper <- xbar="">
1.96
* se->
# QUESTION 2 --- Uncertainty quantification for a regression coefficient
########## Use the code below to regress log sales price
########## onto all other variables except Neighborhood
fit <->(log(SalePrice)
~ .-Neighborhood,
data=ames)
->
########## Use the code below to store and print the statistics
########## for the central air coefficient.
(bstats <->(fit)$coef[
"Central.AirY"
,])
->
# CG Q2a # Use the code below to print the p-value for the central air coefficient.
bstats[
"Pr(>|t|)"
]
########## Based on the p-value, is this predictor significant?
########## Use paste("Y") or paste("N") to indicate your answer.
(
"Y"
)
# CG Q2b # Use info from bstats and a 1.96 critical value in a
########## single line of code to compute a 95% CI for
########## the effect of central air on log sales price.
# QUESTION 3 --- UQ for regression prediction
# CG Q3a # Create an object called nd that is the 1st row
########## in the ames data frame.
nd <->[
1
,]
->
# CG Q3b # Use the predict() function to make a prediction
########## of log sales price and get the standard errors
########## for the first home in the ames data frame.
########## Name this object pred.
fit <->(log(SalePrice)
~ .-Neighborhood,
data = ames)
->
pred <->(fit,
newdata = nd,
se.fit =
T
)
->
se_pred <->->
print(se_pred)
# CG Q3c # Use pred and a 1.96 critical value to
########## compute a 95% CI for the predicted log sales price
########## for the first home in the ames data frame.
# CG Q3d # Wrap the line of code from Q3c in the exp() function
########## to get a 95% CI for the predicted sales price.
# QUESTION 4 --- Bootstrap
# CG Q4a # Run the following code to bootstrap the predicted price
########## of the first home in the ames data frame.
getPrice <>
function
(data,
obs,
xpred){
fit <->(log(SalePrice)
~ .-Neighborhood,
data=data[obs,])
->
return
(exp(predict(fit,newdata=xpred)))
}
library
(parallel)
library
(boot)
set.seed(
1
)
(priceBoot <->(ames,
getPrice,
xpred=ames[
1
,],
->
2000
,
parallel="snow"
,
ncpus=detectCores())
)
# CG Q4b # Use the quantile() function to get a 95% CI
########## for the predicted sales price for the first home.
CI_boot <->(priceBoot$t,
c(
0.025
,
0.975
))
->
print(CI_boot)
# CG Q4c # Use the quantile() function to get a bias corrected 95% CI
########## for the predicted sales price for the first home.