{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Read the Medical Record Dataset"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read the dataset"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"data=pd.read_csv('D:\\\\New\\\\DoctorContacts.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"data_df=pd.DataFrame(data)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Format numbers to be displayed with two decimal positions"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"data_df['lc']=round(data_df['lc'],2)\n",
"data_df['lpi']=round(data_df['lpi'],2)\n",
"data_df['fmde']=round(data_df['fmde'],2)\n",
"data_df['ndisease']=round(data_df['ndisease'],2)\n",
"data_df['linc']=round(data_df['linc'],2)\n",
"data_df['lfam']=round(data_df['lfam'],2)\n",
"data_df['educdec']=round(data_df['educdec'],2)\n",
"data_df['age']=round(data_df['age'],2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"View first 5 rows in the Medical Dataset"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
"\n",
"\n",
" | \n",
"Unnamed: 0 | \n",
"mdu | \n",
"lc | \n",
"idp | \n",
"lpi | \n",
"fmde | \n",
"physlim | \n",
"ndisease | \n",
"health | \n",
"linc | \n",
"lfam | \n",
"educdec | \n",
"age | \n",
"sex | \n",
"child | \n",
"black | \n",
"
\n",
"\n",
"\n",
"\n",
"0 | \n",
"1 | \n",
"0 | \n",
"0.0 | \n",
"True | \n",
"6.91 | \n",
"0.0 | \n",
"False | \n",
"13.73 | \n",
"good | \n",
"9.53 | \n",
"1.39 | \n",
"12.0 | \n",
"42.88 | \n",
"male | \n",
"False | \n",
"True | \n",
"
\n",
"\n",
"1 | \n",
"2 | \n",
"2 | \n",
"0.0 | \n",
"True | \n",
"6.91 | \n",
"0.0 | \n",
"False | \n",
"13.73 | \n",
"good | \n",
"9.53 | \n",
"1.39 | \n",
"12.0 | \n",
"43.88 | \n",
"male | \n",
"False | \n",
"True | \n",
"
\n",
"\n",
"2 | \n",
"3 | \n",
"0 | \n",
"0.0 | \n",
"True | \n",
"6.91 | \n",
"0.0 | \n",
"False | \n",
"13.73 | \n",
"good | \n",
"9.53 | \n",
"1.39 | \n",
"12.0 | \n",
"44.88 | \n",
"male | \n",
"False | \n",
"True | \n",
"
\n",
"\n",
"3 | \n",
"4 | \n",
"0 | \n",
"0.0 | \n",
"True | \n",
"6.91 | \n",
"0.0 | \n",
"False | \n",
"13.73 | \n",
"good | \n",
"9.53 | \n",
"1.39 | \n",
"12.0 | \n",
"45.88 | \n",
"male | \n",
"False | \n",
"True | \n",
"
\n",
"\n",
"4 | \n",
"5 | \n",
"0 | \n",
"0.0 | \n",
"True | \n",
"6.91 | \n",
"0.0 | \n",
"False | \n",
"13.73 | \n",
"good | \n",
"9.53 | \n",
"1.39 | \n",
"12.0 | \n",
"46.88 | \n",
"male | \n",
"False | \n",
"True | \n",
"
\n",
"\n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 mdu lc idp lpi fmde physlim ndisease health linc \\\n",
"0 1 0 0.0 True 6.91 0.0 False 13.73 good 9.53 \n",
"1 2 2 0.0 True 6.91 0.0 False 13.73 good 9.53 \n",
"2 3 0 0.0 True 6.91 0.0 False 13.73 good 9.53 \n",
"3 4 0 0.0 True 6.91 0.0 False 13.73 good 9.53 \n",
"4 5 0 0.0 True 6.91 0.0 False 13.73 good 9.53 \n",
"\n",
" lfam educdec age sex child black \n",
"0 1.39 12.0 42.88 male False True \n",
"1 1.39 12.0 43.88 male False True \n",
"2 1.39 12.0 44.88 male False True \n",
"3 1.39 12.0 45.88 male False True \n",
"4 1.39 12.0 46.88 male False True "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Customize the Column Names so that the first column’s label becomes ‘id"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
"\n",
"\n",
" | \n",
"id | \n",
"mdu | \n",
"lc | \n",
"idp | \n",
"lpi | \n",
"fmde | \n",
"physlim | \n",
"ndisease | \n",
"health | \n",
"linc | \n",
"lfam | \n",
"educdec | \n",
"age | \n",
"sex | \n",
"child | \n",
"black | \n",
"
\n",
"\n",
"\n",
"\n",
"0 | \n",
"1 | \n",
"0 | \n",
"0.0 | \n",
"True | \n",
"6.91 | \n",
"0.0 | \n",
"False | \n",
"13.73 | \n",
"good | \n",
"9.53 | \n",
"1.39 | \n",
"12.0 | \n",
"42.88 | \n",
"male | \n",
"False | \n",
"True | \n",
"
\n",
"\n",
"1 | \n",
"2 | \n",
"2 | \n",
"0.0 | \n",
"True | \n",
"6.91 | \n",
"0.0 | \n",
"False | \n",
"13.73 | \n",
"good | \n",
"9.53 | \n",
"1.39 | \n",
"12.0 | \n",
"43.88 | \n",
"male | \n",
"False | \n",
"True | \n",
"
\n",
"\n",
"2 | \n",
"3 | \n",
"0 | \n",
"0.0 | \n",
"True | \n",
"6.91 | \n",
"0.0 | \n",
"False | \n",
"13.73 | \n",
"good | \n",
"9.53 | \n",
"1.39 | \n",
"12.0 | \n",
"44.88 | \n",
"male | \n",
"False | \n",
"True | \n",
"
\n",
"\n",
"3 | \n",
"4 | \n",
"0 | \n",
"0.0 | \n",
"True | \n",
"6.91 | \n",
"0.0 | \n",
"False | \n",
"13.73 | \n",
"good | \n",
"9.53 | \n",
"1.39 | \n",
"12.0 | \n",
"45.88 | \n",
"male | \n",
"False | \n",
"True | \n",
"
\n",
"\n",
"4 | \n",
"5 | \n",
"0 | \n",
"0.0 | \n",
"True | \n",
"6.91 | \n",
"0.0 | \n",
"False | \n",
"13.73 | \n",
"good | \n",
"9.53 | \n",
"1.39 | \n",
"12.0 | \n",
"46.88 | \n",
"male | \n",
"False | \n",
"True | \n",
"
\n",
"\n",
"
\n",
"
"
],
"text/plain": [
" id mdu lc idp lpi fmde physlim ndisease health linc lfam \\\n",
"0 1 0 0.0 True 6.91 0.0 False 13.73 good 9.53 1.39 \n",
"1 2 2 0.0 True 6.91 0.0 False 13.73 good 9.53 1.39 \n",
"2 3 0 0.0 True 6.91 0.0 False 13.73 good 9.53 1.39 \n",
"3 4 0 0.0 True 6.91 0.0 False 13.73 good 9.53 1.39 \n",
"4 5 0 0.0 True 6.91 0.0 False 13.73 good 9.53 1.39 \n",
"\n",
" educdec age sex child black \n",
"0 12.0 42.88 male False True \n",
"1 12.0 43.88 male False True \n",
"2 12.0 44.88 male False True \n",
"3 12.0 45.88 male False True \n",
"4 12.0 46.88 male False True "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_df.rename(columns={'Unnamed: 0':'id'}).head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Create a Function Analyzing the Association between 2 Variables"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Creating the function"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"import statsmodels.api as sm\n",
"\n",
"#Creating a function for countinuous variables only\n",
"def medical_stats(var1,var2):\n",
" \n",
" var1 = sm.add_constant(var1)\n",
" model1 = sm.OLS(var2,var1)\n",
" fitted1 = model1.fit()\n",
" p_values = fitted1.summary2().tables[1]['P>|t|']\n",
" \n",
" parameters=fitted1.params\n",
" \n",
" slope = parameters[1]\n",
" intercept = parameters[0]\n",
" \n",
" if p_values.values[0] > 0.05: #for 95% confidence\n",
" print(\"The association is not significant\")\n",
" elif p_values.values[0] < 0.05:\n",
" if slope...