89946/machine_learning.ipynb
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"source": [
"import pandas as pd\n",
"import seaborn as sns\n",
"from matplotlib import pyplot as plt\n",
"import numpy as np\n",
"import collections\n",
"from collections import Counter\n",
"\n",
"import sklearn\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"from sklearn.preprocessing import OrdinalEncoder\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.pipeline import Pipeline\n",
"\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.svm import SVC\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.ensemble import GradientBoostingClassifier\n",
"from sklearn.ensemble import AdaBoostClassifier\n",
"from sklearn.linear_model import SGDClassifier\n",
"\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.metrics import classification_report"
],
"outputs": [],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 3,
"source": [
"df = pd.read_csv('obesitylevels.csv')"
],
"outputs": [],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 4,
"source": [
"df"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Gender Age Height Weight family_history_with_overweight \\\n",
"0 Female 21.000000 1.620000 64.000000 yes \n",
"1 Female 21.000000 1.520000 56.000000 yes \n",
"2 Male 23.000000 1.800000 77.000000 yes \n",
"3 Male 27.000000 1.800000 87.000000 no \n",
"4 Male 22.000000 1.780000 89.800000 no \n",
"... ... ... ... ... ... \n",
"2106 Female 20.976842 1.710730 131.408528 yes \n",
"2107 Female 21.982942 1.748584 133.742943 yes \n",
"2108 Female 22.524036 1.752206 133.689352 yes \n",
"2109 Female 24.361936 1.739450 133.346641 yes \n",
"2110 Female 23.664709 1.738836 133.472641 yes \n",
"\n",
" FAVC FCVC NCP CAEC SMOKE CH2O SCC FAF TUE \\\n",
"0 no 2.0 3.0 Sometimes no 2.000000 no 0.000000 1.000000 \n",
"1 no 3.0 3.0 Sometimes yes 3.000000 yes 3.000000 0.000000 \n",
"2 no 2.0 3.0 Sometimes no 2.000000 no 2.000000 1.000000 \n",
"3 no 3.0 3.0 Sometimes no 2.000000 no 2.000000 0.000000 \n",
"4 no 2.0 1.0 Sometimes no 2.000000 no 0.000000 0.000000 \n",
"... ... ... ... ... ... ... ... ... ... \n",
"2106 yes 3.0 3.0 Sometimes no 1.728139 no 1.676269 0.906247 \n",
"2107 yes 3.0 3.0 Sometimes no 2.005130 no 1.341390 0.599270 \n",
"2108 yes 3.0 3.0 Sometimes no 2.054193 no 1.414209 0.646288 \n",
"2109 yes 3.0 3.0 Sometimes no 2.852339 no 1.139107 0.586035 \n",
"2110 yes 3.0 3.0 Sometimes no 2.863513 no 1.026452 0.714137 \n",
"\n",
" CALC MTRANS NObeyesdad \n",
"0 no Public_Transportation Normal_Weight \n",
"1 Sometimes Public_Transportation Normal_Weight \n",
"2 Frequently Public_Transportation Normal_Weight \n",
"3 Frequently Walking Overweight_Level_I \n",
"4 Sometimes Public_Transportation Overweight_Level_II \n",
"... ... ... ... \n",
"2106 Sometimes Public_Transportation Obesity_Type_III \n",
"2107 Sometimes Public_Transportation Obesity_Type_III \n",
"2108 Sometimes Public_Transportation Obesity_Type_III \n",
"2109 Sometimes Public_Transportation Obesity_Type_III \n",
"2110 Sometimes Public_Transportation Obesity_Type_III \n",
"\n",
"[2111 rows x 17 columns]"
],
"text/html": [
"\n",
"\n",
"
\n",
"\n",
"\n",
" | \n",
"Gender | \n",
"Age | \n",
"Height | \n",
"Weight | \n",
"family_history_with_overweight | \n",
"FAVC | \n",
"FCVC | \n",
"NCP | \n",
"CAEC | \n",
"SMOKE | \n",
"CH2O | \n",
"SCC | \n",
"FAF | \n",
"TUE | \n",
"CALC | \n",
"MTRANS | \n",
"NObeyesdad | \n",
"
\n",
"\n",
"\n",
"\n",
"0 | \n",
"Female | \n",
"21.000000 | \n",
"1.620000 | \n",
"64.000000 | \n",
"yes | \n",
"no | \n",
"2.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"2.000000 | \n",
"no | \n",
"0.000000 | \n",
"1.000000 | \n",
"no | \n",
"Public_Transportation | \n",
"Normal_Weight | \n",
"
\n",
"\n",
"1 | \n",
"Female | \n",
"21.000000 | \n",
"1.520000 | \n",
"56.000000 | \n",
"yes | \n",
"no | \n",
"3.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"yes | \n",
"3.000000 | \n",
"yes | \n",
"3.000000 | \n",
"0.000000 | \n",
"Sometimes | \n",
"Public_Transportation | \n",
"Normal_Weight | \n",
"
\n",
"\n",
"2 | \n",
"Male | \n",
"23.000000 | \n",
"1.800000 | \n",
"77.000000 | \n",
"yes | \n",
"no | \n",
"2.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"2.000000 | \n",
"no | \n",
"2.000000 | \n",
"1.000000 | \n",
"Frequently | \n",
"Public_Transportation | \n",
"Normal_Weight | \n",
"
\n",
"\n",
"3 | \n",
"Male | \n",
"27.000000 | \n",
"1.800000 | \n",
"87.000000 | \n",
"no | \n",
"no | \n",
"3.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"2.000000 | \n",
"no | \n",
"2.000000 | \n",
"0.000000 | \n",
"Frequently | \n",
"Walking | \n",
"Overweight_Level_I | \n",
"
\n",
"\n",
"4 | \n",
"Male | \n",
"22.000000 | \n",
"1.780000 | \n",
"89.800000 | \n",
"no | \n",
"no | \n",
"2.0 | \n",
"1.0 | \n",
"Sometimes | \n",
"no | \n",
"2.000000 | \n",
"no | \n",
"0.000000 | \n",
"0.000000 | \n",
"Sometimes | \n",
"Public_Transportation | \n",
"Overweight_Level_II | \n",
"
\n",
"\n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"
\n",
"\n",
"2106 | \n",
"Female | \n",
"20.976842 | \n",
"1.710730 | \n",
"131.408528 | \n",
"yes | \n",
"yes | \n",
"3.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"1.728139 | \n",
"no | \n",
"1.676269 | \n",
"0.906247 | \n",
"Sometimes | \n",
"Public_Transportation | \n",
"Obesity_Type_III | \n",
"
\n",
"\n",
"2107 | \n",
"Female | \n",
"21.982942 | \n",
"1.748584 | \n",
"133.742943 | \n",
"yes | \n",
"yes | \n",
"3.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"2.005130 | \n",
"no | \n",
"1.341390 | \n",
"0.599270 | \n",
"Sometimes | \n",
"Public_Transportation | \n",
"Obesity_Type_III | \n",
"
\n",
"\n",
"2108 | \n",
"Female | \n",
"22.524036 | \n",
"1.752206 | \n",
"133.689352 | \n",
"yes | \n",
"yes | \n",
"3.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"2.054193 | \n",
"no | \n",
"1.414209 | \n",
"0.646288 | \n",
"Sometimes | \n",
"Public_Transportation | \n",
"Obesity_Type_III | \n",
"
\n",
"\n",
"2109 | \n",
"Female | \n",
"24.361936 | \n",
"1.739450 | \n",
"133.346641 | \n",
"yes | \n",
"yes | \n",
"3.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"2.852339 | \n",
"no | \n",
"1.139107 | \n",
"0.586035 | \n",
"Sometimes | \n",
"Public_Transportation | \n",
"Obesity_Type_III | \n",
"
\n",
"\n",
"2110 | \n",
"Female | \n",
"23.664709 | \n",
"1.738836 | \n",
"133.472641 | \n",
"yes | \n",
"yes | \n",
"3.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"2.863513 | \n",
"no | \n",
"1.026452 | \n",
"0.714137 | \n",
"Sometimes | \n",
"Public_Transportation | \n",
"Obesity_Type_III | \n",
"
\n",
"\n",
"
\n",
"
2111 rows × 17 columns
\n",
"
"
]
},
"metadata": {},
"execution_count": 4
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 5,
"source": [
"df.shape"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(2111, 17)"
]
},
"metadata": {},
"execution_count": 5
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 6,
"source": [
"df.info()"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\n",
"RangeIndex: 2111 entries, 0 to 2110\n",
"Data columns (total 17 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Gender 2111 non-null object \n",
" 1 Age 2111 non-null float64\n",
" 2 Height 2111 non-null float64\n",
" 3 Weight 2111 non-null float64\n",
" 4 family_history_with_overweight 2111 non-null object \n",
" 5 FAVC 2111 non-null object \n",
" 6 FCVC 2111 non-null float64\n",
" 7 NCP 2111 non-null float64\n",
" 8 CAEC 2111 non-null object \n",
" 9 SMOKE 2111 non-null object \n",
" 10 CH2O 2111 non-null float64\n",
" 11 SCC 2111 non-null object \n",
" 12 FAF 2111 non-null float64\n",
" 13 TUE 2111 non-null float64\n",
" 14 CALC 2111 non-null object \n",
" 15 MTRANS 2111 non-null object \n",
" 16 NObeyesdad 2111 non-null object \n",
"dtypes: float64(8), object(9)\n",
"memory usage: 280.5+ KB\n"
]
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 7,
"source": [
"df.describe()"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Age Height Weight FCVC NCP \\\n",
"count 2111.000000 2111.000000 2111.000000 2111.000000 2111.000000 \n",
"mean 24.312600 1.701677 86.586058 2.419043 2.685628 \n",
"std 6.345968 0.093305 26.191172 0.533927 0.778039 \n",
"min 14.000000 1.450000 39.000000 1.000000 1.000000 \n",
"25% 19.947192 1.630000 65.473343 2.000000 2.658738 \n",
"50% 22.777890 1.700499 83.000000 2.385502 3.000000 \n",
"75% 26.000000 1.768464 107.430682 3.000000 3.000000 \n",
"max 61.000000 1.980000 173.000000 3.000000 4.000000 \n",
"\n",
" CH2O FAF TUE \n",
"count 2111.000000 2111.000000 2111.000000 \n",
"mean 2.008011 1.010298 0.657866 \n",
"std 0.612953 0.850592 0.608927 \n",
"min 1.000000 0.000000 0.000000 \n",
"25% 1.584812 0.124505 0.000000 \n",
"50% 2.000000 1.000000 0.625350 \n",
"75% 2.477420 1.666678 1.000000 \n",
"max 3.000000 3.000000 2.000000 "
],
"text/html": [
"\n",
"\n",
"
\n",
"\n",
"\n",
" | \n",
"Age | \n",
"Height | \n",
"Weight | \n",
"FCVC | \n",
"NCP | \n",
"CH2O | \n",
"FAF | \n",
"TUE | \n",
"
\n",
"\n",
"\n",
"\n",
"count | \n",
"2111.000000 | \n",
"2111.000000 | \n",
"2111.000000 | \n",
"2111.000000 | \n",
"2111.000000 | \n",
"2111.000000 | \n",
"2111.000000 | \n",
"2111.000000 | \n",
"
\n",
"\n",
"mean | \n",
"24.312600 | \n",
"1.701677 | \n",
"86.586058 | \n",
"2.419043 | \n",
"2.685628 | \n",
"2.008011 | \n",
"1.010298 | \n",
"0.657866 | \n",
"
\n",
"\n",
"std | \n",
"6.345968 | \n",
"0.093305 | \n",
"26.191172 | \n",
"0.533927 | \n",
"0.778039 | \n",
"0.612953 | \n",
"0.850592 | \n",
"0.608927 | \n",
"
\n",
"\n",
"min | \n",
"14.000000 | \n",
"1.450000 | \n",
"39.000000 | \n",
"1.000000 | \n",
"1.000000 | \n",
"1.000000 | \n",
"0.000000 | \n",
"0.000000 | \n",
"
\n",
"\n",
"25% | \n",
"19.947192 | \n",
"1.630000 | \n",
"65.473343 | \n",
"2.000000 | \n",
"2.658738 | \n",
"1.584812 | \n",
"0.124505 | \n",
"0.000000 | \n",
"
\n",
"\n",
"50% | \n",
"22.777890 | \n",
"1.700499 | \n",
"83.000000 | \n",
"2.385502 | \n",
"3.000000 | \n",
"2.000000 | \n",
"1.000000 | \n",
"0.625350 | \n",
"
\n",
"\n",
"75% | \n",
"26.000000 | \n",
"1.768464 | \n",
"107.430682 | \n",
"3.000000 | \n",
"3.000000 | \n",
"2.477420 | \n",
"1.666678 | \n",
"1.000000 | \n",
"
\n",
"\n",
"max | \n",
"61.000000 | \n",
"1.980000 | \n",
"173.000000 | \n",
"3.000000 | \n",
"4.000000 | \n",
"3.000000 | \n",
"3.000000 | \n",
"2.000000 | \n",
"
\n",
"\n",
"
\n",
"
"
]
},
"metadata": {},
"execution_count": 7
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 8,
"source": [
"df.columns"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Index(['Gender', 'Age', 'Height', 'Weight', 'family_history_with_overweight',\n",
" 'FAVC', 'FCVC', 'NCP', 'CAEC', 'SMOKE', 'CH2O', 'SCC', 'FAF', 'TUE',\n",
" 'CALC', 'MTRANS', 'NObeyesdad'],\n",
" dtype='object')"
]
},
"metadata": {},
"execution_count": 8
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 9,
"source": [
"df.columns = ['Gender', 'Age', 'Height', 'Weight', 'Family History with Overweight',\n",
" 'Frequent consumption of high caloric food', 'Frequency of consumption of vegetables', 'Number of main meals', 'Consumption of food between meals', 'Smoke', 'Consumption of water daily', 'Calories consumption monitoring', 'Physical activity frequency', 'Time using technology devices',\n",
" 'Consumption of alcohol', 'Transportation used', 'Obesity']\n",
"\n",
"df\n"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Gender Age Height Weight Family History with Overweight \\\n",
"0 Female 21.000000 1.620000 64.000000 yes \n",
"1 Female 21.000000 1.520000 56.000000 yes \n",
"2 Male 23.000000 1.800000 77.000000 yes \n",
"3 Male 27.000000 1.800000 87.000000 no \n",
"4 Male 22.000000 1.780000 89.800000 no \n",
"... ... ... ... ... ... \n",
"2106 Female 20.976842 1.710730 131.408528 yes \n",
"2107 Female 21.982942 1.748584 133.742943 yes \n",
"2108 Female 22.524036 1.752206 133.689352 yes \n",
"2109 Female 24.361936 1.739450 133.346641 yes \n",
"2110 Female 23.664709 1.738836 133.472641 yes \n",
"\n",
" Frequent consumption of high caloric food \\\n",
"0 no \n",
"1 no \n",
"2 no \n",
"3 no \n",
"4 no \n",
"... ... \n",
"2106 yes \n",
"2107 yes \n",
"2108 yes \n",
"2109 yes \n",
"2110 yes \n",
"\n",
" Frequency of consumption of vegetables Number of main meals \\\n",
"0 2.0 3.0 \n",
"1 3.0 3.0 \n",
"2 2.0 3.0 \n",
"3 3.0 3.0 \n",
"4 2.0 1.0 \n",
"... ... ... \n",
"2106 3.0 3.0 \n",
"2107 3.0 3.0 \n",
"2108 3.0 3.0 \n",
"2109 3.0 3.0 \n",
"2110 3.0 3.0 \n",
"\n",
" Consumption of food between meals Smoke Consumption of water daily \\\n",
"0 Sometimes no 2.000000 \n",
"1 Sometimes yes 3.000000 \n",
"2 Sometimes no 2.000000 \n",
"3 Sometimes no 2.000000 \n",
"4 Sometimes no 2.000000 \n",
"... ... ... ... \n",
"2106 Sometimes no 1.728139 \n",
"2107 Sometimes no 2.005130 \n",
"2108 Sometimes no 2.054193 \n",
"2109 Sometimes no 2.852339 \n",
"2110 Sometimes no 2.863513 \n",
"\n",
" Calories consumption monitoring Physical activity frequency \\\n",
"0 no 0.000000 \n",
"1 yes 3.000000 \n",
"2 no 2.000000 \n",
"3 no 2.000000 \n",
"4 no 0.000000 \n",
"... ... ... \n",
"2106 no 1.676269 \n",
"2107 no 1.341390 \n",
"2108 no 1.414209 \n",
"2109 no 1.139107 \n",
"2110 no 1.026452 \n",
"\n",
" Time using technology devices Consumption of alcohol \\\n",
"0 1.000000 no \n",
"1 0.000000 Sometimes \n",
"2 1.000000 Frequently \n",
"3 0.000000 Frequently \n",
"4 0.000000 Sometimes \n",
"... ... ... \n",
"2106 0.906247 Sometimes \n",
"2107 0.599270 Sometimes \n",
"2108 0.646288 Sometimes \n",
"2109 0.586035 Sometimes \n",
"2110 0.714137 Sometimes \n",
"\n",
" Transportation used Obesity \n",
"0 Public_Transportation Normal_Weight \n",
"1 Public_Transportation Normal_Weight \n",
"2 Public_Transportation Normal_Weight \n",
"3 Walking Overweight_Level_I \n",
"4 Public_Transportation Overweight_Level_II \n",
"... ... ... \n",
"2106 Public_Transportation Obesity_Type_III \n",
"2107 Public_Transportation Obesity_Type_III \n",
"2108 Public_Transportation Obesity_Type_III \n",
"2109 Public_Transportation Obesity_Type_III \n",
"2110 Public_Transportation Obesity_Type_III \n",
"\n",
"[2111 rows x 17 columns]"
],
"text/html": [
"
\n",
"\n",
"
\n",
"\n",
"\n",
" | \n",
"Gender | \n",
"Age | \n",
"Height | \n",
"Weight | \n",
"Family History with Overweight | \n",
"Frequent consumption of high caloric food | \n",
"Frequency of consumption of vegetables | \n",
"Number of main meals | \n",
"Consumption of food between meals | \n",
"Smoke | \n",
"Consumption of water daily | \n",
"Calories consumption monitoring | \n",
"Physical activity frequency | \n",
"Time using technology devices | \n",
"Consumption of alcohol | \n",
"Transportation used | \n",
"Obesity | \n",
"
\n",
"\n",
"\n",
"\n",
"0 | \n",
"Female | \n",
"21.000000 | \n",
"1.620000 | \n",
"64.000000 | \n",
"yes | \n",
"no | \n",
"2.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"2.000000 | \n",
"no | \n",
"0.000000 | \n",
"1.000000 | \n",
"no | \n",
"Public_Transportation | \n",
"Normal_Weight | \n",
"
\n",
"\n",
"1 | \n",
"Female | \n",
"21.000000 | \n",
"1.520000 | \n",
"56.000000 | \n",
"yes | \n",
"no | \n",
"3.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"yes | \n",
"3.000000 | \n",
"yes | \n",
"3.000000 | \n",
"0.000000 | \n",
"Sometimes | \n",
"Public_Transportation | \n",
"Normal_Weight | \n",
"
\n",
"\n",
"2 | \n",
"Male | \n",
"23.000000 | \n",
"1.800000 | \n",
"77.000000 | \n",
"yes | \n",
"no | \n",
"2.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"2.000000 | \n",
"no | \n",
"2.000000 | \n",
"1.000000 | \n",
"Frequently | \n",
"Public_Transportation | \n",
"Normal_Weight | \n",
"
\n",
"\n",
"3 | \n",
"Male | \n",
"27.000000 | \n",
"1.800000 | \n",
"87.000000 | \n",
"no | \n",
"no | \n",
"3.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"2.000000 | \n",
"no | \n",
"2.000000 | \n",
"0.000000 | \n",
"Frequently | \n",
"Walking | \n",
"Overweight_Level_I | \n",
"
\n",
"\n",
"4 | \n",
"Male | \n",
"22.000000 | \n",
"1.780000 | \n",
"89.800000 | \n",
"no | \n",
"no | \n",
"2.0 | \n",
"1.0 | \n",
"Sometimes | \n",
"no | \n",
"2.000000 | \n",
"no | \n",
"0.000000 | \n",
"0.000000 | \n",
"Sometimes | \n",
"Public_Transportation | \n",
"Overweight_Level_II | \n",
"
\n",
"\n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"
\n",
"\n",
"2106 | \n",
"Female | \n",
"20.976842 | \n",
"1.710730 | \n",
"131.408528 | \n",
"yes | \n",
"yes | \n",
"3.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"1.728139 | \n",
"no | \n",
"1.676269 | \n",
"0.906247 | \n",
"Sometimes | \n",
"Public_Transportation | \n",
"Obesity_Type_III | \n",
"
\n",
"\n",
"2107 | \n",
"Female | \n",
"21.982942 | \n",
"1.748584 | \n",
"133.742943 | \n",
"yes | \n",
"yes | \n",
"3.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"2.005130 | \n",
"no | \n",
"1.341390 | \n",
"0.599270 | \n",
"Sometimes | \n",
"Public_Transportation | \n",
"Obesity_Type_III | \n",
"
\n",
"\n",
"2108 | \n",
"Female | \n",
"22.524036 | \n",
"1.752206 | \n",
"133.689352 | \n",
"yes | \n",
"yes | \n",
"3.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"2.054193 | \n",
"no | \n",
"1.414209 | \n",
"0.646288 | \n",
"Sometimes | \n",
"Public_Transportation | \n",
"Obesity_Type_III | \n",
"
\n",
"\n",
"2109 | \n",
"Female | \n",
"24.361936 | \n",
"1.739450 | \n",
"133.346641 | \n",
"yes | \n",
"yes | \n",
"3.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"2.852339 | \n",
"no | \n",
"1.139107 | \n",
"0.586035 | \n",
"Sometimes | \n",
"Public_Transportation | \n",
"Obesity_Type_III | \n",
"
\n",
"\n",
"2110 | \n",
"Female | \n",
"23.664709 | \n",
"1.738836 | \n",
"133.472641 | \n",
"yes | \n",
"yes | \n",
"3.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"2.863513 | \n",
"no | \n",
"1.026452 | \n",
"0.714137 | \n",
"Sometimes | \n",
"Public_Transportation | \n",
"Obesity_Type_III | \n",
"
\n",
"\n",
"
\n",
"
2111 rows × 17 columns
\n",
"
"
]
},
"metadata": {},
"execution_count": 9
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 10,
"source": [
"df['Obesity'] = df['Obesity'].apply(lambda x: x.replace('_', ' '))\n",
"df['Transportation used'] = df['Transportation used'].apply(lambda x: x.replace('_', ' '))\n",
"df['Height'] = df['Height']*100\n",
"df['Height'] = df['Height'].round(1)\n",
"df['Weight'] = df['Weight'].round(1)\n",
"df['Age'] = df['Age'].round(1)\n",
"df"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Gender Age Height Weight Family History with Overweight \\\n",
"0 Female 21.0 162.0 64.0 yes \n",
"1 Female 21.0 152.0 56.0 yes \n",
"2 Male 23.0 180.0 77.0 yes \n",
"3 Male 27.0 180.0 87.0 no \n",
"4 Male 22.0 178.0 89.8 no \n",
"... ... ... ... ... ... \n",
"2106 Female 21.0 171.1 131.4 yes \n",
"2107 Female 22.0 174.9 133.7 yes \n",
"2108 Female 22.5 175.2 133.7 yes \n",
"2109 Female 24.4 173.9 133.3 yes \n",
"2110 Female 23.7 173.9 133.5 yes \n",
"\n",
" Frequent consumption of high caloric food \\\n",
"0 no \n",
"1 no \n",
"2 no \n",
"3 no \n",
"4 no \n",
"... ... \n",
"2106 yes \n",
"2107 yes \n",
"2108 yes \n",
"2109 yes \n",
"2110 yes \n",
"\n",
" Frequency of consumption of vegetables Number of main meals \\\n",
"0 2.0 3.0 \n",
"1 3.0 3.0 \n",
"2 2.0 3.0 \n",
"3 3.0 3.0 \n",
"4 2.0 1.0 \n",
"... ... ... \n",
"2106 3.0 3.0 \n",
"2107 3.0 3.0 \n",
"2108 3.0 3.0 \n",
"2109 3.0 3.0 \n",
"2110 3.0 3.0 \n",
"\n",
" Consumption of food between meals Smoke Consumption of water daily \\\n",
"0 Sometimes no 2.000000 \n",
"1 Sometimes yes 3.000000 \n",
"2 Sometimes no 2.000000 \n",
"3 Sometimes no 2.000000 \n",
"4 Sometimes no 2.000000 \n",
"... ... ... ... \n",
"2106 Sometimes no 1.728139 \n",
"2107 Sometimes no 2.005130 \n",
"2108 Sometimes no 2.054193 \n",
"2109 Sometimes no 2.852339 \n",
"2110 Sometimes no 2.863513 \n",
"\n",
" Calories consumption monitoring Physical activity frequency \\\n",
"0 no 0.000000 \n",
"1 yes 3.000000 \n",
"2 no 2.000000 \n",
"3 no 2.000000 \n",
"4 no 0.000000 \n",
"... ... ... \n",
"2106 no 1.676269 \n",
"2107 no 1.341390 \n",
"2108 no 1.414209 \n",
"2109 no 1.139107 \n",
"2110 no 1.026452 \n",
"\n",
" Time using technology devices Consumption of alcohol \\\n",
"0 1.000000 no \n",
"1 0.000000 Sometimes \n",
"2 1.000000 Frequently \n",
"3 0.000000 Frequently \n",
"4 0.000000 Sometimes \n",
"... ... ... \n",
"2106 0.906247 Sometimes \n",
"2107 0.599270 Sometimes \n",
"2108 0.646288 Sometimes \n",
"2109 0.586035 Sometimes \n",
"2110 0.714137 Sometimes \n",
"\n",
" Transportation used Obesity \n",
"0 Public Transportation Normal Weight \n",
"1 Public Transportation Normal Weight \n",
"2 Public Transportation Normal Weight \n",
"3 Walking Overweight Level I \n",
"4 Public Transportation Overweight Level II \n",
"... ... ... \n",
"2106 Public Transportation Obesity Type III \n",
"2107 Public Transportation Obesity Type III \n",
"2108 Public Transportation Obesity Type III \n",
"2109 Public Transportation Obesity Type III \n",
"2110 Public Transportation Obesity Type III \n",
"\n",
"[2111 rows x 17 columns]"
],
"text/html": [
"
\n",
"\n",
"
\n",
"\n",
"\n",
" | \n",
"Gender | \n",
"Age | \n",
"Height | \n",
"Weight | \n",
"Family History with Overweight | \n",
"Frequent consumption of high caloric food | \n",
"Frequency of consumption of vegetables | \n",
"Number of main meals | \n",
"Consumption of food between meals | \n",
"Smoke | \n",
"Consumption of water daily | \n",
"Calories consumption monitoring | \n",
"Physical activity frequency | \n",
"Time using technology devices | \n",
"Consumption of alcohol | \n",
"Transportation used | \n",
"Obesity | \n",
"
\n",
"\n",
"\n",
"\n",
"0 | \n",
"Female | \n",
"21.0 | \n",
"162.0 | \n",
"64.0 | \n",
"yes | \n",
"no | \n",
"2.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"2.000000 | \n",
"no | \n",
"0.000000 | \n",
"1.000000 | \n",
"no | \n",
"Public Transportation | \n",
"Normal Weight | \n",
"
\n",
"\n",
"1 | \n",
"Female | \n",
"21.0 | \n",
"152.0 | \n",
"56.0 | \n",
"yes | \n",
"no | \n",
"3.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"yes | \n",
"3.000000 | \n",
"yes | \n",
"3.000000 | \n",
"0.000000 | \n",
"Sometimes | \n",
"Public Transportation | \n",
"Normal Weight | \n",
"
\n",
"\n",
"2 | \n",
"Male | \n",
"23.0 | \n",
"180.0 | \n",
"77.0 | \n",
"yes | \n",
"no | \n",
"2.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"2.000000 | \n",
"no | \n",
"2.000000 | \n",
"1.000000 | \n",
"Frequently | \n",
"Public Transportation | \n",
"Normal Weight | \n",
"
\n",
"\n",
"3 | \n",
"Male | \n",
"27.0 | \n",
"180.0 | \n",
"87.0 | \n",
"no | \n",
"no | \n",
"3.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"2.000000 | \n",
"no | \n",
"2.000000 | \n",
"0.000000 | \n",
"Frequently | \n",
"Walking | \n",
"Overweight Level I | \n",
"
\n",
"\n",
"4 | \n",
"Male | \n",
"22.0 | \n",
"178.0 | \n",
"89.8 | \n",
"no | \n",
"no | \n",
"2.0 | \n",
"1.0 | \n",
"Sometimes | \n",
"no | \n",
"2.000000 | \n",
"no | \n",
"0.000000 | \n",
"0.000000 | \n",
"Sometimes | \n",
"Public Transportation | \n",
"Overweight Level II | \n",
"
\n",
"\n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"
\n",
"\n",
"2106 | \n",
"Female | \n",
"21.0 | \n",
"171.1 | \n",
"131.4 | \n",
"yes | \n",
"yes | \n",
"3.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"1.728139 | \n",
"no | \n",
"1.676269 | \n",
"0.906247 | \n",
"Sometimes | \n",
"Public Transportation | \n",
"Obesity Type III | \n",
"
\n",
"\n",
"2107 | \n",
"Female | \n",
"22.0 | \n",
"174.9 | \n",
"133.7 | \n",
"yes | \n",
"yes | \n",
"3.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"2.005130 | \n",
"no | \n",
"1.341390 | \n",
"0.599270 | \n",
"Sometimes | \n",
"Public Transportation | \n",
"Obesity Type III | \n",
"
\n",
"\n",
"2108 | \n",
"Female | \n",
"22.5 | \n",
"175.2 | \n",
"133.7 | \n",
"yes | \n",
"yes | \n",
"3.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"2.054193 | \n",
"no | \n",
"1.414209 | \n",
"0.646288 | \n",
"Sometimes | \n",
"Public Transportation | \n",
"Obesity Type III | \n",
"
\n",
"\n",
"2109 | \n",
"Female | \n",
"24.4 | \n",
"173.9 | \n",
"133.3 | \n",
"yes | \n",
"yes | \n",
"3.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"2.852339 | \n",
"no | \n",
"1.139107 | \n",
"0.586035 | \n",
"Sometimes | \n",
"Public Transportation | \n",
"Obesity Type III | \n",
"
\n",
"\n",
"2110 | \n",
"Female | \n",
"23.7 | \n",
"173.9 | \n",
"133.5 | \n",
"yes | \n",
"yes | \n",
"3.0 | \n",
"3.0 | \n",
"Sometimes | \n",
"no | \n",
"2.863513 | \n",
"no | \n",
"1.026452 | \n",
"0.714137 | \n",
"Sometimes | \n",
"Public Transportation | \n",
"Obesity Type III | \n",
"
\n",
"\n",
"
\n",
"
2111 rows × 17 columns
\n",
"
"
]
},
"metadata": {},
"execution_count": 10
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 11,
"source": [
"for x in ['Frequency of consumption of vegetables', 'Number of main meals', 'Consumption of water daily', 'Physical activity frequency', 'Time using technology devices']:\n",
" value = np.array(df[x])\n",
" print(x,':', 'min:', np.min(value), 'max:', np.max(value))\n"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Frequency of consumption of vegetables : min: 1.0 max: 3.0\n",
"Number of main meals : min: 1.0 max: 4.0\n",
"Consumption of water daily : min: 1.0 max: 3.0\n",
"Physical activity frequency : min: 0.0 max: 3.0\n",
"Time using technology devices : min: 0.0 max: 2.0\n"
]
}
],
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"## Exploratory Data Analysis"
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 12,
"source": [
"for x in ['Frequency of consumption of vegetables', 'Number of main meals', 'Consumption of water daily', 'Physical activity frequency', 'Time using technology devices']:\n",
" df[x] = df[x].apply(round)\n",
" value = np.array(df[x])\n",
" print(x,':', 'min:', np.min(value), 'max:', np.max(value), df[x].dtype)\n",
" print(df[x].unique())\n",
" "
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Frequency of consumption of vegetables : min: 1 max: 3 int64\n",
"[2 3 1]\n",
"Number of main meals : min: 1 max: 4 int64\n",
"[3 1 4 2]\n",
"Consumption of water daily : min: 1 max: 3 int64\n",
"[2 3 1]\n",
"Physical activity frequency : min: 0 max: 3 int64\n",
"[0 3 2 1]\n",
"Time using technology devices : min: 0 max: 2 int64\n",
"[1 0 2]\n"
]
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 13,
"source": [
"df1 = df.copy()"
],
"outputs": [],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 14,
"source": [
"mapping0 = {1:'Never', 2:'Sometimes', 3:'Always'}\n",
"mapping1 = {1: '1', 2:'2' , 3: '3', 4: '3+'}\n",
"mapping2 = {1: 'Less than a liter', 2:'Between 1 and 2 L', 3:'More than 2 L'}\n",
"mapping3 = {0: 'I do not have', 1: '1 or 2 days', 2: '2 or 4 days', 3: '4 or 5 days'}\n",
"mapping4 = {0: '0–2 hours', 1: '3–5 hours', 2: 'More than 5 hours'}"
],
"outputs": [],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 15,
"source": [
"df['Frequency of consumption of vegetables'] = df['Frequency of consumption of vegetables'].replace(mapping0)\n",
"df['Number of main meals'] = df['Number of main meals'].replace(mapping1)\n",
"df['Consumption of water daily'] = df['Consumption of water daily'].replace(mapping2)\n",
"df['Physical activity frequency'] = df['Physical activity frequency'].replace(mapping3)\n",
"df['Time using technology devices'] = df['Time using technology devices'].replace(mapping4)"
],
"outputs": [],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 16,
"source": [
"df"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Gender Age Height Weight Family History with Overweight \\\n",
"0 Female 21.0 162.0 64.0 yes \n",
"1 Female 21.0 152.0 56.0 yes \n",
"2 Male 23.0 180.0 77.0 yes \n",
"3 Male 27.0 180.0 87.0 no \n",
"4 Male 22.0 178.0 89.8 no \n",
"... ... ... ... ... ... \n",
"2106 Female 21.0 171.1 131.4 yes \n",
"2107 Female 22.0 174.9 133.7 yes \n",
"2108 Female 22.5 175.2 133.7 yes \n",
"2109 Female 24.4 173.9 133.3 yes \n",
"2110 Female 23.7 173.9 133.5 yes \n",
"\n",
" Frequent consumption of high caloric food \\\n",
"0 no \n",
"1 no \n",
"2 no \n",
"3 no \n",
"4 no \n",
"... ... \n",
"2106 yes \n",
"2107 yes \n",
"2108 yes \n",
"2109 yes \n",
"2110 yes \n",
"\n",
" Frequency of consumption of vegetables Number of main meals \\\n",
"0 Sometimes 3 \n",
"1 Always 3 \n",
"2 Sometimes 3 \n",
"3 Always 3 \n",
"4 Sometimes 1 \n",
"... ... ... \n",
"2106 Always 3 \n",
"2107 Always 3 \n",
"2108 Always 3 \n",
"2109 Always 3 \n",
"2110 Always 3 \n",
"\n",
" Consumption of food between meals Smoke Consumption of water daily \\\n",
"0 Sometimes no Between 1 and 2 L \n",
"1 Sometimes yes More than 2 L \n",
"2 Sometimes no Between 1 and 2 L \n",
"3 Sometimes no Between 1 and 2 L \n",
"4 Sometimes no Between 1 and 2 L \n",
"... ... ... ... \n",
"2106 Sometimes no Between 1 and 2 L \n",
"2107 Sometimes no Between 1 and 2 L \n",
"2108 Sometimes no Between 1 and 2 L \n",
"2109 Sometimes no More than 2 L \n",
"2110 Sometimes no More than 2 L \n",
"\n",
" Calories consumption monitoring Physical activity frequency \\\n",
"0 no I do not have \n",
"1 yes 4 or 5 days \n",
"2 no 2 or 4 days \n",
"3 no 2 or 4 days \n",
"4 no I do not have \n",
"... ... ... \n",
"2106 no 2 or 4 days \n",
"2107 no 1 or 2 days \n",
"2108 no 1 or 2 days \n",
"2109 no 1 or 2 days \n",
"2110 no 1 or 2 days \n",
"\n",
" Time using technology devices Consumption of alcohol \\\n",
"0 3–5 hours no \n",
"1 0–2 hours Sometimes \n",
"2 3–5 hours Frequently \n",
"3 0–2 hours Frequently \n",
"4 0–2 hours Sometimes \n",
"... ... ... \n",
"2106 3–5 hours Sometimes \n",
"2107 3–5 hours Sometimes \n",
"2108 3–5 hours Sometimes \n",
"2109 3–5 hours Sometimes \n",
"2110 3–5 hours Sometimes \n",
"\n",
" Transportation used Obesity \n",
"0 Public Transportation Normal Weight \n",
"1 Public Transportation Normal Weight \n",
"2 Public Transportation Normal Weight \n",
"3 Walking Overweight Level I \n",
"4 Public Transportation Overweight Level II \n",
"... ... ... \n",
"2106 Public Transportation Obesity Type III \n",
"2107 Public Transportation Obesity Type III \n",
"2108 Public Transportation Obesity Type III \n",
"2109 Public Transportation Obesity Type III \n",
"2110 Public Transportation Obesity Type III \n",
"\n",
"[2111 rows x 17 columns]"
],
"text/html": [
"
\n",
"\n",
"
\n",
"\n",
"\n",
" | \n",
"Gender | \n",
"Age | \n",
"Height | \n",
"Weight | \n",
"Family History with Overweight | \n",
"Frequent consumption of high caloric food | \n",
"Frequency of consumption of vegetables | \n",
"Number of main meals | \n",
"Consumption of food between meals | \n",
"Smoke | \n",
"Consumption of water daily | \n",
"Calories consumption monitoring | \n",
"Physical activity frequency | \n",
"Time using technology devices | \n",
"Consumption of alcohol | \n",
"Transportation used | \n",
"Obesity | \n",
"
\n",
"\n",
"\n",
"\n",
"0 | \n",
"Female | \n",
"21.0 | \n",
"162.0 | \n",
"64.0 | \n",
"yes | \n",
"no | \n",
"Sometimes | \n",
"3 | \n",
"Sometimes | \n",
"no | \n",
"Between 1 and 2 L | \n",
"no | \n",
"I do not have | \n",
"3–5 hours | \n",
"no | \n",
"Public Transportation | \n",
"Normal Weight | \n",
"
\n",
"\n",
"1 | \n",
"Female | \n",
"21.0 | \n",
"152.0 | \n",
"56.0 | \n",
"yes | \n",
"no | \n",
"Always | \n",
"3 | \n",
"Sometimes | \n",
"yes | \n",
"More than 2 L | \n",
"yes | \n",
"4 or 5 days | \n",
"0–2 hours | \n",
"Sometimes | \n",
"Public Transportation | \n",
"Normal Weight | \n",
"
\n",
"\n",
"2 | \n",
"Male | \n",
"23.0 | \n",
"180.0 | \n",
"77.0 | \n",
"yes | \n",
"no | \n",
"Sometimes | \n",
"3 | \n",
"Sometimes | \n",
"no | \n",
"Between 1 and 2 L | \n",
"no | \n",
"2 or 4 days | \n",
"3–5 hours | \n",
"Frequently | \n",
"Public Transportation | \n",
"Normal Weight | \n",
"
\n",
"\n",
"3 | \n",
"Male | \n",
"27.0 | \n",
"180.0 | \n",
"87.0 | \n",
"no | \n",
"no | \n",
"Always | \n",
"3 | \n",
"Sometimes | \n",
"no | \n",
"Between 1 and 2 L | \n",
"no | \n",
"2 or 4 days | \n",
"0–2 hours | \n",
"Frequently | \n",
"Walking | \n",
"Overweight Level I | \n",
"
\n",
"\n",
"4 | \n",
"Male | \n",
"22.0 | \n",
"178.0 | \n",
"89.8 | \n",
"no | \n",
"no | \n",
"Sometimes | \n",
"1 | \n",
"Sometimes | \n",
"no | \n",
"Between 1 and 2 L | \n",
"no | \n",
"I do not have | \n",
"0–2 hours | \n",
"Sometimes | \n",
"Public Transportation | \n",
"Overweight Level II | \n",
"
\n",
"\n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"
\n",
"\n",
"2106 | \n",
"Female | \n",
"21.0 | \n",
"171.1 | \n",
"131.4 | \n",
"yes | \n",
"yes | \n",
"Always | \n",
"3 | \n",
"Sometimes | \n",
"no | \n",
"Between 1 and 2 L | \n",
"no | \n",
"2 or 4 days | \n",
"3–5 hours | \n",
"Sometimes | \n",
"Public Transportation | \n",
"Obesity Type III | \n",
"
\n",
"\n",
"2107 | \n",
"Female | \n",
"22.0 | \n",
"174.9 | \n",
"133.7 | \n",
"yes | \n",
"yes | \n",
"Always | \n",
"3 | \n",
"Sometimes | \n",
"no | \n",
"Between 1 and 2 L | \n",
"no | \n",
"1 or 2 days | \n",
"3–5 hours | \n",
"Sometimes | \n",
"Public Transportation | \n",
"Obesity Type III | \n",
"
\n",
"\n",
"2108 | \n",
"Female | \n",
"22.5 | \n",
"175.2 | \n",
"133.7 | \n",
"yes | \n",
"yes | \n",
"Always | \n",
"3 | \n",
"Sometimes | \n",
"no | \n",
"Between 1 and 2 L | \n",
"no | \n",
"1 or 2 days | \n",
"3–5 hours | \n",
"Sometimes | \n",
"Public Transportation | \n",
"Obesity Type III | \n",
"
\n",
"\n",
"2109 | \n",
"Female | \n",
"24.4 | \n",
"173.9 | \n",
"133.3 | \n",
"yes | \n",
"yes | \n",
"Always | \n",
"3 | \n",
"Sometimes | \n",
"no | \n",
"More than 2 L | \n",
"no | \n",
"1 or 2 days | \n",
"3–5 hours | \n",
"Sometimes | \n",
"Public Transportation | \n",
"Obesity Type III | \n",
"
\n",
"\n",
"2110 | \n",
"Female | \n",
"23.7 | \n",
"173.9 | \n",
"133.5 | \n",
"yes | \n",
"yes | \n",
"Always | \n",
"3 | \n",
"Sometimes | \n",
"no | \n",
"More than 2 L | \n",
"no | \n",
"1 or 2 days | \n",
"3–5 hours | \n",
"Sometimes | \n",
"Public Transportation | \n",
"Obesity Type III | \n",
"
\n",
"\n",
"
\n",
"
2111 rows × 17 columns
\n",
"
"
]
},
"metadata": {},
"execution_count": 16
}
],
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"### Age, Height and Weight"
],
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"In terms of height, male and female are similarly distributed according to the box plot below. While male are generally taller than female, both male and female share a similar average in weight, with female having a much larger range of weight (as well as BMI) compared to male. This is further illustrated by the steeper line plot between weight and height of female than male."
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 18,
"source": [
"sns.set()\n",
"fig = plt.figure(figsize=(20,10))\n",
"plt.subplot(1, 2, 1)\n",
"sns.boxplot(x='Gender', y='Height', data=df)\n",
"plt.subplot(1, 2, 2)\n",
"sns.boxplot(x='Gender', y='Weight', data=df)"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"
"
]
},
"metadata": {},
"execution_count": 18
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"