{
"cells": [
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" State Station Month Min °C \\\n",
"0 TAS Hobart (Ellerslie Road) {station 094029} May 8.5 \n",
"1 TAS Hobart (Ellerslie Road) {station 094029} December 8.5 \n",
"2 TAS Hobart (Ellerslie Road) {station 094029} December 7.7 \n",
"3 TAS Hobart (Ellerslie Road) {station 094029} December 8.5 \n",
"4 TAS Devonport Airport {station 091126} May 6.6 \n",
"... ... ... ... ... \n",
"9934 TAS Devonport Airport {station 091126} July 5.4 \n",
"9935 ACT Canberra Airport {station 070351} July 2.5 \n",
"9936 TAS Hobart (Ellerslie Road) {station 094029} July 2.4 \n",
"9937 TAS Devonport Airport {station 091126} July 3.4 \n",
"9938 TAS Hobart (Ellerslie Road) {station 094029} July 4.1 \n",
"\n",
" Max °C Max Wind gust Dir Max wind gust Spd - km/h Temp °C- 9:00AM \\\n",
"0 15.0 NNW 57 10.1 \n",
"1 18.4 NW 56 12.8 \n",
"2 15.8 WNW 76 12.1 \n",
"3 16.6 WSW 69 12.2 \n",
"4 14.5 WSW 56 8.9 \n",
"... ... ... ... ... \n",
"9934 14.6 ENE 30 9.7 \n",
"9935 13.3 E 35 9.0 \n",
"9936 12.5 NNW 35 3.0 \n",
"9937 14.9 SSE 28 5.4 \n",
"9938 12.7 NNW 28 4.5 \n",
"\n",
" Dir - 9:00 AM - km/h MSLP- hPa - 9:00AM Temp °C- 3:00PM \\\n",
"0 NNW 989.1 14.6 \n",
"1 NNW 992.0 17.3 \n",
"2 WNW 987.4 14.1 \n",
"3 SE 984.7 12.2 \n",
"4 SSE 990.0 13.8 \n",
"... ... ... ... \n",
"9934 SSE 1040.3 13.1 \n",
"9935 ESE 1038.1 12.2 \n",
"9936 NNW 1041.0 11.5 \n",
"9937 SSE 1040.8 14.6 \n",
"9938 NNW 1041.1 12.3 \n",
"\n",
" Dir - 3:00 PM - km/h Spd - 3:00PM - km/h MSLP- hPa - 3:00PM Rain(Y/N) \n",
"0 NW 7 984.5 Y \n",
"1 WSW 20 986.2 Y \n",
"2 NW 28 987.2 Y \n",
"3 SW 35 987.2 Y \n",
"4 NW 11 987.3 Y \n",
"... ... ... ... ... \n",
"9934 NE 19 1036.7 N \n",
"9935 SE 19 1036.7 N \n",
"9936 NE 7 1037.1 N \n",
"9937 ENE 7 1039.8 N \n",
"9938 NNE 6 1040.1 N \n",
"\n",
"[9939 rows x 15 columns]\n"
]
}
],
"source": [
"import numpy as np \n",
"import pandas as pd \n",
"from sklearn import preprocessing\n",
"\n",
"df = pd.read_excel (r'australia-rainfall.xlsx')\n",
"print (df)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"State 9939\n",
"Station 9939\n",
"Month 9939\n",
"Min °C 9939\n",
"Max °C 9939\n",
"Max Wind gust Dir 9939\n",
"Max wind gust Spd - km/h 9939\n",
"Temp °C- 9:00AM 9939\n",
"Dir - 9:00 AM - km/h 9939\n",
"MSLP- hPa - 9:00AM 9939\n",
"Temp °C- 3:00PM 9939\n",
"Dir - 3:00 PM - km/h 9939\n",
"Spd - 3:00PM - km/h 9939\n",
"MSLP- hPa - 3:00PM 9939\n",
"Rain(Y/N) 9939\n",
"dtype: int64"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# How many samples are there in the dataset?\n",
"df.count(axis = 0) "
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['State', 'Station', 'Month', 'Min °C', 'Max °C', 'Max Wind gust Dir',\n",
" 'Max wind gust Spd - km/h', 'Temp °C- 9:00AM', 'Dir - 9:00 AM - km/h',\n",
" 'MSLP- hPa - 9:00AM', 'Temp °C- 3:00PM', 'Dir - 3:00 PM - km/h',\n",
" 'Spd - 3:00PM - km/h', 'MSLP- hPa - 3:00PM', 'Rain(Y/N)'],\n",
" dtype='object')"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# What are the features in the dataset?\n",
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['State', 'Station', 'Month', 'Min °C', 'Max °C', 'Max Wind gust Dir',\n",
" 'Max wind gust Spd - km/h', 'Temp °C- 9:00AM', 'Dir - 9:00 AM - km/h',\n",
" 'MSLP- hPa - 9:00AM', 'Temp °C- 3:00PM', 'Dir - 3:00 PM - km/h',\n",
" 'Spd - 3:00PM - km/h', 'MSLP- hPa - 3:00PM', 'Rain(Y/N)'],\n",
" dtype='object')"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.keys()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" State Station Month Min °C Max °C \\\n",
"0 TAS Hobart (Ellerslie Road) {station 094029} May 8.5 15.0 \n",
"1 TAS Hobart (Ellerslie Road) {station 094029} December 8.5 18.4 \n",
"2 TAS Hobart (Ellerslie Road) {station 094029} December 7.7 15.8 \n",
"3 TAS Hobart (Ellerslie Road) {station 094029} December 8.5 16.6 \n",
"4 TAS Devonport Airport {station 091126} May 6.6 14.5 \n",
"\n",
" Max Wind gust Dir Max wind gust Spd - km/h Temp °C- 9:00AM \\\n",
"0 NNW 57 10.1 \n",
"1 NW 56 12.8 \n",
"2 WNW 76 12.1 \n",
"3 WSW 69 12.2 \n",
"4 WSW 56 8.9 \n",
"\n",
" Dir - 9:00 AM - km/h MSLP- hPa - 9:00AM Temp °C- 3:00PM \\\n",
"0 NNW 989.1 14.6 \n",
"1 NNW 992.0 17.3 \n",
"2 WNW 987.4 14.1 \n",
"3 SE 984.7 12.2 \n",
"4 SSE 990.0 13.8 \n",
"\n",
" Dir - 3:00 PM - km/h Spd - 3:00PM - km/h MSLP- hPa - 3:00PM Rain(Y/N) \n",
"0 NW 7 984.5 Y \n",
"1 WSW 20 986.2 Y \n",
"2 NW 28 987.2 Y \n",
"3 SW 35 987.2 Y \n",
"4 NW 11 987.3 Y \n"
]
}
],
"source": [
"print(df.head(5))"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
"\n",
"\n",
" | \n",
"State | \n",
"Station | \n",
"Month | \n",
"Min °C | \n",
"Max °C | \n",
"Max Wind gust Dir | \n",
"Max wind gust Spd - km/h | \n",
"Temp °C- 9:00AM | \n",
"Dir - 9:00 AM - km/h | \n",
"MSLP- hPa - 9:00AM | \n",
"Temp °C- 3:00PM | \n",
"Dir - 3:00 PM - km/h | \n",
"Spd - 3:00PM - km/h | \n",
"MSLP- hPa - 3:00PM | \n",
"Rain | \n",
"
\n",
"\n",
"\n",
"\n",
"0 | \n",
"TAS | \n",
"Hobart (Ellerslie Road) {station 094029} | \n",
"May | \n",
"8.5 | \n",
"15.0 | \n",
"NNW | \n",
"57 | \n",
"10.1 | \n",
"NNW | \n",
"989.1 | \n",
"14.6 | \n",
"NW | \n",
"7 | \n",
"984.5 | \n",
"Y | \n",
"
\n",
"\n",
"1 | \n",
"TAS | \n",
"Hobart (Ellerslie Road) {station 094029} | \n",
"December | \n",
"8.5 | \n",
"18.4 | \n",
"NW | \n",
"56 | \n",
"12.8 | \n",
"NNW | \n",
"992.0 | \n",
"17.3 | \n",
"WSW | \n",
"20 | \n",
"986.2 | \n",
"Y | \n",
"
\n",
"\n",
"2 | \n",
"TAS | \n",
"Hobart (Ellerslie Road) {station 094029} | \n",
"December | \n",
"7.7 | \n",
"15.8 | \n",
"WNW | \n",
"76 | \n",
"12.1 | \n",
"WNW | \n",
"987.4 | \n",
"14.1 | \n",
"NW | \n",
"28 | \n",
"987.2 | \n",
"Y | \n",
"
\n",
"\n",
"3 | \n",
"TAS | \n",
"Hobart (Ellerslie Road) {station 094029} | \n",
"December | \n",
"8.5 | \n",
"16.6 | \n",
"WSW | \n",
"69 | \n",
"12.2 | \n",
"SE | \n",
"984.7 | \n",
"12.2 | \n",
"SW | \n",
"35 | \n",
"987.2 | \n",
"Y | \n",
"
\n",
"\n",
"4 | \n",
"TAS | \n",
"Devonport Airport {station 091126} | \n",
"May | \n",
"6.6 | \n",
"14.5 | \n",
"WSW | \n",
"56 | \n",
"8.9 | \n",
"SSE | \n",
"990.0 | \n",
"13.8 | \n",
"NW | \n",
"11 | \n",
"987.3 | \n",
"Y | \n",
"
\n",
"\n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"... | \n",
"
\n",
"\n",
"9934 | \n",
"TAS | \n",
"Devonport Airport {station 091126} | \n",
"July | \n",
"5.4 | \n",
"14.6 | \n",
"ENE | \n",
"30 | \n",
"9.7 | \n",
"SSE | \n",
"1040.3 | \n",
"13.1 | \n",
"NE | \n",
"19 | \n",
"1036.7 | \n",
"N | \n",
"
\n",
"\n",
"9935 | \n",
"ACT | \n",
"Canberra Airport {station 070351} | \n",
"July | \n",
"2.5 | \n",
"13.3 | \n",
"E | \n",
"35 | \n",
"9.0 | \n",
"ESE | \n",
"1038.1 | \n",
"12.2 | \n",
"SE | \n",
"19 | \n",
"1036.7 | \n",
"N | \n",
"
\n",
"\n",
"9936 | \n",
"TAS | \n",
"Hobart (Ellerslie Road) {station 094029} | \n",
"July | \n",
"2.4 | \n",
"12.5 | \n",
"NNW | \n",
"35 | \n",
"3.0 | \n",
"NNW | \n",
"1041.0 | \n",
"11.5 | \n",
"NE | \n",
"7 | \n",
"1037.1 | \n",
"N | \n",
"
\n",
"\n",
"9937 | \n",
"TAS | \n",
"Devonport Airport {station 091126} | \n",
"July | \n",
"3.4 | \n",
"14.9 | \n",
"SSE | \n",
"28 | \n",
"5.4 | \n",
"SSE | \n",
"1040.8 | \n",
"14.6 | \n",
"ENE | \n",
"7 | \n",
"1039.8 | \n",
"N | \n",
"
\n",
"\n",
"9938 | \n",
"TAS | \n",
"Hobart (Ellerslie Road) {station 094029} | \n",
"July | \n",
"4.1 | \n",
"12.7 | \n",
"NNW | \n",
"28 | \n",
"4.5 | \n",
"NNW | \n",
"1041.1 | \n",
"12.3 | \n",
"NNE | \n",
"6 | \n",
"1040.1 | \n",
"N | \n",
"
\n",
"\n",
"
\n",
"
9939 rows × 15 columns
\n",
"
"
],
"text/plain": [
" State Station Month Min °C \\\n",
"0 TAS Hobart (Ellerslie Road) {station 094029} May 8.5 \n",
"1 TAS Hobart (Ellerslie Road) {station 094029} December 8.5 \n",
"2 TAS Hobart (Ellerslie Road) {station 094029} December 7.7 \n",
"3 TAS Hobart (Ellerslie Road) {station 094029} December 8.5 \n",
"4 TAS Devonport Airport {station 091126} May 6.6 \n",
"... ... ... ... ... \n",
"9934 TAS Devonport Airport {station 091126} July 5.4 \n",
"9935 ACT Canberra Airport {station 070351} July 2.5 \n",
"9936 TAS Hobart (Ellerslie Road) {station 094029} July 2.4 \n",
"9937 TAS Devonport Airport {station 091126} July 3.4 \n",
"9938 TAS Hobart (Ellerslie Road) {station 094029} July 4.1 \n",
"\n",
" Max °C Max Wind gust Dir Max wind gust Spd - km/h Temp °C- 9:00AM \\\n",
"0 15.0 NNW 57 10.1 \n",
"1 18.4 NW 56 12.8 \n",
"2 15.8 WNW 76 12.1 \n",
"3 16.6 WSW 69 12.2 \n",
"4 14.5 WSW 56 8.9 \n",
"... ... ... ... ... \n",
"9934 14.6 ENE 30 9.7 \n",
"9935 13.3 E 35 9.0 \n",
"9936 12.5 NNW 35 3.0 \n",
"9937 14.9 SSE 28 5.4 \n",
"9938 12.7 NNW 28 4.5 \n",
"\n",
" Dir - 9:00 AM - km/h MSLP- hPa - 9:00AM Temp °C- 3:00PM \\\n",
"0 NNW 989.1 14.6 \n",
"1 NNW 992.0 17.3 \n",
"2 WNW 987.4 14.1 \n",
"3 SE 984.7 12.2 \n",
"4 SSE 990.0 13.8 \n",
"... ... ... ... \n",
"9934 SSE 1040.3 13.1 \n",
"9935 ESE 1038.1 12.2 \n",
"9936 NNW 1041.0 11.5 \n",
"9937 SSE 1040.8 14.6 \n",
"9938 NNW 1041.1 12.3 \n",
"\n",
" Dir - 3:00 PM - km/h Spd - 3:00PM - km/h MSLP- hPa - 3:00PM Rain \n",
"0 NW 7 984.5 Y \n",
"1 WSW 20 986.2 Y \n",
"2 NW 28 987.2 Y \n",
"3 SW 35 987.2 Y \n",
"4 NW 11 987.3 Y \n",
"... ... ... ... ... \n",
"9934 NE 19 1036.7 N \n",
"9935 SE 19 1036.7 N \n",
"9936 NE 7 1037.1 N \n",
"9937 ENE 7 1039.8 N \n",
"9938 NNE 6 1040.1 N \n",
"\n",
"[9939 rows x 15 columns]"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#What are the target classes? How many samples are in each target class?\n",
"df.rename(columns={\"Rain(Y/N)\": \"Rain\"})"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"
\n",
"RangeIndex: 9939 entries, 0 to 9938\n",
"Data columns (total 15 columns):\n",
"State 9939 non-null object\n",
"Station 9939 non-null object\n",
"Month 9939 non-null object\n",
"Min °C 9939 non-null float64\n",
"Max °C 9939 non-null float64\n",
"Max Wind gust Dir 9939 non-null object\n",
"Max wind gust Spd - km/h 9939 non-null int64\n",
"Temp °C- 9:00AM 9939 non-null float64\n",
"Dir - 9:00 AM - km/h 9939 non-null object\n",
"MSLP- hPa - 9:00AM 9939 non-null float64\n",
"Temp °C- 3:00PM 9939 non-null float64\n",
"Dir - 3:00 PM - km/h 9939 non-null object\n",
"Spd - 3:00PM - km/h 9939 non-null int64\n",
"MSLP- hPa - 3:00PM 9939 non-null float64\n",
"Rain(Y/N) 9939 non-null object\n",
"dtypes: float64(6), int64(2), object(7)\n",
"memory usage: 1.1+ MB\n",
"None\n"
]
}
],
"source": [
"#What is the data type of each feature?\n",
"print(df.info())"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"#Show the value distribution of the following nominal attributes"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"(array([ 839., 1659., 830., 0., 833., 1241., 0., 1650., 1648.,\n",
" 1239.]),\n",
" array([0. , 0.7, 1.4, 2.1, 2.8, 3.5, 4.2, 4.9, 5.6, 6.3, 7. ]),\n",
" )"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png":...