Answer To: CIS XXXXXXXXXXProject #1 XXXXXXXXXXWeb Scraping, Data Frames, Numpy In a previous assignment we...
Sampad Swarup answered on Nov 09 2021
Order ID 71348.html
Importing the libraries
In [1]:
import pandas as pd
import numpy as np
import urllib.request
Initialising the link array
In [2]:
volleyball_men_team_links = [
'https://www.brooklyncollegeathletics.com/sports/mens-volleyball/roster/2019',
'https://athletics.baruch.cuny.edu/sports/mens-volleyball/roster',
'https://yorkathletics.com/sports/mens-volleyball/roster'
]
volleyball_women_team_links = [
'https://www.brooklyncollegeathletics.com/sports/womens-volleyball/roster/2019',
'https://athletics.baruch.cuny.edu/sports/womens-volleyball/roster',
'https://johnjayathletics.com/sports/womens-volleyball/roster'
]
swimming_men_team_links = [
"https://www.brooklyncollegeathletics.com/sports/mens-swimming-and-diving/roster",
'https://athletics.baruch.cuny.edu/sports/mens-swimming-and-diving/roster',
'https://yorkathletics.com/sports/mens-swimming-and-diving/roster'
]
swimming_women_team_links = [
"https://www.brooklyncollegeathletics.com/sports/womens-swimming-and-diving/roster",
"https://athletics.baruch.cuny.edu/sports/womens-swimming-and-diving/roster",
"https://queensknights.com/sports/womens-swimming-and-diving/roster"
]
Initialising the DataFrame arrays
In [3]:
volly_men_df_array = []
volly_women_df_array = []
swim_men_df_array = []
swim_women_df_array = []
Scrapping and appending raw data in the DataFrame arrys
In [4]:
print("Getting mens' volleyball team data...")
for link in volleyball_men_team_links:
html = urllib.request.urlopen(link).read()
df = pd.read_html(html)[2]
volly_men_df_array.append(df)
print("Mens' vollyball team datas are scaraped.")
print("Getting womens' volleyball team data...")
for link in volleyball_women_team_links:
html = urllib.request.urlopen(link).read()
df = pd.read_html(html)[2]
volly_women_df_array.append(df)
print("Womens' Volleyball team data is scaraped.")
print("Getting mens' swimming team data...")
for link in swimming_men_team_links:
html = urllib.request.urlopen(link).read()
df = pd.read_html(html)[2]
swim_men_df_array.append(df)
print("Mens' swimming team datas are scaraped.")
print("Getting womens' swimming team data...")
for link in swimming_women_team_links:
html = urllib.request.urlopen(link).read()
df = pd.read_html(html)[2]
swim_women_df_array.append(df)
print("Womens' swimming team datas are scaraped.")
Getting mens' volleyball team data...
Mens' vollyball team datas are scaraped.
Getting womens' volleyball team data...
Womens' Volleyball team data is scaraped.
Getting mens' swimming team data...
Mens' swimming team datas are scaraped.
Getting womens' swimming team data...
Womens' swimming team datas are scaraped.
Initialising empty DataFrame
In [5]:
volly_men_df = pd.DataFrame()
volly_women_df = pd.DataFrame()
swim_men_df = pd.DataFrame()
swim_women_df = pd.DataFrame()
Joining all Mens' Volleyball data from raw data
In [6]:
for df in volly_men_df_array:
volly_men_df = volly_men_df.append(df[["Full Name", "Ht."]])
volly_men_df.reset_index(drop=True, inplace=True)
volly_men_df.tail()
Out[6]:
Full Name Ht.
35 Olawale Kila 6-4
36 David Heyliger, Jr. 6-1
37 Guillermo Hernandez 6-4
38 Fatmir Glavatovic 6-0
39 Ti’Juan Boothe 5-8
Joining all Womens' Volleyball data from raw data
In [7]:
for df in volly_women_df_array:
try:
volly_women_df = volly_women_df.append(df[["Full Name", "Ht."]])
except:
df.rename(columns={'Name': 'Full Name'}, inplace=True)
volly_women_df = volly_women_df.append(df[["Full Name", "Ht."]])
volly_women_df.reset_index(drop=True, inplace=True)
volly_women_df.tail()
Out[7]:
Full Name Ht.
33 Freysha Rivera 5-6
34 Autumn Reeves 6-2
35 Sophia Wheelan 6-0
36 Karen Sumlang 5-8
37 Kimberly Rodriguez 5-7
Joining all Mens' Swimming data from raw data
In [8]:
for df in swim_men_df_array:
try:
swim_men_df = swim_men_df.append(df[["Full Name", "Ht."]])
except:
df.rename(columns={'Name': 'Full Name'}, inplace=True)
swim_men_df = swim_men_df.append(df[["Full Name", "Ht."]])
swim_men_df.reset_index(drop=True, inplace=True)
swim_men_df.tail()
Out[8]:
Full Name Ht.
40 Christopher Mendez 5-7
41 Anthony Nazario, Jr. 5-9
42 Jeremy Sapeg 5-9
43 Jamaar Watson 6-1
44 Alex Winter 6-0
Joining all Womens' Swimming data from raw data
In [9]:
for df in swim_women_df_array:
swim_women_df = swim_women_df.append(df[["Full Name", "Ht."]])
swim_women_df.reset_index(drop=True, inplace=True)
swim_women_df.tail()
Out[9]:
Full Name Ht.
35 Amanda Giordano 5-5
36 Naomi Oken 5-4
37 Allie Stanya 5-7
38 Gabriella Vicidomini 5-4
39 Lily Wilk 5-3
In [10]:
volly_men_df['Height(inch)'] = list(map(lambda x: 12*int(x.split('-')[0])+ int(x.split('-')[1]), volly_men_df["Ht."]))
volly_men_df.drop(['Ht.'], axis=1, inplace=True)
volly_women_df['Height(inch)'] = list(map(lambda x: 12*int(x.split('-')[0])+ int(x.split('-')[1]), volly_women_df["Ht."]))
volly_women_df.drop(['Ht.'], axis=1, inplace=True)
swim_men_df['Height(inch)'] = list(map(lambda x: 12*int(x.split('-')[0])+ int(x.split('-')[1]), swim_men_df["Ht."]))
swim_men_df.drop(['Ht.'], axis=1, inplace=True)
swim_women_df['Height(inch)'] = list(map(lambda x: 12*int(x.split('-')[0])+ int(x.split('-')[1]), swim_women_df["Ht."]))
swim_women_df.drop(['Ht.'], axis=1, inplace=True)
In [11]:
volly_men_avg = np.mean(volly_men_df["Height(inch)"])
volly_women_avg = np.mean(volly_women_df["Height(inch)"])
swim_men_avg = np.mean(swim_men_df["Height(inch)"])
swim_women_avg = np.mean(swim_women_df["Height(inch)"])
print('-'*70)
print(f'Avrage height of mens\' volleyball team is {int(volly_men_avg/12)}\'{int(volly_men_avg%12)}"(in Feet-Inch)')
print(f'Avrage height of mens\' volleyball team is {volly_men_avg:.2f}(in Inch)')
print(f'Avrage height of mens\' volleyball team is {2.54*volly_men_avg:.2f}(in Centimeter)')
print(f'Avrage height of mens\' volleyball team is {0.0254*volly_men_avg:.2f}(in Meter)')
print('-'*70)
print(f'Avrage height of womens\' volleyball team is {int(volly_women_avg/12)}\'{int(volly_women_avg%12)}"(in Feet-Inch)')
print(f'Avrage height of womens\' volleyball team is {volly_women_avg:.2f}(in Inch)')
print(f'Avrage height of womens\' volleyball team is {2.54*volly_women_avg:.2f}(in Centimeter)')
print(f'Avrage height of womens\' volleyball team is {0.0254*volly_women_avg:.2f}(in Meter)')
print('-'*70)
print(f'Avrage height of mens\' swimming team is {int(swim_men_avg/12)}\'{int(swim_men_avg%12)}"(in Feet-Inch)')
print(f'Avrage height of mens\' swimming team is {swim_men_avg:.2f}(in Inch)')
print(f'Avrage height of mens\' swimming team is {2.54*swim_men_avg:.2f}(in Centimeter)')
print(f'Avrage height of mens\' swimming team is {0.0254*swim_men_avg:.2f}(in Meter)')
print('-'*70)
print(f'Avrage height of womens\' swimming team is {int(swim_women_avg/12)}\'{int(swim_women_avg%12)}"(in Feet-Inch)')
print(f'Avrage height of womens\' swimming team is {swim_women_avg:.2f}(in Inch)')
print(f'Avrage height of womens\' swimming team is {2.54*swim_women_avg:.2f}(in Centimeter)')
print(f'Avrage height of womens\' swimming team is {0.0254*swim_women_avg:.2f}(in Meter)')
print('-'*70)
----------------------------------------------------------------------
Avrage height of mens' volleyball team is 6'0"(in Feet-Inch)
Avrage height of mens' volleyball team is 72.00(in Inch)
Avrage height of mens' volleyball team is 182.88(in Centimeter)
Avrage height of mens' volleyball team is 1.83(in Meter)
----------------------------------------------------------------------
Avrage height of womens' volleyball team is 5'6"(in Feet-Inch)
Avrage height of womens' volleyball team is 66.32(in Inch)
Avrage height of womens' volleyball team is 168.44(in Centimeter)
Avrage height of womens' volleyball team is 1.68(in Meter)
----------------------------------------------------------------------
Avrage height of mens' swimming team is 5'10"(in Feet-Inch)
Avrage height of mens' swimming team is 70.71(in Inch)
Avrage height of mens' swimming team is 179.61(in Centimeter)
Avrage height of mens' swimming team is 1.80(in Meter)
----------------------------------------------------------------------
Avrage height of womens' swimming team is 5'4"(in Feet-Inch)
Avrage height of womens' swimming team is 64.85(in Inch)
Avrage height of womens' swimming team is 164.72(in Centimeter)
Avrage height of womens' swimming team is 1.65(in Meter)
----------------------------------------------------------------------
In [12]:
volly_top_5_height_men = volly_men_df.sort_values(by="Height(inch)",ascending=False)[:5]
volly_top_5_height_women = volly_women_df.sort_values(by="Height(inch)",ascending=False)[:5]
swim_top_5_height_men = swim_men_df.sort_values(by="Height(inch)",ascending=False)[:5]
swim_top_5_height_women = swim_women_df.sort_values(by="Height(inch)",ascending=False)[:5]
In [13]:
print('-'*45)
print('Top Five Men Regarding Height In Vollyball')
print('-'*45)
print(volly_top_5_height_men.to_string(index=False))
print('-'*45)
print('Top Five Women Regarding Height In Vollyball')
print('-'*45)
print(volly_top_5_height_women.to_string(index=False))
print('-'*45)
print('Top Five Men Regarding Height In Swimming')
print('-'*45)
print(swim_top_5_height_men.to_string(index=False))
print('-'*45)
print('Top Five Women Regarding Height In Swimming')
print('-'*45)
print(swim_top_5_height_women.to_string(index=False))
---------------------------------------------
Top Five Men Regarding Height In Vollyball
---------------------------------------------
Full Name Height(inch)
Leon Petrovitsky 77
Artem Zinkin 77
Akil Vaughn 77
Carlos Rodriguez 76
Guillermo Hernandez 76
---------------------------------------------
Top Five Women Regarding Height In Vollyball
---------------------------------------------
Full Name Height(inch)
Autumn Reeves 74
Sophia Wheelan 72
Claudia Daporta 72
Mirialis Almanzar 72
Madison Cronk 71
---------------------------------------------
Top Five Men Regarding Height In Swimming
---------------------------------------------
Full Name Height(inch)
Gregory Becker 77
Kasper Gacek 77
Mark MacEachen 76
Morgan Welling 75
Evan Nikolic 75
---------------------------------------------
Top Five Women Regarding Height In Swimming
---------------------------------------------
Full Name Height(inch)
Shenaika Eliassaint 71
Onika George 70
Andrea Gasic 69
Asimina Hamakiotes 69
Alyssa Taylor 69
In [14]:
print('-'*45)
print('Tallest Men Regarding Height In Vollyball')
print('-'*45)
print(volly_top_5_height_men[:1].to_string(index=False))
print('-'*45)
print('Tallest Women Regarding Height In Vollyball')
print('-'*45)
print(volly_top_5_height_women[:1].to_string(index=False))
print('-'*45)
print('Tallest Men Regarding Height In Swimming')
print('-'*45)
print(swim_top_5_height_men[:1].to_string(index=False))
print('-'*45)
print('Tallest Women Regarding Height In Swimming')
print('-'*45)
print(swim_top_5_height_women[:1].to_string(index=False))
---------------------------------------------
Tallest Men Regarding Height In Vollyball
---------------------------------------------
Full Name Height(inch)
Leon Petrovitsky 77
---------------------------------------------
Tallest Women Regarding Height In Vollyball
---------------------------------------------
Full Name Height(inch)
Autumn Reeves 74
---------------------------------------------
Tallest Men Regarding Height In Swimming
---------------------------------------------
Full Name Height(inch)
Gregory Becker 77
---------------------------------------------
Tallest Women Regarding Height In Swimming
---------------------------------------------
Full Name Height(inch)
Shenaika Eliassaint 71
In [15]:
report = f'''
Avarge height of men is always grater then women. Avrage height of a male swimmer ({int(swim_men_avg/12)}\'{int(swim_men_avg%12)}) is less
then avrage height of a mele volleyball player({int(volly_men_avg/12)}\'{int(volly_men_avg%12)}). Avrage height of a female swimmer({int(swim_women_avg/12)}\'{int(swim_women_avg%12)}) is also
less then avrage height of a female volleyball...