Requirementshas to be done inJupiter NotebookYou are required to use the dataset contained within...

Question

Requirementshas to be done inJupiter NotebookYou are required to use the dataset contained within the file “us-names-by-decade.csv”,which contains the following features:			 Gender - Gender of Individual – (M) Male, (F)Female				 Name – First Name of Individual				 Decade – 10 Year Period – 1990 = 1990-1999				 Count – Number of Individuals that were given the name detailed in the Name		feature in the decade indicated in the Decade feature eg “F”, “Olivia”, ”2010”, “69799” = The number of Females named Olivia in the years 2010 -2019 was 69799		and then perform the following analysis:				You are then required to explain what you plan on doing with the data. E.g., Why did you choose the specific visualizations, etcThis must be detailed in the Mark- up of the Jupyter Notebook and include the rational for your choice.				Generate a plot that details the top 5 number of Names for each of the Decades.				Plot a graph depicting the distribution of the Names that are Female in decade 1980.				Find out and visualize which decade had the MOST names.				Observe and visualize the average number of names per decade.		No additional output will be graded.		You must complete ALL data exploration PROGRAMMATICALLY and not using any other tool than python.

Neha · Accepted Answer

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt
",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "filename = "us-names-by-decade.csv"
",
    "df = pd.read_csv(filename)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "
",
       "
",
       "    .dataframe tbody tr th:only-of-type {
",
       "        vertical-align: middle;
",
       "    }
",
       "
",
       "    .dataframe tbody tr th {
",
       "        vertical-align: top;
",
       "    }
",
       "
",
       "    .dataframe thead th {
",
       "        text-align: right;
",
       "    }
",
       "
",
       "
",
       "  
",
       "    
",
       "      
",
       "      gender
",
       "      name
",
       "      decade
",
       "      count
",
       "    
",
       "  
",
       "  
",
       "    
",
       "      0
",
       "      F
",
       "      Sophia
",
       "      2010
",
       "      85720
",
       "    
",
       "    
",
       "      1
",
       "      M
",
       "      Jacob
",
       "      2010
",
       "      79359
",
       "    
",
       "    
",
       "      2
",
       "      F
",
       "      Isabella
",
       "      2010
",
       "      79238
",
       "    
",
       "    
",
       "      3
",
       "      F
",
       "      Emma
",
       "      2010
",
       "      77736
",
       "    
",
       "    
",
       "      4
",
       "      M
",
       "      Mason
",
       "      2010
",
       "      70808
",
       "    
",
       "  
",
       "
",
       ""
      ],
      "text/plain": [
       "  gender      name  decade  count
",
       "0      F    Sophia    2010  85720
",
       "1      M     Jacob    2010  79359
",
       "2      F  Isabella    2010  79238
",
       "3      F      Emma    2010  77736
",
       "4      M     Mason    2010  70808"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "
",
       "
",
       "    .dataframe tbody tr th:only-of-type {
",
       "        vertical-align: middle;
",
       "    }
",
       "
",
       "    .dataframe tbody tr th {
",
       "        vertical-align: top;
",
       "    }
",
       "
",
       "    .dataframe thead th {
",
       "        text-align: right;
",
       "    }
",
       "
",
       "
",
       "  
",
       "    
",
       "      
",
       "      gender
",
       "      name
",
       "      decade
",
       "      count
",
       "    
",
       "  
",
       "  
",
       "    
",
       "      0
",
       "      F
",
       "      Sophia
",
       "      2010
",
       "      85720
",
       "    
",
       "    
",
       "      1
",
       "      M
",
       "      Jacob
",
       "      2010
",
       "      79359
",
       "    
",
       "    
",
       "      2
",
       "      F
",
       "      Isabella
",
       "      2010
",
       "      79238
",
       "    
",
       "    
",
       "      3
",
       "      F
",
       "      Emma
",
       "      2010
",
       "      77736
",
       "    
",
       "    
",
       "      4
",
       "      M
",
       "      Mason
",
       "      2010
",
       "      70808
",
       "    
",
       "    
",
       "      14151
",
       "      M
",
       "      Jacob
",
       "      2000
",
       "      273591
",
       "    
",
       "    
",
       "      14152
",
       "      M
",
       "      Michael
",
       "      2000
",
       "      250318
",
       "    
",
       "    
",
       "      14153
",
       "      M
",
       "      Joshua
",
       "      2000
",
       "      231729
",
       "    
",
       "    
",
       "      14154
",
       "      F
",
       "      Emily
",
       "      2000
",
       "      223565
",
       "    
",
       "    
",
       "      14155
",
       "      M
",
       "      Matthew
",
       "      2000
",
       "      221369
",
       "    
",
       "    
",
       "      32013
",
       "      M
",
       "      Michael
",
       "      1990
",
       "      462265
",
       "    
",
       "    
",
       "      32014
",
       "      M
",
       "      Christopher
",
       "      1990
",
       "      360170
",
       "    
",
       "    
",
       "      32015
",
       "      M
",
       "      Matthew
",
       "      1990
",
       "      351569
",
       "    
",
       "    
",
       "      32016
",
       "      M
",
       "      Joshua
",
       "      1990
",
       "      329072
",
       "    
",
       "    
",
       "      32017
",
       "      F
",
       "      Jessica
",
       "      1990
",
       "      303053
",
       "    
",
       "    
",
       "      47733
",
       "      M
",
       "      Michael
",
       "      1980
",
       "      663445
",
       "    
",
       "    
",
       "      47734
",
       "      M
",
       "      Christopher
",
       "      1980
",
       "      554725
",
       "    
",
       "    
",
       "      47735
",
       "      F
",
       "      Jessica
",
       "      1980
",
       "      469415
",
       "    
",
       "    
",
       "      47736
",
       "      M
",
       "      Matthew
",
       "      1980
",
       "      458831
",
       "    
",
       "    
",
       "      47737
",
       "      F
",
       "      Jennifer
",
       "      1980
",
       "      440818
",
       "    
",
       "    
",
       "      60168
",
       "      M
",
       "      Michael
",
       "      1970
",
       "      707704
",
       "    
",
       "    
",
       "      60169
",
       "      F
",
       "      Jennifer
",
       "      1970
",
       "      581756
",
       "    
",
       "    
",
       "      60170
",
       "      M
",
       "      Christopher
",
       "      1970
",
       "      475681
",
       "    
",
       "    
",
       "      60171
",
       "      M
",
       "      Jason
",
       "      1970
",
       "      462926
",
       "    
",
       "    
",
       "      60172
",
       "      M
",
       "      David
",
       "      1970
",
       "      445967
",
       "    
",
       "    
",
       "      69919
",
       "      M
",
       "      Michael
",
       "      1960
",
       "      833395
",
       "    
",
       "    
",
       "      69920
",
       "      M
",
       "      David
",
       "      1960
",
       "      734176
",
       "    
",
       "    
",
       "      69921
",
       "      M
",
       "      John
",
       "      1960
",
       "      713636
",
       "    
",
       "    
",
       "      69922
",
       "      M
",
       "      James
",
       "      1960
",
       "      684985
",
       "    
",
       "    
",
       "      69923
",
       "      M
",
       "      Robert
",
       "      1960
",
       "      650985
",
       "    
",
       "    
",
       "      77302
",
       "      M
",
       "      James
",
       "      1950
",
       "      843189
",
       "    
",
       "    
",
       "      77303
",
       "      M
",
       "      Michael
",
       "      1950
",
       "      836913
",
       "    
",
       "    
",
       "      77304
",
       "      M
",
       "      Robert
",
       "      1950
",
       "      829819
",
       "    
",
       "    
",
       "      77305
",
       "      M
",
       "      John
",
       "      1950
",
       "      797331
",
       "    
",
       "    
",
       "      77306
",
       "      M
",
       "      David
",
       "      1950
",
       "      769391
",
       "    
",
       "    
",
       "      84003
",
       "      M
",
       "      James
",
       "      1940
",
       "      795557
",
       "    
",
       "    
",
       "      84004
",
       "      M
",
       "      Robert
",
       "      1940
",
       "      757894
",
       "    
",
       "    
",
       "      84005
",
       "      M
",
       "      John
",
       "      1940
",
       "      711411
",
       "    
",
       "    
",
       "      84006
",
       "      F
",
       "      Mary
",
       "      1940
",
       "      639971
",
       "    
",
       "    
",
       "      84007
",
       "      M
",
       "      William
",
       "      1940
",
       "      556286
",
       "    
",
       "    
",
       "      89923
",
       "      M
",
       "      Robert
",
       "      1930
",
       "      590599
",
       "    
",
       "    
",
       "      89924
",
       "      F
",
       "      Mary
",
       "      1930
",
       "      572868
",
       "    
",
       "    
",
       "      89925
",
       "      M
",
       "      James
",
       "      1930
",
       "      547275
",
       "    
",
       "    
",
       "      89926
",
       "      M
",
       "      John
",
       "      1930
",
       "      487777
",
       "    
",
       "    
",
       "      89927
",
       "      M
",
       "      William
",
       "      1930
",
       "      416559
",
       "    
",
       "    
",
       "      95806
",
       "      F
",
       "      Mary
",
       "      1920
",
       "      701709
",
       "    
",
       "    
",
       "      95807
",
       "      M
",
       "      Robert
",
       "      1920
",
       "      576322
",
       "    
",
       "    
",
       "      95808
",
       "      M
",
       "      John
",
       "      1920
",
       "      564033
",
       "    
",
       "    
",
       "      95809
",
       "      M
",
       "      James
",
       "      1920
",
       "      515296
",
       "    
",
       "    
",
       "      95810
",
       "      M
",
       "      William
",
       "      1920
",
       "      512373
",
       "    
",
       "    
",
       "      102705
",
       "      F
",
       "      Mary
",
       "      1910
",
       "      478634
",
       "    
",
       "    
",
       "      102706
",
       "      M
",
       "      John
",
       "      1910
",
       "      376321
",
       "    
",
       "    
",
       "      102707
",
       "      M
",
       "      William
",
       "      1910
",
       "      303027
",
       "    
",
       "    
",
       "      102708
",
       "      M
",
       "      James
",
       "      1910
",
       "      275075
",
       "    
",
       "    
",
       "      102709
",
       "      F
",
       "      Helen
",
       "      1910
",
       "      248150
",
       "    
",
       "  
",
       "
",
       ""
      ],
      "text/plain": [
       "       gender         name  decade   count
",
       "0           F       Sophia    2010   85720
",
       "1           M        Jacob    2010   79359
",
       "2           F     Isabella    2010   79238
",
       "3           F         Emma    2010   77736
",
       "4           M        Mason    2010   70808
",
       "14151       M        Jacob    2000  273591
",
       "14152       M      Michael    2000  250318
",
       "14153       M       Joshua    2000  231729
",
       "14154       F        Emily    2000  223565
",
       "14155       M      Matthew    2000  221369
",
       "32013       M      Michael    1990  462265
",
       "32014       M  Christopher    1990  360170
",
       "32015       M      Matthew    1990  351569
",
       "32016       M       Joshua    1990  329072
",
       "32017       F      Jessica    1990  303053
",
       "47733       M      Michael    1980  663445
",
       "47734       M  Christopher    1980  554725
",
       "47735       F      Jessica    1980  469415
",
       "47736       M      Matthew    1980  458831
",
       "47737       F     Jennifer    1980  440818
",
       "60168       M      Michael    1970  707704
",
       "60169       F     Jennifer    1970  581756
",
       "60170       M  Christopher    1970  475681
",
       "60171       M        Jason    1970  462926
",
       "60172       M        David    1970  445967
",
       "69919       M      Michael    1960  833395
",
       "69920       M        David    1960  734176
",
       "69921       M         John    1960  713636
",
       "69922       M        James    1960  684985
",
       "69923       M       Robert    1960  650985
",
       "77302       M        James    1950  843189
",
       "77303       M      Michael    1950  836913
",
       "77304       M       Robert    1950  829819
",
       "77305       M         John    1950  797331
",
       "77306       M        David    1950  769391
",
       "84003       M        James    1940  795557
",
       "84004       M       Robert    1940  757894
",
       "84005       M         John    1940  711411
",
       "84006       F         Mary    1940  639971
",
       "84007       M      William    1940  556286
",
       "89923       M       Robert    1930  590599
",
       "89924       F         Mary    1930  572868
",
       "89925       M        James    1930  547275
",
       "89926       M         John    1930  487777
",
       "89927       M      William    1930  416559
",
       "95806       F         Mary    1920  701709
",
       "95807       M       Robert    1920  576322
",
       "95808       M         John    1920  564033
",
       "95809       M        James    1920  515296
",
       "95810       M      William    1920  512373
",
       "102705      F         Mary    1910  478634
",
       "102706      M         John    1910  376321
",
       "102707      M      William    1910  303027
",
       "102708      M        James    1910  275075
",
       "102709      F        Helen    1910  248150"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "
",
    "df_dec_group = df.groupby("decade", sort= True)
",
    "df_dec_group.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[['Mary', 'John', 'William', 'James', 'Helen'], ['Mary', 'Robert', 'John', 'James', 'William'], ['Robert', 'Mary', 'James', 'John', 'William'], ['James', 'Robert', 'John', 'Mary', 'William'], ['James', 'Michael', 'Robert', 'John', 'David'], ['Michael', 'David', 'John', 'James', 'Robert'], ['Michael', 'Jennifer', 'Christopher', 'Jason', 'David'], ['Michael', 'Christopher', 'Jessica', 'Matthew', 'Jennifer'], ['Michael', 'Christopher', 'Matthew', 'Joshua', 'Jessica'], ['Jacob', 'Michael', 'Joshua', 'Emily', 'Matthew'], ['Sophia', 'Jacob', 'Isabella', 'Emma', 'Mason']]
",
      "[[478634, 376321, 303027, 275075, 248150], [701709, 576322, 564033, 515296, 512373], [590599, 572868, 547275, 487777, 416559], [795557, 757894, 711411, 639971, 556286], [843189, 836913, 829819, 797331, 769391], [833395, 734176, 713636, 684985, 650985], [707704, 581756, 475681, 462926, 445967], [663445, 554725, 469415, 458831, 440818], [462265, 360170, 351569, 329072, 303053], [273591, 250318, 231729, 223565, 221369], [85720, 79359, 79238, 77736, 70808]]
"
     ]
    }
   ],
   "source": [
    "names = []
",
    "lst = []
",
    "for i in range(1910,2020, 10):
",
    "    df_split = df_dec_group.get_group(i).nlargest(5, "count")
",
    "    names.append(list(df_split['name']))
",
    "    lst.append(list(df_split['count']))
",
    "
",
    "print(names)
",
    "print(lst)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Mary', 'Mary', 'Robert', 'James', 'James', 'Michael', 'Michael', 'Michael', 'Michael', 'Jacob', 'Sophia', 'John', 'Robert', 'Mary', 'Robert', 'Michael', 'David', 'Jennifer', 'Christopher', 'Christopher', 'Michael', 'Jacob', 'William', 'John', 'James', 'John', 'Robert', 'John', 'Christopher', 'Jessica', 'Matthew', 'Joshua', 'Isabella', 'James', 'James', 'John', 'Mary', 'John', 'James', 'Jason', 'Matthew', 'Joshua', 'Emily', 'Emma', 'Helen', 'William', 'William', 'William', 'David', 'Robert', 'David', 'Jennifer', 'Jessica', 'Matthew', 'Mason']
"
     ]
    }
   ],
   "source": [
    "#flatten the names list, since it is nested list
",
    "flatten_names = []
",
    "for j in range(len(names[0])):
",
    "    for i in range(len(names)):
",
    "        flatten_names.append(names[i][j])
",
    "print(flatten_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt 
",
    "import numpy as np 
",
    "  
",
    "# create data 
",
    "x = np.array(list(range(1910, 2020, 10)))
",
    "y1 = [lst[i][0] for i in range(0, 11)]
",
    "y2 = [lst[i][1] for i in range(0, 11)]
",
    "y3 = [lst[i][2] for i in range(0, 11)]
",
    "y4 = [lst[i][3] for i in range(0, 11)]
",
    "y5 = [lst[i][4] for i in range(0, 11)]
",
    "
",
    "width = 1.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png":

Requirements has to be done inJupiter Notebook You are required to use the dataset contained within the file “us-names-by-decade.csv”,which contains the following features:  Gender - Gender of...

Answer To: Requirements has to be done inJupiter Notebook You are required to use the dataset contained within...

Answer To This Question Is Available To Download

Related Questions & Answers

Submit New Assignment

	gender	name	decade	count
0	F	Sophia	2010	85720
1	M	Jacob	2010	79359
2	F	Isabella	2010	79238
3	F	Emma	2010	77736
4	Mason	2010	70808