Answer To: Scalable Data Analytics Homework 1 Spring 2021 Deadline Feb.15 Noon, 2021 Deadlines Homework 1 is...
Sanchi answered on Feb 17 2021
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
" \n",
"data = pd.read_csv('C:/Users/sanchi.kalra/Desktop/Greynodes/AS18/citibike-ltwimtfd.csv')\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"data['starttime'] = pd.to_datetime(data['starttime'], format='%Y-%m-%d %H:%M:%S')\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"x = data.resample('D', on= 'starttime').min()\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"y =[]\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"for m in x['starttime']:\n",
"\ty.append(m)\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"df = data[data['starttime'].isin(y)]\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"data1 = df[['starttime','birth_year']]\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"data1 = data1.groupby(data1['starttime'].unique())\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"finaldata = data1.apply(lambda x: x)\n"
...