{
"cells": [
{
"cell_type": "code",
"execution_count": 104,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"import numpy as np\n",
"import datetime\n",
"from sklearn import linear_model\n",
"reg = linear_model.LinearRegression()\n",
"import statsmodels.formula.api as smf \n",
"import random"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [],
"source": [
"\n",
"PISA = pd.read_csv('dataset.csv', index_col=False)\n"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" year background cnt country female age diffgrade fisced misced \\\n",
"0 2012 Bolivia ARG Argentina 0 15.75 0 6.0 6.0 \n",
"1 2009 Bolivia ARG Argentina 1 15.33 1 5.0 0.0 \n",
"2 2009 Bolivia ARG Argentina 0 15.92 1 NaN NaN \n",
"3 2012 Bolivia ARG Argentina 1 16.00 0 2.0 4.0 \n",
"4 2009 Bolivia ARG Argentina 1 16.17 0 5.0 5.0 \n",
"5 2009 Bolivia ARG Argentina 1 16.08 1 5.0 0.0 \n",
"6 2012 Bolivia ARG Argentina 1 16.08 0 0.0 4.0 \n",
"7 2012 Bolivia ARG Argentina 0 15.33 0 1.0 4.0 \n",
"8 2012 BoliviaARG Argentina 1 15.75 0 1.0 4.0 \n",
"9 2012 Bolivia ARG Argentina 1 15.50 0 1.0 1.0 \n",
"\n",
" momwork ... gdppc hdi lgdppc obs norigin stratum2003 \\\n",
"0 0.0 ... 3791.675 0.659 8.240563 131 4 NaN \n",
"1 NaN ... 3791.675 0.659 8.240563 131 4 NaN \n",
"2 1.0 ... 3791.675 0.659 8.240563 131 4 NaN \n",
"3 1.0 ... 3791.675 0.659 8.240563 131 4 NaN \n",
"4 0.0 ... 3791.675 0.659 8.240563 131 4 NaN \n",
"5 0.0 ... 3791.675 0.659 8.240563 131 4 NaN \n",
"6 0.0 ... 3791.675 0.659 8.240563 131 4 NaN \n",
"7 0.0 ... 3791.675 0.659 8.240563 131 4 NaN \n",
"8 0.0 ... 3791.675 0.659 8.240563 131 4 NaN \n",
"9 1.0 ... 3791.675 0.659 8.240563 131 4 NaN \n",
"\n",
" hostregion stratum2006 stratum2009 stratum2012 \n",
"0 3202 NaN NaN ARG0102 \n",
"1 3203 NaN 3203.0 NaN \n",
"2 3201 NaN 3201.0 NaN \n",
"3 3202 NaN NaN ARG0102 \n",
"4 3201 NaN 3201.0 NaN \n",
"5 3201 NaN 3201.0 NaN \n",
"6 3202 NaN NaN ARG0102 \n",
"7 3202 NaN NaN ARG0102 \n",
"8 3202 NaN NaN ARG0102 \n",
"9 3205 NaN NaN ARG0005 \n",
"\n",
"[10 rows x 37 columns]"
],
"text/html": "
\n\n
\n\n\n | \nyear | \nbackground | \ncnt | \ncountry | \nfemale | \nage | \ndiffgrade | \nfisced | \nmisced | \nmomwork | \n... | \ngdppc | \nhdi | \nlgdppc | \nobs | \nnorigin | \nstratum2003 | \nhostregion | \nstratum2006 | \nstratum2009 | \nstratum2012 | \n
\n\n\n\n0 | \n2012 | \nBolivia | \nARG | \nArgentina | \n0 | \n15.75 | \n0 | \n6.0 | \n6.0 | \n0.0 | \n... | \n3791.675 | \n0.659 | \n8.240563 | \n131 | \n4 | \nNaN | \n3202 | \nNaN | \nNaN | \nARG0102 | \n
\n\n1 | \n2009 | \nBolivia | \nARG | \nArgentina | \n1 | \n15.33 | \n1 | \n5.0 | \n0.0 | \nNaN | \n... | \n3791.675 | \n0.659 | \n8.240563 | \n131 | \n4 | \nNaN | \n3203 | \nNaN | \n3203.0 | \nNaN | \n
\n\n2 | \n2009 | \nBolivia | \nARG | \nArgentina | \n0 | \n15.92 | \n1 | \nNaN | \nNaN | \n1.0 | \n... | \n3791.675 | \n0.659 | \n8.240563 | \n131 | \n4 | \nNaN | \n3201 | \nNaN | \n3201.0 | \nNaN | \n
\n\n3 | \n2012 | \nBolivia | \nARG | \nArgentina | \n1 | \n16.00 | \n0 | \n2.0 | \n4.0 | \n1.0 | \n... | \n3791.675 | \n0.659 | \n8.240563 | \n131 | \n4 | \nNaN | \n3202 | \nNaN | \nNaN | \nARG0102 | \n
\n\n4 | \n2009 | \nBolivia | \nARG | \nArgentina | \n1 | \n16.17 | \n0 | \n5.0 | \n5.0 | \n0.0 | \n... | \n3791.675 | \n0.659 | \n8.240563 | \n131 | \n4 | \nNaN | \n3201 | \nNaN | \n3201.0 | \nNaN | \n
\n\n5 | \n2009 | \nBolivia | \nARG | \nArgentina | \n1 | \n16.08 | \n1 | \n5.0 | \n0.0 | \n0.0 | \n... | \n3791.675 | \n0.659 | \n8.240563 | \n131 | \n4 | \nNaN | \n3201 | \nNaN | \n3201.0 | \nNaN | \n
\n\n6 | \n2012 | \nBolivia | \nARG | \nArgentina | \n1 | \n16.08 | \n0 | \n0.0 | \n4.0 | \n0.0 | \n... | \n3791.675 | \n0.659 | \n8.240563 | \n131 | \n4 | \nNaN | \n3202 | \nNaN | \nNaN | \nARG0102 | \n
\n\n7 | \n2012 | \nBolivia | \nARG | \nArgentina | \n0 | \n15.33 | \n0 | \n1.0 | \n4.0 | \n0.0 | \n... | \n3791.675 | \n0.659 | \n8.240563 | \n131 | \n4 | \nNaN | \n3202 | \nNaN | \nNaN | \nARG0102 | \n
\n\n8 | \n2012 | \nBolivia | \nARG | \nArgentina | \n1 | \n15.75 | \n0 | \n1.0 | \n4.0 | \n0.0 | \n... | \n3791.675 | \n0.659 | \n8.240563 | \n131 | \n4 | \nNaN | \n3202 | \nNaN | \nNaN | \nARG0102 | \n
\n\n9 | \n2012 | \nBolivia | \nARG | \nArgentina | \n1 | \n15.50 | \n0 | \n1.0 | \n1.0 | \n1.0 | \n... | \n3791.675 | \n0.659 | \n8.240563 | \n131 | \n4 | \nNaN | \n3205 | \nNaN | \nNaN | \nARG0005 | \n
\n\n
\n
10 rows × 37 columns
\n
"
},
"metadata": {},
"execution_count": 106
}
],
"source": [
"PISA.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"PISA_G = PISA.loc[PISA['female'] == 1]\r\n",
"PISA_G = PISA_G.drop_duplicates('background')\r\n",
"PISA_G.drop(PISA_G.columns.difference(['background','female','ggi','pv1math']), 1, inplace=True)\r\n",
"#PISA_G\r\n",
"PISA_G.sort_values(by=['background'], inplace=True)\r\n",
"PISA_G['mgg'] = np.random.randint(-100, 50, PISA_G.shape[0])"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" background female pv1math ggi mgg\n",
"8617 Albania 1 314.542800 0.660122 34\n",
"8233 Australia 1 434.109700 0.728218 -99\n",
"8750 Austria 1 586.064819 0.703139 -82\n",
"4843 Belgium 1 550.171400 0.716538 -14\n",
"1 Bolivia 1 363.619995 0.669267 -2\n",
"134 Chile 1 428.812900 0.688381 28\n",
"235 China 1 685.706500 0.690679 13\n",
"2670 Croatia 1 436.910004 0.694358 -94\n",
"4052 Ethiopia 1 233.529999 0.594783 -21\n",
"8399 Fiji 1 410.063904 0.641400 -4\n",
"3419 France 1 456.566406 0.733143 -81\n",
"645 Germany 1 567.954529 0.744915 -42\n",
"670 Greece 1 471.576600 0.666238 -34\n",
"716 India 1 624.793600 0.615098 16\n",
"870 Italy 1 406.067900 0.679783 1\n",
"958 Korea 1 533.953918 0.614611 -48\n",
"3399 Macedonia 1 484.818500 0.694999 -51\n",
"990 Malaysia 1 517.401489 0.646744 16\n",
"7712 Morocco 1 407.549988 0.592550 17\n",
"3562 Netherlands 1 604.992981 0.748981 -44\n",
"1024 New Zealand 1 550.584228 0.788023 -77\n",
"155 Paraguay 1 443.379000 0.686812 -89\n",
"1399 Phillipines 1 544.002197 0.757864 -84\n",
"2786 Poland 1 540.902000 0.699762 34\n",
"5616 Portugal 1 436.680200 0.701284 19\n",
"2834 Romania 1 406.831299 0.680549 -11\n",
"4272 Russian Fed. 1 523.298000 0.698714 31\n",
"1932 South Africa 1 483.959992 0.770937 -64\n",
"10691 Spain 1 486.999500 0.734484 27\n",
"7904 Suriname 1 572.369995 0.672565 -98\n",
"2891 Turkey 1 440.598206 0.582830 -20\n",
"1992 United Kingdom 1 421.880400 0.740245 -73\n",
"2641 United States 1 654.469971 0.717335 43\n",
"219 Uruguay 1 475.320007 0.693637 -39\n",
"1639 Viet Nam 1 487.856400 0.680186 20"
],
"text/html": "
\n\n
\n\n\n | \nbackground | \nfemale | \npv1math | \nggi | \nmgg | \n
\n\n\n\n8617 | \nAlbania | \n1 | \n314.542800 | \n0.660122 | \n34 | \n
\n\n8233 | \nAustralia | \n1 | \n434.109700 | \n0.728218 | \n-99 | \n
\n\n8750 | \nAustria | \n1 | \n586.064819 | \n0.703139 | \n-82 | \n
\n\n4843 | \nBelgium | \n1 | \n550.171400 | \n0.716538 | \n-14 | \n
\n\n1 | \nBolivia | \n1 | \n363.619995 | \n0.669267 | \n-2 | \n
\n\n134 | \nChile | \n1 | \n428.812900 | \n0.688381 | \n28 | \n
\n\n235 | \nChina | \n1 | \n685.706500 | \n0.690679 | \n13 | \n
\n\n2670 | \nCroatia | \n1 | \n436.910004 | \n0.694358 | \n-94 | \n
\n\n4052 | \nEthiopia | \n1 | \n233.529999 | \n0.594783 | \n-21 | \n
\n\n8399 | \nFiji | \n1 | \n410.063904 | \n0.641400 | \n-4 | \n
\n\n3419 | \nFrance | \n1 | \n456.566406 | \n0.733143 | \n-81 | \n
\n\n645 | \nGermany | \n1 | \n567.954529 | \n0.744915 | \n-42 | \n
\n\n670 | \nGreece | \n1 | \n471.576600 | \n0.666238 | \n-34 | \n
\n\n716 | \nIndia | \n1 | \n624.793600 | \n0.615098 | \n16 | \n
\n\n870 | \nItaly | \n1 | \n406.067900 | \n0.679783 | \n1 | \n
\n\n958 | \nKorea | \n1 | \n533.953918 | \n0.614611 | \n-48 | \n
\n\n3399 | \nMacedonia | \n1 | \n484.818500 | \n0.694999 | \n-51 | \n
\n\n990 | \nMalaysia | \n1 | \n517.401489 | \n0.646744 | \n16 | \n
\n\n7712 | \nMorocco | \n1 | \n407.549988 | \n0.592550 | \n17 | \n
\n\n3562 | \nNetherlands | \n1 | \n604.992981 | \n0.748981 | \n-44 | \n
\n\n1024 | \nNew Zealand | \n1 | \n550.584228 | \n0.788023 | \n-77 | \n
\n\n155 | \nParaguay | \n1 | \n443.379000 | \n0.686812 | \n-89 | \n
\n\n1399 | \nPhillipines | \n1 | \n544.002197 | \n0.757864 | \n-84 | \n
\n\n2786 | \nPoland | \n1 | \n540.902000 | \n0.699762 | \n34 | \n
\n\n5616 | \nPortugal | \n1 | \n436.680200 | \n0.701284 | \n19 | \n
\n\n2834 | \nRomania | \n1 | \n406.831299 | \n0.680549 | \n-11 | \n ...SOLUTION.PDFAnswer To This Question Is Available To Download
Submit New Assignment
Please select references for your assignment
Please select no of pages for your assignment
Please select level for your assignment
x
I am Online - Talk to me!
Please fill out the form below to start chatting with the next available agent.
Mehmet Mert
3