please see that attached files and read the comments in python file
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sat Sep 25 15:09:01 2021 @author: Moha """ import pandas as pd import numpy as np import matplotlib.pyplot as plt ''' he said use matrix operation not loops ''' dataset = pd.read_csv('king_county.csv') ''' (f) Several features are listed as numerical but have only a small number of discrete values. This includes ”bedrooms”, ”bathrooms” and ”floors”. For each of these three features, identify the unique values that are observed (you can use the unique function from Pandas). For each of the 3 features, generate a boxplot (you can use the boxplot function from Pandas) of the price. Specifically, you should generate one plot for each feature grouped by the feature value. ''' ''' use full matrix operation whenever possible rather than using for loops. ''' bed = dataset.bedrooms.unique() bath = dataset.bathrooms.unique() floor = dataset.floors.unique() #print(bed) #print(bath) #print(floor) #print(pd.unique(dataset['bedrooms'])) # boxplot for the entire bedrooms features with price dataset.boxplot(by = 'bedrooms', column =['price'], grid = False) # I think I have to plot it according to the unique part # and when I do that it gives me an error becase the proce length is greater than the unique length ''' df = pd.DataFrame(list(zip(bedrooms_unique, bathrooms_unique,floors_unique,price_unique)), columns =['bedrooms', 'bath','flor','price']) print (df) df.boxplot(by = 'bedrooms', column =['price'], grid = False) ''' ''' (g) Consider the following numerical features: sqft living, sqrft lot, sqft living15, sqft lot15. Cal- culate the co-variance matrix of these four features. Generate a scatter plot of the data using sqft_living and sqft_living15, and another scatter plot using sqft_lot with sqft_lot15. Question: what do you observe from the scatter plot? Are these features redundant? ''' df = dataset[['sqft_living', 'sqft_lot', 'sqft_living15', 'sqft_lot15']].copy() print (df) covMat = df.cov() print (covMat) #plt.matshow(covMat) pd.plotting.scatter_matrix(df, alpha=0.2) # plotting the dataframe itself pd.plotting.scatter_matrix(covMat, alpha=0.2) # looks weird #here I could not split the matrix and the generated plot is for all the columns #it should be one plot for sqft_living and sqft_living15, and another scatter plot using sqft_lot with sqft_lot15. id,date,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15,price 3066410850,7/9/2014,4,2.5,2720,10006,2,0,0,3,9,2720,0,1989,0,98074,47.6295,-122.042,2720,10759,5.9495 9345400350,7/18/2014,2,2.5,2600,5000,1,0,0,5,8,1300,1300,1926,0,98126,47.5806,-122.379,2260,5000,6.65 7128300060,7/7/2014,5,1.75,1650,3000,1.5,0,0,3,8,1650,0,1902,0,98144,47.5955,-122.306,1740,4000,4.43 2155500030,4/28/2015,4,1.75,1720,9600,1,0,0,4,8,1720,0,1969,0,98059,47.4764,-122.155,1660,10720,3.8 3999300080,9/4/2014,6,2.25,3830,11180,1,0,2,5,9,2440,1390,1962,0,98008,47.5849,-122.113,2500,10400,8.87 1222069133,2/24/2015,4,2.5,2210,213008,1,0,0,4,7,1210,1000,1975,0,98038,47.4039,-121.98,2270,52707,4.15 6329000185,3/29/2015,3,2.5,2600,23361,1.5,1,4,3,8,2150,450,1912,0,98146,47.4997,-122.379,1700,14700,5.4 3336000296,11/13/2014,4,1.5,1220,4900,1,0,0,3,6,1220,0,1942,0,98118,47.5292,-122.269,1410,3000,2.5 3026059011,8/13/2014,3,2.75,3040,24192,2,0,0,4,10,3040,0,1987,0,98034,47.7108,-122.225,2770,5728,8.25 9197100101,5/4/2015,2,1,1010,5408,1,0,0,4,6,1010,0,1926,0,98032,47.3759,-122.238,980,7800,2.25 53500760,12/8/2014,4,2.5,2660,4082,2,0,0,3,7,2660,0,2010,0,98042,47.3414,-122.055,2390,4876,2.87 6117500460,6/30/2014,4,2.5,2680,12215,1,1,4,3,9,1590,1090,1956,0,98166,47.4396,-122.353,2960,19964,13.1 3754700170,4/23/2015,3,2,1640,9825,1,0,0,4,7,1090,550,1971,0,98034,47.7244,-122.2,1500,9750,4.55 424069010,7/21/2014,4,2.25,2470,17008,2,0,0,4,8,2470,0,1979,0,98075,47.5924,-122.048,2470,31798,6.25 9188200505,7/10/2014,4,2.5,1830,3868,2,0,0,3,7,1830,0,2007,0,98118,47.5186,-122.276,2330,3868,2.75 8856970530,12/8/2014,3,2.5,1860,5321,2,0,0,3,7,1860,0,2000,0,98038,47.3848,-122.033,1940,5205,3.26995 3902300100,5/12/2014,4,2.25,1800,8623,1,0,0,4,8,1360,440,1980,0,98033,47.692,-122.184,2370,8623,5.22 859000110,10/2/2014,1,1,500,7440,1,0,0,1,5,500,0,1928,0,98106,47.5252,-122.362,1350,7440,1.25 1588600040,2/6/2015,2,1,770,5680,1,0,0,4,6,770,0,1929,0,98117,47.6951,-122.366,1170,5514,3.65 7230000265,6/17/2014,3,2.5,2970,21907,2,0,0,3,9,2970,0,1998,2006,98059,47.4741,-122.099,2040,27917,4.995 114100314,3/18/2015,3,1.5,1480,7117,1,0,0,3,7,1170,310,1960,0,98028,47.7766,-122.248,2230,14775,2.85 9528103443,7/24/2014,2,1.5,1180,1034,2,0,0,3,7,1120,60,2001,0,98115,47.678,-122.322,1137,1034,4.1 704450070,7/7/2014,3,2.5,1990,12793,2,0,0,3,8,1990,0,1993,0,98028,47.7347,-122.226,2290,9035,4.5 629410180,12/8/2014,4,2.5,3220,6399,2,0,0,3,9,3220,0,2004,0,98075,47.5883,-121.991,2850,6399,6.97 1324079041,11/18/2014,3,1,1370,17859,1,0,0,4,7,1150,220,1930,0,98024,47.5617,-121.859,1460,47044,2.75 9270200160,10/28/2014,3,1,1570,2280,2,0,0,3,7,1570,0,1922,0,98119,47.6413,-122.364,1580,2640,6.85 5662100110,2/18/2015,3,2.5,1830,6807,2.5,0,0,5,7,1830,0,1954,0,98155,47.7613,-122.322,1340,6807,4.4 1081330210,9/11/2014,4,2.25,2150,27345,2,0,0,5,8,2150,0,1976,0,98059,47.469,-122.121,2200,11923,4.1 5113400364,1/26/2015,4,1.5,2480,6383,1,0,0,3,7,1380,1100,1946,0,98119,47.6445,-122.374,1440,6000,6.5 3226049530,1/22/2015,5,3,2010,7264,1,0,0,3,7,1290,720,1990,0,98103,47.6945,-122.33,1510,7326,4.65 952004725,11/6/2014,2,1,880,5750,1,0,0,3,6,880,0,1939,0,98126,47.5642,-122.379,1190,5750,2.8 5587000010,11/20/2014,3,2.25,1680,8450,1,0,0,3,8,1340,340,1960,0,98177,47.7575,-122.361,1850,8300,3.85 5272200045,11/13/2014,3,1.5,1000,6914,1,0,0,3,7,1000,0,1947,0,98125,47.7144,-122.319,1000,6947,3.78 3450300020,3/18/2015,4,2,1850,9126,1,0,0,5,7,1850,0,1963,0,98059,47.5009,-122.164,1730,9110,3.29 9274203036,9/15/2014,3,3.25,2950,4446,2,0,0,3,9,2450,500,2001,0,98116,47.5852,-122.391,1930,4255,9.3 686400670,4/14/2015,3,1.75,1670,7210,1,0,0,5,8,1670,0,1967,0,98008,47.6344,-122.116,2200,7210,6.78 2925059260,5/6/2015,5,2.5,3000,10560,1,0,0,3,8,1500,1500,1966,0,98004,47.6249,-122.206,2690,11616,8 6054650070,10/7/2014,3,1.75,1370,9680,1,0,0,4,7,1370,0,1977,0,98074,47.6127,-122.045,1370,10208,4 2856101105,5/27/2014,3,2.5,1590,2550,3,0,0,3,7,1590,0,1985,0,98117,47.6772,-122.393,1260,5100,4.88 567000381,3/28/2015,2,1.5,980,853,2,0,0,3,7,820,160,2009,0,98144,47.5925,-122.295,1130,1270,3.78 100300530,9/25/2014,3,2.5,1520,3003,2,0,0,3,7,1520,0,2009,0,98059,47.4876,-122.153,1820,3030,3.3 5104520620,7/24/2014,4,2.5,1770,5000,2,0,0,3,7,1770,0,2004,0,98038,47.3503,-122.005,2080,5100,2.915 66000265,8/7/2014,2,1,820,6550,1,0,0,3,7,820,0,1949,2012,98126,47.5478,-122.381,1640,6550,3.7 1796360870,10/30/2014,3,1.75,1460,8372,1,0,0,4,7,1460,0,1981,0,98042,47.3683,-122.087,1220,7803,2.25 1832100030,6/25/2014,4,4,3570,8250,2,0,0,3,10,2860,710,2015,0,98040,47.5784,-122.226,2230,10000,5.97326 8564500020,1/27/2015,3,1,960,10181,1,0,0,3,7,960,0,1961,0,98034,47.7231,-122.229,1740,10194,3.22 723049197,6/27/2014,2,1,1020,8100,1,0,0,3,6,1020,0,1940,0,98168,47.4971,-122.334,1200,12500,1.95 9238500100,3/18/2015,4,2.25,2070,20280,2,0,0,4,7,2070,0,1968,0,98072,47.774,-122.134,2190,21560,4.95 1326069151,2/24/2015,3,1.75,2160,22702,1,0,0,4,7,2160,0,1981,0,98019,47.7355,-121.982,1820,22687,2.6 1072000400,10/23/2014,4,3,2120,13000,2,0,0,4,8,2120,0,1978,0,98059,47.4745,-122.141,2180,11440,3.85 8644300200,6/5/2014,4,2.75,2020,10720,1,0,0,4,8,1420,600,1976,0,98052,47.6373,-122.104,2190,10164,5.55 1232000810,3/26/2015,3,2.5,1400,4800,1,0,0,3,7,1200,200,1921,0,98117,47.6865,-122.379,1440,3840,5.37 6829900080,3/30/2015,3,1.5,1400,9750,1,0,0,4,6,1400,0,1964,0,98030,47.3768,-122.17,1160,9750,2.75 6205500580,12/10/2014,3,2.5,2640,13775,1,0,0,3,8,1550,1090,1978,0,98005,47.5875,-122.177,2120,12432,5.3 7173700518,7/21/2014,3,1.5,2540,9520,1,0,0,3,8,1500,1040,1959,0,98115,47.6834,-122.306,1870,6800,6.9 1423089134,8/15/2014,3,2.25,2680,41250,2,0,0,3,7,2680,0,1984,0,98045,47.4817,-121.749,1940,47044,5.9 3819750170,3/10/2015,3,2.75,2080,9600,1,0,0,3,7,2080,0,1988,0,98028,47.7698,-122.238,2220,9600,4.15 1036700220,11/10/2014,4,2,2410,4680,2,0,0,3,9,2410,0,1974,0,98008,47.6234,-122.113,1910,4611,4.7 2207100405,5/6/2015,4,1.75,1730,7245,1,0,0,4,7,880,850,1955,0,98007,47.5995,-122.144,1550,7245,4.23 3222049151,10/30/2014,3,2.5,2990,10711,1,1,4,3,9,1560,1430,1976,1991,98198,47.3573,-122.324,2870,11476,8.2 1995200320,2/10/2015,3,2.25,1220,5739,1,0,0,3,7,790,430,1984,0,98115,47.6952,-122.326,1870,5739,2.8 2917200675,1/27/2015,2,1.75,1500,4158,1,0,0,4,7,1220,280,1947,0,98103,47.7006,-122.35,1270,4081,3.4 5437820020,8/7/2014,3,1.75,1580,7875,1,0,0,3,7,1580,0,1979,0,98022,47.1958,-122.003,1560,8314,1.95 9284801500,12/11/2014,3,3,1860,2875,2,0,0,3,8,1710,150,2009,0,98126,47.5511,-122.373,1350,4830,3.9995 6145600557