diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index b4b7209..dc45a15 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/data/scraped.csv b/data/scraped.csv index 51c10a1..49f8e0b 100644 --- a/data/scraped.csv +++ b/data/scraped.csv @@ -1,4 +1,4 @@ -,United States of America,Federal state,US USA 840,US,00,,,U.S.,U.S.,U.S.A.,,,,, +,United States of America,Federal state,".mw-parser-output .monospaced{font-family:monospace,monospace}USUSA840",US,00,,,U.S.,U.S.,U.S.A.,,,,, 12,Alabama,State,US-AL,AL,01,AL,AL,Ala.,Ala.,,,,,, 13,Alaska,State,US-AK,AK,02,AK,AK,Alaska,Alaska,Alas.,,,,, 14,Arizona,State,US-AZ,AZ,04,AZ,AZ,Ariz.,Ariz.,Az.,,,,, @@ -9,7 +9,7 @@ 19,Delaware,State,US-DE,DE,10,DE,DL,Del.,Del.,De.,,,,, 20,DistrictofColumbia,Federal district,US-DC,DC,11,DC,DC,D.C.,D.C.,Wash. D.C.,,,,, 21,Florida,State,US-FL,FL,12,FL,FL,Fla.,Fla.,"Fl., Flor.",,,,, -22,Georgia,State,US-GA,GA,13,GA,GA,Ga.,Ga.,,,,,, +22,Georgia,State,US-GA,GA,13,GA,GA,Ga.,Ga.,Geo.,,,,, 23,Hawaii,State,US-HI,HI,15,HI,HA,Hawaii,Hawaii,H.I.,,,,, 24,Idaho,State,US-ID,ID,16,ID,ID,Idaho,Idaho,"Id., Ida.",,,,, 25,Illinois,State,US-IL,IL,17,IL,IL,Ill.,Ill.,"Il., Ills., Ill's",,,,, @@ -50,12 +50,12 @@ 60,WestVirginia,State,US-WV,WV,54,WV,WV,W. Va.,W.Va.,"W.V., W. Virg.",,,,, 61,Wisconsin,State,US-WI,WI,55,WI,WS,Wis.,Wis.,"Wi., Wisc.",,,,, 62,Wyoming,State,US-WY,WY,56,WY,WY,Wyo.,Wyo.,Wy.,,,,, -63,AmericanSamoa,Insular area (Territory),AS ASM 016 US-AS,AS,60,AS,AS,A.S.,,,,,,, -64,Guam,Insular area (Territory),GU GUM 316 US-GU,GU,66,GU,GU,Guam,,,,,,, -65,NorthernMarianaIslands,Insular area (Commonwealth),MP MNP 580 US-MP,MP,69,MP,CM,M.P.,,CNMI[3],,,,, -66,PuertoRico,Insular area (Territory),PR PRI 630 US-PR,PR,72,PR,PR,P.R.,,,,,,, -67,U.S.VirginIslands,Insular area (Territory),VI VIR 850 US-VI,VI,78,VI,VI,V.I.,,U.S.V.I.,,,,, -68,U.S.MinorOutlyingIslands,Insular areas,UM UMI 581 US-UM,UM,74,,,,,,,,,, +63,AmericanSamoa,Insular area (Territory),ASASM016US-AS,AS,60,AS,AS,A.S.,,,,,,, +64,Guam,Insular area (Territory),GUGUM316US-GU,GU,66,GU,GU,Guam,,,,,,, +65,NorthernMarianaIslands,Insular area (Commonwealth),MPMNP580US-MP,MP,69,MP,CM,M.P.,,CNMI[3],,,,, +66,PuertoRico,Insular area (Territory),PRPRI630US-PR,PR,72,PR,PR,P.R.,,,,,,, +67,U.S.VirginIslands,Insular area (Territory),VIVIR850US-VI,VI,78,VI,VI,V.I.,,U.S.V.I.,,,,, +68,U.S.MinorOutlyingIslands,Insular areas,UMUMI581US-UM,UM,74,,,,,,,,,, 69,BakerIsland,island,UM-81,,81,,,,,XB[4],,,,, 70,HowlandIsland,island,UM-84,,84,,,,,XH[4],,,,, 71,JarvisIsland,island,UM-86,,86,,,,,XQ[4],,,,, @@ -65,14 +65,14 @@ 75,NavassaIsland,island,UM-76,,76,,,,,XV[4],,,,, 76,PalmyraAtoll[5],atoll[5],UM-95,,95,,,,,XL[4],,,,, 77,WakeIsland,atoll,UM-79,,79,,,,,QW[4],,,,, -78,Micronesia,Freely associated state,FM FSM 583,FM,64,FM,,,,,,,,, -79,MarshallIslands,Freely associated state,MH MHL 584,MH,68,MH,,,,,,,,, -80,Palau,Freely associated state,PW PLW 585,PW,70,PW,,,,,,,,, +78,Micronesia,Freely associated state,FMFSM583,FM,64,FM,,,,,,,,, +79,MarshallIslands,Freely associated state,MHMHL584,MH,68,MH,,,,,,,,, +80,Palau,Freely associated state,PWPLW585,PW,70,PW,,,,,,,,, 81,U.S.ArmedForces–Americas[6],US military mail code,,,,AA,,,,,,,,, 82,U.S.ArmedForces–Europe[7],US military mail code,,,,AE,,,,,,,,, 83,U.S.ArmedForces–Pacific[8],US military mail code,,,,AP,,,,,,,,, 84,NorthernMarianaIslands,Obsolete postal code[9],,,,CM,,,,,,,,, -85,PanamaCanalZone,Obsolete postal code,PZ PCZ 594,,,CZ,,,,,,,,, +85,PanamaCanalZone,Obsolete postal code,PZPCZ594,,,CZ,,,,,,,,, 86,Nebraska,Obsolete postal code[10],,,,NB,,,,,,,,, -87,PhilippineIslands,Obsolete postal code,PH PHL 608[11],,,PI,,,,,,,,, -88,TrustTerritoryofthePacificIslands,Obsolete postal code,PC PCI 582,,,TT,,,,,,,,, +87,PhilippineIslands,Obsolete postal code,PHPHL608[11],,,PI,,,,,,,,, +88,TrustTerritoryofthePacificIslands,Obsolete postal code,PCPCI582,,,TT,,,,,,,,, diff --git a/q01_load_data/__pycache__/__init__.cpython-36.pyc b/q01_load_data/__pycache__/__init__.cpython-36.pyc index 92b3ac2..66c265f 100644 Binary files a/q01_load_data/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/build.cpython-36.pyc b/q01_load_data/__pycache__/build.cpython-36.pyc index e27baf6..6c96ba1 100644 Binary files a/q01_load_data/__pycache__/build.cpython-36.pyc and b/q01_load_data/__pycache__/build.cpython-36.pyc differ diff --git a/q01_load_data/build.py b/q01_load_data/build.py index 69d7209..0c48593 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,5 +1,18 @@ +# %load q01_load_data/build.py import pandas as pd +import numpy as np + +def q01_load_data(path='data/excel-comp-data.xlsx'): + #'write your solution here' + df=pd.read_excel(path) + df=pd.DataFrame(df) + df['state']=df['state'].str.lower()#Series column lower case syntax is str.lower() + df['Total']=df['Jan']+df['Feb']+df['Mar']#+ operator works coz its overloaded + return df + +c=q01_load_data(path='data/excel-comp-data.xlsx') +c.shape + + -def q01_load_data(path): - "write your solution here" diff --git a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc index 2a2dfc7..351559c 100644 Binary files a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/tests/__pycache__/tests.cpython-36.pyc b/q01_load_data/tests/__pycache__/tests.cpython-36.pyc index 76e04c8..90884c9 100644 Binary files a/q01_load_data/tests/__pycache__/tests.cpython-36.pyc and b/q01_load_data/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q02_append_row/__pycache__/__init__.cpython-36.pyc b/q02_append_row/__pycache__/__init__.cpython-36.pyc index de0cf61..8ed224d 100644 Binary files a/q02_append_row/__pycache__/__init__.cpython-36.pyc and b/q02_append_row/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_append_row/__pycache__/build.cpython-36.pyc b/q02_append_row/__pycache__/build.cpython-36.pyc index 5088267..e5c539d 100644 Binary files a/q02_append_row/__pycache__/build.cpython-36.pyc and b/q02_append_row/__pycache__/build.cpython-36.pyc differ diff --git a/q02_append_row/build.py b/q02_append_row/build.py index af3701d..ba35ec5 100644 --- a/q02_append_row/build.py +++ b/q02_append_row/build.py @@ -1,11 +1,23 @@ +# %load q02_append_row/build.py import pandas as pd import sys, os #sys.path.append(os.path.join(os.path.dirname(os.curdir))) from greyatomlib.pandas_guided_project.q01_load_data.build import q01_load_data +df= q01_load_data(path='data/excel-comp-data.xlsx') +def q02_append_row(path='data/excel-comp-data.xlsx'): + a=df.append(df[['Jan','Feb','Mar','total']].sum(),ignore_index=True) + return a +# a=df.append([df[['Jan','Feb','Mar']].sum()],ignore_index=True)<--This is how you create a new row in a dataframe + + -def q02_append_row(path): - "write your solution here" + + + + +c=q02_append_row(path='data/excel-comp-data.xlsx') +c diff --git a/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc b/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc index dab3eca..643f222 100644 Binary files a/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc and b/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_append_row/tests/__pycache__/tests.cpython-36.pyc b/q02_append_row/tests/__pycache__/tests.cpython-36.pyc index 742ee79..120a9b1 100644 Binary files a/q02_append_row/tests/__pycache__/tests.cpython-36.pyc and b/q02_append_row/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc b/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc index e99e173..137f514 100644 Binary files a/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc and b/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_scrape_clean/__pycache__/build.cpython-36.pyc b/q03_scrape_clean/__pycache__/build.cpython-36.pyc index cdec2c4..612568a 100644 Binary files a/q03_scrape_clean/__pycache__/build.cpython-36.pyc and b/q03_scrape_clean/__pycache__/build.cpython-36.pyc differ diff --git a/q03_scrape_clean/build.py b/q03_scrape_clean/build.py index a88e3e2..4d00532 100644 --- a/q03_scrape_clean/build.py +++ b/q03_scrape_clean/build.py @@ -1,9 +1,42 @@ +# %load q03_scrape_clean/build.py import pandas as pd import sys, os import requests sys.path.append(os.path.join(os.path.dirname(os.curdir))) +from bs4 import BeautifulSoup + +def q03_scrape_clean(url='https://en.wikipedia.org/wiki/List_of_U.S._state_abbreviations'): + a=requests.get(url) + soup=BeautifulSoup(a.content,'html.parser') + stat=soup.find_all('table',class_='wikitable')[0]#.tbody.tr.decompose() + #stat.tbody.tr.decompose() + df=pd.read_html(str(stat))[0] + df=pd.DataFrame(df) + df=df[9:] + df=df.drop([10,9,11])#<--Directly pass a number and not a list for a single row + df=df.reset_index(drop=True) + df[0].str.replace(' ','') + return df + + +# rows=stat[0].tbody.find_all('tr') +# headers=stat[0].tbody.find_all('td') +# for row in rows: +# for cell in headers: + +# res=requests.get(url) +# soup=BeautifulSoup(res.content,'lxml') +# table=soup.find_all('table')[0] +# return table + + + + + + + +c=q03_scrape_clean(url='https://en.wikipedia.org/wiki/List_of_U.S._state_abbreviations') +c -def q03_scrape_clean(url): - "write your solution here" diff --git a/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc b/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc index bee36fb..d4d1168 100644 Binary files a/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc and b/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc b/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc index 8529c87..cd8aa4c 100644 Binary files a/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc and b/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q04_mapping/__pycache__/__init__.cpython-36.pyc b/q04_mapping/__pycache__/__init__.cpython-36.pyc index ee0618f..833b033 100644 Binary files a/q04_mapping/__pycache__/__init__.cpython-36.pyc and b/q04_mapping/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_mapping/__pycache__/build.cpython-36.pyc b/q04_mapping/__pycache__/build.cpython-36.pyc index 8283165..4e2768f 100644 Binary files a/q04_mapping/__pycache__/build.cpython-36.pyc and b/q04_mapping/__pycache__/build.cpython-36.pyc differ diff --git a/q04_mapping/build.py b/q04_mapping/build.py index 914cfa8..04735bd 100644 --- a/q04_mapping/build.py +++ b/q04_mapping/build.py @@ -1,10 +1,71 @@ +# %load q04_mapping/build.py import pandas as pd import sys, os import numpy as np #sys.path.append(os.path.join(os.path.dirname(os.curdir))) +from greyatomlib.pandas_guided_project.q01_load_data.build import q01_load_data from greyatomlib.pandas_guided_project.q02_append_row.build import q02_append_row -def q04_mapping(path1,path2): - "write your solution here" +from greyatomlib.pandas_guided_project.q03_scrape_clean.build import q03_scrape_clean +df= q01_load_data(path='data/excel-comp-data.xlsx') +a=df.append(df[['Jan','Feb','Mar','total']].sum(),ignore_index=True) +a +b=q03_scrape_clean(url='https://en.wikipedia.org/wiki/List_of_U.S._state_abbreviations' ) +b.reset_index(drop=True) +def q04_mapping(path1='data/excel-comp-data.xlsx',path2='data/scraped.csv'): +# mapping={} +# for i,j in zip(b['United States of America'],b['US']): +# mapping[i]=j + +# state=[] +# for i in (a['state']): +# state.append(i) + +# f=[] +# # for i,j in mapping.items(): +# # for k in state: +# # print(k) +# del state[15] +# for i in state: +# f.append(i.capitalize()) + +# for i,j in enumerate(f): +# if j=='Northcarolina': +# f[i]='NorthCarolina' +# elif j=='Mississipi': +# f[i]='Mississippi' +# elif j=='Rhodeisland': +# f[i]='RhodeIsland' +# elif j=='Tenessee': +# f[i]='Tennessee' +# elif j=='Northdakota': +# f[i]='NorthDakota' +# g=[] +# for j in f: +# for i in mapping.items(): +# if j==i[0]: +# g.append(i[1]) +# g.append('NaN') +# a.insert(loc=6,column='abbr',value=g) +# return a + df1=pd.read_excel(path1) + df1['total']=df1['Jan']+df1['Feb']+df1['Mar'] + df1.loc[len(df1),:]=df1.sum() + df2=pd.read_csv(path2) + abbr_dict=dict(zip(df2.iloc[:,[1,6]]['United States of America'],df2['US'])) + dftemp=df1['state'].map(abbr_dict) + df1.insert(loc=6,column='abbr',value=dftemp) + return df1 + + + + + + +c=q04_mapping(path1='data/excel-comp-data.xlsx',path2='data/scraped.csv') +c + + + diff --git a/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc b/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc index eef3d6b..5e7c93c 100644 Binary files a/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc and b/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_mapping/tests/__pycache__/test.cpython-36.pyc b/q04_mapping/tests/__pycache__/test.cpython-36.pyc index 7f7c96e..0786be8 100644 Binary files a/q04_mapping/tests/__pycache__/test.cpython-36.pyc and b/q04_mapping/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q05_replace_missing_values/__pycache__/__init__.cpython-36.pyc b/q05_replace_missing_values/__pycache__/__init__.cpython-36.pyc index f50c1d5..6c153dd 100644 Binary files a/q05_replace_missing_values/__pycache__/__init__.cpython-36.pyc and b/q05_replace_missing_values/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_replace_missing_values/__pycache__/build.cpython-36.pyc b/q05_replace_missing_values/__pycache__/build.cpython-36.pyc index 6a32964..04ed37e 100644 Binary files a/q05_replace_missing_values/__pycache__/build.cpython-36.pyc and b/q05_replace_missing_values/__pycache__/build.cpython-36.pyc differ diff --git a/q05_replace_missing_values/build.py b/q05_replace_missing_values/build.py index 97d9755..efdd825 100644 --- a/q05_replace_missing_values/build.py +++ b/q05_replace_missing_values/build.py @@ -1,3 +1,4 @@ +# %load q05_replace_missing_values/build.py import pandas as pd import numpy as np import sys @@ -8,6 +9,67 @@ path1 = 'data/excel-comp-data.xlsx' path2 = 'data/scraped.csv' def q05_replace_missing_values(path1,path2): + df1=pd.read_excel(path1) + df1['total']=df1['Jan']+df1['Feb']+df1['Mar'] + df1.loc[len(df1),:]=df1.sum() + df2=pd.read_csv(path2) + abbr_dict=dict(zip(df2.iloc[:,[1,6]]['United States of America'],df2['US']))# + f=list(df1['state']) + for i,j in enumerate(f): + if j=='Northcarolina': + f[i]='NorthCarolina' + elif j=='Mississipi': + f[i]='Mississippi' + elif j=='Rhodeisland': + f[i]='RhodeIsland' + elif j=='Tenessee': + f[i]='Tennessee' + elif j=='Northdakota': + f[i]='NorthDakota' + df1['state']=f + dftemp=df1['state'].map(abbr_dict) + df1.insert(loc=6,column='abbr',value=dftemp) + return df1 + +# mapping={} +# for i,j in zip(df2['United States of America'],df2['US']): +# mapping[i]=j + +# state=[] +# for i in (df1['state']): +# state.append(i) + +# f=[] +# # for i,j in mapping.items(): +# # for k in state: +# # print(k) +# # del state[15] +# for i in state: +# f.append(i.capitalize()) + +# for i,j in enumerate(f): +# if j=='Northcarolina': +# f[i]='NorthCarolina' +# elif j=='Mississipi': +# f[i]='Mississippi' +# elif j=='Rhodeisland': +# f[i]='RhodeIsland' +# elif j=='Tenessee': +# f[i]='Tennessee' +# elif j=='Northdakota': +# f[i]='NorthDakota' +# g=[] +# for j in f: +# for i in mapping.items(): +# if j==i[0]: +# g.append(i[1]) +# g.append('NaN') +# df1.insert(loc=6,column='abbr',value=g) +# return df1 + + + +c=q05_replace_missing_values(path1,path2) +c -#print(q05_replace_missing_values(path1,path2).shape) \ No newline at end of file diff --git a/q05_replace_missing_values/tests/__pycache__/__init__.cpython-36.pyc b/q05_replace_missing_values/tests/__pycache__/__init__.cpython-36.pyc index 03391a7..c4e0ccb 100644 Binary files a/q05_replace_missing_values/tests/__pycache__/__init__.cpython-36.pyc and b/q05_replace_missing_values/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_replace_missing_values/tests/__pycache__/tests.cpython-36.pyc b/q05_replace_missing_values/tests/__pycache__/tests.cpython-36.pyc index 3b9d62a..edc4d21 100644 Binary files a/q05_replace_missing_values/tests/__pycache__/tests.cpython-36.pyc and b/q05_replace_missing_values/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q06_sub_total/__pycache__/__init__.cpython-36.pyc b/q06_sub_total/__pycache__/__init__.cpython-36.pyc index f70134c..5e89ae5 100644 Binary files a/q06_sub_total/__pycache__/__init__.cpython-36.pyc and b/q06_sub_total/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_sub_total/__pycache__/build.cpython-36.pyc b/q06_sub_total/__pycache__/build.cpython-36.pyc index adaf0ce..6310ecb 100644 Binary files a/q06_sub_total/__pycache__/build.cpython-36.pyc and b/q06_sub_total/__pycache__/build.cpython-36.pyc differ diff --git a/q06_sub_total/build.py b/q06_sub_total/build.py index c420838..bde6837 100644 --- a/q06_sub_total/build.py +++ b/q06_sub_total/build.py @@ -1,3 +1,4 @@ +# %load q06_sub_total/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split @@ -10,7 +11,32 @@ path2 = 'data/scraped.csv' def q06_sub_total(path1,path2): - "write your solution here" + df1=pd.read_excel(path1) + df1['total']=df1['Jan']+df1['Feb']+df1['Mar'] + df1.loc[len(df1),:]=df1.sum() + df2=pd.read_csv(path2) + abbr_dict=dict(zip(df2.iloc[:,[1,6]]['United States of America'],df2['US']))# + f=list(df1['state']) + for i,j in enumerate(f): + if j=='Northcarolina': + f[i]='NorthCarolina' + elif j=='Mississipi': + f[i]='Mississippi' + elif j=='Rhodeisland': + f[i]='RhodeIsland' + elif j=='Tenessee': + f[i]='Tennessee' + elif j=='Northdakota': + f[i]='NorthDakota' + df1['state']=f + dftemp=df1['state'].map(abbr_dict) + df1.insert(loc=6,column='abbr',value=dftemp) + groupby_abbr_sum=df1.groupby('abbr')[['Jan','Feb','Mar','total']].sum() + return groupby_abbr_sum +c=q06_sub_total(path1,path2) +c + + diff --git a/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc b/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc index 93ecd56..7bab2b3 100644 Binary files a/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc and b/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_sub_total/tests/__pycache__/test.cpython-36.pyc b/q06_sub_total/tests/__pycache__/test.cpython-36.pyc index 691280a..b758ee9 100644 Binary files a/q06_sub_total/tests/__pycache__/test.cpython-36.pyc and b/q06_sub_total/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q07_symbols/__pycache__/__init__.cpython-36.pyc b/q07_symbols/__pycache__/__init__.cpython-36.pyc index 60b0cca..1f4a614 100644 Binary files a/q07_symbols/__pycache__/__init__.cpython-36.pyc and b/q07_symbols/__pycache__/__init__.cpython-36.pyc differ diff --git a/q07_symbols/__pycache__/build.cpython-36.pyc b/q07_symbols/__pycache__/build.cpython-36.pyc index d28eaa9..e1b34a9 100644 Binary files a/q07_symbols/__pycache__/build.cpython-36.pyc and b/q07_symbols/__pycache__/build.cpython-36.pyc differ diff --git a/q07_symbols/build.py b/q07_symbols/build.py index b8cbb92..5a9c283 100644 --- a/q07_symbols/build.py +++ b/q07_symbols/build.py @@ -1,18 +1,53 @@ +# %load q07_symbols/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split import sys import os -sys.path.append(os.path.join(os.path.dirname(os.curdir))) -from greyatomlib.pandas_guided_project.q06_sub_total.build import q06_sub_total +# sys.path.append(os.path.join(os.path.dirname(os.curdir))) +# from greyatomlib.pandas_guided_project.q06_sub_total.build import q06_sub_total path1 = 'data/excel-comp-data.xlsx' path2 = 'data/scraped.csv' def q07_symbols(path1,path2): - "write your solution here" + df1=pd.read_excel(path1) + df1['total']=df1['Jan']+df1['Feb']+df1['Mar'] + df1.loc[len(df1),:]=df1.sum() + df2=pd.read_csv(path2) + abbr_dict=dict(zip(df2.iloc[:,[1,6]]['United States of America'],df2['US']))# + f=list(df1['state']) + for i,j in enumerate(f): + if j=='Northcarolina': + f[i]='NorthCarolina' + elif j=='Mississipi': + f[i]='Mississippi' + elif j=='Rhodeisland': + f[i]='RhodeIsland' + elif j=='Tenessee': + f[i]='Tennessee' + elif j=='Northdakota': + f[i]='NorthDakota' + df1['state']=f + dftemp=df1['state'].map(abbr_dict) + df1.insert(loc=6,column='abbr',value=dftemp) + groupby_abbr_sum=df1.groupby('abbr')[['Jan','Feb','Mar','total']].sum() + df1[['Jan','Feb','Mar','total']]=df1[['Jan','Feb','Mar','total']].applymap(lambda x:('$'+str(int(x)))) + f=list(df1['total']) + for i,j in enumerate(f): + if j=='05000': + f[i]='05,000' + df1['total']=pd.DataFrame(f) + return df1 + + + -#print(q07_symbols(path1,path2)) + +c=q07_symbols(path1,path2) + +c + diff --git a/q07_symbols/tests/__pycache__/__init__.cpython-36.pyc b/q07_symbols/tests/__pycache__/__init__.cpython-36.pyc index f854b4a..9bf6250 100644 Binary files a/q07_symbols/tests/__pycache__/__init__.cpython-36.pyc and b/q07_symbols/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q07_symbols/tests/__pycache__/test.cpython-36.pyc b/q07_symbols/tests/__pycache__/test.cpython-36.pyc index 1a8a9c3..f2e1ad0 100644 Binary files a/q07_symbols/tests/__pycache__/test.cpython-36.pyc and b/q07_symbols/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q09_pie_chart_jan/__pycache__/__init__.cpython-36.pyc b/q09_pie_chart_jan/__pycache__/__init__.cpython-36.pyc index a0e3add..fdb6fce 100644 Binary files a/q09_pie_chart_jan/__pycache__/__init__.cpython-36.pyc and b/q09_pie_chart_jan/__pycache__/__init__.cpython-36.pyc differ diff --git a/q09_pie_chart_jan/__pycache__/build.cpython-36.pyc b/q09_pie_chart_jan/__pycache__/build.cpython-36.pyc index 25a6c03..78e8454 100644 Binary files a/q09_pie_chart_jan/__pycache__/build.cpython-36.pyc and b/q09_pie_chart_jan/__pycache__/build.cpython-36.pyc differ diff --git a/q09_pie_chart_jan/build.py b/q09_pie_chart_jan/build.py index 6483bc6..25335ae 100644 --- a/q09_pie_chart_jan/build.py +++ b/q09_pie_chart_jan/build.py @@ -1,15 +1,45 @@ +# %load q09_pie_chart_jan/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split -import sys,os -sys.path.append(os.path.join(os.path.dirname(os.curdir))) -from greyatomlib.pandas_guided_project.q06_sub_total.build import q06_sub_total +# import sys,os +# sys.path.append(os.path.join(os.path.dirname(os.curdir))) +# from greyatomlib.pandas_guided_project.q06_sub_total.build import q06_sub_total import matplotlib.pyplot as plt plt.switch_backend('agg') +path1 = 'data/excel-comp-data.xlsx' +path2 = 'data/scraped.csv' + def q09_pie_chart_jan(path1,path2): + df1=pd.read_excel(path1) + df1['total']=df1['Jan']+df1['Feb']+df1['Mar'] + df1.loc[len(df1),:]=df1.sum() + df2=pd.read_csv(path2) + abbr_dict=dict(zip(df2.iloc[:,[1,6]]['United States of America'],df2['US']))# + f=list(df1['state']) + for i,j in enumerate(f): + if j=='Northcarolina': + f[i]='NorthCarolina' + elif j=='Mississipi': + f[i]='Mississippi' + elif j=='Rhodeisland': + f[i]='RhodeIsland' + elif j=='Tenessee': + f[i]='Tennessee' + elif j=='Northdakota': + f[i]='NorthDakota' + df1['state']=f + dftemp=df1['state'].map(abbr_dict) + df1.insert(loc=6,column='abbr',value=dftemp) + groupby_abbr_sum=df1.groupby('abbr')[['Jan','Feb','Mar','total']].sum() + return plt.pie(df1['Jan']) + + + - "write your solution here" +c=q09_pie_chart_jan(path1,path2) +c diff --git a/q09_pie_chart_jan/tests/__pycache__/__init__.cpython-36.pyc b/q09_pie_chart_jan/tests/__pycache__/__init__.cpython-36.pyc index 07ab367..60696c3 100644 Binary files a/q09_pie_chart_jan/tests/__pycache__/__init__.cpython-36.pyc and b/q09_pie_chart_jan/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q09_pie_chart_jan/tests/__pycache__/tests.cpython-36.pyc b/q09_pie_chart_jan/tests/__pycache__/tests.cpython-36.pyc index b3b93c2..84abaae 100644 Binary files a/q09_pie_chart_jan/tests/__pycache__/tests.cpython-36.pyc and b/q09_pie_chart_jan/tests/__pycache__/tests.cpython-36.pyc differ