diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index b4b7209..cc1a6ca 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/data/scraped.csv b/data/scraped.csv index 51c10a1..49f8e0b 100644 --- a/data/scraped.csv +++ b/data/scraped.csv @@ -1,4 +1,4 @@ -,United States of America,Federal state,US USA 840,US,00,,,U.S.,U.S.,U.S.A.,,,,, +,United States of America,Federal state,".mw-parser-output .monospaced{font-family:monospace,monospace}USUSA840",US,00,,,U.S.,U.S.,U.S.A.,,,,, 12,Alabama,State,US-AL,AL,01,AL,AL,Ala.,Ala.,,,,,, 13,Alaska,State,US-AK,AK,02,AK,AK,Alaska,Alaska,Alas.,,,,, 14,Arizona,State,US-AZ,AZ,04,AZ,AZ,Ariz.,Ariz.,Az.,,,,, @@ -9,7 +9,7 @@ 19,Delaware,State,US-DE,DE,10,DE,DL,Del.,Del.,De.,,,,, 20,DistrictofColumbia,Federal district,US-DC,DC,11,DC,DC,D.C.,D.C.,Wash. D.C.,,,,, 21,Florida,State,US-FL,FL,12,FL,FL,Fla.,Fla.,"Fl., Flor.",,,,, -22,Georgia,State,US-GA,GA,13,GA,GA,Ga.,Ga.,,,,,, +22,Georgia,State,US-GA,GA,13,GA,GA,Ga.,Ga.,Geo.,,,,, 23,Hawaii,State,US-HI,HI,15,HI,HA,Hawaii,Hawaii,H.I.,,,,, 24,Idaho,State,US-ID,ID,16,ID,ID,Idaho,Idaho,"Id., Ida.",,,,, 25,Illinois,State,US-IL,IL,17,IL,IL,Ill.,Ill.,"Il., Ills., Ill's",,,,, @@ -50,12 +50,12 @@ 60,WestVirginia,State,US-WV,WV,54,WV,WV,W. Va.,W.Va.,"W.V., W. Virg.",,,,, 61,Wisconsin,State,US-WI,WI,55,WI,WS,Wis.,Wis.,"Wi., Wisc.",,,,, 62,Wyoming,State,US-WY,WY,56,WY,WY,Wyo.,Wyo.,Wy.,,,,, -63,AmericanSamoa,Insular area (Territory),AS ASM 016 US-AS,AS,60,AS,AS,A.S.,,,,,,, -64,Guam,Insular area (Territory),GU GUM 316 US-GU,GU,66,GU,GU,Guam,,,,,,, -65,NorthernMarianaIslands,Insular area (Commonwealth),MP MNP 580 US-MP,MP,69,MP,CM,M.P.,,CNMI[3],,,,, -66,PuertoRico,Insular area (Territory),PR PRI 630 US-PR,PR,72,PR,PR,P.R.,,,,,,, -67,U.S.VirginIslands,Insular area (Territory),VI VIR 850 US-VI,VI,78,VI,VI,V.I.,,U.S.V.I.,,,,, -68,U.S.MinorOutlyingIslands,Insular areas,UM UMI 581 US-UM,UM,74,,,,,,,,,, +63,AmericanSamoa,Insular area (Territory),ASASM016US-AS,AS,60,AS,AS,A.S.,,,,,,, +64,Guam,Insular area (Territory),GUGUM316US-GU,GU,66,GU,GU,Guam,,,,,,, +65,NorthernMarianaIslands,Insular area (Commonwealth),MPMNP580US-MP,MP,69,MP,CM,M.P.,,CNMI[3],,,,, +66,PuertoRico,Insular area (Territory),PRPRI630US-PR,PR,72,PR,PR,P.R.,,,,,,, +67,U.S.VirginIslands,Insular area (Territory),VIVIR850US-VI,VI,78,VI,VI,V.I.,,U.S.V.I.,,,,, +68,U.S.MinorOutlyingIslands,Insular areas,UMUMI581US-UM,UM,74,,,,,,,,,, 69,BakerIsland,island,UM-81,,81,,,,,XB[4],,,,, 70,HowlandIsland,island,UM-84,,84,,,,,XH[4],,,,, 71,JarvisIsland,island,UM-86,,86,,,,,XQ[4],,,,, @@ -65,14 +65,14 @@ 75,NavassaIsland,island,UM-76,,76,,,,,XV[4],,,,, 76,PalmyraAtoll[5],atoll[5],UM-95,,95,,,,,XL[4],,,,, 77,WakeIsland,atoll,UM-79,,79,,,,,QW[4],,,,, -78,Micronesia,Freely associated state,FM FSM 583,FM,64,FM,,,,,,,,, -79,MarshallIslands,Freely associated state,MH MHL 584,MH,68,MH,,,,,,,,, -80,Palau,Freely associated state,PW PLW 585,PW,70,PW,,,,,,,,, +78,Micronesia,Freely associated state,FMFSM583,FM,64,FM,,,,,,,,, +79,MarshallIslands,Freely associated state,MHMHL584,MH,68,MH,,,,,,,,, +80,Palau,Freely associated state,PWPLW585,PW,70,PW,,,,,,,,, 81,U.S.ArmedForces–Americas[6],US military mail code,,,,AA,,,,,,,,, 82,U.S.ArmedForces–Europe[7],US military mail code,,,,AE,,,,,,,,, 83,U.S.ArmedForces–Pacific[8],US military mail code,,,,AP,,,,,,,,, 84,NorthernMarianaIslands,Obsolete postal code[9],,,,CM,,,,,,,,, -85,PanamaCanalZone,Obsolete postal code,PZ PCZ 594,,,CZ,,,,,,,,, +85,PanamaCanalZone,Obsolete postal code,PZPCZ594,,,CZ,,,,,,,,, 86,Nebraska,Obsolete postal code[10],,,,NB,,,,,,,,, -87,PhilippineIslands,Obsolete postal code,PH PHL 608[11],,,PI,,,,,,,,, -88,TrustTerritoryofthePacificIslands,Obsolete postal code,PC PCI 582,,,TT,,,,,,,,, +87,PhilippineIslands,Obsolete postal code,PHPHL608[11],,,PI,,,,,,,,, +88,TrustTerritoryofthePacificIslands,Obsolete postal code,PCPCI582,,,TT,,,,,,,,, diff --git a/q01_load_data/__pycache__/__init__.cpython-36.pyc b/q01_load_data/__pycache__/__init__.cpython-36.pyc index 92b3ac2..6d17fae 100644 Binary files a/q01_load_data/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/build.cpython-36.pyc b/q01_load_data/__pycache__/build.cpython-36.pyc index e27baf6..4669216 100644 Binary files a/q01_load_data/__pycache__/build.cpython-36.pyc and b/q01_load_data/__pycache__/build.cpython-36.pyc differ diff --git a/q01_load_data/build.py b/q01_load_data/build.py index 061a01f..e8ba622 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,5 +1,14 @@ +# %load q01_load_data/build.py import pandas as pd path = 'data/excel-comp-data.xlsx' def q01_load_data(path): - "write your solution here" + 'write your solution here' + df = pd.read_excel(path) + df['state'] = df['state'].apply(lambda x: x.lower()) + df['total'] = df['Jan'] + df['Feb'] + df['Mar'] + return df + +q01_load_data(path) + + diff --git a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc index 2a2dfc7..3d19641 100644 Binary files a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/tests/__pycache__/tests.cpython-36.pyc b/q01_load_data/tests/__pycache__/tests.cpython-36.pyc index 76e04c8..2772771 100644 Binary files a/q01_load_data/tests/__pycache__/tests.cpython-36.pyc and b/q01_load_data/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q02_append_row/__pycache__/__init__.cpython-36.pyc b/q02_append_row/__pycache__/__init__.cpython-36.pyc index de0cf61..21d8928 100644 Binary files a/q02_append_row/__pycache__/__init__.cpython-36.pyc and b/q02_append_row/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_append_row/__pycache__/build.cpython-36.pyc b/q02_append_row/__pycache__/build.cpython-36.pyc index 5088267..43a9a0d 100644 Binary files a/q02_append_row/__pycache__/build.cpython-36.pyc and b/q02_append_row/__pycache__/build.cpython-36.pyc differ diff --git a/q02_append_row/build.py b/q02_append_row/build.py index f54f37e..3c47202 100644 --- a/q02_append_row/build.py +++ b/q02_append_row/build.py @@ -1,14 +1,46 @@ -import pandas as pd -import sys, os +# %load q02_append_row/build.py +#import pandas as pd +#import sys, os #sys.path.append(os.path.join(os.path.dirname(os.curdir))) -from greyatomlib.pandas_guided_project.q01_load_data.build import q01_load_data +#from greyatomlib.pandas_guided_project.q01_load_data.build import q01_load_data +#from q01_load_data.build import q01_load_data +#path = 'data/excel-comp-data.xlsx' -path = 'data/excel-comp-data.xlsx' + +#import pandas as pd +#import sys, os +#sys.path.append(os.path.join(os.path.dirname(os.curdir))) +''' def q02_append_row(path): - "write your solution here" + 'write your solution here' + df = q01_load_data(path) + sum_row = df[['Jan', 'Feb', 'Mar', 'total']].sum() + df_sum = pd.DataFrame(data=sum_row).T + df_final = df.append(df_sum, ignore_index=True) + return df_final +''' +import pandas as pd +import sys, os +sys.path.append(os.path.join(os.path.dirname(os.curdir))) +from greyatomlib.pandas_guided_project.q01_load_data.build import q01_load_data +#from q01_load_data.build import q01_load_data +path = 'data/excel-comp-data.xlsx' +def q02_append_row(path): + df = q01_load_data(path) + df1 = df[['Jan', 'Feb', 'Mar', 'total']].sum(axis=0) + df1 = pd.DataFrame(df1) + df2 = pd.DataFrame(columns=['Jan', 'Feb', 'Mar', 'total']) + df2['Jan'] = df1.loc['Jan'] + df2['Feb'] = df1.loc['Feb'] + df2['Mar'] = df1.loc['Mar'] + df2['total'] = df1.loc['total'] + df3 = df.append(df2, ignore_index=True) + return df3 + +q02_append_row(path) diff --git a/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc b/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc index dab3eca..97142d4 100644 Binary files a/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc and b/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_append_row/tests/__pycache__/tests.cpython-36.pyc b/q02_append_row/tests/__pycache__/tests.cpython-36.pyc index 742ee79..55ef229 100644 Binary files a/q02_append_row/tests/__pycache__/tests.cpython-36.pyc and b/q02_append_row/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc b/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc index e99e173..b0dcfb8 100644 Binary files a/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc and b/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_scrape_clean/__pycache__/build.cpython-36.pyc b/q03_scrape_clean/__pycache__/build.cpython-36.pyc index cdec2c4..4cc9117 100644 Binary files a/q03_scrape_clean/__pycache__/build.cpython-36.pyc and b/q03_scrape_clean/__pycache__/build.cpython-36.pyc differ diff --git a/q03_scrape_clean/build.py b/q03_scrape_clean/build.py index a88e3e2..ae4bdb7 100644 --- a/q03_scrape_clean/build.py +++ b/q03_scrape_clean/build.py @@ -1,9 +1,21 @@ +# %load q03_scrape_clean/build.py import pandas as pd import sys, os import requests sys.path.append(os.path.join(os.path.dirname(os.curdir))) - +url = 'https://en.wikipedia.org/wiki/List_of_U.S._state_abbreviations' def q03_scrape_clean(url): - "write your solution here" + 'write your solution here' + response = requests.get(url) + df1 = pd.read_html(response.content)[0] + df1 = df1.iloc[11:, :] + df1 = df1.rename(columns=df1.iloc[0, :]).iloc[1:, :] + df1['United States of America'] = df1['United States of America'].apply(lambda x: x.replace(' ', '')).astype(object) + df1.to_csv('data/scraped.csv') + return df1 + + +q03_scrape_clean(url) + diff --git a/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc b/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc index bee36fb..5fdb06a 100644 Binary files a/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc and b/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc b/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc index 8529c87..fe27bfe 100644 Binary files a/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc and b/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q04_mapping/__pycache__/__init__.cpython-36.pyc b/q04_mapping/__pycache__/__init__.cpython-36.pyc index ee0618f..b76b932 100644 Binary files a/q04_mapping/__pycache__/__init__.cpython-36.pyc and b/q04_mapping/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_mapping/__pycache__/build.cpython-36.pyc b/q04_mapping/__pycache__/build.cpython-36.pyc index 8283165..af94689 100644 Binary files a/q04_mapping/__pycache__/build.cpython-36.pyc and b/q04_mapping/__pycache__/build.cpython-36.pyc differ diff --git a/q04_mapping/build.py b/q04_mapping/build.py index 70c43fe..1c624bd 100644 --- a/q04_mapping/build.py +++ b/q04_mapping/build.py @@ -1,12 +1,30 @@ +# %load q04_mapping/build.py import pandas as pd import sys, os import numpy as np +from greyatomlib.pandas_guided_project.q02_append_row.build import q02_append_row, q01_load_data +sys.path.append(os.path.join(os.path.dirname(os.curdir))) path1 = 'data/excel-comp-data.xlsx' path2 = 'data/scraped.csv' -#sys.path.append(os.path.join(os.path.dirname(os.curdir))) -from greyatomlib.pandas_guided_project.q02_append_row.build import q02_append_row + def q04_mapping(path1,path2): - "write your solution here" + 'write your solution here' + df_final= q02_append_row(path1) + scraped = pd.read_csv(path2) + print(df_final.head()) + print(scraped.head()) + scraped['United States of America'] = scraped['United States of America'].astype(str).apply(lambda x: x.lower()) + scraped['US'] = scraped['US'].astype(str) + mapping = scraped.set_index('United States of America')['US'].to_dict() + df_final.insert(6, 'abbr', np.nan) + df_final['abbr'] = df_final['state'].map(mapping) + return df_final + + +q04_mapping(path1,path2) +# Df=pd.read_excel(path1) +# print(Df.head(5)) +q04_mapping(path1,path2) diff --git a/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc b/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc index eef3d6b..0b47c82 100644 Binary files a/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc and b/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_mapping/tests/__pycache__/test.cpython-36.pyc b/q04_mapping/tests/__pycache__/test.cpython-36.pyc index 7f7c96e..165730a 100644 Binary files a/q04_mapping/tests/__pycache__/test.cpython-36.pyc and b/q04_mapping/tests/__pycache__/test.cpython-36.pyc differ