diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index b4b7209..8afdb94 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/data/scraped.csv b/data/scraped.csv index 51c10a1..49f8e0b 100644 --- a/data/scraped.csv +++ b/data/scraped.csv @@ -1,4 +1,4 @@ -,United States of America,Federal state,US USA 840,US,00,,,U.S.,U.S.,U.S.A.,,,,, +,United States of America,Federal state,".mw-parser-output .monospaced{font-family:monospace,monospace}USUSA840",US,00,,,U.S.,U.S.,U.S.A.,,,,, 12,Alabama,State,US-AL,AL,01,AL,AL,Ala.,Ala.,,,,,, 13,Alaska,State,US-AK,AK,02,AK,AK,Alaska,Alaska,Alas.,,,,, 14,Arizona,State,US-AZ,AZ,04,AZ,AZ,Ariz.,Ariz.,Az.,,,,, @@ -9,7 +9,7 @@ 19,Delaware,State,US-DE,DE,10,DE,DL,Del.,Del.,De.,,,,, 20,DistrictofColumbia,Federal district,US-DC,DC,11,DC,DC,D.C.,D.C.,Wash. D.C.,,,,, 21,Florida,State,US-FL,FL,12,FL,FL,Fla.,Fla.,"Fl., Flor.",,,,, -22,Georgia,State,US-GA,GA,13,GA,GA,Ga.,Ga.,,,,,, +22,Georgia,State,US-GA,GA,13,GA,GA,Ga.,Ga.,Geo.,,,,, 23,Hawaii,State,US-HI,HI,15,HI,HA,Hawaii,Hawaii,H.I.,,,,, 24,Idaho,State,US-ID,ID,16,ID,ID,Idaho,Idaho,"Id., Ida.",,,,, 25,Illinois,State,US-IL,IL,17,IL,IL,Ill.,Ill.,"Il., Ills., Ill's",,,,, @@ -50,12 +50,12 @@ 60,WestVirginia,State,US-WV,WV,54,WV,WV,W. Va.,W.Va.,"W.V., W. Virg.",,,,, 61,Wisconsin,State,US-WI,WI,55,WI,WS,Wis.,Wis.,"Wi., Wisc.",,,,, 62,Wyoming,State,US-WY,WY,56,WY,WY,Wyo.,Wyo.,Wy.,,,,, -63,AmericanSamoa,Insular area (Territory),AS ASM 016 US-AS,AS,60,AS,AS,A.S.,,,,,,, -64,Guam,Insular area (Territory),GU GUM 316 US-GU,GU,66,GU,GU,Guam,,,,,,, -65,NorthernMarianaIslands,Insular area (Commonwealth),MP MNP 580 US-MP,MP,69,MP,CM,M.P.,,CNMI[3],,,,, -66,PuertoRico,Insular area (Territory),PR PRI 630 US-PR,PR,72,PR,PR,P.R.,,,,,,, -67,U.S.VirginIslands,Insular area (Territory),VI VIR 850 US-VI,VI,78,VI,VI,V.I.,,U.S.V.I.,,,,, -68,U.S.MinorOutlyingIslands,Insular areas,UM UMI 581 US-UM,UM,74,,,,,,,,,, +63,AmericanSamoa,Insular area (Territory),ASASM016US-AS,AS,60,AS,AS,A.S.,,,,,,, +64,Guam,Insular area (Territory),GUGUM316US-GU,GU,66,GU,GU,Guam,,,,,,, +65,NorthernMarianaIslands,Insular area (Commonwealth),MPMNP580US-MP,MP,69,MP,CM,M.P.,,CNMI[3],,,,, +66,PuertoRico,Insular area (Territory),PRPRI630US-PR,PR,72,PR,PR,P.R.,,,,,,, +67,U.S.VirginIslands,Insular area (Territory),VIVIR850US-VI,VI,78,VI,VI,V.I.,,U.S.V.I.,,,,, +68,U.S.MinorOutlyingIslands,Insular areas,UMUMI581US-UM,UM,74,,,,,,,,,, 69,BakerIsland,island,UM-81,,81,,,,,XB[4],,,,, 70,HowlandIsland,island,UM-84,,84,,,,,XH[4],,,,, 71,JarvisIsland,island,UM-86,,86,,,,,XQ[4],,,,, @@ -65,14 +65,14 @@ 75,NavassaIsland,island,UM-76,,76,,,,,XV[4],,,,, 76,PalmyraAtoll[5],atoll[5],UM-95,,95,,,,,XL[4],,,,, 77,WakeIsland,atoll,UM-79,,79,,,,,QW[4],,,,, -78,Micronesia,Freely associated state,FM FSM 583,FM,64,FM,,,,,,,,, -79,MarshallIslands,Freely associated state,MH MHL 584,MH,68,MH,,,,,,,,, -80,Palau,Freely associated state,PW PLW 585,PW,70,PW,,,,,,,,, +78,Micronesia,Freely associated state,FMFSM583,FM,64,FM,,,,,,,,, +79,MarshallIslands,Freely associated state,MHMHL584,MH,68,MH,,,,,,,,, +80,Palau,Freely associated state,PWPLW585,PW,70,PW,,,,,,,,, 81,U.S.ArmedForces–Americas[6],US military mail code,,,,AA,,,,,,,,, 82,U.S.ArmedForces–Europe[7],US military mail code,,,,AE,,,,,,,,, 83,U.S.ArmedForces–Pacific[8],US military mail code,,,,AP,,,,,,,,, 84,NorthernMarianaIslands,Obsolete postal code[9],,,,CM,,,,,,,,, -85,PanamaCanalZone,Obsolete postal code,PZ PCZ 594,,,CZ,,,,,,,,, +85,PanamaCanalZone,Obsolete postal code,PZPCZ594,,,CZ,,,,,,,,, 86,Nebraska,Obsolete postal code[10],,,,NB,,,,,,,,, -87,PhilippineIslands,Obsolete postal code,PH PHL 608[11],,,PI,,,,,,,,, -88,TrustTerritoryofthePacificIslands,Obsolete postal code,PC PCI 582,,,TT,,,,,,,,, +87,PhilippineIslands,Obsolete postal code,PHPHL608[11],,,PI,,,,,,,,, +88,TrustTerritoryofthePacificIslands,Obsolete postal code,PCPCI582,,,TT,,,,,,,,, diff --git a/q01_load_data/__pycache__/__init__.cpython-36.pyc b/q01_load_data/__pycache__/__init__.cpython-36.pyc index 92b3ac2..fecd44e 100644 Binary files a/q01_load_data/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/build.cpython-36.pyc b/q01_load_data/__pycache__/build.cpython-36.pyc index e27baf6..8ddeefc 100644 Binary files a/q01_load_data/__pycache__/build.cpython-36.pyc and b/q01_load_data/__pycache__/build.cpython-36.pyc differ diff --git a/q01_load_data/build.py b/q01_load_data/build.py index 69d7209..6128325 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,5 +1,13 @@ +# %load q01_load_data/build.py import pandas as pd - +path = 'data/excel-comp-data.xlsx' def q01_load_data(path): - "write your solution here" + 'write your solution here' + df = pd.read_excel(path) + df['state'] = df['state'].apply(lambda x: x.lower()) + df['total'] = df['Jan'] + df['Feb'] + df['Mar'] + return df + +q01_load_data(path) + diff --git a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc index 2a2dfc7..60b4e69 100644 Binary files a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/tests/__pycache__/tests.cpython-36.pyc b/q01_load_data/tests/__pycache__/tests.cpython-36.pyc index 76e04c8..8be6b29 100644 Binary files a/q01_load_data/tests/__pycache__/tests.cpython-36.pyc and b/q01_load_data/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q02_append_row/__pycache__/__init__.cpython-36.pyc b/q02_append_row/__pycache__/__init__.cpython-36.pyc index de0cf61..3c1f6f5 100644 Binary files a/q02_append_row/__pycache__/__init__.cpython-36.pyc and b/q02_append_row/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_append_row/__pycache__/build.cpython-36.pyc b/q02_append_row/__pycache__/build.cpython-36.pyc index 5088267..c5ebbf4 100644 Binary files a/q02_append_row/__pycache__/build.cpython-36.pyc and b/q02_append_row/__pycache__/build.cpython-36.pyc differ diff --git a/q02_append_row/build.py b/q02_append_row/build.py index af3701d..c5fdaa7 100644 --- a/q02_append_row/build.py +++ b/q02_append_row/build.py @@ -1,12 +1,23 @@ +# %load q02_append_row/build.py import pandas as pd import sys, os #sys.path.append(os.path.join(os.path.dirname(os.curdir))) from greyatomlib.pandas_guided_project.q01_load_data.build import q01_load_data +path = 'data/excel-comp-data.xlsx' + def q02_append_row(path): - "write your solution here" + 'write your solution here' + df = q01_load_data(path) + sum_row = df[['Jan', 'Feb', 'Mar', 'total']].sum() + df_sum = pd.DataFrame(data=sum_row).T + df_final = df.append(df_sum, ignore_index=True) + return df_final + + +q02_append_row(path) diff --git a/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc b/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc index dab3eca..b40e302 100644 Binary files a/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc and b/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_append_row/tests/__pycache__/tests.cpython-36.pyc b/q02_append_row/tests/__pycache__/tests.cpython-36.pyc index 742ee79..c0c4799 100644 Binary files a/q02_append_row/tests/__pycache__/tests.cpython-36.pyc and b/q02_append_row/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc b/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc index e99e173..c72b3dd 100644 Binary files a/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc and b/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_scrape_clean/__pycache__/build.cpython-36.pyc b/q03_scrape_clean/__pycache__/build.cpython-36.pyc index cdec2c4..1519139 100644 Binary files a/q03_scrape_clean/__pycache__/build.cpython-36.pyc and b/q03_scrape_clean/__pycache__/build.cpython-36.pyc differ diff --git a/q03_scrape_clean/build.py b/q03_scrape_clean/build.py index a88e3e2..a98c92a 100644 --- a/q03_scrape_clean/build.py +++ b/q03_scrape_clean/build.py @@ -1,9 +1,19 @@ +# %load q03_scrape_clean/build.py import pandas as pd import sys, os import requests sys.path.append(os.path.join(os.path.dirname(os.curdir))) - +url = 'https://en.wikipedia.org/wiki/List_of_U.S._state_abbreviations' def q03_scrape_clean(url): - "write your solution here" + 'write your solution here' + + response = requests.get(url) + df1 = pd.read_html(response.content)[0] + df1 = df1.iloc[11:, :] + df1 = df1.rename(columns=df1.iloc[0, :]).iloc[1:, :] + df1['United States of America'] = df1['United States of America'].apply(lambda x: x.replace(' ', '')).astype(object) + df1.to_csv('data/scraped.csv') + return df1 + diff --git a/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc b/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc index bee36fb..4e54604 100644 Binary files a/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc and b/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc b/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc index 8529c87..9df1b98 100644 Binary files a/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc and b/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q04_mapping/__pycache__/__init__.cpython-36.pyc b/q04_mapping/__pycache__/__init__.cpython-36.pyc index ee0618f..c8ff61e 100644 Binary files a/q04_mapping/__pycache__/__init__.cpython-36.pyc and b/q04_mapping/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_mapping/__pycache__/build.cpython-36.pyc b/q04_mapping/__pycache__/build.cpython-36.pyc index 8283165..6972f0b 100644 Binary files a/q04_mapping/__pycache__/build.cpython-36.pyc and b/q04_mapping/__pycache__/build.cpython-36.pyc differ diff --git a/q04_mapping/build.py b/q04_mapping/build.py index 914cfa8..2846e81 100644 --- a/q04_mapping/build.py +++ b/q04_mapping/build.py @@ -1,10 +1,33 @@ +# %load q04_mapping/build.py import pandas as pd import sys, os import numpy as np #sys.path.append(os.path.join(os.path.dirname(os.curdir))) from greyatomlib.pandas_guided_project.q02_append_row.build import q02_append_row + +path1 = 'data/excel-comp-data.xlsx' +path2 = 'data/scraped.csv' def q04_mapping(path1,path2): - "write your solution here" + 'write your solution here' + + df1 = q02_append_row(path1) + df1['abbr'] = np.nan + df2 = pd.read_csv(path2) + ab = df2.iloc[:,7] + name = df2['United States of America'] + d = {} + for i in range(0,ab.shape[0]): + d[name[i].lower()] = ab[i] + + for i in range(0,df1.shape[0]): + if df1.iloc[i,:]['state'] in d.keys(): + df1.iloc[i,-1] = d[df1.iloc[i,:]['state']] + + df2 = df1.iloc[:,0:5] + df2['total'] = df1['total'] + df2['abbr'] = df1['abbr'] + return df2 +q04_mapping(path1, path2) diff --git a/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc b/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc index eef3d6b..4d23b9f 100644 Binary files a/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc and b/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_mapping/tests/__pycache__/test.cpython-36.pyc b/q04_mapping/tests/__pycache__/test.cpython-36.pyc index 7f7c96e..4c4ecdb 100644 Binary files a/q04_mapping/tests/__pycache__/test.cpython-36.pyc and b/q04_mapping/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q06_sub_total/__pycache__/__init__.cpython-36.pyc b/q06_sub_total/__pycache__/__init__.cpython-36.pyc index f70134c..df9b3b8 100644 Binary files a/q06_sub_total/__pycache__/__init__.cpython-36.pyc and b/q06_sub_total/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_sub_total/__pycache__/build.cpython-36.pyc b/q06_sub_total/__pycache__/build.cpython-36.pyc index adaf0ce..1119488 100644 Binary files a/q06_sub_total/__pycache__/build.cpython-36.pyc and b/q06_sub_total/__pycache__/build.cpython-36.pyc differ diff --git a/q06_sub_total/build.py b/q06_sub_total/build.py index c420838..bee03ab 100644 --- a/q06_sub_total/build.py +++ b/q06_sub_total/build.py @@ -1,3 +1,4 @@ +# %load q06_sub_total/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split @@ -10,7 +11,13 @@ path2 = 'data/scraped.csv' def q06_sub_total(path1,path2): - "write your solution here" + df_final = q05_replace_missing_values(path1,path2) + #df_final['abbr'] = df_final['abbr'].astype(str) + df_sub=df_final[['abbr', 'Jan', 'Feb', 'Mar', 'total']].groupby('abbr').sum() + return df_sub + +q06_sub_total(path1,path2) + diff --git a/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc b/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc index 93ecd56..549b3a7 100644 Binary files a/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc and b/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_sub_total/tests/__pycache__/test.cpython-36.pyc b/q06_sub_total/tests/__pycache__/test.cpython-36.pyc index 691280a..6b45702 100644 Binary files a/q06_sub_total/tests/__pycache__/test.cpython-36.pyc and b/q06_sub_total/tests/__pycache__/test.cpython-36.pyc differ