diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index b4b7209..4b29352 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/__init__.cpython-36.pyc b/q01_load_data/__pycache__/__init__.cpython-36.pyc index 92b3ac2..c0cc764 100644 Binary files a/q01_load_data/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/build.cpython-36.pyc b/q01_load_data/__pycache__/build.cpython-36.pyc index e27baf6..692a076 100644 Binary files a/q01_load_data/__pycache__/build.cpython-36.pyc and b/q01_load_data/__pycache__/build.cpython-36.pyc differ diff --git a/q01_load_data/build.py b/q01_load_data/build.py index 69d7209..98d49f7 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,5 +1,8 @@ import pandas as pd - def q01_load_data(path): "write your solution here" + df = pd.read_excel(path) + df['state'] = df['state'].str.lower() + df['total'] = df['Jan'] + df['Feb'] + df['Mar'] + return df diff --git a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc index 2a2dfc7..cf1802f 100644 Binary files a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/tests/__pycache__/tests.cpython-36.pyc b/q01_load_data/tests/__pycache__/tests.cpython-36.pyc index 76e04c8..1f45b8c 100644 Binary files a/q01_load_data/tests/__pycache__/tests.cpython-36.pyc and b/q01_load_data/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q02_append_row/__pycache__/__init__.cpython-36.pyc b/q02_append_row/__pycache__/__init__.cpython-36.pyc index de0cf61..49fd5aa 100644 Binary files a/q02_append_row/__pycache__/__init__.cpython-36.pyc and b/q02_append_row/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_append_row/__pycache__/build.cpython-36.pyc b/q02_append_row/__pycache__/build.cpython-36.pyc index 5088267..f4791ff 100644 Binary files a/q02_append_row/__pycache__/build.cpython-36.pyc and b/q02_append_row/__pycache__/build.cpython-36.pyc differ diff --git a/q02_append_row/build.py b/q02_append_row/build.py index af3701d..c594c23 100644 --- a/q02_append_row/build.py +++ b/q02_append_row/build.py @@ -1,3 +1,4 @@ +# %load q02_append_row/build.py import pandas as pd import sys, os #sys.path.append(os.path.join(os.path.dirname(os.curdir))) @@ -5,7 +6,10 @@ def q02_append_row(path): - "write your solution here" + 'write your solution here' + df = q01_load_data(path) + return df.append(df.sum(numeric_only=True), ignore_index=True) + diff --git a/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc b/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc index dab3eca..ae49c9b 100644 Binary files a/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc and b/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_append_row/tests/__pycache__/tests.cpython-36.pyc b/q02_append_row/tests/__pycache__/tests.cpython-36.pyc index 742ee79..9fd6980 100644 Binary files a/q02_append_row/tests/__pycache__/tests.cpython-36.pyc and b/q02_append_row/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc b/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc index e99e173..383b8d3 100644 Binary files a/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc and b/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_scrape_clean/__pycache__/build.cpython-36.pyc b/q03_scrape_clean/__pycache__/build.cpython-36.pyc index cdec2c4..d1865f4 100644 Binary files a/q03_scrape_clean/__pycache__/build.cpython-36.pyc and b/q03_scrape_clean/__pycache__/build.cpython-36.pyc differ diff --git a/q03_scrape_clean/build.py b/q03_scrape_clean/build.py index a88e3e2..69929e7 100644 --- a/q03_scrape_clean/build.py +++ b/q03_scrape_clean/build.py @@ -1,3 +1,4 @@ +# %load q03_scrape_clean/build.py import pandas as pd import sys, os import requests @@ -5,5 +6,11 @@ def q03_scrape_clean(url): - "write your solution here" + 'write your solution here' + r = requests.get(url, auth=('user', 'pass')) + df = pd.DataFrame(pd.read_html(r.text)[0]).iloc[12:,:] + df.to_csv('scrapeddata.csv') + return df + + diff --git a/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc b/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc index bee36fb..841a268 100644 Binary files a/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc and b/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc b/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc index 8529c87..2d60941 100644 Binary files a/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc and b/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q04_mapping/__pycache__/__init__.cpython-36.pyc b/q04_mapping/__pycache__/__init__.cpython-36.pyc index ee0618f..9bf4ce5 100644 Binary files a/q04_mapping/__pycache__/__init__.cpython-36.pyc and b/q04_mapping/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_mapping/__pycache__/build.cpython-36.pyc b/q04_mapping/__pycache__/build.cpython-36.pyc index 8283165..a9169d6 100644 Binary files a/q04_mapping/__pycache__/build.cpython-36.pyc and b/q04_mapping/__pycache__/build.cpython-36.pyc differ diff --git a/q04_mapping/build.py b/q04_mapping/build.py index 914cfa8..08eb0b1 100644 --- a/q04_mapping/build.py +++ b/q04_mapping/build.py @@ -1,10 +1,19 @@ import pandas as pd import sys, os import numpy as np -#sys.path.append(os.path.join(os.path.dirname(os.curdir))) -from greyatomlib.pandas_guided_project.q02_append_row.build import q02_append_row +sys.path.append(os.path.join(os.path.dirname(os.curdir))) +from q02_append_row.build import q02_append_row def q04_mapping(path1,path2): - "write your solution here" + 'write your solution here' + df_final= q02_append_row(path1) + scraped = pd.read_csv(path2) + scraped['United States of America'] = scraped['United States of America'].astype(str).apply(lambda x: x.lower()) + scraped['US'] = scraped['US'].astype(str) + mapping = scraped.set_index('United States of America')['US'].to_dict() + df_final.insert(6, 'abbr', np.nan) + df_final['abbr'] = df_final['state'].map(mapping) + return df_final + diff --git a/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc b/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc index eef3d6b..efd85e8 100644 Binary files a/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc and b/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_mapping/tests/__pycache__/test.cpython-36.pyc b/q04_mapping/tests/__pycache__/test.cpython-36.pyc index 7f7c96e..bf248ac 100644 Binary files a/q04_mapping/tests/__pycache__/test.cpython-36.pyc and b/q04_mapping/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q05_replace_missing_values/__pycache__/__init__.cpython-36.pyc b/q05_replace_missing_values/__pycache__/__init__.cpython-36.pyc index f50c1d5..f8c8654 100644 Binary files a/q05_replace_missing_values/__pycache__/__init__.cpython-36.pyc and b/q05_replace_missing_values/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_replace_missing_values/__pycache__/build.cpython-36.pyc b/q05_replace_missing_values/__pycache__/build.cpython-36.pyc index 6a32964..004c1dc 100644 Binary files a/q05_replace_missing_values/__pycache__/build.cpython-36.pyc and b/q05_replace_missing_values/__pycache__/build.cpython-36.pyc differ diff --git a/q05_replace_missing_values/build.py b/q05_replace_missing_values/build.py index 97d9755..cce1272 100644 --- a/q05_replace_missing_values/build.py +++ b/q05_replace_missing_values/build.py @@ -2,12 +2,19 @@ import numpy as np import sys import os -#sys.path.append(os.path.join(os.path.dirname(os.curdir))) -from greyatomlib.pandas_guided_project.q04_mapping.build import q04_mapping +sys.path.append(os.path.join(os.path.dirname(os.curdir))) +from q04_mapping.build import q04_mapping path1 = 'data/excel-comp-data.xlsx' path2 = 'data/scraped.csv' def q05_replace_missing_values(path1,path2): + 'write your solution here' + df_final = q04_mapping(path1,path2) + df_mississipi = df_final[df_final['state'] == 'mississipi'].replace(np.nan, 'MS') + df_tenessee = df_final[df_final['state'] == 'tenessee'].replace(np.nan, 'TN') + df_final.replace(df_final.iloc[6], df_mississipi, inplace=True) + df_final.replace(df_final.iloc[10], df_tenessee, inplace=True) + return df_final +#print(q05_replace_missing_values(path1,path2).shape) -#print(q05_replace_missing_values(path1,path2).shape) \ No newline at end of file diff --git a/q05_replace_missing_values/tests/__pycache__/__init__.cpython-36.pyc b/q05_replace_missing_values/tests/__pycache__/__init__.cpython-36.pyc index 03391a7..07d6ec3 100644 Binary files a/q05_replace_missing_values/tests/__pycache__/__init__.cpython-36.pyc and b/q05_replace_missing_values/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_replace_missing_values/tests/__pycache__/tests.cpython-36.pyc b/q05_replace_missing_values/tests/__pycache__/tests.cpython-36.pyc index 3b9d62a..a93b02f 100644 Binary files a/q05_replace_missing_values/tests/__pycache__/tests.cpython-36.pyc and b/q05_replace_missing_values/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q06_sub_total/__pycache__/__init__.cpython-36.pyc b/q06_sub_total/__pycache__/__init__.cpython-36.pyc index f70134c..cdf0106 100644 Binary files a/q06_sub_total/__pycache__/__init__.cpython-36.pyc and b/q06_sub_total/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_sub_total/__pycache__/build.cpython-36.pyc b/q06_sub_total/__pycache__/build.cpython-36.pyc index adaf0ce..ce39ede 100644 Binary files a/q06_sub_total/__pycache__/build.cpython-36.pyc and b/q06_sub_total/__pycache__/build.cpython-36.pyc differ diff --git a/q06_sub_total/build.py b/q06_sub_total/build.py index c420838..984e82f 100644 --- a/q06_sub_total/build.py +++ b/q06_sub_total/build.py @@ -1,3 +1,4 @@ +# %load q06_sub_total/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split @@ -10,7 +11,13 @@ path2 = 'data/scraped.csv' def q06_sub_total(path1,path2): - "write your solution here" + 'write your solution here' + df_final = q05_replace_missing_values(path1,path2) + #df_final['abbr'] = df_final['abbr'].astype(str) + df_sub=df_final[['abbr', 'Jan', 'Feb', 'Mar', 'total']].groupby('abbr').sum() + + return df_sub + diff --git a/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc b/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc index 93ecd56..9a0fd3b 100644 Binary files a/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc and b/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_sub_total/tests/__pycache__/test.cpython-36.pyc b/q06_sub_total/tests/__pycache__/test.cpython-36.pyc index 691280a..6baf95f 100644 Binary files a/q06_sub_total/tests/__pycache__/test.cpython-36.pyc and b/q06_sub_total/tests/__pycache__/test.cpython-36.pyc differ diff --git a/scrapeddata.csv b/scrapeddata.csv new file mode 100644 index 0000000..ba651ea --- /dev/null +++ b/scrapeddata.csv @@ -0,0 +1,78 @@ +,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 +12,Alabama,State,US-AL,AL,01,AL,AL,Ala.,Ala.,,,,,, +13,Alaska,State,US-AK,AK,02,AK,AK,Alaska,Alaska,Alas.,,,,, +14,Arizona,State,US-AZ,AZ,04,AZ,AZ,Ariz.,Ariz.,Az.,,,,, +15,Arkansas,State,US-AR,AR,05,AR,AR,Ark.,Ark.,,,,,, +16,California,State,US-CA,CA,06,CA,CF,Calif.,Calif.,"Ca., Cal.",,,,, +17,Colorado,State,US-CO,CO,08,CO,CL,Colo.,Colo.,Col.,,,,, +18,Connecticut,State,US-CT,CT,09,CT,CT,Conn.,Conn.,Ct.,,,,, +19,Delaware,State,US-DE,DE,10,DE,DL,Del.,Del.,De.,,,,, +20,District of Columbia,Federal district,US-DC,DC,11,DC,DC,D.C.,D.C.,Wash. D.C.,,,,, +21,Florida,State,US-FL,FL,12,FL,FL,Fla.,Fla.,"Fl., Flor.",,,,, +22,Georgia,State,US-GA,GA,13,GA,GA,Ga.,Ga.,Geo.,,,,, +23,Hawaii,State,US-HI,HI,15,HI,HA,Hawaii,Hawaii,H.I.,,,,, +24,Idaho,State,US-ID,ID,16,ID,ID,Idaho,Idaho,"Id., Ida.",,,,, +25,Illinois,State,US-IL,IL,17,IL,IL,Ill.,Ill.,"Il., Ills., Ill's",,,,, +26,Indiana,State,US-IN,IN,18,IN,IN,Ind.,Ind.,In.,,,,, +27,Iowa,State,US-IA,IA,19,IA,IA,Iowa,Iowa,"Ia., Ioa.[1]",,,,, +28,Kansas,State,US-KS,KS,20,KS,KA,Kans.,Kan.,"Ks., Ka.",,,,, +29,Kentucky,State (Commonwealth),US-KY,KY,21,KY,KY,Ky.,Ky.,"Ken., Kent.",,,,, +30,Louisiana,State,US-LA,LA,22,LA,LA,La.,La.,,,,,, +31,Maine,State,US-ME,ME,23,ME,ME,Maine,Maine,Me.,,,,, +32,Maryland,State,US-MD,MD,24,MD,MD,Md.,Md.,,,,,, +33,Massachusetts,State (Commonwealth),US-MA,MA,25,MA,MS,Mass.,Mass.,,,,,, +34,Michigan,State,US-MI,MI,26,MI,MC,Mich.,Mich.,,,,,, +35,Minnesota,State,US-MN,MN,27,MN,MN,Minn.,Minn.,Mn.,,,,, +36,Mississippi,State,US-MS,MS,28,MS,MI,Miss.,Miss.,,,,,, +37,Missouri,State,US-MO,MO,29,MO,MO,Mo.,Mo.,,,,,, +38,Montana,State,US-MT,MT,30,MT,MT,Mont.,Mont.,,,,,, +39,Nebraska,State,US-NE,NE,31,NE,NB,Nebr.,Neb.,,,,,, +40,Nevada,State,US-NV,NV,32,NV,NV,Nev.,Nev.,Nv.,,,,, +41,New Hampshire,State,US-NH,NH,33,NH,NH,N.H.,N.H.,,,,,, +42,New Jersey,State,US-NJ,NJ,34,NJ,NJ,N.J.,N.J.,N.Jersey,,,,, +43,New Mexico,State,US-NM,NM,35,NM,NM,N. Mex.,N.M.,New M.,,,,, +44,New York,State,US-NY,NY,36,NY,NY,N.Y.,N.Y.,N. York,,,,, +45,North Carolina,State,US-NC,NC,37,NC,NC,N.C.,N.C.,N. Car.,,,,, +46,North Dakota,State,US-ND,ND,38,ND,ND,N. Dak.,N.D.,NoDak,,,,, +47,Ohio,State,US-OH,OH,39,OH,OH,Ohio,Ohio,"O., Oh.",,,,, +48,Oklahoma,State,US-OK,OK,40,OK,OK,Okla.,Okla.,Ok.,,,,, +49,Oregon,State,US-OR,OR,41,OR,OR,Oreg.,Ore.,Or.,,,,, +50,Pennsylvania,State (Commonwealth),US-PA,PA,42,PA,PA,Pa.,Pa.,"Penn., Penna.",,,,, +51,Rhode Island,State,US-RI,RI,44,RI,RI,R.I.,R.I.,"R.I. & P.P., R. Isl.",,,,, +52,South Carolina,State,US-SC,SC,45,SC,SC,S.C.,S.C.,S. Car.,,,,, +53,South Dakota,State,US-SD,SD,46,SD,SD,S. Dak.,S.D.,SoDak,,,,, +54,Tennessee,State,US-TN,TN,47,TN,TN,Tenn.,Tenn.,,,,,, +55,Texas,State,US-TX,TX,48,TX,TX,Tex.,Texas,Tx.,,,,, +56,Utah,State,US-UT,UT,49,UT,UT,Utah,Utah,Ut.,,,,, +57,Vermont,State,US-VT,VT,50,VT,VT,Vt.,Vt.,,,,,, +58,Virginia,State (Commonwealth),US-VA,VA,51,VA,VA,Va.,Va.,Virg.,,,,, +59,Washington,State,US-WA,WA,53,WA,WN,Wash.,Wash.,"Wa., Wn.[2]",,,,, +60,West Virginia,State,US-WV,WV,54,WV,WV,W. Va.,W.Va.,"W.V., W. Virg.",,,,, +61,Wisconsin,State,US-WI,WI,55,WI,WS,Wis.,Wis.,"Wi., Wisc.",,,,, +62,Wyoming,State,US-WY,WY,56,WY,WY,Wyo.,Wyo.,Wy.,,,,, +63,American Samoa,Insular area (Territory),ASASM016US-AS,AS,60,AS,AS,A.S.,,,,,,, +64,Guam,Insular area (Territory),GUGUM316US-GU,GU,66,GU,GU,Guam,,,,,,, +65,Northern Mariana Islands,Insular area (Commonwealth),MPMNP580US-MP,MP,69,MP,CM,M.P.,,CNMI[3],,,,, +66,Puerto Rico,Insular area (Territory),PRPRI630US-PR,PR,72,PR,PR,P.R.,,,,,,, +67,U.S. Virgin Islands,Insular area (Territory),VIVIR850US-VI,VI,78,VI,VI,V.I.,,U.S.V.I.,,,,, +68,U.S. Minor Outlying Islands,Insular areas,UMUMI581US-UM,UM,74,,,,,,,,,, +69,Baker Island,island,UM-81,,81,,,,,XB[4],,,,, +70,Howland Island,island,UM-84,,84,,,,,XH[4],,,,, +71,Jarvis Island,island,UM-86,,86,,,,,XQ[4],,,,, +72,Johnston Atoll,atoll,UM-67,,67,,,,,XU[4],,,,, +73,Kingman Reef,atoll,UM-89,,89,,,,,XM[4],,,,, +74,Midway Islands,atoll,UM-71,,71,,,,,QM[4],,,,, +75,Navassa Island,island,UM-76,,76,,,,,XV[4],,,,, +76,Palmyra Atoll[5],atoll[5],UM-95,,95,,,,,XL[4],,,,, +77,Wake Island,atoll,UM-79,,79,,,,,QW[4],,,,, +78,Micronesia,Freely associated state,FMFSM583,FM,64,FM,,,,,,,,, +79,Marshall Islands,Freely associated state,MHMHL584,MH,68,MH,,,,,,,,, +80,Palau,Freely associated state,PWPLW585,PW,70,PW,,,,,,,,, +81,U.S. Armed Forces – Americas[6],US military mail code,,,,AA,,,,,,,,, +82,U.S. Armed Forces – Europe[7],US military mail code,,,,AE,,,,,,,,, +83,U.S. Armed Forces – Pacific[8],US military mail code,,,,AP,,,,,,,,, +84,Northern Mariana Islands,Obsolete postal code[9],,,,CM,,,,,,,,, +85,Panama Canal Zone,Obsolete postal code,PZPCZ594,,,CZ,,,,,,,,, +86,Nebraska,Obsolete postal code[10],,,,NB,,,,,,,,, +87,Philippine Islands,Obsolete postal code,PHPHL608[11],,,PI,,,,,,,,, +88,Trust Territory of the Pacific Islands,Obsolete postal code,PCPCI582,,,TT,,,,,,,,,