diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index b4b7209..c89ef72 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/__init__.cpython-36.pyc b/q01_load_data/__pycache__/__init__.cpython-36.pyc index 92b3ac2..795f111 100644 Binary files a/q01_load_data/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/build.cpython-36.pyc b/q01_load_data/__pycache__/build.cpython-36.pyc index e27baf6..82881a9 100644 Binary files a/q01_load_data/__pycache__/build.cpython-36.pyc and b/q01_load_data/__pycache__/build.cpython-36.pyc differ diff --git a/q01_load_data/build.py b/q01_load_data/build.py index 69d7209..8c5236f 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,5 +1,14 @@ +# %load q01_load_data/build.py import pandas as pd - +path = 'data/excel-comp-data.xlsx' def q01_load_data(path): - "write your solution here" + df = pd.read_excel(path) + df['state'] = df['state'].str.lower() + df['total'] = df.iloc[:,6:9].sum(axis=1) + + return df + +q01_load_data(path) + + diff --git a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc index 2a2dfc7..440c350 100644 Binary files a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/tests/__pycache__/tests.cpython-36.pyc b/q01_load_data/tests/__pycache__/tests.cpython-36.pyc index 76e04c8..c40fafa 100644 Binary files a/q01_load_data/tests/__pycache__/tests.cpython-36.pyc and b/q01_load_data/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q02_append_row/__pycache__/__init__.cpython-36.pyc b/q02_append_row/__pycache__/__init__.cpython-36.pyc index de0cf61..d0b7f04 100644 Binary files a/q02_append_row/__pycache__/__init__.cpython-36.pyc and b/q02_append_row/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_append_row/__pycache__/build.cpython-36.pyc b/q02_append_row/__pycache__/build.cpython-36.pyc index 5088267..418a5ba 100644 Binary files a/q02_append_row/__pycache__/build.cpython-36.pyc and b/q02_append_row/__pycache__/build.cpython-36.pyc differ diff --git a/q02_append_row/build.py b/q02_append_row/build.py index af3701d..8e546dd 100644 --- a/q02_append_row/build.py +++ b/q02_append_row/build.py @@ -1,12 +1,19 @@ +# %load q02_append_row/build.py import pandas as pd import sys, os #sys.path.append(os.path.join(os.path.dirname(os.curdir))) from greyatomlib.pandas_guided_project.q01_load_data.build import q01_load_data +path = 'data/excel-comp-data.xlsx' def q02_append_row(path): - "write your solution here" + df = q01_load_data(path) + df.loc[len(df)] = df.iloc[:,6:10].sum(axis=0) + df.fillna(value=0, inplace=True, axis=0) + + return df +q02_append_row(path) diff --git a/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc b/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc index dab3eca..5ef57e6 100644 Binary files a/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc and b/q02_append_row/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_append_row/tests/__pycache__/tests.cpython-36.pyc b/q02_append_row/tests/__pycache__/tests.cpython-36.pyc index 742ee79..702cde9 100644 Binary files a/q02_append_row/tests/__pycache__/tests.cpython-36.pyc and b/q02_append_row/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc b/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc index e99e173..7aebc1c 100644 Binary files a/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc and b/q03_scrape_clean/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_scrape_clean/__pycache__/build.cpython-36.pyc b/q03_scrape_clean/__pycache__/build.cpython-36.pyc index cdec2c4..68ca8d0 100644 Binary files a/q03_scrape_clean/__pycache__/build.cpython-36.pyc and b/q03_scrape_clean/__pycache__/build.cpython-36.pyc differ diff --git a/q03_scrape_clean/build.py b/q03_scrape_clean/build.py index a88e3e2..05ef7a7 100644 --- a/q03_scrape_clean/build.py +++ b/q03_scrape_clean/build.py @@ -1,9 +1,20 @@ +# %load q03_scrape_clean/build.py import pandas as pd import sys, os import requests +from bs4 import BeautifulSoup sys.path.append(os.path.join(os.path.dirname(os.curdir))) def q03_scrape_clean(url): - "write your solution here" + response = requests.get(url) + soup = BeautifulSoup(response.content,'lxml') + table = soup.find_all('table')[0] + df = pd.read_html(str(table))[0] + df = df.iloc[12:,:] + + return df + +q03_scrape_clean('https://en.wikipedia.org/wiki/List_of_U.S._state_abbreviations') + diff --git a/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc b/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc index bee36fb..30f2ae6 100644 Binary files a/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc and b/q03_scrape_clean/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc b/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc index 8529c87..7727a1c 100644 Binary files a/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc and b/q03_scrape_clean/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q04_mapping/__pycache__/__init__.cpython-36.pyc b/q04_mapping/__pycache__/__init__.cpython-36.pyc index ee0618f..5f06884 100644 Binary files a/q04_mapping/__pycache__/__init__.cpython-36.pyc and b/q04_mapping/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_mapping/__pycache__/build.cpython-36.pyc b/q04_mapping/__pycache__/build.cpython-36.pyc index 8283165..a163f45 100644 Binary files a/q04_mapping/__pycache__/build.cpython-36.pyc and b/q04_mapping/__pycache__/build.cpython-36.pyc differ diff --git a/q04_mapping/build.py b/q04_mapping/build.py index 914cfa8..fa1f227 100644 --- a/q04_mapping/build.py +++ b/q04_mapping/build.py @@ -1,10 +1,27 @@ +# %load q04_mapping/build.py import pandas as pd import sys, os import numpy as np #sys.path.append(os.path.join(os.path.dirname(os.curdir))) +from greyatomlib.pandas_guided_project.q01_load_data.build import q01_load_data from greyatomlib.pandas_guided_project.q02_append_row.build import q02_append_row -def q04_mapping(path1,path2): - "write your solution here" +path1 = 'data/excel-comp-data.xlsx' +path2 = 'data/scraped.csv' +def q04_mapping(path1, path2): + df_02 = q01_load_data(path1) + df_02.loc[len(df_02)] = df_02.iloc[:,6:10].sum(axis=0) + df_02.fillna(value=0, inplace=True, axis=0) + + #df_02 = q02_append_row(path1) + df_03 = pd.read_csv(path2) + mapping = dict(zip(df_03['United States of America'].str.lower(), df_03['Unnamed: 6'])) + + df_02.insert(loc=6, column='abbr', value='') + df_02.iloc[:,6]=df_02['state'].map(mapping) + + return df_02 + +q04_mapping(path1, path2) diff --git a/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc b/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc index eef3d6b..198b648 100644 Binary files a/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc and b/q04_mapping/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_mapping/tests/__pycache__/test.cpython-36.pyc b/q04_mapping/tests/__pycache__/test.cpython-36.pyc index 7f7c96e..23e6172 100644 Binary files a/q04_mapping/tests/__pycache__/test.cpython-36.pyc and b/q04_mapping/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q05_replace_missing_values/__pycache__/__init__.cpython-36.pyc b/q05_replace_missing_values/__pycache__/__init__.cpython-36.pyc index f50c1d5..5dad85d 100644 Binary files a/q05_replace_missing_values/__pycache__/__init__.cpython-36.pyc and b/q05_replace_missing_values/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_replace_missing_values/__pycache__/build.cpython-36.pyc b/q05_replace_missing_values/__pycache__/build.cpython-36.pyc index 6a32964..d93cb0c 100644 Binary files a/q05_replace_missing_values/__pycache__/build.cpython-36.pyc and b/q05_replace_missing_values/__pycache__/build.cpython-36.pyc differ diff --git a/q05_replace_missing_values/build.py b/q05_replace_missing_values/build.py index 97d9755..2baaa03 100644 --- a/q05_replace_missing_values/build.py +++ b/q05_replace_missing_values/build.py @@ -1,13 +1,32 @@ +# %load q05_replace_missing_values/build.py import pandas as pd import numpy as np import sys import os #sys.path.append(os.path.join(os.path.dirname(os.curdir))) +from greyatomlib.pandas_guided_project.q01_load_data.build import q01_load_data from greyatomlib.pandas_guided_project.q04_mapping.build import q04_mapping path1 = 'data/excel-comp-data.xlsx' path2 = 'data/scraped.csv' def q05_replace_missing_values(path1,path2): + df_02 = q01_load_data(path1) + df_02.loc[len(df_02)] = df_02.iloc[:,6:10].sum(axis=0) + df_02.fillna(value=0, inplace=True, axis=0) + + df_03 = pd.read_csv(path2) + mapping = dict(zip(df_03['United States of America'].str.lower(), df_03['Unnamed: 6'])) + + df_02.insert(loc=6, column='abbr', value='') + df_02.iloc[:,6]=df_02['state'].map(mapping) + + #df_02 = q04_mapping (path1, path2) + df_02.iloc[6,6] = 'MS' + df_02.iloc[10,6] = 'TN' + + return df_02 + +print(q05_replace_missing_values(path1,path2).shape) + -#print(q05_replace_missing_values(path1,path2).shape) \ No newline at end of file diff --git a/q05_replace_missing_values/tests/__pycache__/__init__.cpython-36.pyc b/q05_replace_missing_values/tests/__pycache__/__init__.cpython-36.pyc index 03391a7..41f854d 100644 Binary files a/q05_replace_missing_values/tests/__pycache__/__init__.cpython-36.pyc and b/q05_replace_missing_values/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_replace_missing_values/tests/__pycache__/tests.cpython-36.pyc b/q05_replace_missing_values/tests/__pycache__/tests.cpython-36.pyc index 3b9d62a..bf533d4 100644 Binary files a/q05_replace_missing_values/tests/__pycache__/tests.cpython-36.pyc and b/q05_replace_missing_values/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q06_sub_total/__pycache__/__init__.cpython-36.pyc b/q06_sub_total/__pycache__/__init__.cpython-36.pyc index f70134c..487ba67 100644 Binary files a/q06_sub_total/__pycache__/__init__.cpython-36.pyc and b/q06_sub_total/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_sub_total/__pycache__/build.cpython-36.pyc b/q06_sub_total/__pycache__/build.cpython-36.pyc index adaf0ce..8692710 100644 Binary files a/q06_sub_total/__pycache__/build.cpython-36.pyc and b/q06_sub_total/__pycache__/build.cpython-36.pyc differ diff --git a/q06_sub_total/build.py b/q06_sub_total/build.py index c420838..1a2457c 100644 --- a/q06_sub_total/build.py +++ b/q06_sub_total/build.py @@ -1,16 +1,35 @@ +# %load q06_sub_total/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split import sys import os #sys.path.append(os.path.join(os.path.dirname(os.curdir))) +from greyatomlib.pandas_guided_project.q01_load_data.build import q01_load_data from greyatomlib.pandas_guided_project.q05_replace_missing_values.build import q05_replace_missing_values path1 = 'data/excel-comp-data.xlsx' path2 = 'data/scraped.csv' def q06_sub_total(path1,path2): - "write your solution here" + df_02 = q01_load_data(path1) + df_02.loc[len(df_02)] = df_02.iloc[:,6:10].sum(axis=0) + df_02.fillna(value=0, inplace=True, axis=0) + + df_03 = pd.read_csv(path2) + mapping = dict(zip(df_03['United States of America'].str.lower(), df_03['Unnamed: 6'])) + df_02.insert(loc=6, column='abbr', value='') + df_02.iloc[:,6]=df_02['state'].map(mapping) + + df_02.iloc[6,6] = 'MS' + df_02.iloc[10,6] = 'TN' + + #df_02 = q05_replace_missing_values(path1,path2) + df_sub = df_02.groupby(['abbr'])[['account', 'Jan', 'Feb', 'Mar']].sum() + + return df_sub + +q06_sub_total(path1,path2) diff --git a/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc b/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc index 93ecd56..03102d1 100644 Binary files a/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc and b/q06_sub_total/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_sub_total/tests/__pycache__/test.cpython-36.pyc b/q06_sub_total/tests/__pycache__/test.cpython-36.pyc index 691280a..8baba72 100644 Binary files a/q06_sub_total/tests/__pycache__/test.cpython-36.pyc and b/q06_sub_total/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q07_symbols/__pycache__/__init__.cpython-36.pyc b/q07_symbols/__pycache__/__init__.cpython-36.pyc index 60b0cca..19f1e6a 100644 Binary files a/q07_symbols/__pycache__/__init__.cpython-36.pyc and b/q07_symbols/__pycache__/__init__.cpython-36.pyc differ diff --git a/q07_symbols/__pycache__/build.cpython-36.pyc b/q07_symbols/__pycache__/build.cpython-36.pyc index d28eaa9..878e48a 100644 Binary files a/q07_symbols/__pycache__/build.cpython-36.pyc and b/q07_symbols/__pycache__/build.cpython-36.pyc differ diff --git a/q07_symbols/build.py b/q07_symbols/build.py index b8cbb92..aff8145 100644 --- a/q07_symbols/build.py +++ b/q07_symbols/build.py @@ -1,18 +1,36 @@ +# %load q07_symbols/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split import sys import os sys.path.append(os.path.join(os.path.dirname(os.curdir))) -from greyatomlib.pandas_guided_project.q06_sub_total.build import q06_sub_total + +from greyatomlib.pandas_guided_project.q01_load_data.build import q01_load_data +#from greyatomlib.pandas_guided_project.q06_sub_total.build import q06_sub_total path1 = 'data/excel-comp-data.xlsx' path2 = 'data/scraped.csv' def q07_symbols(path1,path2): - "write your solution here" + df_02 = q01_load_data(path1) + df_02.loc[len(df_02)] = df_02.iloc[:,6:10].sum(axis=0) + df_02.fillna(value=0, inplace=True, axis=0) + + df_03 = pd.read_csv(path2) + mapping = dict(zip(df_03['United States of America'].str.lower(), df_03['Unnamed: 6'])) + + df_02.insert(loc=6, column='abbr', value='') + df_02.iloc[:,6]=df_02['state'].map(mapping) + + df_02.iloc[6,6] = 'MS' + df_02.iloc[10,6] = 'TN' + #df_sub = q06_sub_total(path1,path2) + df_sub = df_02.groupby(['abbr'])[['account', 'Jan', 'Feb', 'Mar', 'total']].sum().applymap(lambda x: '$%s'% '{:,}'.format(int(x))) + + return df_sub +q07_symbols(path1,path2) -#print(q07_symbols(path1,path2)) diff --git a/q07_symbols/tests/__pycache__/__init__.cpython-36.pyc b/q07_symbols/tests/__pycache__/__init__.cpython-36.pyc index f854b4a..8097d1d 100644 Binary files a/q07_symbols/tests/__pycache__/__init__.cpython-36.pyc and b/q07_symbols/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q07_symbols/tests/__pycache__/test.cpython-36.pyc b/q07_symbols/tests/__pycache__/test.cpython-36.pyc index 1a8a9c3..90238df 100644 Binary files a/q07_symbols/tests/__pycache__/test.cpython-36.pyc and b/q07_symbols/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q08_append_subtotals/__pycache__/__init__.cpython-36.pyc b/q08_append_subtotals/__pycache__/__init__.cpython-36.pyc index df1c3a2..58169e2 100644 Binary files a/q08_append_subtotals/__pycache__/__init__.cpython-36.pyc and b/q08_append_subtotals/__pycache__/__init__.cpython-36.pyc differ diff --git a/q08_append_subtotals/__pycache__/build.cpython-36.pyc b/q08_append_subtotals/__pycache__/build.cpython-36.pyc index d03d4af..834d15e 100644 Binary files a/q08_append_subtotals/__pycache__/build.cpython-36.pyc and b/q08_append_subtotals/__pycache__/build.cpython-36.pyc differ diff --git a/q08_append_subtotals/build.py b/q08_append_subtotals/build.py index 96e2f9e..bd45537 100644 --- a/q08_append_subtotals/build.py +++ b/q08_append_subtotals/build.py @@ -1,15 +1,37 @@ +# %load q08_append_subtotals/build.py import pandas as pd import numpy as np import sys,os #sys.path.append(os.path.join(os.path.dirname(os.curdir))) -from greyatomlib.pandas_guided_project.q06_sub_total.build import q06_sub_total -from greyatomlib.pandas_guided_project.q07_symbols.build import q07_symbols - +from greyatomlib.pandas_guided_project.q01_load_data.build import q01_load_data +#from greyatomlib.pandas_guided_project.q06_sub_total.build import q06_sub_total +#from greyatomlib.pandas_guided_project.q07_symbols.build import q07_symbols path1 = 'data/excel-comp-data.xlsx' path2 = 'data/scraped.csv' def q08_append_subtotals(path1,path2): - "write your solution here" + df_02 = q01_load_data(path1) + df_02.loc[len(df_02)] = df_02.iloc[:,6:10].sum(axis=0) + df_02.fillna(value=0, inplace=True, axis=0) + + df_03 = pd.read_csv(path2) + mapping = dict(zip(df_03['United States of America'].str.lower(), df_03['Unnamed: 6'])) + + df_02.insert(loc=6, column='abbr', value='') + df_02.iloc[:,6]=df_02['state'].map(mapping) + + df_02.iloc[6,6] = 'MS' + df_02.iloc[10,6] = 'TN' + + #df_sub = q06_sub_total(path1,path2) + df_sub = df_02.groupby(['abbr'])[['Jan', 'Feb', 'Mar', 'total']].sum() + df_sub.loc[len(df_sub)] = df_sub.iloc[:].sum(axis=0) + + df_sub = df_sub.applymap(lambda x: '$%s'% '{:,}'.format(int(x))) + + return df_sub + +q08_append_subtotals(path1,path2) diff --git a/q08_append_subtotals/tests/__pycache__/__init__.cpython-36.pyc b/q08_append_subtotals/tests/__pycache__/__init__.cpython-36.pyc index 21f4cd0..4c5baa7 100644 Binary files a/q08_append_subtotals/tests/__pycache__/__init__.cpython-36.pyc and b/q08_append_subtotals/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q08_append_subtotals/tests/__pycache__/tests.cpython-36.pyc b/q08_append_subtotals/tests/__pycache__/tests.cpython-36.pyc index da1ab93..155e7b8 100644 Binary files a/q08_append_subtotals/tests/__pycache__/tests.cpython-36.pyc and b/q08_append_subtotals/tests/__pycache__/tests.cpython-36.pyc differ diff --git a/q09_pie_chart_jan/__pycache__/__init__.cpython-36.pyc b/q09_pie_chart_jan/__pycache__/__init__.cpython-36.pyc index a0e3add..715a652 100644 Binary files a/q09_pie_chart_jan/__pycache__/__init__.cpython-36.pyc and b/q09_pie_chart_jan/__pycache__/__init__.cpython-36.pyc differ diff --git a/q09_pie_chart_jan/__pycache__/build.cpython-36.pyc b/q09_pie_chart_jan/__pycache__/build.cpython-36.pyc index 25a6c03..22e770b 100644 Binary files a/q09_pie_chart_jan/__pycache__/build.cpython-36.pyc and b/q09_pie_chart_jan/__pycache__/build.cpython-36.pyc differ diff --git a/q09_pie_chart_jan/build.py b/q09_pie_chart_jan/build.py index 6483bc6..6ff47c2 100644 --- a/q09_pie_chart_jan/build.py +++ b/q09_pie_chart_jan/build.py @@ -1,15 +1,33 @@ +# %load q09_pie_chart_jan/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split import sys,os sys.path.append(os.path.join(os.path.dirname(os.curdir))) -from greyatomlib.pandas_guided_project.q06_sub_total.build import q06_sub_total +from greyatomlib.pandas_guided_project.q01_load_data.build import q01_load_data +#from greyatomlib.pandas_guided_project.q06_sub_total.build import q06_sub_total import matplotlib.pyplot as plt plt.switch_backend('agg') +path1 = 'data/excel-comp-data.xlsx' +path2 = 'data/scraped.csv' + def q09_pie_chart_jan(path1,path2): + df_02 = q01_load_data(path1) + df_02.loc[len(df_02)] = df_02.iloc[:,6:10].sum(axis=0) + df_02.fillna(value=0, inplace=True, axis=0) + + df_03 = pd.read_csv(path2) + mapping = dict(zip(df_03['United States of America'].str.lower(), df_03['Unnamed: 6'])) + + df_02.insert(loc=6, column='abbr', value='') + df_02.iloc[:,6]=df_02['state'].map(mapping) - "write your solution here" + df_02.iloc[6,6] = 'MS' + df_02.iloc[10,6] = 'TN' + + return df_02[['Jan', 'Feb', 'Mar', 'total']].Jan.value_counts().plot(kind='pie', y='Jan') +q09_pie_chart_jan(path1,path2) diff --git a/q09_pie_chart_jan/tests/__pycache__/__init__.cpython-36.pyc b/q09_pie_chart_jan/tests/__pycache__/__init__.cpython-36.pyc index 07ab367..6b7af34 100644 Binary files a/q09_pie_chart_jan/tests/__pycache__/__init__.cpython-36.pyc and b/q09_pie_chart_jan/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q09_pie_chart_jan/tests/__pycache__/tests.cpython-36.pyc b/q09_pie_chart_jan/tests/__pycache__/tests.cpython-36.pyc index b3b93c2..a6c4fae 100644 Binary files a/q09_pie_chart_jan/tests/__pycache__/tests.cpython-36.pyc and b/q09_pie_chart_jan/tests/__pycache__/tests.cpython-36.pyc differ