From 71ae781754f0207d3c774c86b36a82362c4b05cc Mon Sep 17 00:00:00 2001 From: ZijuanZhang Date: Sat, 19 Nov 2016 17:06:26 -0600 Subject: [PATCH 1/2] add pandas_Series.py file --- pandas_Series.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 pandas_Series.py diff --git a/pandas_Series.py b/pandas_Series.py new file mode 100644 index 0000000..7dde728 --- /dev/null +++ b/pandas_Series.py @@ -0,0 +1,54 @@ +""" +one of the most popular python library to manipulate and analyze data +""" + +import pandas as pd +import numpy as np + +# one of two important data, Series: one dimensional labeled array +# holding data types: str, int, float, python object etc. +# method 1: create from ndarray, if given index, +# index length should be as same as the array length +x = [_ for _ in range(100, 105, 1)] +sr1 = pd.Series(x, name="ID", + index=['a', 'b', 'c', 'd', 'e']) +print(sr1) + +# method 2: create from dict +d = {'a': 0, 'b': 1, 'c': 2, 'd': 3.0} +sr2 = pd.Series(d) +print(sr2) + +# data type changed +d = {'a': 0, 'b': 1, 'c': 'c', 'd': 3.0} +sr3 = pd.Series(d) +print(sr3) + +# NaN is standard missing data marker in pandas +# pay attention to the order +sr4 = pd.Series(d, index=['a', 'b', 'c', 'e', 'd']) +print(sr4) + +# from scalar value, index must be provided to tell the length +sr5 = pd.Series(2, index=['a', 'b', 'c'], name='scalar') +print(sr5) + +# ndarray-like and dict-like, it is mutable, +# and can be manipulated like dict and ndarray +# int type can not manipulate sqrt, exp !!!! +sr3['c'] = 2 +print(sr3) +print(sr3[1]) +print(sr3+sr3) +print(sr3*3) +# print(np.sqrt(sr3)) raise AttributeError +print(sr3[sr3 >= 2]) +print(sr3) +sr3['e'] = 4.0 +sr3 = pd.Series(sr3, dtype=float, name='example') +print(sr3) +print(np.sqrt(sr3)) +print(sr3.name) +sr6 = sr3.rename("changed") # after pandas v 18 +print(sr6) +print(pd.__version__) From e2dba9abbd46463ea5457db6f92bb77d1479263e Mon Sep 17 00:00:00 2001 From: Zijuan Zhang Date: Sat, 10 Dec 2016 10:35:02 -0600 Subject: [PATCH 2/2] make pandas_series.py print clear and add pandas dataframe.py file --- pandas_DataFrame.py | 78 +++++++++++++++++++++++++++++++++++++++++++++ pandas_Series.py | 10 ++++++ 2 files changed, 88 insertions(+) create mode 100644 pandas_DataFrame.py diff --git a/pandas_DataFrame.py b/pandas_DataFrame.py new file mode 100644 index 0000000..66ca00e --- /dev/null +++ b/pandas_DataFrame.py @@ -0,0 +1,78 @@ +""" +Dataframe is 2-D labeled data structure with different types. +""" + +import pandas as pd +import numpy as np +import random + +# one of two important data, DataFrame: 2-D labeled data structure +# create from dict +df1 = pd.DataFrame({'id': [100, 101, 102], 'color': ['red', 'blue', 'green']}) +print('--------------df1-------------') +print(df1) +# the dict no order, to want the column ordered, +# column and index label argument can be used to be signed +df2 = pd.DataFrame({'id': [100, 101, 102], + 'color': ['red', 'blue', 'green']}, + columns=['id', 'color'], + index=['a', 'b', 'c']) +print('--------------df2-------------') +print(df2) + +# create from list of lists, +# default index and column name will start from 0 +df3 = pd.DataFrame([[100, 'red'], [101, 'blue'], [102, 'green']]) +print('--------------df3-------------') +print(df3) +df4 = pd.DataFrame([[100, 'red'], [101, 'blue'], [102, 'green']], + columns=['id', 'color'], + index=['a', 'b', 'c']) +print('--------------df4-------------') +print(df4) + +# create from ndarry +arr = np.random.rand(4, 2) # create 4*2 ndarry random numbers +print('--------------arr-------------') +print(arr) +df5 = pd.DataFrame(arr, columns=['one', 'two']) +print('--------------df5-------------') +print(df5) + +df6 = pd.DataFrame({'Id': np.arange(100, 110, 1), + 'scores': np.random.randint(60, 101, 10)}) +print('--------------df6-------------') +print(df6) +# to reset the index +df7 = df6.set_index('Id') +print('--------------df7-------------') +print(df7) +# to drop index name +df8 = df7.rename_axis(None) +print('--------------d8-------------') +print(df8) + +# create from structured or record array +data = np.zeros((2,), dtype=[('A', 'i4'), ('B', 'f8'), ('C', 'a10')]) +# i4--int32, f8--float64, a10--10 char string +print('--------------data-------------') +print(data) +data[:] = [(1, 2, 'Hello'), (3, 4., 'World')] +print('--------------revised data-------------') +print(data) +df9 = pd.DataFrame.from_records(data, index='C') +print('--------------df9-------------') +print(df9) + +# column selection, addition, deletion +print('------DataFrame column Selection-------') +print(df9['A']) +print(df9.B) # can not be used if white_space in column name +df9['D'] = [2, 3.5] +df9['E'] = df9.A * df9.B +print('------DataFrame column addition-------') +print(df9) +del df9['B'] +df9.pop('A') +print('------DataFrame column deletion-------') +print(df9) diff --git a/pandas_Series.py b/pandas_Series.py index 7dde728..5b9c31a 100644 --- a/pandas_Series.py +++ b/pandas_Series.py @@ -12,31 +12,37 @@ x = [_ for _ in range(100, 105, 1)] sr1 = pd.Series(x, name="ID", index=['a', 'b', 'c', 'd', 'e']) +print('-----sr1-----') print(sr1) # method 2: create from dict d = {'a': 0, 'b': 1, 'c': 2, 'd': 3.0} sr2 = pd.Series(d) +print('-----sr2-----') print(sr2) # data type changed d = {'a': 0, 'b': 1, 'c': 'c', 'd': 3.0} sr3 = pd.Series(d) +print('-----sr3-----') print(sr3) # NaN is standard missing data marker in pandas # pay attention to the order sr4 = pd.Series(d, index=['a', 'b', 'c', 'e', 'd']) +print('-----sr4-----') print(sr4) # from scalar value, index must be provided to tell the length sr5 = pd.Series(2, index=['a', 'b', 'c'], name='scalar') +print('-----sr5-----') print(sr5) # ndarray-like and dict-like, it is mutable, # and can be manipulated like dict and ndarray # int type can not manipulate sqrt, exp !!!! sr3['c'] = 2 +print('-----changed sr3-----') print(sr3) print(sr3[1]) print(sr3+sr3) @@ -46,9 +52,13 @@ print(sr3) sr3['e'] = 4.0 sr3 = pd.Series(sr3, dtype=float, name='example') +print('---sr3 add item and name, change type---') print(sr3) print(np.sqrt(sr3)) print(sr3.name) sr6 = sr3.rename("changed") # after pandas v 18 +print('-----sr6-----') print(sr6) +print(sr6.name) +print(sr3) print(pd.__version__)