InCodeLearning · ZijuanZhang · Nov 19, 2016 · Dec 10, 2016
diff --git a/pandas_DataFrame.py b/pandas_DataFrame.py
@@ -0,0 +1,78 @@
+"""
+Dataframe is 2-D labeled data structure with different types.
+"""
+
+import pandas as pd
+import numpy as np
+import random
+
+# one of two important data, DataFrame: 2-D labeled data structure
+# create from dict
+df1 = pd.DataFrame({'id': [100, 101, 102], 'color': ['red', 'blue', 'green']})
+print('--------------df1-------------')
+print(df1)
+# the dict no order, to want the column ordered,
+# column and index label argument can be used to be signed
+df2 = pd.DataFrame({'id': [100, 101, 102],
+                    'color': ['red', 'blue', 'green']},
+                   columns=['id', 'color'],
+                   index=['a', 'b', 'c'])
+print('--------------df2-------------')
+print(df2)
+
+# create from list of lists,
+# default index and column name will start from 0
+df3 = pd.DataFrame([[100, 'red'], [101, 'blue'], [102, 'green']])
+print('--------------df3-------------')
+print(df3)
+df4 = pd.DataFrame([[100, 'red'], [101, 'blue'], [102, 'green']],
+                   columns=['id', 'color'],
+                   index=['a', 'b', 'c'])
+print('--------------df4-------------')
+print(df4)
+
+# create from ndarry
+arr = np.random.rand(4, 2)  # create 4*2 ndarry random numbers
+print('--------------arr-------------')
+print(arr)
+df5 = pd.DataFrame(arr, columns=['one', 'two'])
+print('--------------df5-------------')
+print(df5)
+
+df6 = pd.DataFrame({'Id': np.arange(100, 110, 1),
+                    'scores': np.random.randint(60, 101, 10)})
+print('--------------df6-------------')
+print(df6)
+# to reset the index
+df7 = df6.set_index('Id')
+print('--------------df7-------------')
+print(df7)
+# to drop index name
+df8 = df7.rename_axis(None)
+print('--------------d8-------------')
+print(df8)
+
+# create from structured or record array
+data = np.zeros((2,), dtype=[('A', 'i4'), ('B', 'f8'), ('C', 'a10')])
+# i4--int32, f8--float64, a10--10 char string
+print('--------------data-------------')
+print(data)
+data[:] = [(1, 2, 'Hello'), (3, 4., 'World')]
+print('--------------revised data-------------')
+print(data)
+df9 = pd.DataFrame.from_records(data, index='C')
+print('--------------df9-------------')
+print(df9)
+
+# column selection, addition, deletion
+print('------DataFrame column Selection-------')
+print(df9['A'])
+print(df9.B)  # can not be used if white_space in column name
+df9['D'] = [2, 3.5]
+df9['E'] = df9.A * df9.B
+print('------DataFrame column addition-------')
+print(df9)
+del df9['B']
+df9.pop('A')
+print('------DataFrame column deletion-------')
+print(df9)
diff --git a/pandas_Series.py b/pandas_Series.py
@@ -0,0 +1,64 @@
+"""
+one of the most popular python library to manipulate and analyze data
+"""
+
+import pandas as pd
+import numpy as np
+
+# one of two important data, Series: one dimensional labeled array
+# holding data types: str, int, float, python object etc.
+# method 1: create from ndarray, if given index,
+# index length should be as same as the array length
+x = [_ for _ in range(100, 105, 1)]
+sr1 = pd.Series(x, name="ID",
+                index=['a', 'b', 'c', 'd', 'e'])
+print('-----sr1-----')
+print(sr1)
+
+# method 2: create from dict
+d = {'a': 0, 'b': 1, 'c': 2, 'd': 3.0}
+sr2 = pd.Series(d)
+print('-----sr2-----')
+print(sr2)
+
+#  data type changed
+d = {'a': 0, 'b': 1, 'c': 'c', 'd': 3.0}
+sr3 = pd.Series(d)
+print('-----sr3-----')
+print(sr3)
+
+# NaN is standard missing data marker in pandas
+# pay attention to the order
+sr4 = pd.Series(d, index=['a', 'b', 'c', 'e', 'd'])
+print('-----sr4-----')
+print(sr4)
+
+# from scalar value, index must be provided to tell the length
+sr5 = pd.Series(2, index=['a', 'b', 'c'], name='scalar')
+print('-----sr5-----')
+print(sr5)
+
+# ndarray-like and dict-like, it is mutable,
+# and can be manipulated like dict and ndarray
+# int type can not manipulate sqrt, exp !!!!
+sr3['c'] = 2
+print('-----changed sr3-----')
+print(sr3)
+print(sr3[1])
+print(sr3+sr3)
+print(sr3*3)
+# print(np.sqrt(sr3))  raise AttributeError
+print(sr3[sr3 >= 2])
+print(sr3)
+sr3['e'] = 4.0
+sr3 = pd.Series(sr3, dtype=float, name='example')
+print('---sr3 add item and name, change type---')
+print(sr3)
+print(np.sqrt(sr3))
+print(sr3.name)
+sr6 = sr3.rename("changed")  # after pandas v 18
+print('-----sr6-----')
+print(sr6)
+print(sr6.name)
+print(sr3)
+print(pd.__version__)