Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 24 additions & 19 deletions adspy_shared_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ def load_crime_dataset():

crime = pd.read_table('CommViolPredUnnormalizedData.txt', sep=',', na_values='?')
# remove features with poor coverage or lower relevance, and keep ViolentCrimesPerPop target column
columns_to_keep = [5, 6] + list(range(11,26)) + list(range(32, 103)) + [145]
columns_to_keep = [5, 6] + list(range(11,26)) + list(range(32, 103)) + [145]
crime = crime.ix[:,columns_to_keep].dropna()

X_crime = crime.ix[:,range(0,88)]
X_crime = crime.ix[:, range(88)]
Comment on lines -21 to +24
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function load_crime_dataset refactored with the following changes:

y_crime = crime['ViolentCrimesPerPop']

return (X_crime, y_crime)
Expand Down Expand Up @@ -67,9 +67,11 @@ def plot_labelled_scatter(X, y, class_labels):
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)

h = []
for c in range(0, num_labels):
h.append(mpatches.Patch(color=color_array[c], label=class_labels[c]))
h = [
mpatches.Patch(color=color_array[c], label=class_labels[c])
for c in range(num_labels)
]

Comment on lines -70 to +74
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function plot_labelled_scatter refactored with the following changes:

plt.legend(handles=h)

plt.show()
Expand All @@ -80,8 +82,8 @@ def plot_class_regions_for_classifier_subplot(clf, X, y, X_test, y_test, title,
numClasses = numpy.amax(y) + 1
color_list_light = ['#FFFFAA', '#EFEFEF', '#AAFFAA', '#AAAAFF']
color_list_bold = ['#EEEE00', '#000000', '#00CC00', '#0000CC']
cmap_light = ListedColormap(color_list_light[0:numClasses])
cmap_bold = ListedColormap(color_list_bold[0:numClasses])
cmap_light = ListedColormap(color_list_light[:numClasses])
cmap_bold = ListedColormap(color_list_bold[:numClasses])
Comment on lines -83 to +86
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function plot_class_regions_for_classifier_subplot refactored with the following changes:


h = 0.03
k = 0.5
Expand Down Expand Up @@ -115,7 +117,7 @@ def plot_class_regions_for_classifier_subplot(clf, X, y, X_test, y_test, title,

if (target_names is not None):
legend_handles = []
for i in range(0, len(target_names)):
for i in range(len(target_names)):
patch = mpatches.Patch(color=color_list_bold[i], label=target_names[i])
legend_handles.append(patch)
subplot.legend(loc=0, handles=legend_handles)
Expand All @@ -126,8 +128,8 @@ def plot_class_regions_for_classifier(clf, X, y, X_test=None, y_test=None, title
numClasses = numpy.amax(y) + 1
color_list_light = ['#FFFFAA', '#EFEFEF', '#AAFFAA', '#AAAAFF']
color_list_bold = ['#EEEE00', '#000000', '#00CC00', '#0000CC']
cmap_light = ListedColormap(color_list_light[0:numClasses])
cmap_bold = ListedColormap(color_list_bold[0:numClasses])
cmap_light = ListedColormap(color_list_light[:numClasses])
cmap_bold = ListedColormap(color_list_bold[:numClasses])
Comment on lines -129 to +132
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function plot_class_regions_for_classifier refactored with the following changes:


h = 0.03
k = 0.5
Expand Down Expand Up @@ -159,7 +161,7 @@ def plot_class_regions_for_classifier(clf, X, y, X_test=None, y_test=None, title

if (target_names is not None):
legend_handles = []
for i in range(0, len(target_names)):
for i in range(len(target_names)):
patch = mpatches.Patch(color=color_list_bold[i], label=target_names[i])
legend_handles.append(patch)
plt.legend(loc=0, handles=legend_handles)
Expand All @@ -181,10 +183,10 @@ def plot_fruit_knn(X, y, n_neighbors, weights):

# Plot the decision boundary by assigning a color in the color map
# to each mesh point.

mesh_step_size = .01 # step size in the mesh
plot_symbol_size = 50

Comment on lines -184 to +189
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Found the following improvement in Function plot_fruit_knn:

x_min, x_max = X_mat[:, 0].min() - 1, X_mat[:, 0].max() + 1
y_min, y_max = X_mat[:, 1].min() - 1, X_mat[:, 1].max() + 1
xx, yy = numpy.meshgrid(numpy.arange(x_min, x_max, mesh_step_size),
Expand All @@ -207,10 +209,10 @@ def plot_fruit_knn(X, y, n_neighbors, weights):
patch3 = mpatches.Patch(color='#AFAFAF', label='lemon')
plt.legend(handles=[patch0, patch1, patch2, patch3])


plt.xlabel('height (cm)')
plt.ylabel('width (cm)')

plt.show()

def plot_two_class_knn(X, y, n_neighbors, weights, X_test, y_test):
Expand All @@ -226,10 +228,10 @@ def plot_two_class_knn(X, y, n_neighbors, weights, X_test, y_test):

# Plot the decision boundary by assigning a color in the color map
# to each mesh point.

mesh_step_size = .01 # step size in the mesh
plot_symbol_size = 50

Comment on lines -229 to +234
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function plot_two_class_knn refactored with the following changes:

x_min, x_max = X_mat[:, 0].min() - 1, X_mat[:, 0].max() + 1
y_min, y_max = X_mat[:, 1].min() - 1, X_mat[:, 1].max() + 1
xx, yy = numpy.meshgrid(numpy.arange(x_min, x_max, mesh_step_size),
Expand All @@ -246,11 +248,14 @@ def plot_two_class_knn(X, y, n_neighbors, weights, X_test, y_test):
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())

title = "Neighbors = {}".format(n_neighbors)
title = f"Neighbors = {n_neighbors}"
if (X_test is not None):
train_score = clf.score(X_mat, y_mat)
test_score = clf.score(X_test, y_test)
title = title + "\nTrain score = {:.2f}, Test score = {:.2f}".format(train_score, test_score)
title += "\nTrain score = {:.2f}, Test score = {:.2f}".format(
train_score, test_score
)


patch0 = mpatches.Patch(color='#FFFF00', label='class 0')
patch1 = mpatches.Patch(color='#000000', label='class 1')
Expand Down
7 changes: 5 additions & 2 deletions visuals.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ def pca_results(good_data, pca):
Visualizes the PCA results
'''
# Dimension indexing
dimensions = dimensions = ['Dimension {}'.format(i) for i in range(1,len(pca.components_)+1)]
dimensions = dimensions = [
f'Dimension {i}' for i in range(1, len(pca.components_) + 1)
]

Comment on lines -24 to +27
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function pca_results refactored with the following changes:

# PCA components
components = pd.DataFrame(np.round(pca.components_, 4), columns = good_data.keys())
components.index = dimensions
Expand Down Expand Up @@ -97,7 +100,7 @@ def biplot(good_data, reduced_data, pca):
# scatterplot of the reduced data
ax.scatter(x=reduced_data.loc[:, 'Dimension 1'], y=reduced_data.loc[:, 'Dimension 2'],
facecolors='b', edgecolors='b', s=70, alpha=0.5)

Comment on lines -100 to +103
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Found the following improvement in Function biplot:

feature_vectors = pca.components_.T

# we use scaling factors to make the arrows easier to see
Expand Down