diff --git a/adspy_shared_utilities.py b/adspy_shared_utilities.py index 82fc273..758024f 100644 --- a/adspy_shared_utilities.py +++ b/adspy_shared_utilities.py @@ -18,10 +18,10 @@ def load_crime_dataset(): crime = pd.read_table('CommViolPredUnnormalizedData.txt', sep=',', na_values='?') # remove features with poor coverage or lower relevance, and keep ViolentCrimesPerPop target column - columns_to_keep = [5, 6] + list(range(11,26)) + list(range(32, 103)) + [145] + columns_to_keep = [5, 6] + list(range(11,26)) + list(range(32, 103)) + [145] crime = crime.ix[:,columns_to_keep].dropna() - X_crime = crime.ix[:,range(0,88)] + X_crime = crime.ix[:, range(88)] y_crime = crime['ViolentCrimesPerPop'] return (X_crime, y_crime) @@ -67,9 +67,11 @@ def plot_labelled_scatter(X, y, class_labels): plt.xlim(x_min, x_max) plt.ylim(y_min, y_max) - h = [] - for c in range(0, num_labels): - h.append(mpatches.Patch(color=color_array[c], label=class_labels[c])) + h = [ + mpatches.Patch(color=color_array[c], label=class_labels[c]) + for c in range(num_labels) + ] + plt.legend(handles=h) plt.show() @@ -80,8 +82,8 @@ def plot_class_regions_for_classifier_subplot(clf, X, y, X_test, y_test, title, numClasses = numpy.amax(y) + 1 color_list_light = ['#FFFFAA', '#EFEFEF', '#AAFFAA', '#AAAAFF'] color_list_bold = ['#EEEE00', '#000000', '#00CC00', '#0000CC'] - cmap_light = ListedColormap(color_list_light[0:numClasses]) - cmap_bold = ListedColormap(color_list_bold[0:numClasses]) + cmap_light = ListedColormap(color_list_light[:numClasses]) + cmap_bold = ListedColormap(color_list_bold[:numClasses]) h = 0.03 k = 0.5 @@ -115,7 +117,7 @@ def plot_class_regions_for_classifier_subplot(clf, X, y, X_test, y_test, title, if (target_names is not None): legend_handles = [] - for i in range(0, len(target_names)): + for i in range(len(target_names)): patch = mpatches.Patch(color=color_list_bold[i], label=target_names[i]) legend_handles.append(patch) subplot.legend(loc=0, handles=legend_handles) @@ -126,8 +128,8 @@ def plot_class_regions_for_classifier(clf, X, y, X_test=None, y_test=None, title numClasses = numpy.amax(y) + 1 color_list_light = ['#FFFFAA', '#EFEFEF', '#AAFFAA', '#AAAAFF'] color_list_bold = ['#EEEE00', '#000000', '#00CC00', '#0000CC'] - cmap_light = ListedColormap(color_list_light[0:numClasses]) - cmap_bold = ListedColormap(color_list_bold[0:numClasses]) + cmap_light = ListedColormap(color_list_light[:numClasses]) + cmap_bold = ListedColormap(color_list_bold[:numClasses]) h = 0.03 k = 0.5 @@ -159,7 +161,7 @@ def plot_class_regions_for_classifier(clf, X, y, X_test=None, y_test=None, title if (target_names is not None): legend_handles = [] - for i in range(0, len(target_names)): + for i in range(len(target_names)): patch = mpatches.Patch(color=color_list_bold[i], label=target_names[i]) legend_handles.append(patch) plt.legend(loc=0, handles=legend_handles) @@ -181,10 +183,10 @@ def plot_fruit_knn(X, y, n_neighbors, weights): # Plot the decision boundary by assigning a color in the color map # to each mesh point. - + mesh_step_size = .01 # step size in the mesh plot_symbol_size = 50 - + x_min, x_max = X_mat[:, 0].min() - 1, X_mat[:, 0].max() + 1 y_min, y_max = X_mat[:, 1].min() - 1, X_mat[:, 1].max() + 1 xx, yy = numpy.meshgrid(numpy.arange(x_min, x_max, mesh_step_size), @@ -207,10 +209,10 @@ def plot_fruit_knn(X, y, n_neighbors, weights): patch3 = mpatches.Patch(color='#AFAFAF', label='lemon') plt.legend(handles=[patch0, patch1, patch2, patch3]) - + plt.xlabel('height (cm)') plt.ylabel('width (cm)') - + plt.show() def plot_two_class_knn(X, y, n_neighbors, weights, X_test, y_test): @@ -226,10 +228,10 @@ def plot_two_class_knn(X, y, n_neighbors, weights, X_test, y_test): # Plot the decision boundary by assigning a color in the color map # to each mesh point. - + mesh_step_size = .01 # step size in the mesh plot_symbol_size = 50 - + x_min, x_max = X_mat[:, 0].min() - 1, X_mat[:, 0].max() + 1 y_min, y_max = X_mat[:, 1].min() - 1, X_mat[:, 1].max() + 1 xx, yy = numpy.meshgrid(numpy.arange(x_min, x_max, mesh_step_size), @@ -246,11 +248,14 @@ def plot_two_class_knn(X, y, n_neighbors, weights, X_test, y_test): plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) - title = "Neighbors = {}".format(n_neighbors) + title = f"Neighbors = {n_neighbors}" if (X_test is not None): train_score = clf.score(X_mat, y_mat) test_score = clf.score(X_test, y_test) - title = title + "\nTrain score = {:.2f}, Test score = {:.2f}".format(train_score, test_score) + title += "\nTrain score = {:.2f}, Test score = {:.2f}".format( + train_score, test_score + ) + patch0 = mpatches.Patch(color='#FFFF00', label='class 0') patch1 = mpatches.Patch(color='#000000', label='class 1') diff --git a/visuals.py b/visuals.py index 8b8fc2f..7320bd4 100644 --- a/visuals.py +++ b/visuals.py @@ -21,7 +21,10 @@ def pca_results(good_data, pca): Visualizes the PCA results ''' # Dimension indexing - dimensions = dimensions = ['Dimension {}'.format(i) for i in range(1,len(pca.components_)+1)] + dimensions = dimensions = [ + f'Dimension {i}' for i in range(1, len(pca.components_) + 1) + ] + # PCA components components = pd.DataFrame(np.round(pca.components_, 4), columns = good_data.keys()) components.index = dimensions @@ -97,7 +100,7 @@ def biplot(good_data, reduced_data, pca): # scatterplot of the reduced data ax.scatter(x=reduced_data.loc[:, 'Dimension 1'], y=reduced_data.loc[:, 'Dimension 2'], facecolors='b', edgecolors='b', s=70, alpha=0.5) - + feature_vectors = pca.components_.T # we use scaling factors to make the arrows easier to see