[英]Issues with plotting the decision boundaries for the Iris Dataset with KNearestNeighbors
我正在嘗試 plot 為 Iris 數據集的 Scikit-learn 中的 KNeighborsClassifier 的決策邊界。 但是,我得到的圖表對我來說沒有多大意義。
我希望深藍色和淺藍色線 go 之間的邊界在我在圖片上繪制的綠線的方向上。
我用來生成它的代碼可以在下面找到。 它的靈感來自於VotingClassifier 的決策邊界 Plot 。
我錯過了什么或不理解什么?
# -*- coding: utf-8 -*-
"""
Created on Sat May 30 14:22:05 2020
@author: KamKam
Plotting the decision boundaries for KNearestNeighbours.
"""
# Import required modules.
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
from matplotlib.colors import ListedColormap
n_neighbors = [1, 3, 9]
# Load the iris dataset.
iris = datasets.load_iris()
X = iris.data[:, 2:4] # Slice features to only contain
y = iris.target
# Set up the data such that it can be inserting into one plot.
# Count the number of each target that are in the dataset.
ylen = y.shape[0]
unique, counts = np.unique(y, return_counts=True)
# Create empty arrays for each of the targets. We only require them to have 2
# features because we are only plotting in 2D.
X0 = np.zeros((counts[0], 2))
X1 = np.zeros((counts[1], 2))
X2 = np.zeros((counts[2], 2))
countX0, countX1, countX2 = 0, 0, 0 #Initialize place holder for interating
# though and adding data to the X arrays.
# Insert data into to newly created arrays.
for i in range(ylen):
if y[i] == 0:
X0[countX0, :] = X[i, :]
countX0 += 1
elif y[i] == 1:
X1[countX1, :] = X[i, :]
countX1 += 1
else:
X2[countX2, :] = X[i, :]
countX2 += 1
h = 0.02 # Step size of the mesh.
plotCount = 0 # Counter for each of the plots that we will be creating.
# Create colour maps.
cmap_light = ListedColormap(['orange', 'cyan', 'cornflowerblue'])
cmap_bold = ListedColormap(['darkorange', 'c', 'darkblue'])
# Initialize plotting. Close all the currently open plots, initialize the
# figure and subplot commands
plt.close('all')
fig, axs = plt.subplots(1, 3)
axs = axs.ravel()
for j in n_neighbors:
# Create the instance od Neighbours classifier and fit the data.
knn = KNeighborsClassifier(n_neighbors=j)
knn.fit(X, y)
# Plot the decision boundary. For that, we will assign a color for each
# point in the mesh [x_min, x_max]x[y_min, y_max]
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
axs[plotCount].pcolormesh(xx, yy, Z, cmap=cmap_bold)
# Plot the training points.
axs[plotCount].scatter(X0[:,0], X0[:,1], c='k', marker='o',
label=iris.target_names[0])
axs[plotCount].scatter(X1[:,0], X1[:,1], c='r', marker='o',
label=iris.target_names[1])
axs[plotCount].scatter(X1[:,0], X2[:,1], c='y', marker='o',
label=iris.target_names[2])
axs[plotCount].set_xlabel('Petal Width')
axs[plotCount].set_ylabel('Petal Length')
axs[plotCount].legend()
axs[plotCount].set_title('n_neighbours = ' + str(j))
plotCount += 1
fig.suptitle('Petal Width vs Length')
plt.show()
arrays X0、X1 和 X2 的引入似乎使事情變得過於復雜,並且很難將代碼變成 Pythonic。
Python中應該避免的一些事情:
plotCount
僅用於遍歷軸,可以省略並替換為for j, ax in zip(n_neighbors, axs)
。X0
, X1
和 ``X2 的內容can be obtained directly via
X[:, 0][y == y_val], X[:, 1][y == y_val]` 直接獲得,此外還可以輕松編寫散點圖在一個循環中。 您可以在此文檔中閱讀有關 numpy 高級索引的更多信息。import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
from matplotlib.colors import ListedColormap
n_neighbors = [1, 3, 9]
# Load the iris dataset.
iris = datasets.load_iris()
X = iris.data[:, 2:4] # Slice features to only contain
y = iris.target
# Set up the data such that it can be inserting into one plot.
# Count the number of each target that are in the dataset.
ylen = y.shape[0]
unique, counts = np.unique(y, return_counts=True)
h = 0.02 # Step size of the mesh.
# Create colour maps.
#cmap_light = ListedColormap(['orange', 'cyan', 'cornflowerblue'])
cmap_bold = ListedColormap(['darkorange', 'c', 'darkblue'])
# Initialize plotting. Close all the currently open plots, initialize the
# figure and subplot commands
plt.close('all')
fig, axs = plt.subplots(1, 3)
axs = axs.ravel()
for j, ax in zip(n_neighbors, axs):
# Create the instance od Neighbours classifier and fit the data.
knn = KNeighborsClassifier(n_neighbors=j)
knn.fit(X, y)
# Plot the decision boundary. For that, we will assign a color for each
# point in the mesh [x_min, x_max]x[y_min, y_max]
x_min, x_max = X[:, 0].min() - h, X[:, 0].max() + h
y_min, y_max = X[:, 1].min() - h, X[:, 1].max() + h
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
ax.pcolormesh(xx, yy, Z, cmap=cmap_bold)
# Plot the training points.
for y_val, (color, name) in enumerate(zip(['k', 'r', 'y'], iris.target_names)):
ax.scatter(X[:, 0][y == y_val], X[:, 1][y == y_val], c=color, marker='o', label=name)
ax.set_xlabel('Petal Width')
ax.set_ylabel('Petal Length')
ax.legend()
ax.set_title(f'n_neighbours = {j}')
fig.suptitle('Petal Width vs Length')
plt.show()
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.