[英]When importing a module, I get a NameError about certain variable not being defined (even though it is)
因此,我試圖將模塊/腳本(.py文件)導入Jupyter筆記本,主要是為了提高可讀性和簡潔性。 但是,當我嘗試在腳本中運行該類時,出現以下錯誤消息:
NameError Traceback (most recent call last)
<ipython-input-48-4d8cbba46ed0> in <module>()
8
9 test_KMeans = KMeans(k=3, maxiter=1000, tol=1e-9)
---> 10 cluster_center = test_KMeans.fit(X)
11 clusters = test_KMeans.predict(X)
12
~/KMeans.py in fit(self, X)
42 #Choose k random rows of X as the initial cluster centers.
43 initial_cluster_centers = []
---> 44
45 sample = np.random.randint(0,m,size=k)
46
NameError: name 'maxiter' is not defined
這是我的腳本:
import numpy as np
from sklearn.decomposition import PCA
k = 3
maxiter = 1000
tol = 1e-9
class KMeans:
"""A K-Means object class. Implements basic k-means clustering.
Attributes:
k (int): The number of clusters
maxiter (int): The maximum number of iterations
tol (float): A convergence tolerance
"""
def __init__(self, k, maxiter, tol):
"""Set the paramters.
Parameters:
k (int): The number of clusters
maxiter (int): The maximum number of iterations
tol (float): A convergence tolerance
"""
k = 3
maxiter = 1000
tol = 1e-9
self.k = k # Initialize some attributes.
self.maxiter = maxiter
self.tol = tol
def fit(self, X):
"""Accepts an mxn matrix X of m data points with n features.
"""
m,n = X.shape
k = 3
maxiter = 1000
tol = 1e-9
self.m = m
self.n = n
#Choose k random rows of X as the initial cluster centers.
initial_cluster_centers = []
sample = np.random.randint(0,m,size=k)
initial_cluster_centers = X[sample, :]
# Run the k-means iteration until consecutive centers are within the convergence tolerance, or until
# iterating the maximum number of times.
iterations = 0
old_cluster = np.zeros(initial_cluster_centers.shape)
new_cluster = initial_cluster_centers
while iterations < maxiter or np.linalg.norm(old_cluster - new_cluster) >= tol:
#assign each data point to the cluster center that is closest, forming k clusters
clusters = np.zeros(m)
for i in range(0,m):
distances = np.linalg.norm(X[i] - initial_cluster_centers, ord=2, axis=1) # axis=1 was crucial
cluster = np.argmin(distances) #in getting this to work
clusters[i] = cluster
# Store the old/initial centroid values
old_cluster = np.copy(new_cluster)
#Recompute the cluster centers as the means of the new clusters
for i in range(k):
points = [X[j] for j in range(m) if clusters[j] == i]
new_cluster[i] = np.mean(points, axis=0)
#If a cluster is empty, reassign the cluster center as a random row of X.
if new_cluster[i] == []:
new_cluster[i] = X[np.random.randint(0,m,size=1)]
iterations += 1
#Save the cluster centers as attributes.
self.new_cluster = new_cluster
#print("New cluster centers:\n", new_cluster)
return new_cluster
def predict(self, X):
"""Accept an l × n matrix X of data.
"""
# Return an array of l integers where the ith entry indicates which
# cluster center the ith row of X is closest to.
clusters = np.zeros(self.m)
for i in range(0,self.m):
distances = np.linalg.norm(X[i] - self.new_cluster, ord=2, axis=1)
cluster = np.argmin(distances)
clusters[i] = cluster
print("\nClusters:", clusters)
return clusters
然后,我嘗試執行以下操作:
from KMeans import KMeans
X = features_scaled
# k = 3
# maxiter = 1000
# tol = 1e-9
test_KMeans = KMeans(k=3, maxiter=1000, tol=1e-9)
cluster_center = test_KMeans.fit(X)
clusters = test_KMeans.predict(X)
pca = PCA(n_components=2)
pr_components = pca.fit_transform(X) # these are the first 2 principal components
#plot the first two principal components as a scatter plot, where the color of each point is det by the clusters
plt.scatter(pr_components[:,0], pr_components[:,1],
c=clusters, edgecolor='none', alpha=0.5, #color by clusters
cmap=plt.cm.get_cmap('tab10', 3))
plt.xlabel('principal component 1')
plt.ylabel('principal component 2')
plt.colorbar()
plt.title("K-Means Clustering:")
plt.show()
運行以上代碼后,我得到了我描述的NameError。 我不明白為什么它告訴我maxiter
。 您會看到我在腳本中多次定義了變量k, maxiter, tol
,試圖使其正常工作,但是沒有任何作用。 我曾經有過self.maxiter
和self.tol
,但這也不能解決。
我知道此代碼有效,因為我已經多次使用它。 最初,我只是定義了變量k,maxiter和tol.。然后實例化該類並稱為fit和預測方法,由於它們與self一起存儲為屬性,所以一切正常。 但是現在我嘗試將其作為模塊導入,我不知道為什么它不起作用。
謝謝你的幫助!
編輯:這是我的代碼在Jupyter筆記本中的單個單元格中的樣子。.在這種情況下,它確實可以運行並起作用:
from sklearn.decomposition import PCA
class KMeans:
"""A K-Means object class. Implements basic k-means clustering.
Attributes:
k (int): The number of clusters
maxiter (int): The maximum number of iterations
tol (float): A convergence tolerance
"""
def __init__(self, k, maxiter, tol):
"""Set the paramters.
Parameters:
k (int): The number of clusters
maxiter (int): The maximum number of iterations
tol (float): A convergence tolerance
"""
self.k = k # Initialize some attributes.
self.maxiter = maxiter
self.tol = tol
def fit(self, X):
"""Accepts an mxn matrix X of m data points with n features.
"""
m,n = X.shape
self.m = m
self.n = n
#Choose k random rows of X as the initial cluster centers.
initial_cluster_centers = []
sample = np.random.randint(0,m,size=self.k)
initial_cluster_centers = X[sample, :]
# Run the k-means iteration until consecutive centers are within the convergence tolerance, or until
# iterating the maximum number of times.
iterations = 0
old_cluster = np.zeros(initial_cluster_centers.shape)
new_cluster = initial_cluster_centers
while iterations < maxiter or np.linalg.norm(old_cluster - new_cluster) >= tol:
#assign each data point to the cluster center that is closest, forming k clusters
clusters = np.zeros(m)
for i in range(0,m):
distances = np.linalg.norm(X[i] - initial_cluster_centers, ord=2, axis=1) # axis=1 was crucial
cluster = np.argmin(distances) #in getting this to work
clusters[i] = cluster
# Store the old/initial centroid values
old_cluster = np.copy(new_cluster)
#Recompute the cluster centers as the means of the new clusters
for i in range(k):
points = [X[j] for j in range(m) if clusters[j] == i]
new_cluster[i] = np.mean(points, axis=0)
#If a cluster is empty, reassign the cluster center as a random row of X.
if new_cluster[i] == []:
new_cluster[i] = X[np.random.randint(0,m,size=1)]
iterations += 1
#Save the cluster centers as attributes.
self.new_cluster = new_cluster
#print("New cluster centers:\n", new_cluster)
return new_cluster
def predict(self, X):
"""Accept an l × n matrix X of data.
"""
# Return an array of l integers where the ith entry indicates which
# cluster center the ith row of X is closest to.
clusters = np.zeros(self.m)
for i in range(0,self.m):
distances = np.linalg.norm(X[i] - self.new_cluster, ord=2, axis=1)
cluster = np.argmin(distances)
clusters[i] = cluster
print("\nClusters:", clusters)
return clusters
X = features_scaled
k = 3
maxiter = 1000
tol = 1e-9
test_KMeans = KMeans(k,maxiter,tol)
test_KMeans.fit(X)
clusters = test_KMeans.predict(X)
pca = PCA(n_components=2)
pr_components = pca.fit_transform(X) # these are the first 2 principal components
#plot the first two principal components as a scatter plot, where the color of each point is det by the clusters
plt.scatter(pr_components[:,0], pr_components[:,1],
c=clusters, edgecolor='none', alpha=0.5, #color by clusters
cmap=plt.cm.get_cmap('tab10', 3))
plt.xlabel('principal component 1')
plt.ylabel('principal component 2')
plt.colorbar()
plt.title("K-Means Clustering:")
plt.show()
追溯似乎表明Jupyter與Kmeans.py中的當前代碼狀態不同步(因為它指向第44行...這是空的)。 因此,如果計算時間不長,您可以嘗試退出並重新啟動Jupyter,以解決問題。
導入模塊時,Python執行模塊的代碼。 如果在導入模塊后對模塊的代碼進行了更改,則這些更改不會反映在Python解釋器的狀態中。 這可以解釋為什么Jupyter筆記本的錯誤似乎與Kmeans.py的狀態不同步。
除了退出並重新啟動Python外,您還可以重新加載modules 。 例如,在Python3.4或更高版本中,您可以使用
import sys
import importlib
from Kmeans import Kmeans
# make changes to Kmeans.py
importlib.reload(sys.modules['Kmeans'])
# now the Python interpreter should be aware of changes made to Kmeans.py
但是,使用IPython,有一種更簡單的方法。 您可以啟用自動重新加載 :
從命令行運行:
ipython profile create
然后通過添加~/.ipython/profile_default/ipython_config.py
c.InteractiveShellApp.extensions = ['autoreload']
c.InteractiveShellApp.exec_lines = ['%autoreload 2']
退出並重新啟動IPython以使更改生效。 現在,當對定義該模塊的基礎代碼進行更改時,IPython將自動重新加載任何模塊。 在大多數情況下,自動重新加載效果很好,但是在某些情況下,自動重新加載可能會失敗。 有關自動重載及其注意事項的更多信息,請參閱文檔 。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.