简体   繁体   中英

MNIST Python numpy eigen vectors visualization error

I am trying to perform PCA on MNIST dataset, as part of the process I need to generate the eigen vectors and visualize the top features. Following is my algorithm:

  1. Load images
  2. Subtract mean
  3. Generate Covariance matrix
  4. Derive eigen vectors and eigen values

It's fairly a simple algorithm to run; my first task is to visualize the top 10 eigen vectors as images. Following is the code that I have so far:

__author__      =   "Ajay Krishna Teja Kavuri"


import numpy as np 
import random
from mnist import MNIST
import matplotlib.pylab as plt

class PCAMNIST:


    #Initialization
    def __init__(self):
        #Load MNIST datset
        mnistData = MNIST('./mnistData')
        self.imgTrain,self.lblTrain=mnistData.load_training()
        self.imgTrainSmpl=self.imgTrain[:60000]
        np.seterr(all='warn')


    #1. Subtract the mean because the PCA will work better
    def subMean(self):
        try:
            self.sumImg = np.empty([784,])
            #calculate the sum
            for img in self.imgTrainSmpl:
                imgArr = np.asarray(img)
                self.sumImg = np.add(imgArr,self.sumImg)

            #Calculate the mean array
            self.meanImg = self.sumImg/(len(self.imgTrainSmpl))
            self.meanImg = np.nan_to_num(self.meanImg)

            #subtract it out
            index=0
            for img in self.imgTrainSmpl:
                imgArr = np.asarray(img)
                self.imgTrainSmpl[index] = np.subtract(imgArr,self.meanImg).tolist()
                index += 1

            #for img in self.imgTrainSmpl:
                #print img
        except:
            print Exception 


    #2. get the covaraince matrix for each digit
    def getCov(self):
        self.imgCov=[]
        dgtArr = np.asarray(self.imgTrainSmpl).T
        dgtCov = np.cov(dgtArr)
        self.imgCov.append(dgtCov)
        #for img in self.imgCov:
            #print img

    #3. get the eigen vectors from the covariance matrix
    def getEigen(self):
        self.eigVec=[]
        self.eigVal=[]
        dgtArr = np.asarray(self.imgCov)
        tmpEigVal,tmpEigVec=np.linalg.eig(dgtArr)
        self.eigVal.append(tmpEigVal.tolist())
        self.eigVec.append(tmpEigVec.tolist())

        #print "\nEigen values:\n"
        #for img in self.eigVal:
            #print img

        #print "\nEigen vectors:\n"
        #for img in self.eigVec:
            #print img


    def sortEV(self):
        self.eigValArr = np.asarray(self.eigVal[0][0])
        self.eigVecArr = np.asarray(self.eigVec[0][0])
        self.srtdInd = np.argsort(np.abs(self.eigValArr))
        self.srtdEigValArr = self.eigValArr[self.srtdInd]
        self.srtdEigVecArr = self.eigVecArr[self.srtdInd]
        self.srtdEigVec = self.srtdEigVecArr.real.tolist()
        #print self.srtdEigValArr[0]
        print len(self.srtdInd.tolist())
        #print self.eigVec[self.srtdInd[0]]
        #print np.asarray(self.srtdEigVec).shape
        #for img in self.srtdEigVecArr:
            #print img
        #self.drawEig()

    def plotVal(self):
        """
        plt.figure()
        plt.scatter(np.asarray(self.eigVal).real)
        plt.show()
        """

    def drawEig(self):
        for vec in self.srtdEigVec[:10]:
            self.drawEigV(vec)


    def drawEigV(self,digit):
        plt.figure()
        fig=plt.imshow(np.asarray(digit).reshape(28,28),origin='upper')
        fig.set_cmap('gray_r')
        fig.axes.get_xaxis().set_visible(False)
        fig.axes.get_yaxis().set_visible(False)
        plt.savefig(str(random.randint(0,10000))+".png")
        #plt.show()
        plt.close()



    def drawChar(self,digit):
        plt.figure()
        fig=plt.imshow(np.asarray(digit).reshape(28,28),clim=(-1,1.0),origin='upper')
        fig.set_cmap('gray_r')
        fig.axes.get_xaxis().set_visible(False)
        fig.axes.get_yaxis().set_visible(False)
        plt.show()
        plt.close()


    def drawSmpl(self):
        for img in self.imgTrainSmpl:
            self.drawChar(img) 


    def singleStep(self):
        self.val, self.vec = np.linalg.eig(np.cov(np.array(self.imgTrainSmpl).transpose()))
        self.srtd = np.argsort(self.val)[::-1]
        print self.val


#asnmnt4=PCAMNIST()
#asnmnt4.singleStep()
asnmnt4=PCAMNIST()
asnmnt4.subMean()
asnmnt4.getCov()
asnmnt4.getEigen()
asnmnt4.sortEV()
asnmnt4.drawEig()
#asnmnt4.plotVal()
"""
asnmnt4.getSorted()
asnmnt4.printTopEigenVal()
"""

Although the above code runs perfectly and all the array sizes match the given dataset, it generates the following images a eigen vectors: 特征向量作为图像

Clearly the eigen vectors make no sense as they have to represent the features of the dataset which in this case should be digits. Any help is appreciated. If you are trying to run this code you might have to install the MNIST package and download data from link.

You're plotting the rows of the eigenvector matrix. The eigenvectors are in the columns of the matrix, as you can see in the np.linalg.eig documentation .

You should change

self.eigVec.append(tmpEigVec.tolist())

to

self.eigVec.append(np.transpose(tmpEigVec).tolist())

and I believe it will work as expected.

As pointed out by several users the problem was with the eigenvectors to make it work instead of changing the append logic, simply modify the draw function as:

def drawEig(self):
        for vec in self.srtdEigVecArr.T[:10]:
            self.drawEigV(vec)

Now, the eigenvectors make sense: 在此处输入图片说明

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM