簡體   English   中英

MNIST 神經網絡不學習 - Micheal Nielsen 示例

[英]MNIST Neural Network not learning - Micheal Nielsen Example

我一直在嘗試編寫用於識別 Michael Nielsen 在這里http://neuralnetworksanddeeplearning.com/chap1.html給出的 MNIST 的神經網絡

原件是使用 Python 2.7 編寫的,我相信,我使用的是 v3。 該網絡通過測試示例執行 go 並更新權重和偏差,但它沒有學習,並且得到了大約 10% 的測試示例正確(與隨機猜測一樣好)。

我還嘗試簡單地從站點復制代碼並在 Python 2.7 中運行它,它可以正常工作(准確率高達 95%)。 網絡中唯一顯着的區別是數據集(我使用的是兩天前直接從 MINST 下載的數據集)和我將np.dot切換到np.outer的兩個位置,只是為了更容易跟蹤數組形狀(我嘗試堅持使用(N,)而不是(N,1))。 但是那部分似乎很好,因為層大小不同並且乘法正在經歷。 我也使用與示例中相同的學習率和層大小。

我看不出是什么東西把網扔掉了。 如果有人嘗試過這樣做,或者對此有所了解,我將不勝感激。

謝謝

編碼:

import matplotlib.pyplot as plt
import numpy as np
import idx2numpy

import random



### LOAD DATASET ###


train = idx2numpy.convert_from_file("mnist/train-images.idx3-ubyte")

train_labels = idx2numpy.convert_from_file("mnist/train-labels.idx1-ubyte")

test = idx2numpy.convert_from_file("mnist/t10k-images.idx3-ubyte")

test_labels = idx2numpy.convert_from_file("mnist/t10k-labels.idx1-ubyte")


def vectorize(x):
    e = np.zeros(10)
    e[x] = 1.0
    return e

training_images = [np.reshape(i, (784))/255 for i in train]
training_labels = [vectorize(i) for i in train_labels]
training_set = list(zip(training_images,training_labels))

test_images = [np.reshape(i, (784))/255 for i in test]
test_set = list(zip(training_images,test_labels))


### NETWORK CLASS ###


class myNet():

    def __init__ (self , sizes ):
    
        self.sizes = sizes
    
        self.N = len(sizes)

        self.w = [np.random.randn(y, x) for x, y in zip( sizes [: -1] , sizes [1:]) ]
        
        self.b = [np.random.randn(i) for i in sizes[1:]]

    
    def sigmoid (self,z):
        
        return 1.0/(1.0+ np.exp(-z))

    def sigmoid_prime (self,z):

        return self.sigmoid (z)*(1 - self.sigmoid (z))

    def cost_derivative (self,output_activations , y):
    
        return ( output_activations - y)

    def feedforward (self , a):

        for bb, ww in zip(self.b , self.w ):
            a = self.sigmoid (np.dot(ww, a)+bb)
            
        return a


    def backprop (self , x, y):

        nabla_b = [np. zeros (bb. shape ) for bb in self.b ]
        nabla_w = [np. zeros (ww. shape ) for ww in self.w ]
    
        activation = x

        activations = [x] # list to store all the activations , layer by layer
        zs = [] # list to store all the z vectors , layer by layer
        
        for bb, ww in zip(self.b , self. w ):
            z = np.dot(ww, activation )+bb

            zs. append (z)
            activation = self.sigmoid (z)
            activations . append ( activation )
        # backward pass
        
        delta = self. cost_derivative ( activations [-1], y) * self.sigmoid_prime (zs [ -1])

        nabla_b [-1] = delta
        nabla_w [-1] = np.outer(delta , activations [ -2])
        # Note that the variable l in the loop below is used a little

        for l in range (2, self.N ):

            z = zs[-l]
            sp = self.sigmoid_prime (z)
            delta = np.dot(self.w [-l+1]. transpose () , delta ) * sp
            nabla_b [-l] = delta
            nabla_w [-l] = np.outer(delta , activations [-l -1])
            
        return (nabla_b , nabla_w )

    def update(self,mini_batch,eta):

        nabla_b = [np.zeros (bb.shape ) for bb in self.b ]
        nabla_w = [np.zeros (ww.shape ) for ww in self.w ]


        for x, y in mini_batch :
            
            delta_nabla_b , delta_nabla_w = self. backprop (x, y)
            
            nabla_b = [nb+dnb for nb , dnb in zip(nabla_b , delta_nabla_b )]
            nabla_w = [nw+dnw for nw , dnw in zip(nabla_w , delta_nabla_w )]
            
        self.w = [ww -( eta/len( mini_batch ))*nw
                         for ww, nw in zip(self.w , nabla_w )]
        self.b = [bb -( eta/len( mini_batch ))*nb
                        for bb, nb in zip(self.b, nabla_b )]
        
        return

    
    def gradient_descent(self,training_data,epochs,mini_batch_size,eta,test_data):

        i = 0

        n = len( training_data )

        for j in range (epochs):
            
            random.shuffle (training_data)
            
            mini_batches = [
                training_data [k:k+ mini_batch_size ]
                for k in range (0, n, mini_batch_size )]

            for mini_batch in mini_batches :
                self.update( mini_batch , eta)

            print("Epoch {0}: {1}". format (
                j, self.evaluate(test_data)))

        return

    def evaluate (self, test_data):
        
        test_results = [( np.argmax (self.feedforward (x)), y)
                        for (x, y) in test_data ]

        return sum(int(x == y) for (x, y) in test_results )
    

sizes =[28*28, 30, 10]

net = myNet(sizes)
    
net.gradient_descent(training_set,30,10,3.0,test_set)

我發現了錯誤......我錯誤地將帶有測試標簽的訓練圖像壓縮成測試集,這顯然不是應該的。 現在我正確地形成了測試集,一切正常,准確率高達 95% 左右。 這是完整的更正代碼(可在 Python3 中使用)

import matplotlib.pyplot as plt
import numpy as np
import idx2numpy

import random


def vectorize(x):
    e = np.zeros(10)
    e[x] = 1.0
    return e


### LOAD DATASET ###


train_images = idx2numpy.convert_from_file("mnist/train-images.idx3-ubyte")/255
train_labels = idx2numpy.convert_from_file("mnist/train-labels.idx1-ubyte")

train_images = [np.reshape(x,(784)).astype('float32') for x in train_images]
train_labels = [vectorize(i) for i in train_labels]

test_images = idx2numpy.convert_from_file("mnist/t10k-images.idx3-ubyte")/255
test_labels = idx2numpy.convert_from_file("mnist/t10k-labels.idx1-ubyte")

test_images = [np.reshape(x,(784)).astype('float32') for x in test_images]


training_set = list(zip(train_images,train_labels))

test_set = list(zip(test_images,test_labels)) ## THIS IS WHERE I MESSED UP


### NETWORK CLASS ###


class myNet():

    def __init__ (self , sizes ):
    
        self.sizes = sizes
    
        self.N = len(sizes)

        self.w = [np.random.randn(y, x) for x, y in zip( sizes [: -1] , sizes [1:]) ]
        
        self.b = [np.random.randn(i) for i in sizes[1:]]

    
    def sigmoid (self,z):
        
        return 1.0/(1.0+ np.exp(-z))

    def sigmoid_prime (self,z):

        return self.sigmoid (z)*(1 - self.sigmoid (z))

    def cost_derivative (self,output_activations , y):
    
        return ( output_activations - y)

    def feedforward (self , a):

        for bb, ww in zip(self.b , self.w ):
            a = self.sigmoid (np.dot(ww, a)+bb)
            
        return a


    def backprop (self , x, y):

        nabla_b = [np. zeros (bb. shape ) for bb in self.b ]
        nabla_w = [np. zeros (ww. shape ) for ww in self.w ]
    
        activation = x

        activations = [x] # list to store all the activations , layer by layer
        zs = [] # list to store all the z vectors , layer by layer
        
        for bb, ww in zip(self.b , self. w ):
            z = np.dot(ww, activation )+bb

            zs. append (z)
            activation = self.sigmoid (z)
            activations . append ( activation )
        # backward pass
        
        delta = self. cost_derivative ( activations [-1], y) * self.sigmoid_prime (zs [ -1])

        nabla_b [-1] = delta
        nabla_w [-1] = np.outer(delta , activations [ -2])
        # Note that the variable l in the loop below is used a little

        for l in range (2, self.N ):

            z = zs[-l]
            sp = self.sigmoid_prime (z)
            delta = np.dot(self.w [-l+1]. transpose () , delta ) * sp
            nabla_b [-l] = delta
            nabla_w [-l] = np.outer(delta , activations [-l -1])
            
        return (nabla_b , nabla_w )

    def update(self,mini_batch,eta):

        nabla_b = [np.zeros (bb.shape ) for bb in self.b ]
        nabla_w = [np.zeros (ww.shape ) for ww in self.w ]


        for x, y in mini_batch :
            
            delta_nabla_b , delta_nabla_w = self. backprop (x, y)
            
            nabla_b = [nb+dnb for nb , dnb in zip(nabla_b , delta_nabla_b )]
            nabla_w = [nw+dnw for nw , dnw in zip(nabla_w , delta_nabla_w )]
            
        self.w = [ww -( eta/len( mini_batch ))*nw
                         for ww, nw in zip(self.w , nabla_w )]
        self.b = [bb -( eta/len( mini_batch ))*nb
                        for bb, nb in zip(self.b, nabla_b )]
        
        return

    
    def gradient_descent(self,training_data,epochs,mini_batch_size,eta,test_data):

        i = 0

        n = len( training_data )

        for j in range (epochs):
            
            random.shuffle (training_data)
            
            mini_batches = [
                training_data [k:k+ mini_batch_size ]
                for k in range (0, n, mini_batch_size )]

            for mini_batch in mini_batches :
                self.update( mini_batch , eta)

            print("Epoch {0}: {1}". format (
                j, self.evaluate(test_data)))

        return

    def evaluate (self, test_data):
        
        test_results = [( np.argmax (self.feedforward (x)), y)
                        for (x, y) in test_data ]
        
        return sum(int(x == y) for (x, y) in test_results )
    

sizes =[28*28, 30, 10]

net = myNet(sizes)
    
net.gradient_descent(training_set,30,10,3.0,test_set)

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM