MNIST 神經網絡不學習 - Micheal Nielsen 示例

Question

我一直在嘗試編寫用於識別 Michael Nielsen 在這里http://neuralnetworksanddeeplearning.com/chap1.html給出的 MNIST 的神經網絡

原件是使用 Python 2.7 編寫的，我相信，我使用的是 v3。 該網絡通過測試示例執行 go 並更新權重和偏差，但它沒有學習，並且得到了大約 10% 的測試示例正確（與隨機猜測一樣好）。

我還嘗試簡單地從站點復制代碼並在 Python 2.7 中運行它，它可以正常工作（准確率高達 95%）。 網絡中唯一顯着的區別是數據集（我使用的是兩天前直接從 MINST 下載的數據集）和我將np.dot切換到np.outer的兩個位置，只是為了更容易跟蹤數組形狀（我嘗試堅持使用（N，）而不是（N，1））。 但是那部分似乎很好，因為層大小不同並且乘法正在經歷。 我也使用與示例中相同的學習率和層大小。

我看不出是什么東西把網扔掉了。 如果有人嘗試過這樣做，或者對此有所了解，我將不勝感激。

謝謝

編碼：

import matplotlib.pyplot as plt
import numpy as np
import idx2numpy

import random



### LOAD DATASET ###


train = idx2numpy.convert_from_file("mnist/train-images.idx3-ubyte")

train_labels = idx2numpy.convert_from_file("mnist/train-labels.idx1-ubyte")

test = idx2numpy.convert_from_file("mnist/t10k-images.idx3-ubyte")

test_labels = idx2numpy.convert_from_file("mnist/t10k-labels.idx1-ubyte")


def vectorize(x):
    e = np.zeros(10)
    e[x] = 1.0
    return e

training_images = [np.reshape(i, (784))/255 for i in train]
training_labels = [vectorize(i) for i in train_labels]
training_set = list(zip(training_images,training_labels))

test_images = [np.reshape(i, (784))/255 for i in test]
test_set = list(zip(training_images,test_labels))


### NETWORK CLASS ###


class myNet():

    def __init__ (self , sizes ):
    
        self.sizes = sizes
    
        self.N = len(sizes)

        self.w = [np.random.randn(y, x) for x, y in zip( sizes [: -1] , sizes [1:]) ]
        
        self.b = [np.random.randn(i) for i in sizes[1:]]

    
    def sigmoid (self,z):
        
        return 1.0/(1.0+ np.exp(-z))

    def sigmoid_prime (self,z):

        return self.sigmoid (z)*(1 - self.sigmoid (z))

    def cost_derivative (self,output_activations , y):
    
        return ( output_activations - y)

    def feedforward (self , a):

        for bb, ww in zip(self.b , self.w ):
            a = self.sigmoid (np.dot(ww, a)+bb)
            
        return a


    def backprop (self , x, y):

        nabla_b = [np. zeros (bb. shape ) for bb in self.b ]
        nabla_w = [np. zeros (ww. shape ) for ww in self.w ]
    
        activation = x

        activations = [x] # list to store all the activations , layer by layer
        zs = [] # list to store all the z vectors , layer by layer
        
        for bb, ww in zip(self.b , self. w ):
            z = np.dot(ww, activation )+bb

            zs. append (z)
            activation = self.sigmoid (z)
            activations . append ( activation )
        # backward pass
        
        delta = self. cost_derivative ( activations [-1], y) * self.sigmoid_prime (zs [ -1])

        nabla_b [-1] = delta
        nabla_w [-1] = np.outer(delta , activations [ -2])
        # Note that the variable l in the loop below is used a little

        for l in range (2, self.N ):

            z = zs[-l]
            sp = self.sigmoid_prime (z)
            delta = np.dot(self.w [-l+1]. transpose () , delta ) * sp
            nabla_b [-l] = delta
            nabla_w [-l] = np.outer(delta , activations [-l -1])
            
        return (nabla_b , nabla_w )

    def update(self,mini_batch,eta):

        nabla_b = [np.zeros (bb.shape ) for bb in self.b ]
        nabla_w = [np.zeros (ww.shape ) for ww in self.w ]


        for x, y in mini_batch :
            
            delta_nabla_b , delta_nabla_w = self. backprop (x, y)
            
            nabla_b = [nb+dnb for nb , dnb in zip(nabla_b , delta_nabla_b )]
            nabla_w = [nw+dnw for nw , dnw in zip(nabla_w , delta_nabla_w )]
            
        self.w = [ww -( eta/len( mini_batch ))*nw
                         for ww, nw in zip(self.w , nabla_w )]
        self.b = [bb -( eta/len( mini_batch ))*nb
                        for bb, nb in zip(self.b, nabla_b )]
        
        return

    
    def gradient_descent(self,training_data,epochs,mini_batch_size,eta,test_data):

        i = 0

        n = len( training_data )

        for j in range (epochs):
            
            random.shuffle (training_data)
            
            mini_batches = [
                training_data [k:k+ mini_batch_size ]
                for k in range (0, n, mini_batch_size )]

            for mini_batch in mini_batches :
                self.update( mini_batch , eta)

            print("Epoch {0}: {1}". format (
                j, self.evaluate(test_data)))

        return

    def evaluate (self, test_data):
        
        test_results = [( np.argmax (self.feedforward (x)), y)
                        for (x, y) in test_data ]

        return sum(int(x == y) for (x, y) in test_results )
    

sizes =[28*28, 30, 10]

net = myNet(sizes)
    
net.gradient_descent(training_set,30,10,3.0,test_set)

Answer 1

我發現了錯誤......我錯誤地將帶有測試標簽的訓練圖像壓縮成測試集，這顯然不是應該的。 現在我正確地形成了測試集，一切正常，准確率高達 95% 左右。 這是完整的更正代碼（可在 Python3 中使用）

import matplotlib.pyplot as plt
import numpy as np
import idx2numpy

import random


def vectorize(x):
    e = np.zeros(10)
    e[x] = 1.0
    return e


### LOAD DATASET ###


train_images = idx2numpy.convert_from_file("mnist/train-images.idx3-ubyte")/255
train_labels = idx2numpy.convert_from_file("mnist/train-labels.idx1-ubyte")

train_images = [np.reshape(x,(784)).astype('float32') for x in train_images]
train_labels = [vectorize(i) for i in train_labels]

test_images = idx2numpy.convert_from_file("mnist/t10k-images.idx3-ubyte")/255
test_labels = idx2numpy.convert_from_file("mnist/t10k-labels.idx1-ubyte")

test_images = [np.reshape(x,(784)).astype('float32') for x in test_images]


training_set = list(zip(train_images,train_labels))

test_set = list(zip(test_images,test_labels)) ## THIS IS WHERE I MESSED UP


### NETWORK CLASS ###


class myNet():

    def __init__ (self , sizes ):
    
        self.sizes = sizes
    
        self.N = len(sizes)

        self.w = [np.random.randn(y, x) for x, y in zip( sizes [: -1] , sizes [1:]) ]
        
        self.b = [np.random.randn(i) for i in sizes[1:]]

    
    def sigmoid (self,z):
        
        return 1.0/(1.0+ np.exp(-z))

    def sigmoid_prime (self,z):

        return self.sigmoid (z)*(1 - self.sigmoid (z))

    def cost_derivative (self,output_activations , y):
    
        return ( output_activations - y)

    def feedforward (self , a):

        for bb, ww in zip(self.b , self.w ):
            a = self.sigmoid (np.dot(ww, a)+bb)
            
        return a


    def backprop (self , x, y):

        nabla_b = [np. zeros (bb. shape ) for bb in self.b ]
        nabla_w = [np. zeros (ww. shape ) for ww in self.w ]
    
        activation = x

        activations = [x] # list to store all the activations , layer by layer
        zs = [] # list to store all the z vectors , layer by layer
        
        for bb, ww in zip(self.b , self. w ):
            z = np.dot(ww, activation )+bb

            zs. append (z)
            activation = self.sigmoid (z)
            activations . append ( activation )
        # backward pass
        
        delta = self. cost_derivative ( activations [-1], y) * self.sigmoid_prime (zs [ -1])

        nabla_b [-1] = delta
        nabla_w [-1] = np.outer(delta , activations [ -2])
        # Note that the variable l in the loop below is used a little

        for l in range (2, self.N ):

            z = zs[-l]
            sp = self.sigmoid_prime (z)
            delta = np.dot(self.w [-l+1]. transpose () , delta ) * sp
            nabla_b [-l] = delta
            nabla_w [-l] = np.outer(delta , activations [-l -1])
            
        return (nabla_b , nabla_w )

    def update(self,mini_batch,eta):

        nabla_b = [np.zeros (bb.shape ) for bb in self.b ]
        nabla_w = [np.zeros (ww.shape ) for ww in self.w ]


        for x, y in mini_batch :
            
            delta_nabla_b , delta_nabla_w = self. backprop (x, y)
            
            nabla_b = [nb+dnb for nb , dnb in zip(nabla_b , delta_nabla_b )]
            nabla_w = [nw+dnw for nw , dnw in zip(nabla_w , delta_nabla_w )]
            
        self.w = [ww -( eta/len( mini_batch ))*nw
                         for ww, nw in zip(self.w , nabla_w )]
        self.b = [bb -( eta/len( mini_batch ))*nb
                        for bb, nb in zip(self.b, nabla_b )]
        
        return

    
    def gradient_descent(self,training_data,epochs,mini_batch_size,eta,test_data):

        i = 0

        n = len( training_data )

        for j in range (epochs):
            
            random.shuffle (training_data)
            
            mini_batches = [
                training_data [k:k+ mini_batch_size ]
                for k in range (0, n, mini_batch_size )]

            for mini_batch in mini_batches :
                self.update( mini_batch , eta)

            print("Epoch {0}: {1}". format (
                j, self.evaluate(test_data)))

        return

    def evaluate (self, test_data):
        
        test_results = [( np.argmax (self.feedforward (x)), y)
                        for (x, y) in test_data ]
        
        return sum(int(x == y) for (x, y) in test_results )
    

sizes =[28*28, 30, 10]

net = myNet(sizes)
    
net.gradient_descent(training_set,30,10,3.0,test_set)

MNIST 神經網絡不學習 - Micheal Nielsen 示例

問題描述

1 個解決方案

解決方案1
0 已采納 2022-07-03 15:13:08

MNIST 神經網絡不學習 - Micheal Nielsen 示例

問題描述

1 個解決方案

解決方案1 0 已采納 2022-07-03 15:13:08

解決方案1
0 已采納 2022-07-03 15:13:08