繁体   English   中英

简单的反向传播神经网络算法(Python)

[英]Simple backpropagation Neural Network algorithm (Python)

我试图了解反向传播,为此,我使用了一些python代码,但它不能正常工作。 当我使用xor输入输出进行训练时,错误不会收敛。 但是,如果我更改xor的最后一个输出的值,它将收敛。

如果我将一些目标输出值设置为> 1,则目标1的误差会聚,这并不正确。

import numpy as np
import random

class neural_network():

    activation = [] #List of values with the values of activation of each layers
    weightsIn = []
    weightsOut = []

    def __init__(self, sizeOfLayers):
        '''
            sizeOfLayers: Tuple with numbers of neurons of each layer
            (in, hidden, out)
        '''
        if len(sizeOfLayers) > 3:
            raise ValueError('Wrong number of layers')

        self.sizeOfLayers = sizeOfLayers
        for i in range(len(sizeOfLayers)):
            if i == 0:
                #input layer + bias
                self.activation.append(sizeOfLayers[i]*[0.0] + [0.0])
            else:
                self.activation.append(sizeOfLayers[i]*[0.0])
        # Wi = len(Hid) x len(IN)+1(bias)
        self.weightsIn = np.random.random((sizeOfLayers[1], sizeOfLayers[0] + 1))
        # Wo = len(OUT) x len(Hid)
        self.weightsOut = np.random.random((sizeOfLayers[2], sizeOfLayers[1]))

    def forward(self, X):
        '''
            X: Vetor de entradas
        '''
        #In+bias add ativation vector
        self.activation[0] = np.vstack((np.array([X]).T, np.array([1])))
        #sum of (weights x in)
        self.sumHidden = self.weightsIn.dot(self.activation[0])
        #Ativation of hidden layer
        self.activation[1] = (self.sigmoid(self.sumHidden))
        #sum of(out weights x activation of last layer)
        self.sumOut = self.weightsOut.dot(self.activation[1])
        #activation of output
        self.activation[2] = (self.sigmoid(self.sumOut))
        return self.activation[2].T

    def backPropagate(self, Y, trainRate = 0.1):
        '''
            Y: output target
            trainRate:
        '''
        if len(Y) != self.sizeOfLayers[2]:
            raise ValueError('Wrong number of inputs')

        #Calc of output delta
        error_o = Y.T - self.activation[2].T
        out_delta = self.sigmoidPrime(self.activation[2]) * error_o.T
        #Calc of hidden delta
        error_h = out_delta.T.dot(self.weightsOut)
        hiden_delta = self.sigmoidPrime(self.activation[1]) * error_h.T

        # update output weights output
        change_o = self.activation[1] * out_delta.T
        for i in range(self.sizeOfLayers[2]):
            for j in range(self.sizeOfLayers[1]):
                self.weightsOut[i][j] = self.weightsOut[i][j] + trainRate*change_o[j][i]
        # update Input weights
        change_h = self.activation[0] * hiden_delta.T
        for i in range(self.sizeOfLayers[1]):
            for j in range(self.sizeOfLayers[0]):
                self.weightsIn[i][j] = self.weightsIn[i][j] + trainRate*change_h[j][i]

        #Error
        return np.sum((Y.T - self.activation[2].T)**2)/0.5

    def sigmoid(self, z, derv = False):
        if derv == False:
            return 1/(1+np.exp(-z))

    def sigmoidPrime(self, z):
        return self.sigmoid(z)*(1-self.sigmoid(z))

    def train(self, target, trainRate = 0.001, it = 50000):
        for i in range(it):
            error = 0.0
            for t in target:
                inputs = np.array(t[0])
                targets = np.array([t[1]])
                self.forward(inputs)
                error = error + self.backPropagate(targets, trainRate)

nn = neural_network((2,6,1))
xor = [
    [[0,0], [0]],
    [[0,1], [1]],
    [[1,0], [1]],
    [[1,1], [0]] #If I change her to 1 it converges
    ]

nn.train(xor)

编辑:根据迭戈·斯特凡诺的说法进行了修改(谢谢迭戈),但是错误尚未收敛。

import numpy as np
import math
import random
from scipy.special import expit
from sklearn.preprocessing import normalize


class neural_network(object):
    activation = []
    weightsIn = []
    weightsOut = []

    def __init__(self, sizeOfLayers):
        '''
            sizeOfLayers: Tuple with numbers of neurons of each layer
            (in, hidden, out)
        '''
        self.sizeOfLayers = sizeOfLayers
        for i in range(len(sizeOfLayers)):
            self.activation.append(sizeOfLayers[i]*[0.0] + [0.0])

        self.weightsIn = np.random.normal(scale=0.1, size = (sizeOfLayers[1], sizeOfLayers[0] + 1))
        self.weightsOut = np.random.normal(scale=0.1,  size = (sizeOfLayers[2], sizeOfLayers[1] + 1))


    def forward(self, X):
        '''
            X: Vetor de entradas
        '''
        #In+bias add ativation vector
        self.activation[0] = np.vstack((np.array([X]).T, np.array([1])))
        #sum of (weights x in)
        self.sumHidden = self.weightsIn.dot(self.activation[0])
        #+bias add ativation vector
        self.activation[1] = np.vstack((expit(self.sumHidden), np.array([1])))
        #sum of(out weights x activation of last layer)
        self.sumOut = self.weightsOut.dot(self.activation[1])
        #activation of output
        self.activation[2] = (expit(self.sumOut))
        return self.activation[2].T

    def backPropagate(self, X, Y, trainRate = 0.1):
        self.forward(X)
        #Calc of output delta
        error_o = Y - self.activation[2].T
        out_delta = self.sigmoidPrime(self.activation[2]) * error_o.T
        #Calc of hidden delta
        error_h = out_delta.T.dot(self.weightsOut)
        hiden_delta = self.sigmoidPrime(self.activation[1]) * error_h.T

        # update output weights output
        change_o = self.activation[1] * np.transpose(out_delta)

        self.weightsOut = self.weightsOut + trainRate*change_o.T
        # update hidden weights output
        change_h = self.activation[0].dot( hiden_delta[:-1].T)
        self.weightsIn = self.weightsIn + trainRate*change_h.T
        #error
        return np.sum((Y - self.activation[2].T)**2)*0.5


    def train(self, input_list, epochs):
        for epoch in range(epochs):
            ErrAcc = 0.0
            for inputs, targets in input_list:
                Err = self.backPropagate(np.array(inputs), np.array(targets), 0.2)
                ErrAcc = ErrAcc + Err
            if epoch % 1000 == 0:
                print 'Epoch =', epoch, 'ErrAcc =', ErrAcc

    def sigmoidPrime(self,x):
      return expit(x)*(1-expit(x))


nn = neural_network((2,10,1))
xor = [
    [[0,0], [0]],
    [[0,1], [1]],
    [[1,0], [1]],
    [[1,1], [0]] #If I change her to 1 it converges
    ]
nn.train(xor, 300000)

这是我对您的代码进行的修改,使其可以正常工作:

  1. 也向输出神经元添加偏见。 网络中的所有神经元都应该拥有它,因为它将激活场与原点分开,因此,可以左右移动激活功能,从而大大提高了成功学习的机会

  2. 与其使用np.random.random来生成间隔[ np.random.uniform )中的数字来初始化权重, np.random.uniform使用np.random.uniform来生成[ np.random.uniform中的均匀随机浮点数。

  3. 将输入空间围绕原点居中(即去除均值)并将其标准化。

这是初始化的方式:

    for i in range(len(sizeOfLayers)):
        self.activation.append(sizeOfLayers[i]*[0.0] + [0.0])

    self.weightsIn = np.random.uniform(-1,1,(sizeOfLayers[1], sizeOfLayers[0] + 1))
    self.weightsOut = np.random.uniform(-1,1,(sizeOfLayers[2], sizeOfLayers[1] + 1))

然后,您还必须在forward函数的activation中附加1:

self.activation[1] = np.vstack((self.sigmoid(self.sumHidden), np.array([1])))

您可能需要更改学习率才能使其正常工作(约0.5对我有效)。 同样,您的均方误差计算是错误的:您应该乘以0.5,而不是除以。

这是修改后的代码:

import numpy as np
import random

class neural_network():

activation = [] #List of values with the values of activation of each layers
weightsIn = []
weightsOut = []

def __init__(self, sizeOfLayers):
    '''
        sizeOfLayers: Tuple with numbers of neurons of each layer
        (in, hidden, out)
    '''
    if len(sizeOfLayers) > 3:
        raise ValueError('Wrong number of layers')

    self.sizeOfLayers = sizeOfLayers
    for i in range(len(sizeOfLayers)):
        #input layer + bias
        self.activation.append(sizeOfLayers[i]*[0.0] + [0.0])

    # Wi = len(Hid) x len(IN)+1(bias)
    self.weightsIn = np.random.uniform(-1,1,(sizeOfLayers[1], sizeOfLayers[0] + 1))

    # Wo = len(OUT) x len(Hid)
    self.weightsOut = np.random.uniform(-1,1,(sizeOfLayers[2], sizeOfLayers[1] + 1))

def forward(self, X):
    '''
        X: Vetor de entradas
    '''
    #In+bias add ativation vector
    self.activation[0] = np.vstack((np.array([X]).T, np.array([1])))
    #sum of (weights x in)
    self.sumHidden = self.weightsIn.dot(self.activation[0])
    #Ativation of hidden layer
    self.activation[1] =  np.vstack( ( self.sigmoid(self.sumHidden), np.array([1]) ) )
    #sum of(out weights x activation of last layer)
    self.sumOut = self.weightsOut.dot(self.activation[1])
    #activation of output
    self.activation[2] = (self.sigmoid(self.sumOut))
    return self.activation[2].T

def backPropagate(self, Y, trainRate = 0.1):
    '''
        Y: output target
        trainRate:
    '''
    if len(Y) != self.sizeOfLayers[2]:
        raise ValueError('Wrong number of inputs')

    #Calc of output delta
    error_o = Y.T - self.activation[2].T
    out_delta = self.sigmoidPrime(self.activation[2]) * error_o.T
    #Calc of hidden delta
    error_h = out_delta.T.dot(self.weightsOut)
    hiden_delta = self.sigmoidPrime(self.activation[1]) * error_h.T

    # update output weights output
    change_o = self.activation[1] * out_delta.T
    for i in range(self.sizeOfLayers[2]):
        for j in range(self.sizeOfLayers[1]):
            self.weightsOut[i][j] = self.weightsOut[i][j] + trainRate*change_o[j][i]
    # update Input weights
    change_h = self.activation[0] * hiden_delta.T
    for i in range(self.sizeOfLayers[1]):
        for j in range(self.sizeOfLayers[0]):
            self.weightsIn[i][j] = self.weightsIn[i][j] + trainRate*change_h[j][i]

    #Error
    return np.sum((Y.T - self.activation[2].T)**2)*0.5

def sigmoid(self, z, derv = False):
    if derv == False:
        return 1/(1+np.exp(-z))

def sigmoidPrime(self, z):
    return self.sigmoid(z)*(1-self.sigmoid(z))

def train(self, target, trainRate = 0.5, it = 50000):
    for i in range(it):
        error = 0.0
        for t in target:
            inputs = np.array(t[0])
            targets = np.array([t[1]])
            self.forward(inputs)
            error = error + self.backPropagate(targets, trainRate)

nn = neural_network((2,5,1))
xor = [
    [[-1.0, -1.0], [0]],
    [[-1.0,  1.0], [1]],
    [[ 1.0, -1.0], [1]],
    [[ 1.0,  1.0], [0]] #If I change her to 1 it converges
]

nn.train(xor)

for e in xor:
    nn.forward(e[0])
    print nn.activation[2]

祝好运!

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM