神经网络反向传播不起作用

Question

I have coded a neural network in JavaScript and implemented the Backpropagation algorithm described here . 我用JavaScript编写了一个神经网络，并实现了这里描述的Backpropagation算法。 Here is the code (typescript): 这是代码（typescript）：

/**
 * Net
 */


export class Net {
    private layers: Layer[] = [];
    private inputLayer: Layer;
    private outputLayer: Layer;
    public error: number = Infinity;

    private eta: number = 0.15;
    private alpha: number = 0.5;

    constructor(...topology: number[]) {
        topology.forEach((topologyLayer, iTL) => {
            var nextLayerNeuronNumber = topology[iTL + 1] || 0;
            this.layers.push(new Layer(topologyLayer, nextLayerNeuronNumber));
        });

        this.inputLayer = this.layers[0];
        this.outputLayer = this.layers[this.layers.length - 1];

    }

    public loadWeights(weights) {
        /*
        [
            [Layer
                [Node weights, ..., ...]
            ]
        ]
        */

        for (var iL = 0; iL < weights.length; iL++) {
            var neuronWeights = weights[iL];
            var layer = this.layers[iL];
            for (var iN = 0; iN < neuronWeights.length; iN++) {

                // Neuron

                var connections = neuronWeights[iN];
                for (var iC = 0; iC < connections.length; iC++) {
                    var connection = connections[iC];
                    this.layer(iL).neuron(iN).setWeights(iC, connection);

                }

            }
        }

    }


    public train(data: number[][], iterartions = 2000) {

        var inputs = this.inputLayer.neurons.length - 1;

        for (var ite = 0; ite < iterartions; ite++) {

            data.forEach(node => {

                var inputData = [];
                var outputData = [];

                for (var i = 0; i < node.length; i++) {
                    if (i < inputs) {
                        inputData.push(node[i])
                    } else {
                        outputData.push(node[i])
                    }
                }

                this.feedForward(...inputData);
                this.backProb(...outputData);


            });


        }


        return this.calcDataError(data);

    }

    private calcDataError(data){
        var overallDataErrorSum = 0;
        var inputs = this.inputLayer.neurons.length - 1;

        data.forEach(node => {
            var outputData = node.splice(inputs);
            var inputData = node;

            this.feedForward(...inputData);
            overallDataErrorSum += this.getNetError(outputData);
        });

        overallDataErrorSum /= data.length;

        return overallDataErrorSum;
    }

    public saveWeights() {
        // Ignore output layer
        var ret = []
        for (var iL = 0; iL < this.layers.length - 1; iL++) {
            var layer = this.layers[iL];
            var layer_ret = [];

            layer.neurons.forEach(neuron => {
                layer_ret.push(neuron.connections.map(c => c.weight));
            });

            ret.push(layer_ret);
        }
        return ret;
    }

    feedForward(...inputs: number[]) {
        if (inputs.length != this.inputLayer.neurons.length - 1) return false;

        this.inputLayer.neurons.forEach((neuron, i) => {
            if (!neuron.isBias) {
                neuron.output(inputs[i]);
            }
        });

        this.layers.forEach((layer, i) => {
            // Skip Input Layer
            if (i > 0) {
                var prevLayer = this.layers[i - 1]
                layer.neurons.forEach(neuron => {
                    neuron.calcOutput(prevLayer);
                });
            }
        });

    }

    public getNetError(targetVals) {
        // Calc delta error of outputs
        var deltas = [];

        this.outputLayer.neurons.forEach((neuron, iN) => {
            if (!neuron.isBias) {
                neuron.calcOutputDelta(targetVals[iN]);
                deltas.push(neuron.delta);
            }
        });

        deltas = deltas.map(d => Math.pow(d, 2));


        var sum = 0;

        deltas.forEach(d => sum += d);

        return sum / deltas.length;


    }

    backProb(...targetVals: number[]) {



        // Calc delta error of outputs
        this.outputLayer.neurons.forEach((neuron, iN) => {
            if (!neuron.isBias) {
                neuron.calcOutputDelta(targetVals[iN]);
            }
        });

        // Backprop delta error through hidden layers

        for (var iL = this.layers.length - 2; iL > 0; iL--) {
            var layer = this.layers[iL];
            var nextLayer = this.layers[iL + 1]
            layer.neurons.forEach(neuron => {
                neuron.calcHiddenDelta(nextLayer);
            });

        }

        // Update weights 

        for (var iL = 1; iL < this.layers.length; iL++) {
            var layer = this.layers[iL];
            var prevLayer = this.layers[iL - 1];

            layer.neurons.forEach(neuron => {
                if (!neuron.isBias) {
                    neuron.updateWeights(prevLayer, this.eta);
                }
            });
        }

        this.error = this.getNetError(targetVals);

        return this.error;

    }

    getOutputs(...inputs: number[]) {

        var ret = [];
        this.outputLayer.neurons.forEach(neuron => {
            if (!neuron.isBias) {
                ret.push(neuron.output())
            }
        });
        return ret;

    }

    getResults(...inputs: number[]) {
        this.feedForward(...inputs)
        return this.getOutputs();
    }

    layer(i) {
        return this.layers[i];
    }
}

/**
 * Layer
 */
class Layer {
    public neurons: Neuron[] = [];
    constructor(neuronNumber: number, nextLayerNeuronNumber: number) {
        for (var iN = 0; iN < neuronNumber + 1; iN++) {
            // +1 for bias neuron, which is last
            if (iN < neuronNumber) {
                // Create normal neuron
                this.neurons.push(new Neuron(nextLayerNeuronNumber, iN, false));
            } else {
                this.neurons.push(new Neuron(nextLayerNeuronNumber, iN, true));
            }
        }
    }

    neuron(i) {
        return this.neurons[i];
    }

    bias() {
        return this.neurons[this.neurons.length - 1];
    }
}

/**
 * Neuron
 */
class Neuron {
    public connections: Connection[] = [];
    private outputVal: number;
    public delta: number;

    constructor(outputsTo: number, private index, public isBias = false) {

        // Creates connections
        for (var c = 0; c < outputsTo; c++) {
            this.connections.push(new Connection());
        }

        this.outputVal = isBias ? 1 : 0;

    }

    calcOutput(prevLayer: Layer) {

        // Only calcOutput when neuron is not a bias neuron

        if (!this.isBias) {
            var sum = 0;

            prevLayer.neurons.forEach(prevLayerNeuron => {
                sum += prevLayerNeuron.output() * prevLayerNeuron.getWeights(this.index).weight;
            });

            this.output(this.activationFunction(sum));
        }

    }

    private activationFunction(x) {

        //return Math.tanh(x);
        return 1 / (1 + Math.exp(-x))
        //return x;
    };

    private activationFunctionDerivative(x) {
        // Small approximation of tanh derivative
        //return 1 - x * x

        // Sigmoid
        var s = this.activationFunction(x);
        return s * (1 - s);

        // With general derivative formula where h = 1e-10
        /*var h = 0.0001;
        var dx = ((this.activationFunction(x + h) - this.activationFunction(x))/h)
        return dx;*/

        //return 1
    };

    // Backprop // Todo // Understand


    public calcOutputDelta(targetVal) {

        // Bias output neurons do not have delta error
        if (!this.isBias) {
            this.delta = targetVal - this.output();
        }
    }

    public calcHiddenDelta(nextLayer: Layer) {
        var sum = 0;

        // Go through all neurons of next layer excluding bias
        nextLayer.neurons.forEach((neuron, iN) => {
            if (!neuron.isBias) {
                sum += neuron.delta * this.getWeights(iN).weight;
            }
        });

        this.delta = sum;
    }

    public updateWeights(prevLayer: Layer, eta: number) {

        prevLayer.neurons.forEach((neuron, iN) => {
            var weight = neuron.getWeights(this.index).weight;
            var newWeight =
                weight + // old weight
                eta *   // learning weight
                this.delta * // delta error
                this.activationFunctionDerivative(neuron.output())
            neuron.getWeights(this.index).weight = newWeight;
        });


    }


    // Backprop end

    output(s?) {
        if (s && !this.isBias) {
            this.outputVal = s;
            return this.outputVal;
        } else {
            return this.outputVal;
        }
    }

    getWeights(i) {
        return this.connections[i];
    }

    setWeights(i, s) {
        return this.connections[i].weight = s;
    }
}

/**
 * Connection
 */
class Connection {
    public weight: number;
    public deltaWeight: number;

    constructor() {
        this.weight = Math.random();
        this.deltaWeight = 0;
    }
}

When training it for just one set of data, it works just fine. 当仅针对一组数据进行训练时，它可以正常工作。 (example from here ) （例子来自这里）

import {Net} from './ml';

var myNet = new Net(2, 2, 2);


var weights = [
    [
        [0.15, 0.25],
        [0.20, 0.30],
        [0.35, 0.35]
    ],
    [
        [0.40, 0.50],
        [0.45, 0.55],
        [0.60, 0.60]
    ]
];

// Just loads the weights given in the example

myNet.loadWeights(weights)

var error = myNet.train([[0.05, 0.10, 0.01, 0.99]]);
console.log('Error: ', error);

console.log(myNet.getResults(0.05, 0.10));

Console prints: 控制台打印：

Error:  0.0000020735174706210714
[ 0.011556397089327321, 0.9886867357304885 ]

Basically, that's pretty good, right? 基本上，这很不错，对吧？

Then, I wanted to teach the network the XOR problem: 然后，我想教网络XOR问题：

import {Net} from './ml';

var myNet = new Net(2, 3, 1);


var trainigData = [
    [0, 0, 0],
    [1, 0, 1],
    [0, 1, 1],
    [1, 1, 0]
]

var error = myNet.train(trainigData)
console.log('Error: ', error);

console.log('Input: 0, 0: ', myNet.getResults(0, 0));
console.log('Input: 1, 0: ', myNet.getResults(1, 0));

Here the network fails: 这里网络失败：

Error:  0.2500007370167383
Input: 0, 0:  [ 0.5008584967899313 ]
Input: 1, 0:  [ 0.5008584967899313 ]

What am I doing wrong? 我究竟做错了什么？

Answer 1

Firstly perform gradient checks on the entire batch (meaining on the function calculating gradients on the batch), if you have not done so already. 如果您还没有这样做，首先对整个批次执行梯度检查（计算批次上计算梯度的函数）。 This will ensure you know what the problem is. 这将确保您知道问题所在。

If gradients are not correctly computed, taking into account that your implementation works on single data sets, you are most likely mixing some values in the backwards pass. 如果未正确计算渐变，考虑到您的实现适用于单个数据集，则很可能在向后传递中混合某些值。

If gradients are correctly computed, there is an error in your update function. 如果正确计算了渐变，则更新函数中会出错。

A working implementation of backpropagation for neural networks in javaScript can be found here 可以在此处找到javaScript中神经网络的反向传播的工作实现

Here is the code snippet of the trainStep function using backpropagation 这是使用反向传播的trainStep函数的代码片段

    function trainStepBatch(details){
//we compute forward pass 
//for each training sample in the batch
//and stored in the batch array 
    var batch=[];
    var ks=[];
    for(var a=0;a<details.data.in.length;a++){
    var results=[];
    var k=1;
    results[0]={output:details.data.in[a]};
    for(var i=1;i<this.layers.length;i++){
        results[i]=layers[this.layers[i].type].evalForGrad(this.layers[i],results[i-1].output);
        k++;
    }
    batch[a]=results;
    ks[a]=k;
    }
//We compute the backward pass
//first derivative of the cost function given the output
    var grad=[];
    for(i in batch)grad[i]={grad:costs[details.cost].df(batch[i][ks[i]-1].output,details.data.out[i])};
//for each layer we compute the backwards pass
//on the results of all forward passes at a given layer
    for(var i=this.layers.length-1;i>0;i--){
    var grads=[];
    var test=true;
    for(a in batch){
        grads[a]=layers[this.layers[i].type].grad(this.layers[i],batch[a][i],batch[a][i-1],grad[a]);
        if(grads[a]==null)test=false;
        else grads[a].layer=i;
    }
//we perform the update
    if(test)stepBatch(this.layers[i].par,grads,details.stepSize);
    }
}

And for the stepBatch function 并为stepBatch功能

function stepBatch(params,grads, stepSize){
for(i in params.w){
    for(j in params.w[i]){
        for(a in grads){
            params.w[i][j]-=stepSize*grads[a].dw[i][j];
        }
    }
}
for(i in params.b){
    for(a in grads){
        params[a]-=stepSize*grads[a].db[i];
    }
}
function stepBatch(params,grads, stepSize){
    for(i in params.w){
        for(j in params.w[i]){
            for(a in grads){
                params.w[i][j]-=stepSize*grads[a].dw[i][j];
            }
        }
    }
    for(i in params.b){
        for(a in grads){
            params[a]-=stepSize*grads[a].db[i];
        }
    }
}

神经网络反向传播不起作用

问题描述

1 个解决方案

解决方案1
0 2016-11-11 14:35:50

神经网络反向传播不起作用

问题描述

1 个解决方案

解决方案1 0 2016-11-11 14:35:50

解决方案1
0 2016-11-11 14:35:50