用于预测的循环神经网络不会学习

Question

我正在尝试构建一个用于预测的循环神经网络。 我正在PyBrain 中进行。

我已经创建了两个简单的脚本来测试想法和技术，然后再将它们实现为更复杂的东西。

我已经尝试尽可能多地遵循经证明有效的代码，即：在stackoverflow和github 上。

在第一个示例中，我试图在给定过去值的时间范围内预测 sin 值：

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""An example of a simple RNN."""

import time
import math
import matplotlib.pyplot as plt

from normalizator import Normalizator

from pybrain.tools.shortcuts import buildNetwork
from pybrain.structure.modules import LSTMLayer
from pybrain.structure import LinearLayer, SigmoidLayer
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.supervised import RPropMinusTrainer
from pybrain.datasets import SupervisedDataSet
from pybrain.datasets import SequentialDataSet
import pybrain.datasets.sequential


class Network(object):
    """Sieć neuronowa."""

    def __init__(self, inputs, hidden, outputs):
        """Just a constructor."""
        self.inputs = inputs
        self.outputs = outputs
        self.hidden = hidden
        self.network = self.build_network(inputs, hidden, outputs)
        self.norm = Normalizator()

    def build_network(self, inputs, hidden, outputs):
        """Builds the network."""
        network = buildNetwork(inputs, hidden, outputs,
                               hiddenclass=LSTMLayer,
                               #hiddenclass=SigmoidLayer,
                               outclass=SigmoidLayer,
                               bias = True,
                               outputbias=False, recurrent=True)
        network.sortModules()
        print "Constructed network:"
        print network
        return network

    def train(self, learning_set, max_terations=100):
        """Trains the network."""
        print "\nThe network is learning..."
        time_s = time.time()
        self.network.randomize()
        #trainer = RPropMinusTrainer(self.network, dataset=learning_set,
        #                            verbose=True)
        learning_rate = 0.05
        trainer = BackpropTrainer(self.network, learning_set, verbose=True,
                                  momentum=0.8, learningrate=learning_rate)
        errors = trainer.trainUntilConvergence(maxEpochs=max_terations)
        #print "Last error in learning:", errors[-1]
        time_d = time.time() - time_s
        print "Learning took %d seconds." % time_d
        return errors, learning_rate

    def test(self, data):
        """Tests the network."""
        print ("X\tCorrect\tOutput\t\tOutDenorm\tError")
        mse = 0.0
        outputs = []
        #self.network.reset()
        for item in data:
            x_val = self.norm.denormalize("x", item[0])
            sin_val = self.norm.denormalize("sin", item[1])
            #get the output from the network
            output = self.network.activate(item[0])[0]
            out_denorm = self.norm.denormalize("sin", output)
            outputs.append(out_denorm)
            #compute the error
            error = sin_val - out_denorm
            mse += error**2
            print "%f\t%f\t%f\t%f\t%f" % \
                (round(x_val, 2), sin_val, output, out_denorm, error)
        mse = mse / float(len(data))
        print "MSE:", mse
        return outputs, mse

    def show_plot(self, correct, outputs, learn_x, test_x,
                  learning_targets, mse):
        """Plots some useful stuff :)"""
        #print "learn_x:", learn_x
        #print "test_x:", test_x
        #print "output:", outputs
        #print "correct:", correct
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(test_x, outputs, label="Prediction", color="red")
        ax.plot(test_x, correct, ":", label="Original data")
        ax.legend(loc='upper left')
        plt.xlabel('X')
        plt.ylabel('Sinus')
        plt.title('Sinus... (mse=%f)' % mse)
        #plot a portion of the learning data
        learning_plt = fig.add_subplot(111)
        learn_index = int(0.9 * len(learning_targets))
        learning_plt.plot(learn_x[learn_index:], learning_targets[learn_index:],
                          label="Learning values", color="blue")
        learning_plt.legend(loc='upper left')
        plt.show()

    def prepare_data(self):
        """Prepares the data."""
        learn_inputs = [round(x, 2) for x in [y * 0.05 for y in range(0, 4001)]]
        learn_targets = [math.sin(z) for z in learn_inputs]

        test_inputs = [round(x, 2) for x in [y * 0.05 for y in range(4001, 4101)]]
        test_targets = [math.sin(z) for z in test_inputs]

        self.norm.add_feature("x", learn_inputs + test_inputs)
        self.norm.add_feature("sin", learn_targets + test_targets)

        #learning_set = pybrain.datasets.sequential.SupervisedDataSet(1, 1)
        learning_set = SequentialDataSet(1, 1)
        targ_close_to_zero = 0
        for inp, targ in zip(learn_inputs, learn_targets):
            if abs(targ) < 0.01:
                targ_close_to_zero += 1
            #if inp % 1 == 0.0:
            if targ_close_to_zero == 2:
                print "New sequence at", (inp, targ)
                targ_close_to_zero = 0
                learning_set.newSequence()
            learning_set.appendLinked(self.norm.normalize("x", inp),
                                      self.norm.normalize("sin", targ))

        testing_set = []
        for inp, targ in zip(test_inputs, test_targets):
            testing_set.append([self.norm.normalize("x", inp),
                               self.norm.normalize("sin", targ), inp, targ])
        return learning_set, testing_set, learn_inputs, test_inputs, learn_targets

if __name__ == '__main__':
    nnetwork = Network(1, 20, 1)
    learning_set, testing_set, learning_inputs, testing_inputs, learn_targets = \
        nnetwork.prepare_data()
    errors, rate = nnetwork.train(learning_set, 125)
    outputs, mse = nnetwork.test(testing_set)
    correct = [element[3] for element in testing_set]
    nnetwork.show_plot(correct, outputs,
                       learning_inputs, testing_inputs, learn_targets, mse)

至少可以说，结果是悲惨的。

X       Correct     Output      OutDenorm   Error

200.050000  -0.847857   0.490775    -0.018445   -0.829411
200.100000  -0.820297   0.490774    -0.018448   -0.801849
200.150000  -0.790687   0.490773    -0.018450   -0.772237
200.200000  -0.759100   0.490772    -0.018452   -0.740648
200.250000  -0.725616   0.490770    -0.018454   -0.707162

这太疯狂了。

第二个类似，基于太阳黑子数据：

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""An example of a simple RNN."""

import argparse
import sys
import operator
import time

from pybrain.tools.shortcuts import buildNetwork
from pybrain.structure import FullConnection
from pybrain.structure.modules import LSTMLayer
from pybrain.structure import LinearLayer, SigmoidLayer
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.supervised import RPropMinusTrainer
from pybrain.datasets import SupervisedDataSet
import pybrain.datasets.sequential

import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter

from normalizator import Normalizator


class Network(object):
    """Neural network."""

    def __init__(self, inputs, hidden, outputs):
        """Constructor."""
        self.inputs = inputs
        self.outputs = outputs
        self.hidden = hidden
        self.network = self.build_network(inputs, hidden, outputs)
        self.norm = Normalizator()

    def build_network(self, inputs, hidden, outputs):
        """Builds the network."""
        network = buildNetwork(inputs, hidden, outputs, bias=True,
                               hiddenclass=LSTMLayer,
                               #hiddenclass=SigmoidLayer,
                               outclass=SigmoidLayer,
                               outputbias=False, fast=False, recurrent=True)
        #network.addRecurrentConnection(
        #    FullConnection(network['hidden0'], network['hidden0'], name='c3'))
        network.sortModules()
        network.randomize()
        print "Constructed network:"
        print network
        return network

    def train(self, learning_set, max_terations=100):
        """Trains the network."""
        print "\nThe network is learning..."
        time_s = time.time()
        trainer = RPropMinusTrainer(self.network, dataset=learning_set,
                                    verbose=True)
        learning_rate = 0.001
        #trainer = BackpropTrainer(self.network, learning_set, verbose=True,
        #          batchlearning=True, momentum=0.8, learningrate=learning_rate)
        errors = trainer.trainUntilConvergence(maxEpochs=max_terations)
        #print "Last error in learning:", errors[-1]
        time_d = time.time() - time_s
        print "Learning took %d seconds." % time_d
        return errors, learning_rate

    def test(self, data):
        """Tests the network."""
        print ("Year\tMonth\tCount\tCount_norm\t" +
                "Output\t\tOutDenorm\tError")
        # do the testing
        mse = 0.0
        outputs = []
        #print "Test data:", data
        for item in data:
            #month = self.norm.denormalize("month", item[1])
            #year = self.norm.denormalize("year", item[2])
            year, month = self.norm.denormalize("ym", item[5])
            count = self.norm.denormalize("count", item[3])
            #get the output from the network
            output = self.network.activate((item[1], item[2]))
            out_denorm = self.norm.denormalize("count", output[0])
            outputs.append(out_denorm)
            #compute the error
            error = count - out_denorm
            mse += error**2
            print "%d\t%d\t%s\t%f\t%f\t%f\t%f" % \
                (year, month, count, item[3],
                 output[0], out_denorm, error)
        mse /= len(data)
        print "MSE:", mse
        #corrects = [self.norm.denormalize("count", item[3]) for item in data]
        #print "corrects:", len(corrects)
        return outputs, mse

    def show_plot(self, correct, outputs, learn_x, test_x,
                  learning_targets, mse):
        """Rysuje wykres :)"""
        #print "x_axis:", x_axis
        #print "output:", output
        #print "correct:", correct
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(test_x, outputs, label="Prediction", color="red")
        ax.plot(test_x, correct, ":", label="Correct")
        #                                               int(201000.0 / 100)
        ax.xaxis.set_major_formatter(FormatStrFormatter('%s'))
        ax.legend(loc='upper left')
        learn_index = int(0.8 * len(learn_x))
        learn_part_x = learn_x[learn_index:]
        learn_part_vals = learning_targets[learn_index:]
        learning_plt = fig.add_subplot(111)
        learning_plt.plot(learn_part_x, learn_part_vals,
                          label="Learning values", color="blue")
        learning_plt.legend(loc='upper left')
        plt.xlabel('Year-Month')
        plt.ylabel('Values')
        plt.title('... (mse=%f)' % mse)
        plt.show()

    def read_data(self, learnfile, testfile):
        """Wczytuje dane uczące oraz testowe."""
        #read learning data
        data_learn_tmp = []
        for line in learnfile:
            if line[1] == "#":
                continue
            row = line.split()
            year = float(row[0][0:4])
            month = float(row[0][4:6])
            yearmonth = int(row[0])
            count = float(row[2])
            data_learn_tmp.append([month, year, count, yearmonth])
        data_learn_tmp = sorted(data_learn_tmp, key=operator.itemgetter(1, 0))
        # read test data
        data_test_tmp = []
        for line in testfile:
            if line[0] == "#":
                continue
            row = line.split()
            year = float(row[0][0:4])
            month = float(row[0][4:6])
            count = float(row[2])
            year_month = int(row[0])
            data_test_tmp.append([month, year, count, year_month])
        data_test_tmp = sorted(data_test_tmp, key=operator.itemgetter(1, 0))
        # prepare data for normalization
        months = [item[0] for item in data_learn_tmp + data_test_tmp]
        years = [item[1] for item in data_learn_tmp + data_test_tmp]
        counts = [item[2] for item in data_learn_tmp + data_test_tmp]
        self.norm.add_feature("month", months)
        self.norm.add_feature("year", years)
        ym = [(years[index], months[index]) for index in xrange(0, len(years))]
        self.norm.add_feature("ym", ym, ranked=True)
        self.norm.add_feature("count", counts)
        #build learning data set
        learning_set = pybrain.datasets.sequential.SequentialDataSet(2, 1)
        #learning_set = pybrain.datasets.sequential.SupervisedDataSet(2, 1)
        # add items to the learning dataset proper
        last_year = -1
        for item in data_learn_tmp:
            if last_year != item[1]:
                learning_set.newSequence()
                last_year = item[1]
            year_month = self.norm.normalize("ym", (item[1], item[0]))
            count = self.norm.normalize("count", item[2])
            learning_set.appendLinked((year_month), (count))
        #build testing data set proper
        words = ["N/A"] * len(data_test_tmp)
        testing_set = []
        for index in range(len(data_test_tmp)):
            month = self.norm.normalize("month", data_test_tmp[index][0])
            year = self.norm.normalize("year", data_test_tmp[index][3])
            year_month = self.norm.normalize("ym",
                        (data_test_tmp[index][4], data_test_tmp[index][0]))
            count = self.norm.normalize("count", data_test_tmp[index][5])
            testing_set.append((words[index], month, year,
                                count, data_test_tmp[index][6], year_month))
        #learning_set, testing_set, learn_inputs, test_inputs, learn_targets
        learn_x = [element[3] for element in data_learn_tmp]
        test_x = [element[3] for element in data_test_tmp]
        learn_targets = [element[2] for element in data_learn_tmp]
        test_targets = [element[2] for element in data_test_tmp]
        return (learning_set, testing_set, learn_x, test_x,
                learn_targets, test_targets)


def get_args():
    """Buduje parser cli."""
    parser = argparse.ArgumentParser(
        description='Trains a simple recurrent neural network.')

    parser.add_argument('--inputs', type=int, default=2,
                        help='Number of input neurons.')
    parser.add_argument('--hidden', type=int, default=5,
                        help='Number of hidden neurons.')
    parser.add_argument('--outputs', type=int, default=1,
                        help='Number of output neurons.')

    parser.add_argument('--iterations', type=int, default=100,
                help='Maximum number of iteration epoch in training phase.')

    parser.add_argument('trainfile', nargs='?', type=argparse.FileType('r'),
                        default=sys.stdin, help="File with learning dataset.")
    parser.add_argument('testfile', nargs='?', type=argparse.FileType('r'),
                        default=sys.stdin, help="File with testing dataset.")

    parser.add_argument('--version', action='version', version='%(prog)s 1.0')

    return parser.parse_args()

if __name__ == '__main__':
    args = get_args()
    nnetwork = Network(args.inputs, args.hidden, args.outputs)
    learning_set, testing_set, learn_x, test_x, learn_targets, test_targets = \
        nnetwork.read_data(args.trainfile, args.testfile)
    errors, rate = nnetwork.train(learning_set, args.iterations)
    outputs, mse = nnetwork.test(testing_set)
    nnetwork.show_plot(test_targets, outputs,
                       learn_x, test_x, learn_targets, mse)

而且在这里，我只看到混乱，我无法在情节中向您展示，因为我没有足够的声望点。 但基本上，预测函数是一个周期性的齿形曲线，与输入或过去的数据关联不大。

Year    Month   Count   Count_norm  Output      OutDenorm   Error
2009    9       4.3     0.016942    0.216687    54.995108   -50.695108
2009    10      4.8     0.018913    0.218810    55.534015   -50.734015
2009    11      4.1     0.016154    0.221876    56.312243   -52.212243
2009    12      10.8    0.042553    0.224774    57.047758   -46.247758
2010    1       13.2    0.052009    0.184361    46.790833   -33.590833
2010    2       18.8    0.074074    0.181018    45.942258   -27.142258
2010    3       15.4    0.060678    0.183226    46.502806   -31.102806

我尝试了两种不同的学习算法，隐藏单元、学习率、向学习数据集中添加元素的类型的多种组合，但无济于事。

我现在完全迷失了。

Answer 1

如果您在输出层使用逻辑激活函数，输出将被限制在(0,1)范围内。 但是您的 sin 函数提供的输出范围为(-1,1) 。 我认为这就是为什么你的罪恶学习很难收敛到一个小错误。 您甚至无法在训练数据中正确预测 sin 函数，对吗？ 也许您可能需要在训练和测试之前扩展输入/输出集。

用于预测的循环神经网络不会学习

问题描述

1 个解决方案

解决方案1
5 2013-11-26 16:41:18

用于预测的循环神经网络不会学习

问题描述

1 个解决方案

解决方案1 5 2013-11-26 16:41:18

解决方案1
5 2013-11-26 16:41:18