简体   繁体   中英

tensorflow/keras LSTM model is throwing value errors

My script:

import sqlite3
from tensorflow import keras
from sklearn.model_selection import train_test_split as tts
import numpy as np
import pickle

def batchSequencer(tokenedCommentList,sequenceLength,incrementSize):
    filler = -1
    sequence = []
    index = 0
    if len(tokenedCommentList) > sequenceLength:
        while index <= len(tokenedCommentList)-sequenceLength:
            sequence.append(tokenedCommentList[index:index+sequenceLength])
            index +=incrementSize
    else:
        _slice = []
        for token in tokenedCommentList:
            _slice.append(token)
        for _ in range(len(_slice),sequenceLength):
            _slice.append(filler)
        sequence.append(_slice)
    return np.array(sequence)



class batch_generator():
    def __init__(self,tFeatures,tLabels,k_tk,Length,iSize):
        self.features = tFeatures
        self.labels = tLabels
        self.tk = k_tk
        self.length = Length
        self.iSize = iSize
        self.index = 0
        self.internalIter = 0
        self.storedSequences = []
        self.sequenceIndex = 0
        self.currentLabel = []

    def generate(self):
        result = batchSequencer(self.features[self.index],self.length,self.iSize)
        y_index = self.index
        self.index +=1
        if self.index > len(self.features):
            self.index = 0
        return result, self.labels[y_index]


    def batchGenerate(self):
        x = self.tk.texts_to_matrix(self.features[self.index])
        result = batchSequencer(x,self.length,self.iSize)
        y_index = self.index
        self.index +=1
        if self.index > len(self.features):
            self.index = 0

        self.storedSequences = result
        self.currentLabel = self.labels[y_index]
        #return np.array(result),np.array(self.labels[y_index])



    def miniSequencer(self):
        if self.sequenceIndex >= len(self.storedSequences):
            self.batchGenerate()
            self.internalIter = 0
            self.sequenceIndex = 0
        result = np.array(self.storedSequences[self.sequenceIndex])
        self.sequenceIndex +=1
        return result,np.array(self.currentLabel)



def Main():
    connection = sqlite3.connect('chatDataset.db')
    c = connection.cursor()
    trainFeatures = []
    trainLabels = []
    testFeatures = []
    testLabels = []
    vocabSize = 10000
    sequenceSize = 6
    hiddenSize = 500
    goodCount = 0
    badCount = 0
    num_epochs = 10
    for row in c.execute('SELECT * FROM posts'):
        if row[3] < 0:
            trainFeatures.append(row[1])
            trainLabels.append([1,0])
            badCount +=1
        else:
            if goodCount <= badCount:
                trainFeatures.append(row[1])
                trainLabels.append([0,1])
                goodCount +=1

    try:
        tk = pickle.load( open( "tokenizer2.pkl", "rb" ) )
        trainFeatures = tk.texts_to_sequences(trainFeatures)
        print("tokenizer loaded")

    except:
        print("no tokenizer found, creating new one")
        tk = keras.preprocessing.text.Tokenizer(num_words=vocabSize, filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~ ', lower=True, split=' ', char_level=False, oov_token=None)
        tk.fit_on_texts(trainFeatures)
        trainFeatures = tk.texts_to_sequences(trainFeatures)
        pickle.dump(tk,open( "tokenizer2.pkl", "wb" ))

    try:
        model = keras.models.load_model("LSTM_Model.mdl")
        print("loaded model successfully!")

    except:
        print("No model information found, creating new model!")
        trainFeatures,testFeatures,trainLabels,testLabels = tts(trainFeatures,trainLabels,test_size = 0.01)

        checkpointer = keras.callbacks.ModelCheckpoint(filepath='/model-{epoch:02d}.hdf5', verbose=1)
        model = keras.Sequential()
        model.add(keras.layers.Embedding(vocabSize, hiddenSize, input_length = sequenceSize))
        model.add(keras.layers.LSTM(hiddenSize, return_sequences=True))
        model.add(keras.layers.LSTM(hiddenSize, return_sequences=True))
        model.add(keras.layers.Flatten())
        model.add(keras.layers.Dense(2, activation='relu')) 
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=["accuracy"])


        trainGenerator = batch_generator(trainFeatures,trainLabels, tk,sequenceSize,1)
        validGenerator = batch_generator(testFeatures,testLabels, tk,sequenceSize,1)
        model.fit_generator(trainGenerator.generate(), len(trainFeatures), num_epochs,
                        validation_data=validGenerator.generate(),
                        validation_steps=len(testFeatures),callbacks=[checkpointer])

        model.save("LSTM_Model.mdl")


    print("done training model")



if __name__ == "__main__":
    Main()

full traceback error:

Traceback (most recent call last):

  File "<ipython-input-2-dfff27295b93>", line 1, in <module>
    runfile('D:/coding projects/py practice/machine learning/autoReporter/sequencer.py', wdir='D:/coding projects/py practice/machine learning/autoReporter')

  File "C:\Users\oxrock\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 701, in runfile
    execfile(filename, namespace)

  File "C:\Users\oxrock\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 101, in execfile
    exec(compile(f.read(), filename, 'exec'), namespace)

  File "D:/coding projects/py practice/machine learning/autoReporter/sequencer.py", line 147, in <module>
    Main()

  File "D:/coding projects/py practice/machine learning/autoReporter/sequencer.py", line 137, in Main
    validation_steps=len(testFeatures),callbacks=[checkpointer])

  File "C:\Users\oxrock\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py", line 1779, in fit_generator
    initial_epoch=initial_epoch)

  File "C:\Users\oxrock\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training_generator.py", line 136, in fit_generator
    val_x, val_y, val_sample_weight)

  File "C:\Users\oxrock\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py", line 917, in _standardize_user_data
    exception_prefix='target')

  File "C:\Users\oxrock\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training_utils.py", line 191, in standardize_input_data
    ' but got array with shape ' + str(data_shape))

ValueError: Error when checking target: expected dense_1 to have shape (2,) but got array with shape (1,)

The model's purpose is to separate reddit comments into negative/positive posts. The sql database that the training data is being pulled from contains posts I've pulled and classified from a reddit rip. I have to break the data into small batches via my batch generator class because holding it all in memory just isn't possible.

I'm having a hard time getting this model to train with "ValueError: Error when checking target: expected dense_1 to have shape (2,) but got array with shape (1,)" being as far as I've been able to get.

I've been stuck on this a while and have gotten to the point where I'm changing things randomly hoping for a miracle. A helping hand would be appreciated. If any additional info is required I'll be happy to post it.

There are currently some things wrong with your code. At first, your data generator should not only return one value but all values of the dataset. Usually, you do this with a loop and use yield inside the loop to return single data.

Secondly, it might help to not pass python lists but np.array s to keras . Please try out, what happens if you cast the labels before training the model. Furthermore, make sure, that your data has the right shape, ie the target labels should have the shape (batch size, 2) and the input data something like (batch size, sequenceSize, vocabSize) . Please print the shapes of the training/validation data (input and target data) and make sure, that this is correct.

Lastly, if you are doing some classification task, the last activation function of your network should be a softmax. Therefore, please replace

model.add(keras.layers.Dense(2, activation='relu'))

with

model.add(keras.layers.Dense(2, activation='softmax'))

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM