![](/img/trans.png)
[英]explain model.fit in LSTM encoder-decoder with Attention model for Text Summarization using Keras /Tensorflow
[英]tensorflow/keras LSTM model is throwing value errors
我的劇本:
import sqlite3
from tensorflow import keras
from sklearn.model_selection import train_test_split as tts
import numpy as np
import pickle
def batchSequencer(tokenedCommentList,sequenceLength,incrementSize):
filler = -1
sequence = []
index = 0
if len(tokenedCommentList) > sequenceLength:
while index <= len(tokenedCommentList)-sequenceLength:
sequence.append(tokenedCommentList[index:index+sequenceLength])
index +=incrementSize
else:
_slice = []
for token in tokenedCommentList:
_slice.append(token)
for _ in range(len(_slice),sequenceLength):
_slice.append(filler)
sequence.append(_slice)
return np.array(sequence)
class batch_generator():
def __init__(self,tFeatures,tLabels,k_tk,Length,iSize):
self.features = tFeatures
self.labels = tLabels
self.tk = k_tk
self.length = Length
self.iSize = iSize
self.index = 0
self.internalIter = 0
self.storedSequences = []
self.sequenceIndex = 0
self.currentLabel = []
def generate(self):
result = batchSequencer(self.features[self.index],self.length,self.iSize)
y_index = self.index
self.index +=1
if self.index > len(self.features):
self.index = 0
return result, self.labels[y_index]
def batchGenerate(self):
x = self.tk.texts_to_matrix(self.features[self.index])
result = batchSequencer(x,self.length,self.iSize)
y_index = self.index
self.index +=1
if self.index > len(self.features):
self.index = 0
self.storedSequences = result
self.currentLabel = self.labels[y_index]
#return np.array(result),np.array(self.labels[y_index])
def miniSequencer(self):
if self.sequenceIndex >= len(self.storedSequences):
self.batchGenerate()
self.internalIter = 0
self.sequenceIndex = 0
result = np.array(self.storedSequences[self.sequenceIndex])
self.sequenceIndex +=1
return result,np.array(self.currentLabel)
def Main():
connection = sqlite3.connect('chatDataset.db')
c = connection.cursor()
trainFeatures = []
trainLabels = []
testFeatures = []
testLabels = []
vocabSize = 10000
sequenceSize = 6
hiddenSize = 500
goodCount = 0
badCount = 0
num_epochs = 10
for row in c.execute('SELECT * FROM posts'):
if row[3] < 0:
trainFeatures.append(row[1])
trainLabels.append([1,0])
badCount +=1
else:
if goodCount <= badCount:
trainFeatures.append(row[1])
trainLabels.append([0,1])
goodCount +=1
try:
tk = pickle.load( open( "tokenizer2.pkl", "rb" ) )
trainFeatures = tk.texts_to_sequences(trainFeatures)
print("tokenizer loaded")
except:
print("no tokenizer found, creating new one")
tk = keras.preprocessing.text.Tokenizer(num_words=vocabSize, filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~ ', lower=True, split=' ', char_level=False, oov_token=None)
tk.fit_on_texts(trainFeatures)
trainFeatures = tk.texts_to_sequences(trainFeatures)
pickle.dump(tk,open( "tokenizer2.pkl", "wb" ))
try:
model = keras.models.load_model("LSTM_Model.mdl")
print("loaded model successfully!")
except:
print("No model information found, creating new model!")
trainFeatures,testFeatures,trainLabels,testLabels = tts(trainFeatures,trainLabels,test_size = 0.01)
checkpointer = keras.callbacks.ModelCheckpoint(filepath='/model-{epoch:02d}.hdf5', verbose=1)
model = keras.Sequential()
model.add(keras.layers.Embedding(vocabSize, hiddenSize, input_length = sequenceSize))
model.add(keras.layers.LSTM(hiddenSize, return_sequences=True))
model.add(keras.layers.LSTM(hiddenSize, return_sequences=True))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(2, activation='relu'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=["accuracy"])
trainGenerator = batch_generator(trainFeatures,trainLabels, tk,sequenceSize,1)
validGenerator = batch_generator(testFeatures,testLabels, tk,sequenceSize,1)
model.fit_generator(trainGenerator.generate(), len(trainFeatures), num_epochs,
validation_data=validGenerator.generate(),
validation_steps=len(testFeatures),callbacks=[checkpointer])
model.save("LSTM_Model.mdl")
print("done training model")
if __name__ == "__main__":
Main()
完整的回溯錯誤:
Traceback (most recent call last):
File "<ipython-input-2-dfff27295b93>", line 1, in <module>
runfile('D:/coding projects/py practice/machine learning/autoReporter/sequencer.py', wdir='D:/coding projects/py practice/machine learning/autoReporter')
File "C:\Users\oxrock\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 701, in runfile
execfile(filename, namespace)
File "C:\Users\oxrock\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 101, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "D:/coding projects/py practice/machine learning/autoReporter/sequencer.py", line 147, in <module>
Main()
File "D:/coding projects/py practice/machine learning/autoReporter/sequencer.py", line 137, in Main
validation_steps=len(testFeatures),callbacks=[checkpointer])
File "C:\Users\oxrock\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py", line 1779, in fit_generator
initial_epoch=initial_epoch)
File "C:\Users\oxrock\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training_generator.py", line 136, in fit_generator
val_x, val_y, val_sample_weight)
File "C:\Users\oxrock\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py", line 917, in _standardize_user_data
exception_prefix='target')
File "C:\Users\oxrock\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training_utils.py", line 191, in standardize_input_data
' but got array with shape ' + str(data_shape))
ValueError: Error when checking target: expected dense_1 to have shape (2,) but got array with shape (1,)
該模型的目的是將Reddit評論分為負面/正面帖子。 從中提取訓練數據的sql數據庫包含我從reddit rip中提取並分類的帖子。 我必須通過批處理生成器類將數據分成小批,因為不可能將所有數據都保存在內存中。
我很難用“ ValueError:檢查目標時出錯:期望deny_1具有形狀(2,)但形狀為(1,)的數組”來訓練該模型,得到。
我已經堅持了一段時間,現在已經到了隨機改變事物的地步,希望能創造奇跡。 一臂之力將不勝感激。 如果需要任何其他信息,我將很樂意發布。
當前,您的代碼有些錯誤。 首先,數據生成器不僅應返回一個值,還應返回數據集的所有值。 通常,您可以通過循環執行此操作,並在循環內部使用yield
來返回單個數據。
其次,不傳遞python
列表而是將np.array
傳遞給keras
可能會有所幫助。 請嘗試一下,如果在訓練模型之前投射標簽會發生什么情況。 此外,請確保您的數據具有正確的形狀,即目標標簽應具有形狀(batch size, 2)
,輸入數據應具有類似的形狀(batch size, sequenceSize, vocabSize)
。 請打印訓練/驗證數據(輸入和目標數據)的形狀,並確保這是正確的。
最后,如果您正在執行一些分類任務,則網絡的最后激活功能應該是softmax。 因此,請更換
model.add(keras.layers.Dense(2, activation='relu'))
同
model.add(keras.layers.Dense(2, activation='softmax'))
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.