简体   繁体   中英

keras use weight to load model, issue ValueError: Dimension 1 in both shapes must be equal, but are 124 and 121

I've been training this model with keras for a text generator

# Small LSTM Network to Generate Text for Alice in Wonderland
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

# load ascii text and covert to lowercase
filename = "lyrics_small.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()

# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

# summarize the loaded data
n_chars = len(raw_text)
n_vocab = len(chars)
print ("Total Characters: ", n_chars)
print ("Total Vocab: ", n_vocab)

# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []

for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])

n_patterns = len(dataX)
print ("Total Patterns: ", n_patterns)

# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))

# normalize
X = X / float(n_vocab)

# one hot encode the output variable
y = np_utils.to_categorical(dataY)

model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

# fit the model
model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

once it finished running I realized it actually didn't save the model.

Not a problem because you can load the weights into the model and use it and the blog on this had instructions on this

# load ascii text and covert to lowercase
filename = "lyrics_small.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()

# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

# summarize the loaded data
n_chars = len(raw_text)
n_vocab = len(chars)
print ("Total Characters: ", n_chars)
print ("Total Vocab: ", n_vocab)

# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []

for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])

n_patterns = len(dataX)
print ("Total Patterns: ", n_patterns)

# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))

# normalize
X = X / float(n_vocab)

# one hot encode the output variable
y = np_utils.to_categorical(dataY)

model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))

# load the network weights
filename = "weights-improvement-17-1.7150.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print ("Seed:")
print ("\"", ''.join([int_to_char[value] for value in pattern]), "\"")

# generate characters
for i in range(1000):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]

print ("\nDone.")

but when I ran the model it throws an incompatibility error

Using TensorFlow backend.
Total Characters:  1571440
Total Vocab:  124
Total Patterns:  1571340
Traceback (most recent call last):
File "/home/sam/code/envs/data/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1628, in _create_c_op
    c_op = c_api.TF_FinishOperation(op_desc)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Dimension 1 in both shapes must be equal, but are 124 and 121. Shapes are [256,124] and [256,121]. for 'Assign_6' (op: 'Assign') with input shapes: [256,124], [256,121].

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "new_model_trained.py", line 58, in <module>
    model.load_weights(filename)
File "/home/sam/code/envs/data/lib/python3.6/site-packages/keras/engine/network.py", line 1166, in load_weights
    f, self.layers, reshape=reshape)
File "/home/sam/code/envs/data/lib/python3.6/site-packages/keras/engine/saving.py", line 1058, in load_weights_from_hdf5_group
    K.batch_set_value(weight_value_tuples)
File "/home/sam/code/envs/data/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2465, in batch_set_value
    assign_op = x.assign(assign_placeholder)
File "/home/sam/code/envs/data/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 1718, in assign
    name=name)
File "/home/sam/code/envs/data/lib/python3.6/site-packages/tensorflow/python/ops/state_ops.py", line 221, in assign
    validate_shape=validate_shape)
File "/home/sam/code/envs/data/lib/python3.6/site-packages/tensorflow/python/ops/gen_state_ops.py", line 61, in assign
    use_locking=use_locking, name=name)
File "/home/sam/code/envs/data/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
File "/home/sam/code/envs/data/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
File "/home/sam/code/envs/data/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3274, in create_op
    op_def=op_def)
File "/home/sam/code/envs/data/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1792, in __init__
    control_input_ops)
File "/home/sam/code/envs/data/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1631, in _create_c_op
    raise ValueError(str(e))
ValueError: Dimension 1 in both shapes must be equal, but are 124 and 121. Shapes are [256,124] and [256,121]. for 'Assign_6' (op: 'Assign') with input shapes: [256,124], [256,121].

because the model get's it input_shape from the shape of X this leads me to the assumption the model doesn't split the content in a consistent way.

Is there a way to fix this?

probably datasets youre using for codes is diffrent you can try change this line:

y = np_utils.to_categorical(dataY)

to this:

np_utils.to_categorical(y_train, num_classes=121)

you can load model like that but dont wait for accuracy when datasets are diffrent

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM