I am working with tf=2.0.0, and tensorflow hub. Currently I am using the Tensorflow data API to load my data which is stored in a tfrecords file.
I am successfully loading the dataset and compiling the model but when I try to fit the data into the model I get the error:
Error when checking input: expected inputs_input to have 1 dimensions, but got array with shape (64, 1)
This is how I am loading my data:
def _dataset_parser(value):
"""Parse a record from value."""
featdef={
'id': tf.io.FixedLenFeature([1], tf.int64),
'question': tf.io.FixedLenFeature([1], tf.string),
'label': tf.io.FixedLenFeature([1], tf.int64)
}
example = tf.io.parse_single_example(value, featdef)
label = tf.cast(example['label'], tf.int32)
question = tf.cast(example['question'], tf.string)
return example['question'], example['label']
def _input(epochs, batch_size, filenames):
dataset = tf.data.TFRecordDataset(filenames)
dataset = dataset.repeat(epochs)
dataset = dataset.prefetch(1)
# Parse records.
dataset = dataset.map(_dataset_parser)
dataset = dataset.shuffle(100)
# Batch it up.
dataset = dataset.batch(batch_size)
iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
question_batch, label_batch = iterator.get_next()
label_batch = tf.one_hot(label_batch, NUM_CLASSES)
return question_batch, label_batch
train_ds = _input(20, 64, ['train_xs.tfrecords'])
This is my model:
model = tf.keras.Sequential([
hub.KerasLayer(HUB_URL, dtype=tf.string, input_shape=[], output_shape=[WIDTH], name='inputs'),
tf.keras.layers.Dense(256, 'relu', name ='layer_1'),
tf.keras.layers.Dense(128, 'relu', name = 'layer_2'),
tf.keras.layers.Dense(NUM_CLASSES, activation='softmax', name='output')
])
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])
I already tried setting the input shape of the entry layer as (None, 1) but it keeps failing, not sure if the problem is due to TensorFlow hub however I tried running this example from the hands-on-ml book:
model = tf.keras.Sequential([
hub.KerasLayer("https://tfhub.dev/google/tf2-preview/nnlm-en-dim50/1",
dtype=tf.string, input_shape=[], output_shape=[50]),
tf.keras.layers.Dense(128, activation="relu"),
tf.keras.layers.Dense(1, activation="sigmoid")
])
model.compile(loss="binary_crossentropy", optimizer="adam",
metrics=["accuracy"])
datasets, info = tfds.load("imdb_reviews", as_supervised=True, with_info=True)
train_size = info.splits["train"].num_examples
batch_size = 32
train_set = datasets["train"].repeat().batch(batch_size).prefetch(1)
history = model.fit(train_set, steps_per_epoch=5, epochs=5)#steps_per_epoch=train_size // batch_size, epochs=5)
and it works fine, however one difference I found is that if I print the train_set on the example from the book I get:
<PrefetchDataset shapes: ((None,), (None,)), types: (tf.string, tf.int64)>
Whereas with my code I get this when I print the dataset that I am feeding to the model:
(<tf.Tensor: id=11409, shape=(64, 1), dtype=string, numpy= array([[b'Restroom score out of 9'],
[b'Name'],
[b'Lastname'],
[b'Type of house'],
[b'Inspection date'],
[b'Pet'],
[b'Phone'], dtype=object)>, <tf.Tensor: id=11414, shape=(64, 1, 80), dtype=float32, numpy= array([[[0., 0., 0., ...,
0., 0., 0.]],
[[0., 0., 0., ..., 0., 0., 0.]],
[[0., 0., 0., ..., 0., 0., 0.]],
...,
[[0., 0., 0., ..., 0., 0., 0.]],
[[0., 0., 0., ..., 0., 0., 0.]],
[[0., 0., 0., ..., 0., 0., 0.]]], dtype=float32)>)
Does someone know why the difference in the shape of the data?
If anyone is interested, this is the final code that worked for me:
model = tf.keras.Sequential([
hub.KerasLayer(HUB_URL, dtype=tf.string, input_shape=[], output_shape=[WIDTH], name='inputs'),
name=INPUT_TENSOR_NAME),
tf.keras.layers.Dense(256, 'relu', name ='layer_1'),
tf.keras.layers.Dense(128, 'relu', name = 'layer_2'),
tf.keras.layers.Dense(NUM_CLASSES, activation='softmax', name='output')
])
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])
def _dataset_parser(value):
"""Parse a record from value."""
featdef={
'id': tf.io.FixedLenFeature([], tf.int64),
'question': tf.io.FixedLenFeature([], tf.string),
'label': tf.io.FixedLenFeature([], tf.int64)
}
example = tf.io.parse_single_example(value, featdef)
label = tf.cast(example['label'], tf.int64)
question = tf.cast(example['question'], tf.string)
return question, tf.one_hot(label, NUM_CLASSES)
def _input(epochs, batch_size, filenames):
dataset = tf.data.TFRecordDataset(filenames)
dataset = dataset.repeat(epochs)
dataset = dataset.prefetch(1)
# Parse records.
dataset = dataset.map(_dataset_parser)
dataset = dataset.shuffle(100)
# Batch it up.
dataset = dataset.batch(batch_size)
return dataset
train_ds = _input(1, 10, ['train_xs.tfrecords'])
model.fit(train_ds,epochs=1)
Not sure why, but by changing the feature definition to
featdef={
'id': tf.io.FixedLenFeature([], tf.int64),
'question': tf.io.FixedLenFeature([], tf.string),
'label': tf.io.FixedLenFeature([], tf.int64)
}
it worked
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.