How I can continue training from checkpoint keras model?

Question

I train the first checkpoint model on 2 epoch with the following code :

def check_units(y_true, y_pred):
if y_pred.shape[1] != 1:
    y_pred = y_pred[:,1:2]
    y_true = y_true[:,1:2]
return y_true, y_pred

def precision(y_true, y_pred):
y_true, y_pred = check_units(y_true, y_pred)
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
return precision

def recall(y_true, y_pred):
y_true, y_pred = check_units(y_true, y_pred)
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
return recall

def fmeasure(y_true, y_pred):
def recall(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

y_true, y_pred = check_units(y_true, y_pred)
precision = precision(y_true, y_pred)
recall = recall(y_true, y_pred)
return 2 * ((precision * recall) / (precision + recall + K.epsilon()))

classifier = Sequential()

classifier.add(Conv2D(6, (3, 3), input_shape = (30, 30, 3), data_format="channels_last", activation = 'relu'))
classifier.add(MaxPooling2D(pool_size = (2, 2)))

classifier.add(Conv2D(6, (3, 3), activation = 'relu'))
classifier.add(MaxPooling2D(pool_size = (2, 2)))

classifier.add(Flatten())

classifier.add(Dense(units = 128, activation = 'relu'))
classifier.add(Dense(units = 64, activation = 'relu'))
classifier.add(Dense(units = 1, activation = 'sigmoid'))

opt = Adam(learning_rate = 0.001, beta_1 = 0.9, beta_2 = 0.999, epsilon = 1e-08, decay = 0.0)
classifier.compile(optimizer = opt, loss = 'binary_crossentropy', metrics = ['accuracy', precision, recall, fmeasure])

from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale = 1./255,
                               horizontal_flip = True,
                               vertical_flip = True,
                               rotation_range = 180)

validation_datagen = ImageDataGenerator(rescale = 1./255)

training_set = train_datagen.flow_from_directory('/home/dataset/training_set',
                                             target_size = (30, 30),
                                             batch_size = 32,
                                             class_mode = 'binary')

validation_set = validation_datagen.flow_from_directory('/home/dataset/validation_set',
                                                    target_size = (30, 30),
                                                    batch_size = 32,
                                                    class_mode = 'binary')

history = classifier.fit_generator(training_set,
                               steps_per_epoch = 208170,
                               epochs = 2,
                               validation_data = validation_set,
                               validation_steps = 89140)

classifier.save('/content/gdrive/My Drive/Checkpoints/Checkpoint_1/Model.h5')

To continue training (the seconde checkpoint model) from the first checkpoint model I use this code :

def check_units(y_true, y_pred):
if y_pred.shape[1] != 1:
    y_pred = y_pred[:,1:2]
    y_true = y_true[:,1:2]
return y_true, y_pred

def precision(y_true, y_pred):
y_true, y_pred = check_units(y_true, y_pred)
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
return precision

def recall(y_true, y_pred):
y_true, y_pred = check_units(y_true, y_pred)
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
return recall

def fmeasure(y_true, y_pred):
def recall(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

y_true, y_pred = check_units(y_true, y_pred)
precision = precision(y_true, y_pred)
recall = recall(y_true, y_pred)
return 2 * ((precision * recall) / (precision + recall + K.epsilon()))

train_datagen = ImageDataGenerator(rescale = 1./255,
                               horizontal_flip = True,
                               vertical_flip = True,
                               rotation_range = 180)

validation_datagen = ImageDataGenerator(rescale = 1./255)

training_set = train_datagen.flow_from_directory('/home/dataset/training_set',
                                             target_size = (30, 30),
                                             batch_size = 32,
                                             class_mode = 'binary')

validation_set = validation_datagen.flow_from_directory('/home/dataset/validation_set',
                                                    target_size = (30, 30),
                                                    batch_size = 32,
                                                    class_mode = 'binary')

custom_metrics = {
'precision' : precision,
'recall' : recall,
'fmeasure' : fmeasure    
}

classifier = load_model('/content/gdrive/My Drive/Checkpoints/Checkpoint_1/Model.h5', custom_objects = custom_metrics)

history = classifier.fit(training_set,
                     steps_per_epoch = 208170,
                     epochs = 2,
                     validation_data = validation_set,
                     validation_steps = 89140)

classifier.save('/content/gdrive/My Drive/Checkpoints/Checkpoint_2/Model.h5')

But I got this error why ?

WARNING:tensorflow:Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches (in this case, 416340 batches). You may need to use the repeat() function when building your dataset.
WARNING:tensorflow:Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches (in this case, 89140 batches). You may need to use the repeat() function when building your dataset.

Answer 1

I noticed that your steps per epoch was too large a value, try using an expression instead of a numerical input that reduces the dimension.

steps_per_epoch = len(input_train)//BATCH_SIZE

Might as well try this, considering the dataset and input size the above expression should help you start training.

Answer 2

From what I found in your code above, you create your both your training and testing datasets using the code below.

training_set = train_datagen.flow_from_directory('/home/dataset/training_set',
                                             target_size = (30, 30),
                                             batch_size = 32,
                                             class_mode = 'binary')

validation_set = validation_datagen.flow_from_directory('/home/dataset/validation_set',
                                                    target_size = (30, 30),
                                                    batch_size = 32,
                                                    class_mode = 'binary')

An easy fix if you are using Tensorflow 1.X is to something similar to what Manik was talking about. Adjust your fit function to take the following formula, int(steps_per_epoch/batch_size) .

history = classifier.fit(training_set,
                     steps_per_epoch = int(208170/batch_size),
                     epochs = 2,
                     validation_data = validation_set,
                     validation_steps = int(89140/batch_size))

If you are using Tensorflow 2.X+, then the following function would better suit what you are looking for. The answer of which I found at this Github issue: https://github.com/tensorflow/tensorflow/issues/25254

#Get your data
training_set...
validation_set...

#Declare the types and shape of your data
types = (tf.float32, tf.int32)
shapes = ((512, 512, 3), (2,))
ds_train = tf.data.Dataset.from_generator(lambda: training_set, types, shapes).shuffle(steps_per_epoch).batch(batch_size)
ds_test = tf.data.Dataset.from_generator(lambda: validation_set, types, shapes).shuffle(steps_per_epoch).batch(batch_size)

# usage in model
model.fit(training_set, validation_data=validation_set, epochs=10, verbose=True, callbacks=[visualize, tensorboard])

How I can continue training from checkpoint keras model?

Question

2 answers

solution1
0 2020-08-25 08:09:52

solution2
0 2020-08-26 18:48:52

How I can continue training from checkpoint keras model?

Question

2 answers

solution1 0 2020-08-25 08:09:52

solution2 0 2020-08-26 18:48:52

solution1
0 2020-08-25 08:09:52

solution2
0 2020-08-26 18:48:52