简体   繁体   中英

reducing validation loss in CNN Model

import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
import pickle
import numpy as np
from keras.models import model_from_json
from keras.models import load_model
import matplotlib.pyplot as plt

# Opening the files about data
X = pickle.load(open("X.pickle", "rb"))
y = pickle.load(open("y.pickle", "rb"))

# normalizing data (a pixel goes from 0 to 255)
X = X/255.0

# Building the model
model = Sequential()
# 3 convolutional layers
model.add(Conv2D(32, (3, 3), input_shape = X.shape[1:]))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2,2)))


model.add(Conv2D(64, (3, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.9))

# 5 hidden layers
model.add(Flatten())

model.add(Dense(128))
model.add(Activation("relu"))

model.add(Dense(128))
model.add(Activation("relu"))

model.add(Dense(128))
model.add(Activation("relu"))

model.add(Dense(128))
model.add(Activation("relu"))

model.add(Dense(128))
model.add(Activation("relu"))

# The output layer with 7 neurons, for 7 classes
model.add(Dense(13))
model.add(Activation("softmax"))

# Compiling the model using some basic parameters
model.compile(loss="sparse_categorical_crossentropy",
                optimizer="adam",
                metrics=["accuracy"])

# Training the model, with 40 iterations
# validation_split corresponds to the percentage of images used for the validation phase compared to all the images

print("X = " + str(len(X)))
print("y = " + str(len(y)))

history = model.fit(X, y, batch_size=32, epochs=1000, validation_split=0.1)

# Saving the model
model_json = model.to_json()
with open("model.json", "w") as json_file :
    json_file.write(model_json)

model.save_weights("model.h5")

print("Saved model to disk")

model.save('CNN.model')

# Printing a graph showing the accuracy changes during the training phase
print(history.history.keys())

plt.show()

plt.plot(history.history['accuracy'])

plt.plot(history.history['loss'])

plt.title('model accuracy')

plt.ylabel('accuracy')

plt.xlabel('epoch')

plt.legend(['train', 'validation'], loc='upper left')

plt.show()

The problem is that, I am getting lower training loss but very high validation accuracy. And accuracy of validation is also extremely low. How can I solve this issue? I have tried to increase the drop value up-to 0.9 but still the loss is much higher. I also tried using linear function for activation, but no use.

Please help.

模型丢失看起来像这样

As is already mentioned, it is pretty hard to give a good advice without seeing the data.

What I would try is the following: - remove the Dropout after the maxpooling layer - remove some dense layer - add dropout between dense

If it´s then still overfitting, add dropout between dense layers

Edit: After I have seen the loss and accuracy plot I would suggest the following:

  1. the highest priority is, to get more data.
  2. then use data augmentation to even increase your dataset
  3. further reduce the complexity of your neural network if additional data doesn't help (but I think that training will slow down with more data and validation loss will also decrease for a longer period of epochs)

Data Augmentation is the best technique to reduce overfitting. Try data generators for training and validation sets to reduce the loss and increase accuracy.

To learn more about Augmentation, and the available transforms, check out https://github.com/keras-team/keras-preprocessing

# Add our data-augmentation parameters to ImageDataGenerator
train_datagen = ImageDataGenerator(rescale = 1./255.,
                               rotation_range = 40,
                               width_shift_range = 0.2,
                               height_shift_range = 0.2,
                               shear_range = 0.2,
                               zoom_range = 0.2,
                               horizontal_flip = True)

# Note that the validation data should not be augmented!
test_datagen = ImageDataGenerator(rescale = 1./255.)

# Flow training images in batches of 20 using train_datagen generator
train_generator = train_datagen.flow_from_directory(train_dir,
                                                batch_size = 20,
                                                class_mode = 'binary', 
                                                target_size = (150, 150))     

# Flow validation images in batches of 20 using test_datagen generator
validation_generator =  test_datagen.flow_from_directory(validation_dir,
                                                      batch_size  = 20,
                                                      class_mode  = 'binary', 
                                                      target_size = (150, 150)) 

# Now fit the training, validation generators to the CNN model
history = model.fit_generator(train_generator,
        validation_data = validation_generator,
        steps_per_epoch = 100,
        epochs = 3,
        validation_steps = 50,
        verbose = 2,callbacks=[callbacks])

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM