CNN for image classification performing poorly

Question

I am building a CNN for image processing of knee Xrays, but after the 7th-8th Epoch on every run the Validation and Training loss/accuracies dont line up and after a while the validation loss starts increasing. I tried increasing my data set through data augmentation. Now im wondering if could be the quality of the images, my actual architecture or the way im saving my data. For my data pre-processing I save each image as a numpy array and then write it to a pickle file. I was also not too sure how to determine the number of layers and hyperparameters but I did get guidance from other cnns. Im not sure what else to try. I will be grateful for suggestions on how to improve this model.

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.pyplot import imshow
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import numpy as np
import random as rd
import boto3
import tempfile
import os
import io
import pickle
import tensorflow as tf
import keras
from keras.utils import to_categorical
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers import Input, InputLayer
from keras.models import Model
from keras.utils import to_categorical
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.pooling import GlobalMaxPooling2D
from keras.models import model_from_json
from keras.layers import MaxPool2D, InputLayer, BatchNormalization

s3_client = boto3.client('s3')
my_array2 = []
labels = []
labels2 = []

x_train = []
x_test = [] 
y_train = []
y_test = []

def load_data():
    # download without using disk
    global my_array2
    my_array_data2 = io.BytesIO()
    s3_client.download_fileobj('msckneeoadata', 'KneeKL224AllReduced.pkl', my_array_data2)
    my_array_data2.seek(0)
    my_array2 = pickle.load(my_array_data2)

def load_labels():
    for x in range(3000):
        labels.append(0)

    for x in range(3000):
        labels.append(1)

    for x in range(3000):
        labels.append(2)

    for x in range(3000):
        labels.append(3)

    for x in range(3000):
        labels.append(4)

def ShuffleData():
    global labels
    global my_array2
    my_array2, labels = shuffle(my_array2, labels, random_state=0)

def SplitData():
    # split into 80% for train and 20% for test
    global x_train 
    global x_test  
    global y_train
    global y_test 
    global my_array2
    x_train, x_test, y_train, y_test = train_test_split(my_array2, labels, test_size=0.20, random_state=0)

    x_train = np.array(x_train)
    y_train = np.array(y_train,dtype='uint8')
    x_test = np.array(x_test)
    y_test = np.array(y_test,dtype='uint8')
    
    #one-hot encode target column
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)
    
    #train the model
    x_train = x_train.reshape(12000,224,224,1)
    x_test = x_test.reshape(3000,224,224,1)

    x_train = x_train.astype('float64')
    x_test = x_test.astype('float64')

    x_train/=255
    x_test/=255
def create_model():
    global x_train
    global x_test
    global y_train
    global y_test

    #create model
    model = Sequential()
    #add model layers


model.add(Conv2D(input_shape=(224,224,1),filters=64,kernel_size=(11,11),padding="same", `activation="relu",name="Conv1_1"))`           
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2), name="Pool1_1"))

model.add(Conv2D(filters=128,kernel_size=(7,7),padding="same", activation="relu",name="Conv2_1"))
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2), name="Pool2_1"))

model.add(Conv2D(filters=256,kernel_size=(5,5),padding="same", activation="relu",name="Conv3_1"))
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2), name="Pool3_1"))

model.add(Conv2D(filters=512,kernel_size=(3,3),padding="same", activation="relu",name="Conv4_1"))
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2), name="Pool4_1"))


model.add(Flatten())
model.add(Dense(units=1024,activation="relu"))
model.add(Dropout(0.1))
model.add(Dense(units=5, activation="softmax",name="fc6"))


#compile model using accuracy to measure model performance
from keras.optimizers import SGD
opt = SGD(lr=0.1)

model.compile(loss = "categorical_crossentropy", optimizer = 'adam',metrics=['accuracy'])
model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=15,batch_size=150)

# Evaluate the model on the test data using `evaluate`
print('\n# Evaluate on test data')
results = model.evaluate(x_test, y_test, batch_size=100)
print('test loss, test acc:', results)

def main():
    load_data()
    load_labels()
    ShuffleData()
    SplitData()
    create_model()

Here are the results from this execution:

Answer 1

I would start by adjusting your kernel size, as that sticks out the most. The idea is to create some simple features at the top of the network and allow them to build in complexity through the number of filters as the data trickles down the network.

model.add(Conv2D(input_shape=(224,224,1),filters=32,kernel_size=2,padding="same", `activation="relu",name="Conv1_1"))`           
model.add(MaxPooling2D(pool_size=2, name="Pool1_1"))

model.add(Conv2D(filters=64,kernel_size=2,padding="same", activation="relu",name="Conv2_1"))
model.add(MaxPooling2D(pool_size=2, name="Pool2_1"))

model.add(Conv2D(filters=128,kernel_size=2,padding="same", activation="relu",name="Conv3_1"))
model.add(MaxPooling2D(pool_size=2, name="Pool3_1"))

You could even remove one of your convolution layers and just try 3.

Other ideas:

Try adding BatchNormalization between convolutions
Try adding dropout between convolutions (I would use SpatialDropout2D)

Also I noticed that you aren't using the optimizer for which you set the custom LR for, you could try to lower the LR a little since you are currently using default settings for Adam.

Final note, you could also add a callback for early stopping so it will save the weights from the best epoch.

Answer 2

This tutorial from tensorflow provides a good example of evaluating different architecture:https://www.tensorflow.org/tutorials/keras/overfit_and_underfit

Another very important point is learning rate. In your code you use SGD with learning rate=0.1, but finally you didn't use it. I would suggest a lower learning rate like 1e-3. Also you can try to train with low no of epoch, if the model is still improving you can train further.

If you have difficulty to find a good learning rate this is a great help: https://gist.github.com/WittmannF/c55ed82d27248d18799e2be324a79473

CNN for image classification performing poorly

Question

2 answers

solution1
1 2020-08-24 20:38:03

solution2
1 2020-08-25 11:01:28

CNN for image classification performing poorly

Question

2 answers

solution1 1 2020-08-24 20:38:03

solution2 1 2020-08-25 11:01:28

solution1
1 2020-08-24 20:38:03

solution2
1 2020-08-25 11:01:28