简体   繁体   中英

CNN Regression model predicting values far outside the expected range

I am training a multi-output CNN regression model which predicts x and y coordinate values given a single image input. Both the image data and associated x and y target labels have been normalized to be within the range of 0 and 1. However when using the model to predict the x and y values of a given image, the model is predicting values far outside this range, for example:

[114, -17] as opposed to [0.83, 0.16]

I am unable to figure out the reason for this, and would appreciate if someone would be able to give some insight. Find the code used below:

IMAGESTRAIN_PATH = os.path.sep.join([BASE_PATH, "RegressionDatasetGrey"])
ANNOTSTRAIN_PATH = os.path.sep.join([BASE_PATH, "RegressionDatasetGrey.csv"])
IMAGESTEST_PATH = os.path.sep.join([BASE_PATH, "RegressionDSGreyTesting"])
ANNOTSTEST_PATH = os.path.sep.join([BASE_PATH, "RegressionDSGreyTesting.csv"])

print("[INFO] loading training dataset...")
rows = open(ANNOTSTRAIN_PATH).read().strip().split("\n")

data = []
targets = []
filenames = []

for row in rows:

   row = row.split(",")
   (filename, coordX, coordY) = row

   coordX = float(int(coordX)/1920)
   coordY = float(int(coordY)/1080)

   imagePath = os.path.sep.join([IMAGESTRAIN_PATH, filename])
   img = load_img(imagePath, color_mode="grayscale", target_size=(64, 80))
   # img = img.size(1, 64, 80, 1)
   img = img_to_array(img)

   data.append(img)
   targets.append((coordX, coordY))
   filenames.append(filename)

trainImages = np.array(data, dtype="float32") / 255.0
trainCoords = np.array(targets, dtype="float32")

print("[INFO] loading testing dataset...")
rows1 = open(ANNOTSTEST_PATH).read().strip().split("\n")

data1 = []
targets1 = []
filenames1 = []

for row1 in rows1:

row1 = row1.split(",")
(filename1, coordX1, coordY1) = row1

coordX1 = float(int(coordX1)/1920)
coordY1 = float(int(coordY1)/1080)

imagePath1 = os.path.sep.join([IMAGESTEST_PATH, filename1])
image1 = load_img(imagePath1, color_mode="grayscale", target_size=(64, 80))
image1 = img_to_array(image1)

data1.append(image1)
targets1.append((coordX1, coordY1))
filenames1.append(filename1)

testImages = np.array(data1, dtype="float32") / 255.0
testCoords = np.array(targets1, dtype="float32")

inputs = Input(shape=(64, 80, 1))
x = inputs

x = Conv2D(16, (3, 3), padding='same')(x)
x = Activation('relu')(x)
# x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Conv2D(32, (3, 3), padding='same')(x)
# x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Conv2D(64, (3, 3), padding='same')(x)
# x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

x = Flatten()(x)
x = Dense(128)(x)
x = Activation('relu')(x)
x = Dropout(0.3)(x)
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dropout(0.3)(x)
x = Dense(2)(x)
x = Activation('linear')(x)

model = Model(inputs, x)
opt = Adam(lr=0.001)
model.compile(loss="mse", optimizer=opt)

# print(model.summary())

print("[INFO] training Regressor...")
History = model.fit(
    trainImages, trainCoords,
    validation_data=(testImages, testCoords),
    batch_size=32,
    epochs=50,
    verbose=1, shuffle=True)

print("[INFO] Saving Model...")
model.save('ModelV12.h5')

N = 50
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, N), History.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), History.history["val_loss"], label="val_loss")
plt.title("Loss on Training Set")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.legend(loc="lower left")
plt.savefig('RegressionPlot.png')

I am training a multi-output CNN regression model which predicts x and y coordinate values given a single image input. Both the image data and associated x and y target labels have been normalized to be within the range of 0 and 1. However when using the model to predict the x and y values of a given image, the model is predicting values far outside this range, for example:

[114, -17] as opposed to [0.83, 0.16]

I am unable to figure out the reason for this, and would appreciate if someone would be able to give some insight. Find the code used below:

IMAGESTRAIN_PATH = os.path.sep.join([BASE_PATH, "RegressionDatasetGrey"])
ANNOTSTRAIN_PATH = os.path.sep.join([BASE_PATH, "RegressionDatasetGrey.csv"])
IMAGESTEST_PATH = os.path.sep.join([BASE_PATH, "RegressionDSGreyTesting"])
ANNOTSTEST_PATH = os.path.sep.join([BASE_PATH, "RegressionDSGreyTesting.csv"])

print("[INFO] loading training dataset...")
rows = open(ANNOTSTRAIN_PATH).read().strip().split("\n")

data = []
targets = []
filenames = []

for row in rows:

   row = row.split(",")
   (filename, coordX, coordY) = row

   coordX = float(int(coordX)/1920)
   coordY = float(int(coordY)/1080)

   imagePath = os.path.sep.join([IMAGESTRAIN_PATH, filename])
   img = load_img(imagePath, color_mode="grayscale", target_size=(64, 80))
   # img = img.size(1, 64, 80, 1)
   img = img_to_array(img)

   data.append(img)
   targets.append((coordX, coordY))
   filenames.append(filename)

trainImages = np.array(data, dtype="float32") / 255.0
trainCoords = np.array(targets, dtype="float32")

print("[INFO] loading testing dataset...")
rows1 = open(ANNOTSTEST_PATH).read().strip().split("\n")

data1 = []
targets1 = []
filenames1 = []

for row1 in rows1:

row1 = row1.split(",")
(filename1, coordX1, coordY1) = row1

coordX1 = float(int(coordX1)/1920)
coordY1 = float(int(coordY1)/1080)

imagePath1 = os.path.sep.join([IMAGESTEST_PATH, filename1])
image1 = load_img(imagePath1, color_mode="grayscale", target_size=(64, 80))
image1 = img_to_array(image1)

data1.append(image1)
targets1.append((coordX1, coordY1))
filenames1.append(filename1)

testImages = np.array(data1, dtype="float32") / 255.0
testCoords = np.array(targets1, dtype="float32")

inputs = Input(shape=(64, 80, 1))
x = inputs

x = Conv2D(16, (3, 3), padding='same')(x)
x = Activation('relu')(x)
# x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Conv2D(32, (3, 3), padding='same')(x)
# x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Conv2D(64, (3, 3), padding='same')(x)
# x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

x = Flatten()(x)
x = Dense(128)(x)
x = Activation('relu')(x)
x = Dropout(0.3)(x)
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dropout(0.3)(x)
x = Dense(2)(x)
x = Activation('linear')(x)

model = Model(inputs, x)
opt = Adam(lr=0.001)
model.compile(loss="mse", optimizer=opt)

# print(model.summary())

print("[INFO] training Regressor...")
History = model.fit(
    trainImages, trainCoords,
    validation_data=(testImages, testCoords),
    batch_size=32,
    epochs=50,
    verbose=1, shuffle=True)

print("[INFO] Saving Model...")
model.save('ModelV12.h5')

N = 50
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, N), History.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), History.history["val_loss"], label="val_loss")
plt.title("Loss on Training Set")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.legend(loc="lower left")
plt.savefig('RegressionPlot.png')

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM