I'm trying to fit my Keras model with quite large amount of data.
To do this, I'm using custom data generators and model.fit_generator
function.
However, I can't seem to understand if I'm doing this correctly.
Here's what I have:
from os.path import join
import cv2
import numpy as np
from keras.models import Sequential
from keras.layers.core import Flatten, Dense, Dropout
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau
# The function returns a list of image names from folder
from data.preprocessing import get_list_of_images
class VGG19(object):
def __init__(self, weights_path=None, train_folder='data/train', validation_folder='data/val'):
self.weights_path = weights_path
self.model = self._init_model()
if weights_path:
self.model.load_weights(weights_path)
else:
self.datagen = self._init_datagen()
self.train_folder = train_folder
self.validation_folder = validation_folder
self.model.compile(
loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
def fit(self, batch_size=32, nb_epoch=10):
self.model.fit_generator(
self._generate_data_from_folder(self.train_folder), 32,
nb_epoch,
verbose=1,
callbacks=[
TensorBoard(log_dir='./logs', write_images=True),
ModelCheckpoint(filepath='weights.{epoch:02d}-{val_loss:.2f}.hdf5', monitor='val_loss'),
ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=0.001)
],
validation_data=self._generate_data_from_folder(self.validation_folder),
nb_val_samples=32
)
def predict(self, X, batch_size=32, verbose=1):
return self.model.predict(X, batch_size=batch_size, verbose=verbose)
def predict_proba(self, X, batch_size=32, verbose=1):
return self.model.predict_proba(X, batch_size=batch_size, verbose=verbose)
def _init_model(self):
model = Sequential()
# model definition goes here...
return model
def _init_datagen(self):
return ImageDataGenerator(
featurewise_center=True,
samplewise_center=False,
featurewise_std_normalization=True,
samplewise_std_normalization=False,
zca_whitening=False,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True,
vertical_flip=True
)
def _generate_data_from_folder(self, folder_path):
while 1:
images = get_list_of_images(folder_path)
for image_path in images:
x = cv2.imread(join(folder_path, image_path))
y = 0 if image_path.split('.')[0] == 'dog' else 1
yield (x, y)
My dataset consists of images with names like:
cat.[number].jpg
, ie: cat.124.jpg
dog.[number].jpg
, ie: dog.64.jpg
So, basically, I'm trying to train a model to perform a binary cat-dog classification.
Is my _generate_data_from_folder
function correctly implemented for mini-batch optimization?
How can I add the usage of ImageDataGenerator
to my _generate_data_from_folder
function (from the _init_datagen
function)?
Okay, here's the github link to my final version of the project that I got working:
https://github.com/yakovenkodenis/dogs-vs-cats-kaggle
Hope, it helps somebody
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.