I am new to tensorflow and trying to train a custom CNN estimator with inputs being provided from TFRecord
files.
The Load_input()
function is supposed to look into DATA_DIR for TFRecords
file and decode it through a call to read_and_decode
function(which is supposed to do the actual decoding of the records), store the information into an instance of _image_object and return it.
cnn_model
is where I have defined the CNN architecture. And generate_input_fn
is supposed to create the batches and feed it to the estimator.train
while training.
I just have an abstract understanding of the codes, no idea of the internal mechanics which is the primary reason why I am not able to debug.
Here is my code :
import tensorflow as tf
import numpy as np
import os
DATA_DIR = "./TFRecords/train" #path to tfrecords directory
TRAINING_SET_SIZE = 3
BATCH_SIZE = 3
IMAGE_SIZE = 224
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
# image object from protobuf
class _image_object:
def __init__(self):
self.image = tf.Variable([], dtype = tf.string)
self.height = tf.Variable([], dtype = tf.int64)
self.width = tf.Variable([], dtype = tf.int64)
self.filename = tf.Variable([], dtype = tf.string)
self.label = tf.Variable([], dtype = tf.int32)
def read_and_decode(filename_queue):
# this module is responsible for extracting the features
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example, features = {
"image/encoded": tf.FixedLenFeature([], tf.string),
"image/height": tf.FixedLenFeature([], tf.int64),
"image/width": tf.FixedLenFeature([], tf.int64),
"image/filename": tf.FixedLenFeature([], tf.string),
"image/class/label": tf.FixedLenFeature([], tf.int64),})
image_encoded = features["image/encoded"]
image_raw = tf.image.decode_jpeg(image_encoded, channels=3)
image_object = _image_object()
image_object.image = tf.image.resize_image_with_crop_or_pad(image_raw, IMAGE_SIZE, IMAGE_SIZE)#resizes and crops
image_object.height = features["image/height"]
image_object.width = features["image/width"]
image_object.filename = features["image/filename"]
image_object.label = tf.cast(features["image/class/label"], tf.int64)
return image_object
def Load_input():
print 'Generating data from tfrecords...'
filenames = [os.path.join(DATA_DIR, "train-0000%d-of-00002.tfrecord" % i) for i in xrange(0, 1)]
for f in filenames:
if not tf.gfile.Exists(f):
raise ValueError("Failed to find file: " + f)
filename_queue = tf.train.string_input_producer(filenames)
print 'decoding queue contents ::{}'.format(filename_queue)
image_object = read_and_decode(filename_queue)
image = tf.image.per_image_standardization(image_object.image)
# image = image_object.image
# image = tf.image.adjust_gamma(tf.cast(image_object.image, tf.float32), gamma=1, gain=1) # Scale image to (0, 1)
label = image_object.label
filename = image_object.filename
return image,label,filename
def cnn_model(features,labels,mode):
print 'creating layers...'
#Input layer
#inp = tf.reshape(features['x'],[-1,28,28,1])
inp = tf.reshape(features,[-1,224,224,3])
print 'input shape ::{}'.format(inp.shape)
#convolutional layer #1
conv1 = tf.layers.conv2d(inputs=inp,filters=32,kernel_size=[5,5],padding='same',activation=tf.nn.relu)
print 'convolution-1 shape ::{}'.format(conv1.shape)
#pooling Layer
pool1=tf.layers.max_pooling2d(inputs=conv1,pool_size=[2,2],strides=2)
print 'Pool-1 shape ::{}'.format(pool1.shape)
#convolutional layer #2
conv2 = tf.layers.conv2d(inputs=pool1,filters=64,kernel_size=[5,5],padding='same',activation=tf.nn.relu)
print 'convolution-2 shape ::{}'.format(conv2.shape)
#pooling layer
pool2=tf.layers.max_pooling2d(inputs=conv2,pool_size=[2,2],strides=2)
print 'Pool-2 shape ::{}'.format(pool2.shape)
#dense layer
pool2_flat = tf.reshape(pool2,[-1,56*56*64]) #dimension = [BATCH_SIZE,HEIGHT*WIDTH*CHANNELS of the last pooled layers]
dense = tf.layers.dense(inputs=pool2_flat,units=1024,activation=tf.nn.relu) # units = number of neurons per layer
dropout=tf.layers.dropout(inputs=dense,rate=0.4,training = (mode == tf.estimator.ModeKeys.TRAIN))
#Logits Layer
logits = tf.layers.dense(inputs=dropout,units=2) #has shape [batch_size, no_of_labels]
predictions ={'classes':tf.argmax(input=logits,axis=1),'probabilities':tf.nn.softmax(logits,name='softmax_tensor')}
print 'Logits shape ::{}'.format(logits.shape)
print 'Labels shape ::{}'.format(labels.shape)
#Calculate loss for TRAIN and EVAL mode
loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels,logits=logits)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(loss=loss,global_step=tf.train.get_global_step())
print 'Layers created...'
return tf.estimator.EstimatorSpec(mode=mode,loss=loss,train_op=train_op)
def generate_input_fn(image,label,batch_size=BATCH_SIZE):
print("Filling queue with images before starting to train. " "This will take a few minutes.")
num_preprocess_threads = 1
def _input_fn():
image_placeholder=tf.placeholder(tf.float32,shape=[batch_size,224,224,3])
label_placeholder=tf.placeholder(tf.int64,shape=[batch_size,1])
image_batch, label_batch= tf.train.shuffle_batch(
[image_placeholder, label_placeholder],
batch_size = batch_size,
num_threads = num_preprocess_threads,
capacity = 8 * BATCH_SIZE,
min_after_dequeue = 4 * BATCH_SIZE)
return image_batch, label_batch
return _input_fn
def main(unused_argv):
print 'program started...'
image_data, label_data, filename = Load_input()
print 'image_data::{} label_data::{}'.format(type(image_data),type(label_data))
estimator = tf.estimator.Estimator(model_fn=cnn_model,model_dir='./')
print 'Estimator ready...'
tensors_to_log = {'probabilities':'softmax_tensor'}
logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,every_n_iter=1)
print 'Logs ready...'
print 'Starting training...'
estimator.train(input_fn=generate_input_fn(image=image_data, label=label_data),steps=2,hooks=[logging_hook])
if __name__=='__main__':
tf.app.run()
print 'Program ended...'
it gives me the following error :
ValueError: Dimension 0 in both shapes must be equal, but are 9 and 3. Shapes are [9,2] and [3,3]. for 'softmax_cross_entropy_with_logits_sg' (op: 'SoftmaxCross EntropyWithLogits') with input shapes: [9,2], [3,3].
also the layers shapes are as follows :
conv1 output shape :: (9, 224, 224, 32)
pool1 shape :: (9, 112, 112, 32)
conv2 shape ::(9, 112, 112, 64)
pool2 shape :: (9, 56, 56, 64)
Logits shape :: (9, 2)
Labels shape :: (3, 3)
I don't understand why is the batch size
9 even if I try to explicitly set it to 3 in the code.
Note : If anyone has a better/easier solution please post it. The aim is to use tfrecords to train a custom CNN
The error was at generate_input_fn
.
Modify :
image_placeholder=tf.placeholder(tf.float32,shape=[batch_size,224,224,3])
label_placeholder=tf.placeholder(tf.int64,shape=[batch_size,1])
to :
image_placeholder=tf.placeholder(tf.float32,shape=image.shape)
label_placeholder=tf.placeholder(tf.int64,shape=label.shape)
ie they should contain the dimension of a single image instance, because that's what is required by tensors
argument of tf.train.shuffle_batch
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.