Tensorflow batch training of single dimension numpy array without converting it into multiple dimension numpy arrays

Question

I have some confusion over numpy array to tensor....

My code:

import os
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
import numpy as np
import random

n = 2500
y = np.zeros((n), dtype = np.int32)

for i in range(n):
    y[i] = random.randint(0,1)

print "Before Batch Training:"
print "len(y):" , len(y)
print "y: " , y
print "y[9]: " , y[9]

batch_size = 10
num_preprocess_threads = 1
min_queue_examples = 256

y_batch = tf.train.batch([y], batch_size=batch_size, num_threads=num_preprocess_threads, capacity=min_queue_examples + 3 * batch_size, allow_smaller_final_batch=True)

print "After Batch Training:"
print "y_batch:" , y_batch
print "y_batch[9]: " , y_batch[9]

with tf.Session() as sess:

    tf.global_variables_initializer().run()

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)

    y_proccessed = sess.run(y_batch)

    print "After Session Run:"
    print "y_proccessed:" , y_proccessed
    print "y_proccessed[9]: " , y_proccessed[9]
    print "y_proccessed[0][9]: " , y_proccessed[0][9]
    print "y_proccessed[1][9]: " , y_proccessed[1][9]
    print "y_proccessed[2][9]: " , y_proccessed[2][9]
    print "y_proccessed[3][9]: " , y_proccessed[3][9]
    print "y_proccessed[4][9]: " , y_proccessed[4][9]
    print "y_proccessed[5][9]: " , y_proccessed[5][9]

    coord.request_stop()
    coord.join(threads)

sess.close()

Result after execution:

Before Batch Training:
len(y): 2500
y:  [0 0 1 ..., 1 1 1]
y[9]:  1
After Batch Training:
y_batch: Tensor("batch:0", shape=(?, 2500), dtype=int32)
y_batch[9]:  Tensor("strided_slice:0", shape=(2500,), dtype=int32)
After Session Run:
y_proccessed: [[0 0 1 ..., 1 1 1]
 [0 0 1 ..., 1 1 1]
 [0 0 1 ..., 1 1 1]
 ..., 
 [0 0 1 ..., 1 1 1]
 [0 0 1 ..., 1 1 1]
 [0 0 1 ..., 1 1 1]]
y_proccessed[9]:  [0 0 1 ..., 1 1 1]
y_proccessed[0][9]:  1
y_proccessed[1][9]:  1
y_proccessed[2][9]:  1
y_proccessed[3][9]:  1
y_proccessed[4][9]:  1
y_proccessed[5][9]:  1

My confusion goes where y_proccessed[9] suppose to generate result of '1' like y[9], instead it generate [0 0 1 ..., 1 1 1]?

On the other hand, if you look at y_proccessed which produce

    [[0 0 1 ..., 1 1 1]
     [0 0 1 ..., 1 1 1]
     [0 0 1 ..., 1 1 1]
     ..., 
     [0 0 1 ..., 1 1 1]
     [0 0 1 ..., 1 1 1]
     [0 0 1 ..., 1 1 1]]

it generate the same redundant first 10 array, which it suppose to loop over to other 10 subsequence array for another batch?

Thanks

Answer 1

Manage to fix it:

import os
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
import numpy as np
import random

n = 2500
y = np.zeros((n), dtype = np.int32)

for i in range(n):
    y[i] = random.randint(0,1)

print "Before Batch Training:"
print "len(y):" , len(y)
print "y: " , y
print "y[9]: " , y[9]

batch_size = 10
num_preprocess_threads = 1
min_queue_examples = 256

#adding enqueue_many=True into the tf.train.batch
y_batch = tf.train.batch([y], batch_size=batch_size, num_threads=num_preprocess_threads, capacity=min_queue_examples + 3 * batch_size, enqueue_many=True, allow_smaller_final_batch=True)

print "After Batch Training:"
print "y_batch:" , y_batch
print "y_batch[9]: " , y_batch[9]

with tf.Session() as sess:

    tf.global_variables_initializer().run()

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)

    y_proccessed = sess.run(y_batch)

    print "After Session Run:"
    print "y_proccessed:" , y_proccessed
    print "y_proccessed[9]: " , y_proccessed[9]
    print "y_proccessed[0][9]: " , y_proccessed[0][9]
    print "y_proccessed[1][9]: " , y_proccessed[1][9]
    print "y_proccessed[2][9]: " , y_proccessed[2][9]
    print "y_proccessed[3][9]: " , y_proccessed[3][9]
    print "y_proccessed[4][9]: " , y_proccessed[4][9]
    print "y_proccessed[5][9]: " , y_proccessed[5][9]

    coord.request_stop()
    coord.join(threads)

sess.close()

I leave some additional resource for reference purpose:

Passing a numpy array to a tensorflow Queue

http://ischlag.github.io/2016/11/07/tensorflow-input-pipeline-for-large-datasets/

How to handle different queue batch size and feed value batch size in tensorflow?

https://github.com/dennybritz/tf-rnn/blob/master/sequence_example.ipynb

by the way, if anyone got any better solution please post it.

Thanks

Tensorflow batch training of single dimension numpy array without converting it into multiple dimension numpy arrays

Question

1 answers

solution1
0 ACCPTED 2017-03-25 07:33:39

Tensorflow batch training of single dimension numpy array without converting it into multiple dimension numpy arrays

Question

1 answers

solution1 0 ACCPTED 2017-03-25 07:33:39

solution1
0 ACCPTED 2017-03-25 07:33:39