Tensorflow MapDataset iterator fails

Question

I am trying to implement the method suggested by the tensorflow documentation over here ( https://www.tensorflow.org/tutorials/load_data/images ) to load images from local directory as a tensorflow dataset. Especially I am interested in loading using tf.data as a tf.data.Dataset object as it is suggested that the performance is better that way. I pretty much took the exact code from the documentation page and also made sure that the tensorflow version matches to the one in the documentation

The problem happens when I try to iterate over the MapDataset object using take().

import os
import sys
import pathlib

import IPython.display as display
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf

AUTOTUNE = tf.data.experimental.AUTOTUNE

BATCH_SIZE = 32
IMG_HEIGHT = 224
IMG_WIDTH = 224
STEPS_PER_EPOCH = np.ceil(3670/BATCH_SIZE)
CLASS_NAMES = None

#https://www.tensorflow.org/tutorials/load_data/images

def get_label(file_path):
    # convert the path to a list of path components
    #parts = tf.strings.split(file_path, result_type = 'RaggedTensor')
    parts = tf.strings.split(file_path)

    # The second to last is the class-directory
    return parts[-2] == CLASS_NAMES

def decode_img(img):
    # convert the compressed string to a 3D uint8 tensor
    img = tf.image.decode_jpeg(img, channels=3)

    # Use `convert_image_dtype` to convert to floats in the [0,1] range.
    img = tf.image.convert_image_dtype(img, tf.float32)

    # resize the image to the desired size.
    return tf.image.resize(img, [IMG_WIDTH, IMG_HEIGHT])

def process_path(file_path):
    label = get_label(file_path)

    # load the raw data from the file as a string
    img = tf.io.read_file(file_path)
    img = decode_img(img)

    return img, label

def test():

    data_dir = tf.keras.utils.get_file(origin='https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz',
                                         fname='flower_photos', untar=True)

    data_dir = pathlib.Path(data_dir)

    global CLASS_NAMES
    CLASS_NAMES = np.array([item.name for item in data_dir.glob('*') if item.name != "LICENSE.txt"])

    list_ds = tf.data.Dataset.list_files(str(data_dir/'*/*'))

    labeled_ds = list_ds.map(process_path)
    print('type: ', type(labeled_ds))

    for image, label in labeled_ds.take(1):
        print("Image shape: ", image.numpy().shape)
        print("Label: ", label.numpy())

def main():
    test()  

if __name__ == '__main__':
    main()

I get the following error and have no idea how to go about resolving this

2020-04-17 09:47:53.816123: W tensorflow/core/framework/op_kernel.cc:1655] OP_REQUIRES failed at strided_slice_op.cc:108 : Invalid argument: slice index -1 of dimension 0 out of bounds.
2020-04-17 09:47:53.820082: W tensorflow/core/framework/op_kernel.cc:1655] OP_REQUIRES failed at iterator_ops.cc:941 : Invalid argument: slice index -1 of dimension 0 out of bounds.
         [[{{node strided_slice}}]]
Traceback (most recent call last):
  File "C:\Users\VVJ3281\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow_core\python\eager\context.py", line 1897, in execution_mode
    yield
  File "C:\Users\VVJ3281\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow_core\python\data\ops\iterator_ops.py", line 659, in _next_internal
    output_shapes=self._flat_output_shapes)
  File "C:\Users\VVJ3281\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow_core\python\ops\gen_dataset_ops.py", line 2478, in iterator_get_next_sync
    _ops.raise_from_not_ok_status(e, name)
  File "C:\Users\VVJ3281\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow_core\python\framework\ops.py", line 6606, in raise_from_not_ok_status
    six.raise_from(core._status_to_exception(e.code, message), None)
  File "<string>", line 3, in raise_from
tensorflow.python.framework.errors_impl.InvalidArgumentError: slice index -1 of dimension 0 out of bounds.
         [[{{node strided_slice}}]] [Op:IteratorGetNextSync]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File ".\img_sub_model.py", line 150, in <module>
    main()
  File ".\img_sub_model.py", line 145, in main
    test()
  File ".\img_sub_model.py", line 136, in test
    for image, label in labeled_ds.take(1):
  File "C:\Users\VVJ3281\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow_core\python\data\ops\iterator_ops.py", line 630, in __next__
    return self.next()
  File "C:\Users\VVJ3281\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow_core\python\data\ops\iterator_ops.py", line 674, in next
    return self._next_internal()
  File "C:\Users\VVJ3281\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow_core\python\data\ops\iterator_ops.py", line 665, in _next_internal
    return structure.from_compatible_tensor_list(self._element_spec, ret)
  File "C:\Users\VVJ3281\AppData\Local\Continuum\anaconda3\lib\contextlib.py", line 130, in __exit__
    self.gen.throw(type, value, traceback)
  File "C:\Users\VVJ3281\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow_core\python\eager\context.py", line 1900, in execution_mode
    executor_new.wait()
  File "C:\Users\VVJ3281\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow_core\python\eager\executor.py", line 67, in wait
    pywrap_tensorflow.TFE_ExecutorWaitForAllPendingNodes(self._handle)
tensorflow.python.framework.errors_impl.InvalidArgumentError: slice index -1 of dimension 0 out of bounds.
         [[{{node strided_slice}}]]

By some random coincidence I found that when CLASS_NAMES is set to None, the code runs and the lebel object of labeled_ds has a value 'False'

See output below

type:  <class 'tensorflow.python.data.ops.dataset_ops.MapDataset'>
Image shape:  (224, 224, 3)
Label:  False

Answer 1

The error occurs because get_label performs an out-of-bounds list access

def get_label(file_path):
    # convert the path to a list of path components
    parts = tf.strings.split(file_path)

    # The second to last is the class-directory
    return parts[-2] == CLASS_NAMES

parts has size 1 . This is because tf.strings.split will split by whitespace unless you specify what delimiter to use. To split into path components, it should be parts = tf.strings.split(file_path, "/")

To debug this sort of issue, you can add tf.print statements to your functions, eg

def get_label(file_path):
    # convert the path to a list of path components
    parts = tf.strings.split(file_path)

    # The second to last is the class-directory
    tf.print(file_path)
    tf.print(len(parts))
    return parts[-2] == CLASS_NAMES

Tensorflow MapDataset iterator fails

Question

1 answers

solution1
0 2020-04-17 16:34:13

Tensorflow MapDataset iterator fails

Question

1 answers

solution1 0 2020-04-17 16:34:13

solution1
0 2020-04-17 16:34:13