Dali: Can't use tensorflow example with TFRecord file or FileReader

Created on 3 Jul 2018 · 13Comments · Source: NVIDIA/DALI

the directory "example/tensorflow" introduces a method to use DALI with tensorflow ,but it's implemented by ops.MXNetReader to read images and labels . When I want to use ops.FileReader or ops.TFRecordReader to reader file ,the error occur ：

DALI data_tensor_shape = ShapeAt(&pipe_handle_, 0) failed: [/opt/dali/dali/pipeline/data/tensor.h:188] Assert on "tl->IsDenseTensor()" failed: All tensors in the input TensorList must have the same shape and be densely packed.

I have no idea what is the error mean, and each pipeline classes return the same data struct, why MXNetReader can work while the others can not?

my code(which is mostly copied from the example):

class RN50Pipeline(Pipeline):
    def __init__(self, batch_size, num_threads, device_id, num_gpus):
        super(RN50Pipeline, self).__init__(batch_size,
                                         num_threads,
                                         device_id)
        self.input = ops.MXNetReader(path = rec_files, index_path = idx_files,
                                     shard_id = device_id, num_shards = num_gpus)

        self.decode = ops.nvJPEGDecoder(device = "mixed", output_type = types.RGB)
        self.resize = ops.Resize(device = "gpu", random_resize = True,
                                 resize_a = 256, resize_b = 480,
                                 image_type = types.RGB,
                                 interp_type = types.INTERP_LINEAR)
        self.cmn = ops.CropMirrorNormalize(device = "gpu",
                                            output_dtype = types.FLOAT,
                                            crop = (227, 227),
                                            image_type = types.RGB,
                                            mean = [128., 128., 128.],
                                            std = [1., 1., 1.])
        self.uniform = ops.Uniform(range = (0.0, 1.0))

    def define_graph(self):
        inputs, labels = self.input(name="Reader")
        images = self.decode(inputs)
        images = self.resize(images)
        output = self.cmn(images, crop_pos_x = self.uniform(),
                          crop_pos_y = self.uniform())
        return (output, labels.gpu())
class FileReadPipeline(Pipeline):
    def __init__(self,batch_size, num_threads, device_id):
        super(FileReadPipeline, self).__init__(batch_size, num_threads, device_id, seed = 12)
        self.input = ops.FileReader(file_root = image_dir, random_shuffle = True, initial_fill = 21)
        self.decode = ops.nvJPEGDecoder(device = "mixed", output_type = types.RGB)
        self.resize = ops.Resize(device = "gpu", random_resize = True, 
                                 resize_a = 256, resize_b = 480,
                                 image_type = types.RGB,
                                 interp_type = types.INTERP_LINEAR)
        self.cmn = ops.CropMirrorNormalize(device = "gpu",
                                            output_dtype = types.FLOAT,
                                            crop = (227, 227),
                                            image_type = types.RGB,
                                            mean = [128., 128., 128.],
                                            std = [1., 1., 1.])
        self.uniform = ops.Uniform(range = (0.0, 1.0))
    def define_graph(self):
        jpegs, labels = self.input()
        images = self.decode(jpegs)
        resized_images = self.resize(images)
        output = self.cmn(resized_images, crop_pos_x = self.uniform(),
                           crop_pos_y = self.uniform())
        # images are on the GPU
        return (output, labels.gpu())

class TFRecordPipeline(Pipeline):
    def __init__(self, batch_size, num_threads, device_id):
        super(TFRecordPipeline, self).__init__(batch_size,
                                         num_threads,
                                         device_id)
        self.input = ops.TFRecordReader(path = tfrecord, 
                                        index_path = tfrecord_idx,
                                        features = {"image/encoded" : tfrec.FixedLenFeature((), tfrec.string, ""),
                                         "image/class/text":          tfrec.FixedLenFeature((), tfrec.string, "")})
        self.decode = ops.nvJPEGDecoder(device = "mixed", output_type = types.RGB)
        self.resize = ops.Resize(device = "gpu", random_resize = True,
                                 resize_a = 256, resize_b = 480,
                                 image_type = types.RGB,
                                 interp_type = types.INTERP_LINEAR)
        self.cmn = ops.CropMirrorNormalize(device = "gpu",
                                            output_dtype = types.FLOAT,
                                            crop = (227, 227),
                                            image_type = types.RGB,
                                            mean = [128., 128., 128.],
                                            std = [1., 1., 1.])
        self.uniform = ops.Uniform(range = (0.0, 1.0))

    def define_graph(self):
        inputs = self.input()
        images = self.decode(inputs["image/encoded"])
        resized_images = self.resize(images)
        output = self.cmn(resized_images, crop_pos_x = self.uniform(),
                           crop_pos_y = self.uniform())
        return (output, inputs["image/class/text"].gpu())

def get_batch_test_dali(batch_size):

    global DEVICES

    pipes = [FileReadPipeline(batch_size=BATCH_SIZE, num_threads=2, device_id = device_id) for device_id in range(DEVICES)]#not work
    # pipes = [RN50Pipeline(batch_size=BATCH_SIZE, num_threads=2, device_id = device_id,num_gpus = DEVICES) for device_id in range(DEVICES)]#work
    # pipes = [TFRecordPipeline(batch_size=batch_size, num_threads=2, device_id = 0) for device_id in range(DEVICES)]#not work

    serialized_pipes = [pipe.serialize() for pipe in pipes]
    del pipes
    daliop = dali_tf.DALIIterator()
    images = []
    labels = []
    for d in range(DEVICES):
        with tf.device('/gpu:%i' % d):
            image, label = daliop(serialized_pipeline = serialized_pipes[d],
                batch_size = BATCH_SIZE,
                height = 227,
                width = 227,
                device_id = d)
            images.append(image)
            labels.append(label)

    return [images, labels]

def main_run():

    test_batch = get_batch_test_dali( BATCH_SIZE)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        step = 0
        while step < NUM_DATA / BATCH_SIZE + 1:
            print('step', step)
            imgs = []
            get_batch = sess.run(test_batch)  #error occurs
            for i in range(len(images)):
                img = images[0][0][i].transpose((1,2,0)) + 128
                imgs.append(img)
            maxx = sess.run(softmax, feed_dict={x: imgs})
            step = step + 1
   sess.close()

examples

Source

jxmelody

Most helpful comment

I just prepared some example based on your experience, https://github.com/NVIDIA/DALI/pull/58.

JanuszL on 19 Jul 2018

😄1 👍1

All 13 comments

Hmm, that's strange - @Kh4L could you look at it?

ptrendx on 3 Jul 2018

Hi,
For me following code works (corrected some errors from your example):

class FileReadPipeline(Pipeline):
    def __init__(self,batch_size, num_threads, device_id):
        super(FileReadPipeline, self).__init__(batch_size, num_threads, device_id, seed = 12)
        self.input = ops.FileReader(file_root = image_dir, random_shuffle = True, initial_fill = 21)
        self.decode = ops.nvJPEGDecoder(device = "mixed", output_type = types.RGB)
        self.resize = ops.Resize(device = "gpu", random_resize = True, 
                                resize_a = 256, resize_b = 480,
                                image_type = types.RGB,
                                interp_type = types.INTERP_LINEAR)
        self.cmn = ops.CropMirrorNormalize(device = "gpu",
                                            output_dtype = types.FLOAT,
                                            crop = (227, 227),
                                            image_type = types.RGB,
                                            mean = [128., 128., 128.],
                                            std = [1., 1., 1.])
        self.uniform = ops.Uniform(range = (0.0, 1.0))

    def define_graph(self):
        jpegs, labels = self.input()
        images = self.decode(jpegs)
        resized_images = self.resize(images)
        output = self.cmn(resized_images, crop_pos_x = self.uniform(),
                        crop_pos_y = self.uniform())
        # images are on the GPU
        return (output, labels.gpu())

I will check TFRecordPipeline too later.

JanuszL on 3 Jul 2018

Hi @JanuszL ,
Thanks for your reply.
I checked my code and I found it's somewhere confusing cause I changed it before I raise this issue... so I modified it.
Actually, the most confused question of me is the serialize part ... What's the diffrence between serialized pipeline and non-serialized pipeline ？must serialized pipeline when using tensorflow ？
At last, I noticed that your code above are different with mine in last line:

return (output, labels,.gpu())
#return (output,labels)

So, is the .gpu() neccessary?

(sorry, I cant run the code right now,but I will check if the change can work tommrow (9 hours later... ), thanks again ;))

jxmelody on 3 Jul 2018

Hi @jxmelody ,
For TFRecordPipeline you have to define the graph by overriding def define_graph(self):.

As @JanuszL wrote, in FileReadPipeline you are returning images that is images = self.decode(jpegs). You actually want to return output (from CropMirrorNormalize).

Yes, .gpu() is necassary because FileReader returns CPU Tensors and tensorflow-gpu expects only GPU Tensors as Output.
So your define_graph's should return

return (output, labels.gpu())

The serialized pipeline is a string representing your pipeline in protobuf format: it contains parameters and the graph defined in Python. Our DALI-Tensorflow op needs this serialized pipeline to build and run internally the actual DALI Pipeline.

Kh4L on 3 Jul 2018

Hi @Kh4L ,
Sorry ... I forgot to paste the define_graph function of TFRecordPipeline , but I actually wrote this function and that not work... Anyway, I will add it to my code above, and you can check this.

For FileReaderPipeline, by correcting the errors as @JanuszL wrote, it can work now, It's my mistake. Thank both of you!

But TFRecordPipeline still can not work ,even though I have return .gpu() as you said .

Thank you for explaining the serialization . So as far as I understand, it's necessary to serialize the pipline first before use sess.run([image, label]), isn't it?

jxmelody on 4 Jul 2018

Hi,
Yes, indeed you need to serialize the pipeline first, then use it to initialize daliop. Please follow TensorFlow-ResNet50 example.
TFRecordPipeline works for me. You need to use image/class/label as labels, image/class/text is rather human-readable representation not meant for training.

class TFRecordPipeline(Pipeline):
    def __init__(self, batch_size, num_threads, device_id):
        super(TFRecordPipeline, self).__init__(batch_size,
                                        num_threads,
                                        device_id)
        self.input = ops.TFRecordReader(path = tfrecord, 
                                        index_path = tfrecord_idx,
                                        features = {"image/encoded" : tfrec.FixedLenFeature((), tfrec.string, ""),
                                        'image/class/label':         tfrec.FixedLenFeature([1], tfrec.int64,  -1)
                                        })
        self.decode = ops.nvJPEGDecoder(device = "mixed", output_type = types.RGB)
        self.resize = ops.Resize(device = "gpu", resize_a = 256, resize_b = 256)
        self.cmnp = ops.CropMirrorNormalize(device = "gpu",
                                            output_dtype = types.FLOAT,
                                            crop = (224, 224),
                                            image_type = types.RGB,
                                            mean = [0., 0., 0.],
                                            std = [1., 1., 1.])
        self.uniform = ops.Uniform(range = (0.0, 1.0))

    def define_graph(self):
        inputs = self.input()
        images = self.decode(inputs["image/encoded"])
        resized_images = self.resize(images)
        output = self.cmnp(resized_images, crop_pos_x = self.uniform(),
                        crop_pos_y = self.uniform())
        return (output, inputs["image/class/label"].gpu())

JanuszL on 6 Jul 2018

What's the next step to improve this one? Do we need to document the example better, improve it generally, ... ?