Keras-retinanet: FailedPreconditionError: Attempting to use uninitialized value training_...

Created on 26 Aug 2018 · 2Comments · Source: fizyr/keras-retinanet

I am using custom code based on the train.py script. I am attempting to create the resnet50_retinanet model and then load up the pretrained coco weights, freeze certian layers and then compile and train using a custom CSV datatset. I'm completely lost by this error message?

My code is here:
...

from keras_retinanet import models
from keras_retinanet.models import resnet

num_classes = 5
base_model = resnet.resnet50_retinanet(num_classes , inputs = None )
base_model.load_weights(r"resnet50_coco_best_v2.1.0.h5" , by_name = True , skip_mismatch=True)

for layer in base_model.layers[:]:
    layer.trainable = False    
#make the last 9 layers trainable
for layer in base_model.layers[len(base_model.layers)-9:]:
    layer.trainable = True

from keras.optimizers import SGD #using adam here also causes same error
from keras_retinanet import losses
from keras_retinanet.models.retinanet import retinanet_bbox
#training is same ss base because single GPU
training_model = base_model
training_model.compile(
    loss = {
        'regression'  : losses.smooth_l1(),
        'classification': losses.focal()
    },
    optimizer = SGD(lr=1e-5, clipnorm=0.001)
)
pred_model = retinanet_bbox(model=base_model)
#After creating the callbacks and training and evaluation generators...
steps_epochs =  1250
num_epochs = 10
training_model.fit_generator(
    generator = train_gen,
    steps_per_epoch = steps_epochs,
    epochs = num_epochs,
    verbose = 1, 
    callbacks = callbacks_dat
)

And the error message is:

Epoch 1/10

FailedPreconditionError Traceback (most recent call last)
in ()
7 epochs = num_epochs,
8 verbose = 1,
----> 9 callbacks = callbacks_dat
10 )

c:\users\user\appdata\local\programs\python\python36\lib\site-packages\keras\legacy\interfaces.py in wrapper(args, *kwargs)
89 warnings.warn('Update your ' + object_name + 90 ' call to the Keras 2 API: ' + signature, stacklevel=2)
---> 91 return func(args, *kwargs)
92 wrapper._original_function = func
93 return wrapper

c:\users\user\appdata\local\programs\python\python36\lib\site-packages\kerasengine\training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
1424 use_multiprocessing=use_multiprocessing,
1425 shuffle=shuffle,
-> 1426 initial_epoch=initial_epoch)
1427
1428 @interfaces.legacy_generator_methods_support

c:\users\user\appdata\local\programs\python\python36\lib\site-packages\kerasengine\training_generator.py in fit_generator(model, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
189 outs = model.train_on_batch(x, y,
190 sample_weight=sample_weight,
--> 191 class_weight=class_weight)
192
193 if not isinstance(outs, list):

c:\users\user\appdata\local\programs\python\python36\lib\site-packages\kerasengine\training.py in train_on_batch(self, x, y, sample_weight, class_weight)
1218 ins = x + y + sample_weights
1219 self._make_train_function()
-> 1220 outputs = self.train_function(ins)
1221 if len(outputs) == 1:
1222 return outputs[0]

c:\users\user\appdata\local\programs\python\python36\lib\site-packages\keras\backend\tensorflow_backend.py in __call__(self, inputs)
2659 return self._legacy_call(inputs)
2660
-> 2661 return self._call(inputs)
2662 else:
2663 if py_any(is_tensor(x) for x in inputs):

c:\users\user\appdata\local\programs\python\python36\lib\site-packages\keras\backend\tensorflow_backend.py in _call(self, inputs)
2629 symbol_vals,
2630 session)
-> 2631 fetched = self._callable_fn(*array_vals)
2632 return fetched[:len(self.outputs)]
2633

c:\users\user\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\client\session.py in __call__(self, *args)
1452 else:
1453 return tf_session.TF_DeprecatedSessionRunCallable(
-> 1454 self._session._session, self._handle, args, status, None)
1455
1456 def __del__(self):

c:\users\user\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\frameworkerrors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
517 None, None,
518 compat.as_text(c_api.TF_Message(self.status.status)),
--> 519 c_api.TF_GetCode(self.status.status))
520 # Delete the underlying status object from memory otherwise it stays alive
521 # as there is a reference to status from this from the traceback due to

FailedPreconditionError: Attempting to use uninitialized value training_2/SGD/Variable_43
[[Node: training_2/SGD/Variable_43/read = Identity[T=DT_FLOAT, _class=["loc:@training_2/SGD/Assign_86"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](training_2/SGD/Variable_43)]]
[[Node: loss_2/regression_loss/Mean_2/_5469 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_7035_loss_2/regression_loss/Mean_2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

I'm still new to tensorflow and keras and have no idea what exactly is going on?? The same error occurs regardless of the optimizer I choose (

FailedPreconditionError: Attempting to use uninitialized value training_...

)
I have a windows machine and have installed the windows fork of the cocoapi. I also have tensorflow 1.9.0 and am sure that both keras-retinanet and tensorflow have been installed correctly

Source

Miguel-P

Most helpful comment

@Miguel-P did you manage to figure out? I have the same problem but don't know how to solve it. The problem probably is AdamOptimizer and some other optimizers, create additional variables internally, but they don't get initialized before training the model. See This. When creating a model manually, then tf.global_variables_initializer should work. But here, I don't know how to initialize variables.

Update

The problem is solved when I added keras.backend.get_session().run(tf.global_variables_initializer())
Here is my code:
keras.backend.get_session().run(tf.global_variables_initializer())
steps = 100 # 35000
epochs = 50
import retinanet
from retinanet.train import create_generators

training_model.fit_generator(
generator=train_generator,
steps_per_epoch=steps,
epochs=epochs,
verbose=1,
callbacks=callbacks,
workers=1)

avaghefi on 28 Aug 2018

👍7 🎉2

All 2 comments

Update

training_model.fit_generator(
generator=train_generator,
steps_per_epoch=steps,
epochs=epochs,
verbose=1,
callbacks=callbacks,
workers=1)