Using:
TensorFlow 2.1.0
TensorFlow Addons 0.9.1
Describe the bug
Tqdm Callback fails to run with tf.data API
Code to reproduce the issue
import tensorflow as tf
print(f"tf.__version__: {tf.__version__}")
import tensorflow_addons as tfa
from tensorflow.keras import backend as K
from tensorflow.keras.datasets import mnist
import tensorflow_datasets as tfds
import os
try:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR']) # TPU detection
print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
except ValueError:
strategy = tf.distribute.get_strategy()
raise BaseException('ERROR: Not connected to a TPU runtime.')
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)
print("REPLICAS: ", tpu_strategy.num_replicas_in_sync)
def get_dataset(batch_size=200):
datasets, info = tfds.load(name='mnist', with_info=True, as_supervised=True,
try_gcs=True)
mnist_train, mnist_test = datasets['train'], datasets['test']
def scale(image, label):
image = tf.cast(image, tf.float32)
image /= 255.0
return image, label
train_dataset = mnist_train.map(scale).shuffle(10000).batch(batch_size)
test_dataset = mnist_test.map(scale).batch(batch_size)
return train_dataset, test_dataset
def create_model():
return tf.keras.Sequential(
[tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28, 28, 1)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(10)])
train_dataset, test_dataset = get_dataset()
with tpu_strategy.scope():
model = create_model()
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['sparse_categorical_accuracy'])
model.fit(train_dataset,epochs=10,validation_data=test_dataset,callbacks=[tfa.callbacks.TQDMProgressBar()],verbose=0)
Error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_v2.py in on_epoch(self, epoch, mode)
766 try:
--> 767 yield epoch_logs
768 finally:
13 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
341 training_context=training_context,
--> 342 total_epochs=epochs)
343 cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
180 cbks.make_logs(model, batch_logs, batch_outs, mode)
--> 181 step += 1
182
/usr/lib/python3.6/contextlib.py in __exit__(self, type, value, traceback)
87 try:
---> 88 next(self.gen)
89 except StopIteration:
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_v2.py in on_batch(self, step, mode, size)
787 self.callbacks._call_batch_hook(
--> 788 mode, 'end', step, batch_logs)
789 self.progbar.on_batch_end(step, batch_logs)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/callbacks.py in _call_batch_hook(self, mode, hook, batch, logs)
238 batch_hook = getattr(callback, hook_name)
--> 239 batch_hook(batch, logs)
240 self._delta_ts[hook_name].append(time.time() - t_before_callbacks)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/callbacks.py in on_train_batch_end(self, batch, logs)
527 # For backwards compatibility.
--> 528 self.on_batch_end(batch, logs=logs)
529
/usr/local/lib/python3.6/dist-packages/tensorflow_addons/callbacks/tqdm_progress_bar.py in on_batch_end(self, batch, logs)
169
--> 170 if self.steps_so_far < self.total_steps:
171
TypeError: '<' not supported between instances of 'int' and 'NoneType'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-21-0096553acbe6> in <module>()
51 metrics=['sparse_categorical_accuracy'])
52
---> 53 model.fit(train_dataset,epochs=10,validation_data=test_dataset,callbacks=[tfa.callbacks.TQDMProgressBar()])
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
817 max_queue_size=max_queue_size,
818 workers=workers,
--> 819 use_multiprocessing=use_multiprocessing)
820
821 def evaluate(self,
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
395 total_epochs=1)
396 cbks.make_logs(model, epoch_logs, eval_result, ModeKeys.TEST,
--> 397 prefix='val_')
398
399 return model.history
/usr/lib/python3.6/contextlib.py in __exit__(self, type, value, traceback)
97 value = type()
98 try:
---> 99 self.gen.throw(type, value, traceback)
100 except StopIteration as exc:
101 # Suppress StopIteration *unless* it's the same exception that
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_v2.py in on_epoch(self, epoch, mode)
769 if mode == ModeKeys.TRAIN:
770 # Epochs only apply to `fit`.
--> 771 self.callbacks.on_epoch_end(epoch, epoch_logs)
772 self.progbar.on_epoch_end(epoch, epoch_logs)
773
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/callbacks.py in on_epoch_end(self, epoch, logs)
300 logs = logs or {}
301 for callback in self.callbacks:
--> 302 callback.on_epoch_end(epoch, logs)
303
304 def on_train_batch_begin(self, batch, logs=None):
/usr/local/lib/python3.6/dist-packages/tensorflow_addons/callbacks/tqdm_progress_bar.py in on_epoch_end(self, epoch, logs)
151 # update the rest of the steps in epoch progress bar
152 self.epoch_progress_tqdm.update(
--> 153 self.total_steps - self.epoch_progress_tqdm.n
154 )
155 self.epoch_progress_tqdm.close()
TypeError: unsupported operand type(s) for -: 'NoneType' and 'int'
@shun-lin
I will take a look thanks @sourcecode369
Yes, @shun-lin thank you so much. Hope you find a fix soon.
@sourcecode369 please be careful with your phrasing as it sound a bit like an order (which I know it isn't of course). Something like "I need it for my current project, it would be great if we could find a fix soon" would sound much better :)
Hi @sourcecode369, can you update your tensorflow package to 2.2.0-rc3? This issue is fixed in 2.2.0-rc3, I just checked in colab it should work without any error.
Hi @shun-lin , thanks for your quick response. Updating to tf 2.2.0-rc3 fixed the issue for me.
sounds good, will close the issue then :)
Most helpful comment
@sourcecode369 please be careful with your phrasing as it sound a bit like an order (which I know it isn't of course). Something like "I need it for my current project, it would be great if we could find a fix soon" would sound much better :)