Tensorboard: [TF2.0] Error from Tensorboard with keras callback

Created on 19 Mar 2019  路  7Comments  路  Source: tensorflow/tensorboard

Consider Stack Overflow for getting support using TensorBoard - they have a larger community with better searchability:

https://stackoverflow.com/questions/tagged/tensorboard

For bug reports, please include the following:

  • TensorBoard version (from pip package, also printed out when running tensorboard)
    tb-nightly=1.14.0a20190301
  • TensorFlow version if different from TensorBoard
    tensorflow-gpu=2.0.0a0 & tb-nightly=1.14.0a20190301
  • OS Platform and version
    win10
  • Python version
    3.6.8
  • For browser-related issues:

I want to try tensorboard from keras, it work if tensorflow-gpu=1.13.1 & tensorboard=1.13.1,
but get the error if tensorflow-gpu=2.0.0a0 & tf-nightly-gpu=1.14.1.dev20190310 & tb-nightly=1.14.0a20190301 as below:

Epoch 1/50
   32/60000 [..............................] - ETA: 11:31 - loss: 2.3852 - acc: 0.1562
---------------------------------------------------------------------------
NotFoundError                             Traceback (most recent call last)
<ipython-input-4-aadf56b04ffa> in <module>
----> 1 model.fit(x_train, y_train, epochs=50, callbacks=[tensorboard_callback])
      2 
      3 model.evaluate(x_test, y_test)

~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    863           validation_steps=validation_steps,
    864           validation_freq=validation_freq,
--> 865           steps_name='steps_per_epoch')
    866 
    867   def evaluate(self,

~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\engine\training_arrays.py in model_iteration(model, inputs, targets, sample_weights, batch_size, epochs, verbose, callbacks, val_inputs, val_targets, val_sample_weights, shuffle, initial_epoch, steps_per_epoch, validation_steps, validation_freq, mode, validation_in_fit, prepared_feed_values_from_dataset, steps_name, **kwargs)
    361         # Callbacks batch end.
    362         batch_logs = cbks.make_logs(model, batch_logs, batch_outs, mode)
--> 363         callbacks._call_batch_hook(mode, 'end', batch_index, batch_logs)
    364         progbar.on_batch_end(batch_index, batch_logs)
    365 

~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\callbacks.py in _call_batch_hook(self, mode, hook, batch, logs)
    225     for callback in self.callbacks:
    226       batch_hook = getattr(callback, hook_name)
--> 227       batch_hook(batch, logs)
    228     self._delta_ts[hook_name].append(time.time() - t_before_callbacks)
    229 

~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\callbacks.py in on_train_batch_end(self, batch, logs)
    507     """
    508     # For backwards compatibility.
--> 509     self.on_batch_end(batch, logs=logs)
    510 
    511   def on_test_batch_begin(self, batch, logs=None):

~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\callbacks_v1.py in on_batch_end(self, batch, logs)
    360     self._total_batches_seen += 1
    361     if self._is_profiling:
--> 362       profiler.save(self.log_dir, profiler.stop())
    363       self._is_profiling = False
    364     elif (not self._is_profiling and

~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\eager\profiler.py in save(logdir, result)
    141       logdir, 'plugins', 'profile',
    142       datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
--> 143   gfile.MakeDirs(plugin_dir)
    144   maybe_create_event_file(logdir)
    145   with gfile.Open(os.path.join(plugin_dir, 'local.trace'), 'wb') as f:

~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\lib\io\file_io.py in recursive_create_dir(dirname)
    446     errors.OpError: If the operation fails.
    447   """
--> 448   recursive_create_dir_v2(dirname)
    449 
    450 

~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\lib\io\file_io.py in recursive_create_dir_v2(path)
    462   """
    463   with errors.raise_exception_on_not_ok_status() as status:
--> 464     pywrap_tensorflow.RecursivelyCreateDir(compat.as_bytes(path), status)
    465 
    466 

~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\framework\errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
    546             None, None,
    547             compat.as_text(c_api.TF_Message(self.status.status)),
--> 548             c_api.TF_GetCode(self.status.status))
    549     # Delete the underlying status object from memory otherwise it stays alive
    550     # as there is a reference to status from this from the traceback due to

NotFoundError: Failed to create a directory: logs/fit/20190315-164851\plugins\profile\2019-03-15_16-48-53; No such file or directory

if reinstall tensorflow=2.0.0-alpha0 and tf-nightly-gpu=1.14.1.dev20190310 got another error:

NotFoundError                             Traceback (most recent call last)
<ipython-input-6-aadf56b04ffa> in <module>
----> 1 model.fit(x_train, y_train, epochs=50, callbacks=[tensorboard_callback])
      2 
      3 model.evaluate(x_test, y_test)

~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    871           validation_steps=validation_steps,
    872           validation_freq=validation_freq,
--> 873           steps_name='steps_per_epoch')
    874 
    875   def evaluate(self,

~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\engine\training_arrays.py in model_iteration(model, inputs, targets, sample_weights, batch_size, epochs, verbose, callbacks, val_inputs, val_targets, val_sample_weights, shuffle, initial_epoch, steps_per_epoch, validation_steps, validation_freq, mode, validation_in_fit, prepared_feed_values_from_dataset, steps_name, **kwargs)
    202       samples=num_samples_or_steps,
    203       verbose=0,  # Handle ProgBarLogger separately in this loop.
--> 204       mode=mode)
    205   # TODO(omalleyt): Handle ProgBar as part of Callbacks once hooks are ready.
    206   progbar = training_utils.get_progbar(model, count_mode)

~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\callbacks.py in configure_callbacks(callbacks, model, do_validation, batch_size, epochs, steps_per_epoch, samples, verbose, count_mode, mode)
     94   # Set callback model
     95   callback_model = model._get_callback_model()  # pylint: disable=protected-access
---> 96   callback_list.set_model(callback_model)
     97 
     98   set_callback_parameters(

~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\callbacks.py in set_model(self, model)
    208     self.model = model
    209     for callback in self.callbacks:
--> 210       callback.set_model(model)
    211 
    212   def _call_batch_hook(self, mode, hook, batch, logs=None):

~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\callbacks.py in set_model(self, model)
   1213     self.model = model
   1214     with context.eager_mode():
-> 1215       self._initialize_writers()
   1216       if self.write_graph:
   1217         if model.run_eagerly:

~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\callbacks.py in _initialize_writers(self)
   1251       return summary_ops_v2.create_file_writer_v2(path)
   1252 
-> 1253     self._train_writer = create_writer('train')
   1254     self._writers.append(self._train_writer)
   1255     self._validation_writer = create_writer('validation')

~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\callbacks.py in create_writer(subdir)
   1249     def create_writer(subdir):
   1250       path = os.path.join(self.log_dir, subdir)
-> 1251       return summary_ops_v2.create_file_writer_v2(path)
   1252 
   1253     self._train_writer = create_writer('train')

~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\ops\summary_ops_v2.py in create_file_writer_v2(logdir, max_queue, flush_millis, filename_suffix, name)
    377               filename_suffix=filename_suffix),
    378           name=name,
--> 379           v2=True)
    380 
    381 

~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\ops\summary_ops_v2.py in __init__(self, shared_name, init_op_fn, name, v2)
    197     # TODO(nickfelt): cache other constructed ops in graph mode
    198     self._init_op_fn = init_op_fn
--> 199     self._init_op = init_op_fn(self._resource)
    200     self._v2 = v2
    201     self._closed = False

~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\ops\gen_summary_ops.py in create_summary_file_writer(writer, logdir, max_queue, flush_millis, filename_suffix, name)
    190       else:
    191         message = e.message
--> 192       _six.raise_from(_core._status_to_exception(e.code, message), None)
    193   # Add nodes to the TensorFlow graph.
    194   _, _, _op = _op_def_lib._apply_op_helper(

~\Anaconda3\envs\lab\lib\site-packages\six.py in raise_from(value, from_value)

NotFoundError: Failed to create a directory: logs/fit/20190315-171835\train; No such file or directory [Op:CreateSummaryFileWriter]

Code to reproduce the issue

from __future__ import absolute_import, division, print_function
import tensorflow as tf
import datetime
from tensorflow.keras.callbacks import TensorBoard

mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation='softmax')
])

log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir)

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
model.fit(x_train, y_train, epochs=50, callbacks=[tensorboard_callback])
model.evaluate(x_test, y_test)

Any suggestion to fix? Thanks.

awaiting response support

Most helpful comment

You鈥檙e hitting https://github.com/tensorflow/tensorflow/issues/26021,
a Windows-specific bug in TensorFlow.

The fix is to use the platform-appropriate path separators in log_dir
rather than hard-coding forward slashes:

log_dir = os.path.join(
    "logs",
    "fit",
    datetime.datetime.now().strftime("%Y%m%d-%H%M%S"),
)

Can you try this and see if it works for you?

All 7 comments

You鈥檙e hitting https://github.com/tensorflow/tensorflow/issues/26021,
a Windows-specific bug in TensorFlow.

The fix is to use the platform-appropriate path separators in log_dir
rather than hard-coding forward slashes:

log_dir = os.path.join(
    "logs",
    "fit",
    datetime.datetime.now().strftime("%Y%m%d-%H%M%S"),
)

Can you try this and see if it works for you?

Hi, @wchargin
This is very helpful, thanks a lot!

Great; glad to hear it!

@wchargin thanks a lot!

Any idea why Tensoflow might be raising this issue? I am following their hyperparameter turorial for gdrive Colab.

UnimplementedError                        Traceback (most recent call last)
<ipython-input-8-42471fa05132> in <module>()
     13 METRIC_ACCURACY = 'accuracy'
     14 
---> 15 with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
     16   hp.hparams_config(
     17     hparams=[HP_NUM_UNITS, HP_DROPOUT, HP_OPTIMIZER],

4 frames
/usr/local/lib/python3.6/dist-packages/six.py in raise_from(value, from_value)

UnimplementedError: logs; Operation not supported [Op:CreateSummaryFileWriter]

My chunk of code throwing the error:

x, y = images_data, integer_labels
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=42, test_size=0.2, stratify=y)
x_train, x_test = x_train.reshape(len(x_train), -1), x_test.reshape(len(x_test), -1)
x_train, x_test = x_train/255, x_test/255
y_train, y_test = to_categorical(y_train, num_classes=6), to_categorical(y_test, num_classes=6)



HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([200, 200, 150, 100], [200, 150, 100]))
HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.1, 0.2))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam']))

METRIC_ACCURACY = 'accuracy'

with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
  hp.hparams_config(
    hparams=[HP_NUM_UNITS, HP_DROPOUT, HP_OPTIMIZER], 
    metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
  )

@wchargin Your suggestion worked perfectly for me.

My initial code:
folder_name = f'{model_name} at {strftime("%H:%M")}' directory = os.path.join(LOGGING_PATH, folder_name)

Suggested code:
directory = os.path.join( "logs", "fit", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"), )

Thank you, you're a rockstar.

You鈥檙e hitting tensorflow/tensorflow#26021,
a Windows-specific bug in TensorFlow.

The fix is to use the platform-appropriate path separators in log_dir
rather than hard-coding forward slashes:

log_dir = os.path.join(
    "logs",
    "fit",
    datetime.datetime.now().strftime("%Y%m%d-%H%M%S"),
)

Can you try this and see if it works for you?

could'not fix the issue.
OS SYSTEM INFORMATION:

  • windows 10
  • conda 4.7.12
  • python 3.7
  • tensorflow-gpu : 2.1.0
    I use the way as you said, but I have the same issue.
logdir = os.path.join(
        "../../data/keras_model/",
        datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
)
# logdir = "../../data/keras_model/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

Error Information:

NotFoundError: Failed to create a directory: ../../data/keras_model/20200407-183028\train; No such file or directory [Op:CreateSummaryFileWriter]

Information:

Train for 100 steps, validate for 20 steps
---------------------------------------------------------------------------
NotFoundError                             Traceback (most recent call last)
e:\aboutme\github_code\eat_tensorflow2_in_30_days\code\1\2.py in 
     86 
     87 history = model.fit(ds_train, epochs=10, validation_data=ds_test,
---> 88             callbacks = [tensorboard_callback], workers = 4)

D:\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    817         max_queue_size=max_queue_size,
    818         workers=workers,
--> 819         use_multiprocessing=use_multiprocessing)
    820 
    821   def evaluate(self,

D:\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    305           count_mode='samples' if use_sample else 'steps',
    306           verbose=0,  # Handle ProgBarLogger separately in this loop.
--> 307           mode=ModeKeys.TRAIN)
    308 
    309       with training_context.on_start(model, training_callbacks, use_sample,

D:\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\callbacks.py in configure_callbacks(callbacks, model, do_validation, batch_size, epochs, steps_per_epoch, samples, verbose, count_mode, mode)
    105   # Set callback model
    106   callback_model = model._get_callback_model()  # pylint: disable=protected-access
--> 107   callback_list.set_model(callback_model)
    108 
    109   set_callback_parameters(

D:\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\callbacks.py in set_model(self, model)
    220     self.model = model
    221     for callback in self.callbacks:
--> 222       callback.set_model(model)
    223 
    224   def _call_batch_hook(self, mode, hook, batch, logs=None):

D:\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\callbacks.py in set_model(self, model)
   1535       self._close_writers()
   1536       if self.write_graph:
-> 1537         with self._get_writer(self._train_run_name).as_default():
   1538           with summary_ops_v2.always_record_summaries():
   1539             if not model.run_eagerly:

D:\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\callbacks.py in _get_writer(self, writer_name)
   1618     if writer_name not in self._writers:
   1619       path = os.path.join(self._log_write_dir, writer_name)
-> 1620       writer = summary_ops_v2.create_file_writer_v2(path)
   1621       self._writers[writer_name] = writer
   1622     return self._writers[writer_name]

D:\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\ops\summary_ops_v2.py in create_file_writer_v2(logdir, max_queue, flush_millis, filename_suffix, name)
    407               filename_suffix=filename_suffix),
    408           name=name,
--> 409           v2=True)
    410 
    411 

D:\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\ops\summary_ops_v2.py in __init__(self, shared_name, init_op_fn, name, v2)
    229     # TODO(nickfelt): cache other constructed ops in graph mode
    230     self._init_op_fn = init_op_fn
--> 231     self._init_op = init_op_fn(self._resource)
    232     self._v2 = v2
    233     self._closed = False

D:\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\ops\gen_summary_ops.py in create_summary_file_writer(writer, logdir, max_queue, flush_millis, filename_suffix, name)
    151         pass  # Add nodes to the TensorFlow graph.
    152     except _core._NotOkStatusException as e:
--> 153       _ops.raise_from_not_ok_status(e, name)
    154   # Add nodes to the TensorFlow graph.
    155   _, _, _op, _outputs = _op_def_library._apply_op_helper(

D:\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\framework\ops.py in raise_from_not_ok_status(e, name)
   6604   message = e.message + (" name: " + name if name is not None else "")
   6605   # pylint: disable=protected-access
-> 6606   six.raise_from(core._status_to_exception(e.code, message), None)
   6607   # pylint: enable=protected-access
   6608 

D:\Anaconda3\envs\tf-gpu\lib\site-packages\six.py in raise_from(value, from_value)
Was this page helpful?
0 / 5 - 0 ratings