Consider Stack Overflow for getting support using TensorBoard - they have a larger community with better searchability:
https://stackoverflow.com/questions/tagged/tensorboard
For bug reports, please include the following:
tensorboard)I want to try tensorboard from keras, it work if tensorflow-gpu=1.13.1 & tensorboard=1.13.1,
but get the error if tensorflow-gpu=2.0.0a0 & tf-nightly-gpu=1.14.1.dev20190310 & tb-nightly=1.14.0a20190301 as below:
Epoch 1/50
32/60000 [..............................] - ETA: 11:31 - loss: 2.3852 - acc: 0.1562
---------------------------------------------------------------------------
NotFoundError Traceback (most recent call last)
<ipython-input-4-aadf56b04ffa> in <module>
----> 1 model.fit(x_train, y_train, epochs=50, callbacks=[tensorboard_callback])
2
3 model.evaluate(x_test, y_test)
~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
863 validation_steps=validation_steps,
864 validation_freq=validation_freq,
--> 865 steps_name='steps_per_epoch')
866
867 def evaluate(self,
~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\engine\training_arrays.py in model_iteration(model, inputs, targets, sample_weights, batch_size, epochs, verbose, callbacks, val_inputs, val_targets, val_sample_weights, shuffle, initial_epoch, steps_per_epoch, validation_steps, validation_freq, mode, validation_in_fit, prepared_feed_values_from_dataset, steps_name, **kwargs)
361 # Callbacks batch end.
362 batch_logs = cbks.make_logs(model, batch_logs, batch_outs, mode)
--> 363 callbacks._call_batch_hook(mode, 'end', batch_index, batch_logs)
364 progbar.on_batch_end(batch_index, batch_logs)
365
~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\callbacks.py in _call_batch_hook(self, mode, hook, batch, logs)
225 for callback in self.callbacks:
226 batch_hook = getattr(callback, hook_name)
--> 227 batch_hook(batch, logs)
228 self._delta_ts[hook_name].append(time.time() - t_before_callbacks)
229
~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\callbacks.py in on_train_batch_end(self, batch, logs)
507 """
508 # For backwards compatibility.
--> 509 self.on_batch_end(batch, logs=logs)
510
511 def on_test_batch_begin(self, batch, logs=None):
~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\callbacks_v1.py in on_batch_end(self, batch, logs)
360 self._total_batches_seen += 1
361 if self._is_profiling:
--> 362 profiler.save(self.log_dir, profiler.stop())
363 self._is_profiling = False
364 elif (not self._is_profiling and
~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\eager\profiler.py in save(logdir, result)
141 logdir, 'plugins', 'profile',
142 datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
--> 143 gfile.MakeDirs(plugin_dir)
144 maybe_create_event_file(logdir)
145 with gfile.Open(os.path.join(plugin_dir, 'local.trace'), 'wb') as f:
~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\lib\io\file_io.py in recursive_create_dir(dirname)
446 errors.OpError: If the operation fails.
447 """
--> 448 recursive_create_dir_v2(dirname)
449
450
~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\lib\io\file_io.py in recursive_create_dir_v2(path)
462 """
463 with errors.raise_exception_on_not_ok_status() as status:
--> 464 pywrap_tensorflow.RecursivelyCreateDir(compat.as_bytes(path), status)
465
466
~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\framework\errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
546 None, None,
547 compat.as_text(c_api.TF_Message(self.status.status)),
--> 548 c_api.TF_GetCode(self.status.status))
549 # Delete the underlying status object from memory otherwise it stays alive
550 # as there is a reference to status from this from the traceback due to
NotFoundError: Failed to create a directory: logs/fit/20190315-164851\plugins\profile\2019-03-15_16-48-53; No such file or directory
if reinstall tensorflow=2.0.0-alpha0 and tf-nightly-gpu=1.14.1.dev20190310 got another error:
NotFoundError Traceback (most recent call last)
<ipython-input-6-aadf56b04ffa> in <module>
----> 1 model.fit(x_train, y_train, epochs=50, callbacks=[tensorboard_callback])
2
3 model.evaluate(x_test, y_test)
~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
871 validation_steps=validation_steps,
872 validation_freq=validation_freq,
--> 873 steps_name='steps_per_epoch')
874
875 def evaluate(self,
~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\engine\training_arrays.py in model_iteration(model, inputs, targets, sample_weights, batch_size, epochs, verbose, callbacks, val_inputs, val_targets, val_sample_weights, shuffle, initial_epoch, steps_per_epoch, validation_steps, validation_freq, mode, validation_in_fit, prepared_feed_values_from_dataset, steps_name, **kwargs)
202 samples=num_samples_or_steps,
203 verbose=0, # Handle ProgBarLogger separately in this loop.
--> 204 mode=mode)
205 # TODO(omalleyt): Handle ProgBar as part of Callbacks once hooks are ready.
206 progbar = training_utils.get_progbar(model, count_mode)
~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\callbacks.py in configure_callbacks(callbacks, model, do_validation, batch_size, epochs, steps_per_epoch, samples, verbose, count_mode, mode)
94 # Set callback model
95 callback_model = model._get_callback_model() # pylint: disable=protected-access
---> 96 callback_list.set_model(callback_model)
97
98 set_callback_parameters(
~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\callbacks.py in set_model(self, model)
208 self.model = model
209 for callback in self.callbacks:
--> 210 callback.set_model(model)
211
212 def _call_batch_hook(self, mode, hook, batch, logs=None):
~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\callbacks.py in set_model(self, model)
1213 self.model = model
1214 with context.eager_mode():
-> 1215 self._initialize_writers()
1216 if self.write_graph:
1217 if model.run_eagerly:
~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\callbacks.py in _initialize_writers(self)
1251 return summary_ops_v2.create_file_writer_v2(path)
1252
-> 1253 self._train_writer = create_writer('train')
1254 self._writers.append(self._train_writer)
1255 self._validation_writer = create_writer('validation')
~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\keras\callbacks.py in create_writer(subdir)
1249 def create_writer(subdir):
1250 path = os.path.join(self.log_dir, subdir)
-> 1251 return summary_ops_v2.create_file_writer_v2(path)
1252
1253 self._train_writer = create_writer('train')
~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\ops\summary_ops_v2.py in create_file_writer_v2(logdir, max_queue, flush_millis, filename_suffix, name)
377 filename_suffix=filename_suffix),
378 name=name,
--> 379 v2=True)
380
381
~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\ops\summary_ops_v2.py in __init__(self, shared_name, init_op_fn, name, v2)
197 # TODO(nickfelt): cache other constructed ops in graph mode
198 self._init_op_fn = init_op_fn
--> 199 self._init_op = init_op_fn(self._resource)
200 self._v2 = v2
201 self._closed = False
~\Anaconda3\envs\lab\lib\site-packages\tensorflow\python\ops\gen_summary_ops.py in create_summary_file_writer(writer, logdir, max_queue, flush_millis, filename_suffix, name)
190 else:
191 message = e.message
--> 192 _six.raise_from(_core._status_to_exception(e.code, message), None)
193 # Add nodes to the TensorFlow graph.
194 _, _, _op = _op_def_lib._apply_op_helper(
~\Anaconda3\envs\lab\lib\site-packages\six.py in raise_from(value, from_value)
NotFoundError: Failed to create a directory: logs/fit/20190315-171835\train; No such file or directory [Op:CreateSummaryFileWriter]
Code to reproduce the issue
from __future__ import absolute_import, division, print_function
import tensorflow as tf
import datetime
from tensorflow.keras.callbacks import TensorBoard
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation='softmax')
])
log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir)
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(x_train, y_train, epochs=50, callbacks=[tensorboard_callback])
model.evaluate(x_test, y_test)
Any suggestion to fix? Thanks.
You鈥檙e hitting https://github.com/tensorflow/tensorflow/issues/26021,
a Windows-specific bug in TensorFlow.
The fix is to use the platform-appropriate path separators in log_dir
rather than hard-coding forward slashes:
log_dir = os.path.join(
"logs",
"fit",
datetime.datetime.now().strftime("%Y%m%d-%H%M%S"),
)
Can you try this and see if it works for you?
Hi, @wchargin
This is very helpful, thanks a lot!
Great; glad to hear it!
@wchargin thanks a lot!
Any idea why Tensoflow might be raising this issue? I am following their hyperparameter turorial for gdrive Colab.
UnimplementedError Traceback (most recent call last)
<ipython-input-8-42471fa05132> in <module>()
13 METRIC_ACCURACY = 'accuracy'
14
---> 15 with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
16 hp.hparams_config(
17 hparams=[HP_NUM_UNITS, HP_DROPOUT, HP_OPTIMIZER],
4 frames
/usr/local/lib/python3.6/dist-packages/six.py in raise_from(value, from_value)
UnimplementedError: logs; Operation not supported [Op:CreateSummaryFileWriter]
My chunk of code throwing the error:
x, y = images_data, integer_labels
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=42, test_size=0.2, stratify=y)
x_train, x_test = x_train.reshape(len(x_train), -1), x_test.reshape(len(x_test), -1)
x_train, x_test = x_train/255, x_test/255
y_train, y_test = to_categorical(y_train, num_classes=6), to_categorical(y_test, num_classes=6)
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([200, 200, 150, 100], [200, 150, 100]))
HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.1, 0.2))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam']))
METRIC_ACCURACY = 'accuracy'
with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
hp.hparams_config(
hparams=[HP_NUM_UNITS, HP_DROPOUT, HP_OPTIMIZER],
metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
)
@wchargin Your suggestion worked perfectly for me.
My initial code:
folder_name = f'{model_name} at {strftime("%H:%M")}'
directory = os.path.join(LOGGING_PATH, folder_name)
Suggested code:
directory = os.path.join(
"logs",
"fit",
datetime.datetime.now().strftime("%Y%m%d-%H%M%S"),
)
Thank you, you're a rockstar.
You鈥檙e hitting tensorflow/tensorflow#26021,
a Windows-specific bug in TensorFlow.The fix is to use the platform-appropriate path separators in
log_dir
rather than hard-coding forward slashes:log_dir = os.path.join( "logs", "fit", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"), )Can you try this and see if it works for you?
could'not fix the issue.
OS SYSTEM INFORMATION:
logdir = os.path.join(
"../../data/keras_model/",
datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
)
# logdir = "../../data/keras_model/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
Error Information:
NotFoundError: Failed to create a directory: ../../data/keras_model/20200407-183028\train; No such file or directory [Op:CreateSummaryFileWriter]
Information:
Train for 100 steps, validate for 20 steps
---------------------------------------------------------------------------
NotFoundError Traceback (most recent call last)
e:\aboutme\github_code\eat_tensorflow2_in_30_days\code\1\2.py in
86
87 history = model.fit(ds_train, epochs=10, validation_data=ds_test,
---> 88 callbacks = [tensorboard_callback], workers = 4)
D:\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
817 max_queue_size=max_queue_size,
818 workers=workers,
--> 819 use_multiprocessing=use_multiprocessing)
820
821 def evaluate(self,
D:\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
305 count_mode='samples' if use_sample else 'steps',
306 verbose=0, # Handle ProgBarLogger separately in this loop.
--> 307 mode=ModeKeys.TRAIN)
308
309 with training_context.on_start(model, training_callbacks, use_sample,
D:\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\callbacks.py in configure_callbacks(callbacks, model, do_validation, batch_size, epochs, steps_per_epoch, samples, verbose, count_mode, mode)
105 # Set callback model
106 callback_model = model._get_callback_model() # pylint: disable=protected-access
--> 107 callback_list.set_model(callback_model)
108
109 set_callback_parameters(
D:\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\callbacks.py in set_model(self, model)
220 self.model = model
221 for callback in self.callbacks:
--> 222 callback.set_model(model)
223
224 def _call_batch_hook(self, mode, hook, batch, logs=None):
D:\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\callbacks.py in set_model(self, model)
1535 self._close_writers()
1536 if self.write_graph:
-> 1537 with self._get_writer(self._train_run_name).as_default():
1538 with summary_ops_v2.always_record_summaries():
1539 if not model.run_eagerly:
D:\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\callbacks.py in _get_writer(self, writer_name)
1618 if writer_name not in self._writers:
1619 path = os.path.join(self._log_write_dir, writer_name)
-> 1620 writer = summary_ops_v2.create_file_writer_v2(path)
1621 self._writers[writer_name] = writer
1622 return self._writers[writer_name]
D:\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\ops\summary_ops_v2.py in create_file_writer_v2(logdir, max_queue, flush_millis, filename_suffix, name)
407 filename_suffix=filename_suffix),
408 name=name,
--> 409 v2=True)
410
411
D:\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\ops\summary_ops_v2.py in __init__(self, shared_name, init_op_fn, name, v2)
229 # TODO(nickfelt): cache other constructed ops in graph mode
230 self._init_op_fn = init_op_fn
--> 231 self._init_op = init_op_fn(self._resource)
232 self._v2 = v2
233 self._closed = False
D:\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\ops\gen_summary_ops.py in create_summary_file_writer(writer, logdir, max_queue, flush_millis, filename_suffix, name)
151 pass # Add nodes to the TensorFlow graph.
152 except _core._NotOkStatusException as e:
--> 153 _ops.raise_from_not_ok_status(e, name)
154 # Add nodes to the TensorFlow graph.
155 _, _, _op, _outputs = _op_def_library._apply_op_helper(
D:\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\framework\ops.py in raise_from_not_ok_status(e, name)
6604 message = e.message + (" name: " + name if name is not None else "")
6605 # pylint: disable=protected-access
-> 6606 six.raise_from(core._status_to_exception(e.code, message), None)
6607 # pylint: enable=protected-access
6608
D:\Anaconda3\envs\tf-gpu\lib\site-packages\six.py in raise_from(value, from_value)
Most helpful comment
You鈥檙e hitting https://github.com/tensorflow/tensorflow/issues/26021,
a Windows-specific bug in TensorFlow.
The fix is to use the platform-appropriate path separators in
log_dirrather than hard-coding forward slashes:
Can you try this and see if it works for you?