System information
Describe the bug
decoder_cell.get_initial_state throws an exception when called.
Code to reproduce the issue
Colab notebook: https://colab.research.google.com/drive/1iwLGirQ9_avql9sKsGY4Gc5JF1rpwYrO
# Input Layer
enc_input_layer = tf.keras.Input(shape = [None], dtype = tf.int32)
# Embedding Layer
EMBEDDING_SIZE = 12
VOCAB_SIZE = 34
enc_embed_layer = tf.keras.layers.Embedding(VOCAB_SIZE, EMBEDDING_SIZE)(enc_input_layer)
# Encoder
LSTM_SIZE_ENC = 512 # num_units
encoder = tf.keras.layers.LSTM(LSTM_SIZE_ENC, return_state=True)
encoder_outputs, state_h, state_c = encoder(enc_embed_layer)
encoder_state = [state_h, state_c]
# Decoder
LSTM_SIZE_DEC = 512
ATTENTION_UNITS = 2
decoder_cell = tfa.seq2seq.AttentionWrapper(
tf.keras.layers.LSTMCell(LSTM_SIZE_DEC),
attention_mechanism = tfa.seq2seq.LuongAttention(
ATTENTION_UNITS,
#memory_sequence_length = 32
memory = encoder_state),
attention_layer_size = ATTENTION_UNITS)
# get the initial_state to call decoder
BATCH_SIZE = 4
initial_state = decoder_cell.get_initial_state(dtype=tf.float32, batch_size=BATCH_SIZE)
Other info / logs
OperatorNotAllowedInGraphError Traceback (most recent call last)
<ipython-input-14-70f3e9fceb40> in <module>()
2
3 if with_attn:
----> 4 initial_state = decoder_cell.get_initial_state(dtype=tf.float32, batch_size=BATCH_SIZE) # Crashes(!)
5 initial_state = initial_state.clone(cell_state=encoder_state)
6 else:
8 frames
/tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/attention_wrapper.py in get_initial_state(self, inputs, batch_size, dtype)
1960 )
1961 with tf.control_dependencies(
-> 1962 self._batch_size_checks(batch_size, error_message)
1963 ): # pylint: disable=bad-continuation
1964 cell_state = tf.nest.map_structure(
/tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/attention_wrapper.py in _batch_size_checks(self, batch_size, error_message)
1839 batch_size, attention_mechanism.batch_size, message=error_message
1840 )
-> 1841 for attention_mechanism in self._attention_mechanisms
1842 ]
1843
/tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/attention_wrapper.py in <listcomp>(.0)
1839 batch_size, attention_mechanism.batch_size, message=error_message
1840 )
-> 1841 for attention_mechanism in self._attention_mechanisms
1842 ]
1843
/tensorflow-2.1.0/python3.6/tensorflow_core/python/ops/check_ops.py in assert_equal_v2(x, y, message, summarize, name)
645 execution or if `x` and `y` are statically known.
646 """
--> 647 return assert_equal(x=x, y=y, summarize=summarize, message=message, name=name)
648
649
/tensorflow-2.1.0/python3.6/tensorflow_core/python/ops/check_ops.py in assert_equal(x, y, data, summarize, message, name)
656 return None if context.executing_eagerly() else control_flow_ops.no_op()
657 return _binary_assert('==', 'assert_equal', math_ops.equal, np.equal, x, y,
--> 658 data, summarize, message, name)
659
660
/tensorflow-2.1.0/python3.6/tensorflow_core/python/ops/check_ops.py in _binary_assert(sym, opname, op_func, static_func, x, y, data, summarize, message, name)
331 test_op = op_func(x, y)
332 condition = math_ops.reduce_all(test_op)
--> 333 if condition:
334 return
335
/tensorflow-2.1.0/python3.6/tensorflow_core/python/framework/ops.py in __bool__(self)
755 `TypeError`.
756 """
--> 757 self._disallow_bool_casting()
758
759 def __nonzero__(self):
/tensorflow-2.1.0/python3.6/tensorflow_core/python/framework/ops.py in _disallow_bool_casting(self)
524 else:
525 # Default: V1-style Graph execution.
--> 526 self._disallow_in_graph_mode("using a `tf.Tensor` as a Python `bool`")
527
528 def _disallow_iteration(self):
/tensorflow-2.1.0/python3.6/tensorflow_core/python/framework/ops.py in _disallow_in_graph_mode(self, task)
513 raise errors.OperatorNotAllowedInGraphError(
514 "{} is not allowed in Graph execution. Use Eager execution or decorate"
--> 515 " this function with @tf.function.".format(task))
516
517 def _disallow_bool_casting(self):
OperatorNotAllowedInGraphError: using a `tf.Tensor` as a Python `bool` is not allowed in Graph execution. Use Eager execution or decorate this function with @tf.function.
When .. is set
tf.compat.v1.disable_eager_execution()
then decoder_cell.get_initial_state works.
@guillaumekln Do you know if it's a known problem?
When I run the full code without eager mode, it ends up in another error
... [see code above] ...
# get the initial_state to call decoder
BATCH_SIZE = 4
initial_state = decoder_cell.get_initial_state(dtype=tf.float32, batch_size=BATCH_SIZE)
decoder = tfa.seq2seq.BasicDecoder(
cell = decoder_cell,
sampler = tfa.seq2seq.TrainingSampler(),
output_layer = tf.keras.layers.Dense(VOCAB_SIZE) # the projection layer with num_outputs
)
dec_input_layer = tf.keras.Input(shape = [None], dtype = tf.int32)
dec_embed_layer = tf.keras.layers.Embedding(VOCAB_SIZE, EMBEDDING_SIZE)(dec_input_layer)
outputs, _, _ = decoder(
dec_embed_layer,
initial_state=initial_state
)
New error
ValueError Traceback (most recent call last)
<ipython-input-7-9588bb9b4f9e> in <module>()
43 outputs, _, _ = decoder(
44 dec_embed_layer,
---> 45 initial_state=initial_state
46 )
1 frames
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
776 outputs = base_layer_utils.mark_as_return(outputs, acd)
777 else:
--> 778 outputs = call_fn(cast_inputs, *args, **kwargs)
779
780 except errors.OperatorNotAllowedInGraphError as e:
/tensorflow-2.1.0/python3.6/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
235 except Exception as e: # pylint:disable=broad-except
236 if hasattr(e, 'ag_error_metadata'):
--> 237 raise e.ag_error_metadata.to_exception(e)
238 else:
239 raise
ValueError: in converted code:
/tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/decoder.py:162 call *
return dynamic_decode(
/tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/decoder.py:405 body *
(next_outputs, decoder_state, next_inputs, decoder_finished) = decoder.step(
/tensorflow-2.1.0/python3.6/tensorflow_core/python/ops/control_flow_ops.py:2478 while_loop_v2
return_same_structure=True)
/tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/basic_decoder.py:134 step *
cell_outputs, cell_state = self.cell(inputs, state, training=training)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer.py:778 __call__
outputs = call_fn(cast_inputs, *args, **kwargs)
/tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/attention_wrapper.py:2065 call *
attention, alignments, next_attention_state = self._attention_fn(
/tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/attention_wrapper.py:1572 _compute_attention *
alignments, next_attention_state = attention_mechanism(
/tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/attention_wrapper.py:201 __call__ *
return super().__call__(inputs, **kwargs)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer.py:778 __call__
outputs = call_fn(cast_inputs, *args, **kwargs)
/tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/attention_wrapper.py:265 call *
return self._calculate_attention(query, state)
/tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/attention_wrapper.py:613 _calculate_attention *
score = _luong_score(query, self.keys, self.scale_weight)
/tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/attention_wrapper.py:483 _luong_score *
raise ValueError(
ValueError: Incompatible or unknown inner dimensions between query and keys. Query (Tensor("attention_wrapper_2/checked_cell_output:0", shape=(4, 512), dtype=float32)) has units: 512. Keys (Tensor("LuongAttention_2/LuongAttention/memory_layer/Tensordot:0", shape=(None, 512, 2), dtype=float32)) have units: 2. Perhaps you need to set num_units to the keys' dimension (2)?
The initial error is the same issue as #673, but it was closed without a complete resolution. It is because you set the BATCH_SIZE to a Python value, while it should depend on your input, e.g. tf.shape(enc_embed_layer)[0].
But after this change, there is another error (as reported in #673). As a non Keras expert, I'm still trying to figure this out. When running Keras in eager mode there is a strange mix of eager and graph tensors.
@ulf1, please find the correct code below. It still has to be executed in the Graph mode because of some misappropriate graph tensors management in the Eager mode deep inside Tensorflow core.
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow_addons as tfa
print(tf.__version__)
# Disable eager mode
tf.compat.v1.disable_eager_execution()
# Input Layer
enc_input_layer = tf.keras.Input(shape = [None], dtype = tf.int32)
# Embedding Layer
EMBEDDING_SIZE = 12
VOCAB_SIZE = 34
enc_embed_layer = tf.keras.layers.Embedding(VOCAB_SIZE, EMBEDDING_SIZE)(enc_input_layer)
# Encoder
LSTM_SIZE_ENC = 512 # num_units
encoder = tf.keras.layers.LSTM(LSTM_SIZE_ENC, return_sequences=True,return_state=True)
encoder_outputs, state_h, state_c = encoder(enc_embed_layer)
encoder_state = [state_h, state_c]
# Decoder
LSTM_SIZE_DEC = 512
ATTENTION_UNITS = 2
decoder_cell = tfa.seq2seq.AttentionWrapper(
tf.keras.layers.LSTMCell(ATTENTION_UNITS),
attention_mechanism = tfa.seq2seq.LuongAttention(
ATTENTION_UNITS,
#memory_sequence_length = 32
memory = encoder_outputs),
)#attention_layer_size = ATTENTION_UNITS)
# get the initial_state to call decoder
BATCH_SIZE = tf.shape(enc_embed_layer)[0]
initial_state = decoder_cell.get_initial_state(dtype=tf.float32, batch_size=BATCH_SIZE)
# Instantiate Decoder
decoder = tfa.seq2seq.BasicDecoder(
cell = decoder_cell,
sampler = tfa.seq2seq.TrainingSampler(),
output_layer = tf.keras.layers.Dense(VOCAB_SIZE) # the projection layer with num_outputs
)
# Input Layer
dec_input_layer = tf.keras.Input(shape = [None], dtype = tf.int32)
# Embedding Layer
dec_embed_layer = tf.keras.layers.Embedding(VOCAB_SIZE, EMBEDDING_SIZE)(dec_input_layer)
outputs, _, _ = decoder(
dec_embed_layer,
initial_state=initial_state
)
# logits = outputs.rnn_output
@failure-to-thrive Thank you.
So we need to wait for tensorflow core to change.
So we need to wait for tensorflow core to change.
So they claim they fixed it: https://github.com/tensorflow/tensorflow/issues/37264#issuecomment-596834233
Most helpful comment
@ulf1, please find the correct code below. It still has to be executed in the Graph mode because of some misappropriate graph tensors management in the Eager mode deep inside Tensorflow core.