Addons: decoder_cell.get_initial_state fails in eager mode

Created on 25 Feb 2020 · 5Comments · Source: tensorflow/addons

System information

OS Platform and Distribution (e.g., Linux Ubuntu 16.04): Online, colab
TensorFlow version and how it was installed (source or binary): 2.1.0
TensorFlow-Addons version and how it was installed (source or binary): 0.8.1
Python version: 3.6
Is GPU used? (yes/no): both throw an exception

Describe the bug

decoder_cell.get_initial_state throws an exception when called.

Code to reproduce the issue
Colab notebook: https://colab.research.google.com/drive/1iwLGirQ9_avql9sKsGY4Gc5JF1rpwYrO

# Input Layer
enc_input_layer = tf.keras.Input(shape = [None], dtype = tf.int32)

# Embedding Layer
EMBEDDING_SIZE = 12
VOCAB_SIZE = 34
enc_embed_layer = tf.keras.layers.Embedding(VOCAB_SIZE, EMBEDDING_SIZE)(enc_input_layer)

# Encoder
LSTM_SIZE_ENC = 512  # num_units

encoder = tf.keras.layers.LSTM(LSTM_SIZE_ENC, return_state=True)
encoder_outputs, state_h, state_c = encoder(enc_embed_layer) 
encoder_state = [state_h, state_c]

# Decoder
LSTM_SIZE_DEC = 512
ATTENTION_UNITS = 2

decoder_cell = tfa.seq2seq.AttentionWrapper(
        tf.keras.layers.LSTMCell(LSTM_SIZE_DEC), 
        attention_mechanism = tfa.seq2seq.LuongAttention(
            ATTENTION_UNITS, 
            #memory_sequence_length = 32
            memory = encoder_state), 
        attention_layer_size = ATTENTION_UNITS)

# get the initial_state to call decoder
BATCH_SIZE = 4
initial_state = decoder_cell.get_initial_state(dtype=tf.float32, batch_size=BATCH_SIZE)

Other info / logs

OperatorNotAllowedInGraphError            Traceback (most recent call last)
<ipython-input-14-70f3e9fceb40> in <module>()
      2 
      3 if with_attn:
----> 4     initial_state = decoder_cell.get_initial_state(dtype=tf.float32, batch_size=BATCH_SIZE)  # Crashes(!)
      5     initial_state = initial_state.clone(cell_state=encoder_state)
      6 else:

8 frames
/tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/attention_wrapper.py in get_initial_state(self, inputs, batch_size, dtype)
   1960             )
   1961             with tf.control_dependencies(
-> 1962                 self._batch_size_checks(batch_size, error_message)
   1963             ):  # pylint: disable=bad-continuation
   1964                 cell_state = tf.nest.map_structure(

/tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/attention_wrapper.py in _batch_size_checks(self, batch_size, error_message)
   1839                 batch_size, attention_mechanism.batch_size, message=error_message
   1840             )
-> 1841             for attention_mechanism in self._attention_mechanisms
   1842         ]
   1843 

/tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/attention_wrapper.py in <listcomp>(.0)
   1839                 batch_size, attention_mechanism.batch_size, message=error_message
   1840             )
-> 1841             for attention_mechanism in self._attention_mechanisms
   1842         ]
   1843 

/tensorflow-2.1.0/python3.6/tensorflow_core/python/ops/check_ops.py in assert_equal_v2(x, y, message, summarize, name)
    645       execution or if `x` and `y` are statically known.
    646   """
--> 647   return assert_equal(x=x, y=y, summarize=summarize, message=message, name=name)
    648 
    649 

/tensorflow-2.1.0/python3.6/tensorflow_core/python/ops/check_ops.py in assert_equal(x, y, data, summarize, message, name)
    656       return None if context.executing_eagerly() else control_flow_ops.no_op()
    657   return _binary_assert('==', 'assert_equal', math_ops.equal, np.equal, x, y,
--> 658                         data, summarize, message, name)
    659 
    660 

/tensorflow-2.1.0/python3.6/tensorflow_core/python/ops/check_ops.py in _binary_assert(sym, opname, op_func, static_func, x, y, data, summarize, message, name)
    331       test_op = op_func(x, y)
    332       condition = math_ops.reduce_all(test_op)
--> 333       if condition:
    334         return
    335 

/tensorflow-2.1.0/python3.6/tensorflow_core/python/framework/ops.py in __bool__(self)
    755       `TypeError`.
    756     """
--> 757     self._disallow_bool_casting()
    758 
    759   def __nonzero__(self):

/tensorflow-2.1.0/python3.6/tensorflow_core/python/framework/ops.py in _disallow_bool_casting(self)
    524     else:
    525       # Default: V1-style Graph execution.
--> 526       self._disallow_in_graph_mode("using a `tf.Tensor` as a Python `bool`")
    527 
    528   def _disallow_iteration(self):

/tensorflow-2.1.0/python3.6/tensorflow_core/python/framework/ops.py in _disallow_in_graph_mode(self, task)
    513     raise errors.OperatorNotAllowedInGraphError(
    514         "{} is not allowed in Graph execution. Use Eager execution or decorate"
--> 515         " this function with @tf.function.".format(task))
    516 
    517   def _disallow_bool_casting(self):

OperatorNotAllowedInGraphError: using a `tf.Tensor` as a Python `bool` is not allowed in Graph execution. Use Eager execution or decorate this function with @tf.function.

disable eager

When .. is set

tf.compat.v1.disable_eager_execution()

then decoder_cell.get_initial_state works.

seq2seq

Source

ulf1

Most helpful comment

@ulf1, please find the correct code below. It still has to be executed in the Graph mode because of some misappropriate graph tensors management in the Eager mode deep inside Tensorflow core.

import tensorflow as tf
import tensorflow.keras as keras
import tensorflow_addons as tfa
print(tf.__version__)

# Disable eager mode
tf.compat.v1.disable_eager_execution()

# Input Layer
enc_input_layer = tf.keras.Input(shape = [None], dtype = tf.int32)

# Embedding Layer
EMBEDDING_SIZE = 12
VOCAB_SIZE = 34
enc_embed_layer = tf.keras.layers.Embedding(VOCAB_SIZE, EMBEDDING_SIZE)(enc_input_layer)

# Encoder
LSTM_SIZE_ENC = 512  # num_units

encoder = tf.keras.layers.LSTM(LSTM_SIZE_ENC, return_sequences=True,return_state=True)
encoder_outputs, state_h, state_c = encoder(enc_embed_layer)
encoder_state = [state_h, state_c]

# Decoder
LSTM_SIZE_DEC = 512
ATTENTION_UNITS = 2

decoder_cell = tfa.seq2seq.AttentionWrapper(
        tf.keras.layers.LSTMCell(ATTENTION_UNITS),
        attention_mechanism = tfa.seq2seq.LuongAttention(
            ATTENTION_UNITS,
            #memory_sequence_length = 32
            memory = encoder_outputs),
        )#attention_layer_size = ATTENTION_UNITS)

# get the initial_state to call decoder
BATCH_SIZE = tf.shape(enc_embed_layer)[0]
initial_state = decoder_cell.get_initial_state(dtype=tf.float32, batch_size=BATCH_SIZE)


# Instantiate Decoder
decoder = tfa.seq2seq.BasicDecoder(
    cell = decoder_cell,
    sampler = tfa.seq2seq.TrainingSampler(),
    output_layer = tf.keras.layers.Dense(VOCAB_SIZE)  # the projection layer with num_outputs
)

# Input Layer
dec_input_layer = tf.keras.Input(shape = [None], dtype = tf.int32)
# Embedding Layer
dec_embed_layer = tf.keras.layers.Embedding(VOCAB_SIZE, EMBEDDING_SIZE)(dec_input_layer)

outputs, _, _ = decoder(
    dec_embed_layer,
    initial_state=initial_state
)

# logits = outputs.rnn_output

failure-to-thrive on 1 Mar 2020

👍2

All 5 comments

@guillaumekln Do you know if it's a known problem?

When I run the full code without eager mode, it ends up in another error

... [see code above] ...
# get the initial_state to call decoder
BATCH_SIZE = 4
initial_state = decoder_cell.get_initial_state(dtype=tf.float32, batch_size=BATCH_SIZE)

decoder = tfa.seq2seq.BasicDecoder(
    cell = decoder_cell, 
    sampler = tfa.seq2seq.TrainingSampler(),
    output_layer = tf.keras.layers.Dense(VOCAB_SIZE)  # the projection layer with num_outputs
)

dec_input_layer = tf.keras.Input(shape = [None], dtype = tf.int32)
dec_embed_layer = tf.keras.layers.Embedding(VOCAB_SIZE, EMBEDDING_SIZE)(dec_input_layer)

outputs, _, _ = decoder(
    dec_embed_layer,
    initial_state=initial_state
)

New error

ValueError                                Traceback (most recent call last)
<ipython-input-7-9588bb9b4f9e> in <module>()
     43 outputs, _, _ = decoder(
     44     dec_embed_layer,
---> 45     initial_state=initial_state
     46 )

1 frames
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
    776                     outputs = base_layer_utils.mark_as_return(outputs, acd)
    777                 else:
--> 778                   outputs = call_fn(cast_inputs, *args, **kwargs)
    779 
    780             except errors.OperatorNotAllowedInGraphError as e:

/tensorflow-2.1.0/python3.6/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
    235       except Exception as e:  # pylint:disable=broad-except
    236         if hasattr(e, 'ag_error_metadata'):
--> 237           raise e.ag_error_metadata.to_exception(e)
    238         else:
    239           raise

ValueError: in converted code:

    /tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/decoder.py:162 call  *
        return dynamic_decode(
    /tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/decoder.py:405 body  *
        (next_outputs, decoder_state, next_inputs, decoder_finished) = decoder.step(
    /tensorflow-2.1.0/python3.6/tensorflow_core/python/ops/control_flow_ops.py:2478 while_loop_v2
        return_same_structure=True)
    /tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/basic_decoder.py:134 step  *
        cell_outputs, cell_state = self.cell(inputs, state, training=training)
    /tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer.py:778 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    /tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/attention_wrapper.py:2065 call  *
        attention, alignments, next_attention_state = self._attention_fn(
    /tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/attention_wrapper.py:1572 _compute_attention  *
        alignments, next_attention_state = attention_mechanism(
    /tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/attention_wrapper.py:201 __call__  *
        return super().__call__(inputs, **kwargs)
    /tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/base_layer.py:778 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    /tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/attention_wrapper.py:265 call  *
        return self._calculate_attention(query, state)
    /tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/attention_wrapper.py:613 _calculate_attention  *
        score = _luong_score(query, self.keys, self.scale_weight)
    /tensorflow-2.1.0/python3.6/tensorflow_addons/seq2seq/attention_wrapper.py:483 _luong_score  *
        raise ValueError(

    ValueError: Incompatible or unknown inner dimensions between query and keys. Query (Tensor("attention_wrapper_2/checked_cell_output:0", shape=(4, 512), dtype=float32)) has units: 512.  Keys (Tensor("LuongAttention_2/LuongAttention/memory_layer/Tensordot:0", shape=(None, 512, 2), dtype=float32)) have units: 2.  Perhaps you need to set num_units to the keys' dimension (2)?

ulf1 on 26 Feb 2020

The initial error is the same issue as #673, but it was closed without a complete resolution. It is because you set the BATCH_SIZE to a Python value, while it should depend on your input, e.g. tf.shape(enc_embed_layer)[0].

But after this change, there is another error (as reported in #673). As a non Keras expert, I'm still trying to figure this out. When running Keras in eager mode there is a strange mix of eager and graph tensors.

guillaumekln on 26 Feb 2020

@ulf1, please find the correct code below. It still has to be executed in the Graph mode because of some misappropriate graph tensors management in the Eager mode deep inside Tensorflow core.

import tensorflow as tf
import tensorflow.keras as keras
import tensorflow_addons as tfa
print(tf.__version__)

# Disable eager mode
tf.compat.v1.disable_eager_execution()

# Input Layer
enc_input_layer = tf.keras.Input(shape = [None], dtype = tf.int32)

# Embedding Layer
EMBEDDING_SIZE = 12
VOCAB_SIZE = 34
enc_embed_layer = tf.keras.layers.Embedding(VOCAB_SIZE, EMBEDDING_SIZE)(enc_input_layer)

# Encoder
LSTM_SIZE_ENC = 512  # num_units

encoder = tf.keras.layers.LSTM(LSTM_SIZE_ENC, return_sequences=True,return_state=True)
encoder_outputs, state_h, state_c = encoder(enc_embed_layer)
encoder_state = [state_h, state_c]

# Decoder
LSTM_SIZE_DEC = 512
ATTENTION_UNITS = 2

decoder_cell = tfa.seq2seq.AttentionWrapper(
        tf.keras.layers.LSTMCell(ATTENTION_UNITS),
        attention_mechanism = tfa.seq2seq.LuongAttention(
            ATTENTION_UNITS,
            #memory_sequence_length = 32
            memory = encoder_outputs),
        )#attention_layer_size = ATTENTION_UNITS)

# get the initial_state to call decoder
BATCH_SIZE = tf.shape(enc_embed_layer)[0]
initial_state = decoder_cell.get_initial_state(dtype=tf.float32, batch_size=BATCH_SIZE)


# Instantiate Decoder
decoder = tfa.seq2seq.BasicDecoder(
    cell = decoder_cell,
    sampler = tfa.seq2seq.TrainingSampler(),
    output_layer = tf.keras.layers.Dense(VOCAB_SIZE)  # the projection layer with num_outputs
)

# Input Layer
dec_input_layer = tf.keras.Input(shape = [None], dtype = tf.int32)
# Embedding Layer
dec_embed_layer = tf.keras.layers.Embedding(VOCAB_SIZE, EMBEDDING_SIZE)(dec_input_layer)

outputs, _, _ = decoder(
    dec_embed_layer,
    initial_state=initial_state
)

# logits = outputs.rnn_output

failure-to-thrive on 1 Mar 2020

👍2

@failure-to-thrive Thank you.
So we need to wait for tensorflow core to change.

ulf1 on 2 Mar 2020