I'm build a sequential variantion autoencoder, but I got an error if I use value > 1 as batch_size:
InvalidArgumentError (see above for traceback): Incompatible shapes: [2,5] vs. [2]
[[Node: training/Adam/gradients/loss/td_loss/sub_3_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@training/Adam/gradients/loss/td_loss/sub_3_grad/Reshape_1"], _device="/job:localhost/replica:0/task:0/device:CPU:0"](training/Adam/gradients/loss/td_loss/sub_3_grad/Shape, training/Adam/gradients/loss/td_loss/sub_3_grad/Shape_1)]]
But if I change batch size to 1, I'm able to train the network
1/4 [======>.......................] - ETA: 6s - loss: 43080.3672
2/4 [==============>...............] - ETA: 2s - loss: 42773.6367
3/4 [=====================>........] - ETA: 0s - loss: 42599.2396
4/4 [==============================] - 2s 591ms/step - loss: 42735.5234
here's my code, a stand alone script that you can just run it
I copied the Keras VAE example, and changed the last layer to TimeDistributed layer to reproduce the error
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
from keras.layers import Input, Dense, Lambda
from keras.models import Model
from keras import backend as K
from keras import metrics
from keras.datasets import mnist
batch_size = 100
original_dim = 784
latent_dim = 2
intermediate_dim = 256
epochs = 50
epsilon_std = 1.0
x = Input(shape=(original_dim,))
h = Dense(intermediate_dim, activation='relu')(x)
z_mean = Dense(latent_dim)(h)
z_log_var = Dense(latent_dim)(h)
def sampling(args):
z_mean, z_log_var = args
epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0.,
stddev=epsilon_std)
return z_mean + K.exp(z_log_var / 2) * epsilon
# note that "output_shape" isn't necessary with the TensorFlow backend
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])
# we instantiate these layers separately so as to reuse them later
decoder_h = Dense(intermediate_dim, activation='relu')
decoder_mean = Dense(original_dim, activation='sigmoid')
h_decoded = decoder_h(z)
x_decoded_mean = decoder_mean(h_decoded)
# vae = Model(x, x_decoded_mean)
from keras.layers import RepeatVector, TimeDistributed
time_step = 3
rv = RepeatVector(time_step)
repeated_decoded_mean = rv(x_decoded_mean)
decoder_dense = Dense(5)
decoded_sequence = TimeDistributed(decoder_dense)(repeated_decoded_mean)
# instantiate VAE model
vae = Model(x, decoded_sequence)
vae.summary()
def vae_loss_func(y_true, y_pred):
# Compute VAE loss
# xent_loss = original_dim * metrics.binary_crossentropy(y_true, y_pred)
xent_loss = original_dim * metrics.categorical_crossentropy(y_true, y_pred)
kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
loss = K.mean(xent_loss + kl_loss)
return loss
vae.compile(loss=vae_loss_func, optimizer='rmsprop')
# train the VAE on MNIST digits
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
x_train = x_train[:2]
# create dummy labels
# time step == 3 and softmax dimention == 5
y_train = [[[1],[3], [4]], [[1],[3], [4]]]
y_train = np.array(y_train)
vae.fit(x_train,
y_train,
shuffle=True,
epochs=epochs,
batch_size=batch_size,)
full error dump:
Using TensorFlow backend.
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) (None, 784) 0
__________________________________________________________________________________________________
dense_1 (Dense) (None, 256) 200960 input_1[0][0]
__________________________________________________________________________________________________
dense_2 (Dense) (None, 2) 514 dense_1[0][0]
__________________________________________________________________________________________________
dense_3 (Dense) (None, 2) 514 dense_1[0][0]
__________________________________________________________________________________________________
lambda_1 (Lambda) (None, 2) 0 dense_2[0][0]
dense_3[0][0]
__________________________________________________________________________________________________
dense_4 (Dense) (None, 256) 768 lambda_1[0][0]
__________________________________________________________________________________________________
dense_5 (Dense) (None, 784) 201488 dense_4[0][0]
__________________________________________________________________________________________________
repeat_vector_1 (RepeatVector) (None, 3, 784) 0 dense_5[0][0]
__________________________________________________________________________________________________
time_distributed_1 (TimeDistrib (None, 3, 5) 3925 repeat_vector_1[0][0]
==================================================================================================
Total params: 408,169
Trainable params: 408,169
Non-trainable params: 0
__________________________________________________________________________________________________
Epoch 1/50
2018-05-09 16:13:24.724251: I tensorflow/core/platform/cpu_feature_guard.cc:140] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
Traceback (most recent call last):
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1322, in _do_call
return fn(*args)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1307, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1409, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [2,3] vs. [2]
[[Node: training/RMSprop/gradients/loss/time_distributed_1_loss/add_1_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@training/RMSprop/gradients/loss/time_distributed_1_loss/add_1_grad/Reshape"], _device="/job:localhost/replica:0/task:0/device:CPU:0"](training/RMSprop/gradients/loss/time_distributed_1_loss/add_1_grad/Shape, training/RMSprop/gradients/loss/time_distributed_1_loss/add_1_grad/Shape_1)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/charleschou/PycharmProjects/bigdata/prototypes/nlp/language_model/variational_asae/vae_example.py", line 94, in <module>
batch_size=batch_size,)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/keras/engine/training.py", line 1705, in fit
validation_steps=validation_steps)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/keras/engine/training.py", line 1236, in _fit_loop
outs = f(ins_batch)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2482, in __call__
**self.session_kwargs)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 900, in run
run_metadata_ptr)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1135, in _run
feed_dict_tensor, options, run_metadata)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1316, in _do_run
run_metadata)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1335, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [2,3] vs. [2]
[[Node: training/RMSprop/gradients/loss/time_distributed_1_loss/add_1_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@training/RMSprop/gradients/loss/time_distributed_1_loss/add_1_grad/Reshape"], _device="/job:localhost/replica:0/task:0/device:CPU:0"](training/RMSprop/gradients/loss/time_distributed_1_loss/add_1_grad/Shape, training/RMSprop/gradients/loss/time_distributed_1_loss/add_1_grad/Shape_1)]]
Caused by op 'training/RMSprop/gradients/loss/time_distributed_1_loss/add_1_grad/BroadcastGradientArgs', defined at:
File "/Users/charleschou/PycharmProjects/bigdata/prototypes/nlp/language_model/variational_asae/vae_example.py", line 94, in <module>
batch_size=batch_size,)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/keras/engine/training.py", line 1682, in fit
self._make_train_function()
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/keras/engine/training.py", line 992, in _make_train_function
loss=self.total_loss)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/keras/optimizers.py", line 244, in get_updates
grads = self.get_gradients(loss, params)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/keras/optimizers.py", line 78, in get_gradients
grads = K.gradients(loss, params)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2519, in gradients
return tf.gradients(loss, variables, colocate_gradients_with_ops=True)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 494, in gradients
gate_gradients, aggregation_method, stop_gradients)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 636, in _GradientsHelper
lambda: grad_fn(op, *out_grads))
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 385, in _MaybeCompile
return grad_fn() # Exit early
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 636, in <lambda>
lambda: grad_fn(op, *out_grads))
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/tensorflow/python/ops/math_grad.py", line 842, in _AddGrad
rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 674, in broadcast_gradient_args
"BroadcastGradientArgs", s0=s0, s1=s1, name=name)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3392, in create_op
op_def=op_def)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1718, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
...which was originally created as op 'loss/time_distributed_1_loss/add_1', defined at:
File "/Users/charleschou/PycharmProjects/bigdata/prototypes/nlp/language_model/variational_asae/vae_example.py", line 74, in <module>
vae.compile(loss=vae_loss_func, optimizer='rmsprop')
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/keras/engine/training.py", line 830, in compile
sample_weight, mask)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/keras/engine/training.py", line 429, in weighted
score_array = fn(y_true, y_pred)
File "/Users/charleschou/PycharmProjects/bigdata/prototypes/nlp/language_model/variational_asae/vae_example.py", line 66, in vae_loss_func
loss = K.mean(xent_loss + kl_loss)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 979, in binary_op_wrapper
return func(x, y, name=name)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 297, in add
"Add", x=x, y=y, name=name)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3392, in create_op
op_def=op_def)
File "/Users/charleschou/anaconda/envs/ML-dev/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1718, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): Incompatible shapes: [2,3] vs. [2]
[[Node: training/RMSprop/gradients/loss/time_distributed_1_loss/add_1_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@training/RMSprop/gradients/loss/time_distributed_1_loss/add_1_grad/Reshape"], _device="/job:localhost/replica:0/task:0/device:CPU:0"](training/RMSprop/gradients/loss/time_distributed_1_loss/add_1_grad/Shape, training/RMSprop/gradients/loss/time_distributed_1_loss/add_1_grad/Shape_1)]]
The problem is here:
def vae_loss_func(y_true, y_pred):
# Compute VAE loss
# xent_loss = original_dim * metrics.binary_crossentropy(y_true, y_pred)
xent_loss = original_dim * metrics.categorical_crossentropy(y_true, y_pred)
kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
loss = K.mean(xent_loss + kl_loss)
return loss
The shape of xent_loss is (?,3) where 3 is the time_step for TimeDistributed laye. The kl_loss shape is (?,)
how should I reshape the xent_loss?
as described above the xent_loss and kl_loss shapes don't match, I added this line to sove the problem:
xent_loss = K.mean(xent_loss)
Had a similar issue but with 2D Convs within an autoencoder. Changed my loss function in a similar manner and now I can train on batches
Or you just flatten your input, y_true and y_pred
xent_loss = original_dim * metrics.categorical_crossentropy(K.flatten(y_true), K.flatten(y_pred))
ref. https://keras.io/examples/variational_autoencoder_deconv/
Most helpful comment
as described above the
xent_lossandkl_lossshapes don't match, I added this line to sove the problem:xent_loss = K.mean(xent_loss)