Keras: Feed in sequences and get sequences out from LSTM model

Created on 29 Jun 2016 · 5Comments · Source: keras-team/keras

I'm pretty new to machine learning and Keras, and running into a model/data configuration issue.

Let's say my data is structured like so:
[len(sentences), maxlen, len(chars)] as the input
And I want
[len(sentences), maxlen, len(chars)] as the output

What do I need to do to the input for the LSTM model to accept it? In addition, what changes would I need to do to input_shape=(maxlen, len(chars)) in the first LSTM layer in the model?

I keep getting this error: Exception: Error when checking model target: expected activation_1 to have 2 dimensions, but got array with shape (100L, 9L, 15L)

Thank you for your time.

Source

ZerothLaw

Most helpful comment

If y is in 3D (sequences, timesteps, dimensions), you have to use TimeDistributed with Dense layer at the end. Check out addition_rnn for details. In text_generation example, y has only two dimensions. That's why a Dense matrix at the end was acceptable.

balsulami on 29 Jun 2016

👍2

All 5 comments

You show us your network code. To keep the 3D of your input you have to set return_sequences=True in LSTM model. I think the error you got is because your activation expects 2D matrix and received 3D instead.

balsulami on 29 Jun 2016

👍1

Based on lstm_text_generation.py in Examples/

text = "0123456789+-/* "

print('corpus length:', len(text))

ops = {'+': operator.add, '-':operator.sub, '/':operator.div, '*':operator.mul}

chars = set(text)
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

#create math statements
maxlen=9
n_neurons = maxlen**3
sentences = []
results = []
for i in range(0, 100):
    x_val = np.random.randint(1, 9999)
    op = np.random.choice(ops.keys())
    y_val = np.random.randint(1, 9999)
    sentences.append(str(x_val)+op+str(y_val))
    results.append(str(ops[op](x_val, y_val)))


print('nb sequences:', len(sentences))

print('Vectorization...')
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        X[i, t, char_indices[char]] = 1
    for u, char in enumerate(results[i]):
        y[i, u, char_indices[char]] = 1


# build the model: 2 stacked LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(n_neurons, return_sequences=True, input_shape=(maxlen, len(chars))))
model.add(Dropout(0.2))
model.add(LSTM(n_neurons, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop')


def sample(a, temperature=1.0):
    # helper function to sample an index from a probability array
    a = np.log(a) / temperature
    a = np.exp(a) / np.sum(np.exp(a))
    return np.argmax(np.random.multinomial(1, a, 1))

# train the model, output generated text after each iteration
for iteration in range(1, 60):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(X, y, batch_size=1, nb_epoch=1)
    break

ZerothLaw on 29 Jun 2016

balsulami on 29 Jun 2016

👍2

Thank you! Much appreciated.

ZerothLaw on 29 Jun 2016

newbie here!! Adding the time distributed wrapped gives me an assertion error.

(PLEASE IGNORE THE VARIABLE NAMES. THEY'VE JUST BEEN COPIED OFF A SEQ2SEQ MODEL)

from __future__ import print_function

from keras.models import Model,Sequential
from keras.layers import Input, LSTM, Dense,Bidirectional,Reshape,Flatten,TimeDistributed
import numpy as np
import random

data_path = 'test-1.txt'

input_texts = []
target_texts = []

input_characters = ["a","-","b","c"]
target_characters = ["-","A","B","C"]
target_characters.append("-")
with open(data_path, 'r', encoding='utf8') as f:
lines = f.read().split('\n')

random.seed(9)
random.shuffle(lines)

for line in lines:
input_text, target_text = line.split("\t")
input_text = input_text.lower()

target_text = '\t' + target_text + '\n'
input_texts.append(input_text)
target_texts.append(target_text)

input_characters = sorted(list(input_characters))
target_characters = sorted(list(target_characters))
num_encoder_tokens = len(input_characters)
num_decoder_tokens = len(target_characters)
max_encoder_seq_length = 36
max_decoder_seq_length = 36

print('Number of samples:', len(input_texts))
print('Number of unique input tokens:', num_encoder_tokens)
print('Number of unique output tokens:', num_decoder_tokens)
print('Max sequence length for inputs:', max_encoder_seq_length)
print('Max sequence length for outputs:', max_decoder_seq_length)

input_token_index = dict(
[(char, i) for i, char in enumerate(input_characters)])
target_token_index = dict(
[(char, i) for i, char in enumerate(target_characters)])

encoder_input_data = np.zeros(
(len(input_texts), 36, num_encoder_tokens),
dtype='float32')
decoder_input_data = np.zeros(
(len(input_texts),36, num_decoder_tokens),
dtype='float32')

for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
for t, char in enumerate(input_text):
encoder_input_data[i, t, input_token_index[char]] = 1.
for t,char in enumerate(target_text):
decoder_input_data[i,t, target_token_index[char]] = 1.

print('Build model...')
model = Sequential()
model.add(Bidirectional(LSTM(128), input_shape=(36, num_encoder_tokens)))
model.add(TimeDistributed(Dense(128,activation='relu')))

model.add(Dense(num_decoder_tokens,activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
model.fit(encoder_input_data,decoder_input_data,epochs=10,batch_size=128,validation_split=0.2)

Any help would be greatly appreciated!