Loading and testing a saved model does not initialize the states of the lstm model, which throws an error when running that agent.
ValueError: Must pass in RNN state batches for placeholders [<tf.Tensor 'default_policy/Placeholder:0' shape=(?, 16) dtype=float32>, <tf.Tensor 'default_policy/Placeholder_1:0' shape=(?, 16) dtype=float32>], got []
With no lstm (use_lstm=False) algorithm works fine (see example script to see it more clear).
Couldn't find ANY way to initialize or send initial values, so not really sure if it's a bug, or a wrong approach.
Ray version and other system information (Python version, TensorFlow version, OS):
Ray: 0.9.0.dev0
Python: 3.7.7
TF: 1.15
Please provide a script that can be run to reproduce the issue. The script should have no external library dependencies (i.e., use fake or mock data / environments):
If we cannot run your script, we cannot fix your issue.
Original code from @stefanbschneider :
https://github.com/ray-project/ray/issues/9123#issuecomment-649543419
Simple class training, saving, loading, and testing on CartPole:
from ray.rllib.agents import impala
import gym
class customExperimentClass():
def __init__(self):
ray.shutdown()
ray.init(num_cpus=2, num_gpus=0)
self.env = gym.make("CartPole-v0")
self.config = dict(**{
"env": "CartPole-v0",
#"callbacks": MyCallbacks,
"num_workers": 0,
"num_gpus": 0,
"evaluation_num_workers": 1,
# Custom eval function
#"custom_eval_function": custom_eval_function,
# Enable evaluation, once per 100 training iteration.
"evaluation_interval": 1,
# Run 3 episodes each time evaluation runs.
"evaluation_num_episodes": 1,
"model": {
# Nonlinearity for fully connected net (tanh, relu)
"fcnet_activation": "tanh",
# Number of hidden layers for fully connected net
"fcnet_hiddens": [16,8],
# Whether to wrap the model with an LSTM.
# Needs LSTMWrapper.get_initial_state() ?
"use_lstm": True, # <--------------------- Change this to see it working.
# Max seq len for training the LSTM, defaults to 20.
"max_seq_len": 20,
# Size of the LSTM cell.
"lstm_cell_size": 16,
# Whether to feed a_{t-1}, r_{t-1} to LSTM.
"lstm_use_prev_action_reward": False,
},
"framework": "tf",
"ignore_worker_failures": True,
})
def train(self):
"""
Train an RLlib IMPALA agent using tune until any of the configured stopping criteria is met.
See https://docs.ray.io/en/latest/tune/api_docs/execution.html#tune-run
:return: Return the path to the saved agent (checkpoint) and tune's ExperimentAnalysis object
See https://docs.ray.io/en/latest/tune/api_docs/analysis.html#experimentanalysis-tune-experimentanalysis
"""
#analysis = ray.tune.run(ppo.PPOTrainer, config=self.config, local_dir=self.save_dir, stop=stop_criteria,
# checkpoint_at_end=True)
# list of lists: one list per checkpoint; each checkpoint list contains 1st the path, 2nd the metric value
results = tune.run("IMPALA",
verbose=1,
config=self.config,
stop={"training_iteration": 2,},
checkpoint_freq=1,
keep_checkpoints_num=1,
checkpoint_score_attr='training_iteration',
)
checkpoints = results.get_trial_checkpoints_paths(trial=results.get_best_trial('episode_reward_mean'),
metric='episode_reward_mean')
# retriev the checkpoint path; we only have a single checkpoint, so take the first one
checkpoint_path = checkpoints[0][0]
print("Checkpoint path:", checkpoint_path)
return checkpoint_path, results
def load(self, path):
"""
Load a trained RLlib agent from the specified path. Call this before testing a trained agent.
:param path: Path pointing to the agent's saved checkpoint (only used for RLlib agents)
"""
self.agent = impala.ImpalaTrainer(config=self.config)
self.agent.restore(path)
def test(self):
"""Test trained agent for a single episode. Return the episode reward"""
# instantiate env class
env = self.env
# run until episode ends
episode_reward = 0
done = False
obs = env.reset()
while not done:
action = self.agent.compute_action(obs)
obs, reward, done, info = env.step(action)
episode_reward += reward
return episode_reward
# Class instance
exper = customExperimentClass()
# Train and save for 2 iterations
checkpoint_path, results = exper.train()
# Load saved
exper.load(checkpoint_path)
# Test loaded
exper.test()
If use_lstm=True , throws this error:
ValueError: Must pass in RNN state batches for placeholders [<tf.Tensor 'default_policy/Placeholder:0' shape=(?, 16) dtype=float32>, <tf.Tensor 'default_policy/Placeholder_1:0' shape=(?, 16) dtype=float32>], got []
If use_lstm=False, works fine and returns episode reward.
Full traceback:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-74-e113b831200e> in <module>
----> 1 exper.test()
<ipython-input-70-4c6c0df09014> in test(self)
82 obs = env.reset()
83 while not done:
---> 84 action = self.agent.compute_action(obs)
85 obs, reward, done, info = env.step(action)
86 episode_reward += reward
~/anaconda3/envs/ray2/lib/python3.7/site-packages/ray/rllib/agents/trainer.py in compute_action(self, observation, state, prev_action, prev_reward, info, policy_id, full_fetch, explore)
813 clip_actions=self.config["clip_actions"],
814 explore=explore,
--> 815 timestep=self.global_vars["timestep"])
816
817 if state or full_fetch:
~/anaconda3/envs/ray2/lib/python3.7/site-packages/ray/rllib/policy/policy.py in compute_single_action(self, obs, state, prev_action, prev_reward, info, episode, clip_actions, explore, timestep, **kwargs)
174 episodes=episodes,
175 explore=explore,
--> 176 timestep=timestep)
177
178 single_action = unbatch(batched_action)
~/anaconda3/envs/ray2/lib/python3.7/site-packages/ray/rllib/policy/tf_policy.py in compute_actions(self, obs_batch, state_batches, prev_action_batch, prev_reward_batch, info_batch, episodes, explore, timestep, **kwargs)
292 prev_reward_batch=prev_reward_batch,
293 explore=explore,
--> 294 timestep=timestep)
295
296 # Execute session run to get action (and other fetches).
~/anaconda3/envs/ray2/lib/python3.7/site-packages/ray/rllib/policy/tf_policy.py in _build_compute_actions(self, builder, obs_batch, state_batches, prev_action_batch, prev_reward_batch, episodes, explore, timestep)
598 raise ValueError(
599 "Must pass in RNN state batches for placeholders {}, got {}".
--> 600 format(self._state_inputs, state_batches))
601
602 builder.add_feed_dict(self.extra_compute_action_feed_dict())
ValueError: Must pass in RNN state batches for placeholders [<tf.Tensor 'default_policy/Placeholder:0' shape=(?, 16) dtype=float32>, <tf.Tensor 'default_policy/Placeholder_1:0' shape=(?, 16) dtype=float32>], got []
I'm a freaking idiot. Should read more carefully the documentation.
https://docs.ray.io/en/master/rllib-training.html#common-parameters
Just needed to add the state in compute_action
def test(self):
"""Test trained agent for a single episode. Return the episode reward"""
# instantiate env class
env = self.env
# run until episode ends
episode_reward = 0
done = False
obs = env.reset()
cell_size=16
state=[np.zeros(cell_size, np.float32),
np.zeros(cell_size, np.float32)]
while not done:
action, state, logits = self.agent.compute_action(obs, state)
obs, reward, done, info = env.step(action)
episode_reward += reward
return episode_reward
Thanks for posting the solution, I was having the same problem.
Would you proceed similarly with prev_action and prev_reward if config['model']['lstm_use_prev_action_reward'] = True?
Ie, initializing them with vectors of zeros? Or what's the previous action and reward in the first step? There is none...
Most helpful comment
I'm a freaking idiot. Should read more carefully the documentation.
https://docs.ray.io/en/master/rllib-training.html#common-parameters
Just needed to add the state in
compute_action