I have been running the IMPALA algorithm and trying to use a Tuplle action space in my custom env, Tuple(Discrete(9), Box(1,))... When training, at some point all my trials end of failing due to the following error trace..
(pid=3731) Traceback (most recent call last):
(pid=3731) File "/home/svc-tai-dev/virt/algo_36/lib64/python3.6/site-packages/tensorflow/python/client/session.py", line 1356, in _d
o_call
(pid=3731) return fn(*args)
(pid=3731) File "/home/svc-tai-dev/virt/algo_36/lib64/python3.6/site-packages/tensorflow/python/client/session.py", line 1341, in _r
un_fn
(pid=3731) options, feed_dict, fetch_list, target_list, run_metadata)
(pid=3731) File "/home/svc-tai-dev/virt/algo_36/lib64/python3.6/site-packages/tensorflow/python/client/session.py", line 1429, in _c
all_tf_sessionrun
(pid=3731) run_metadata)
(pid=3731) tensorflow.python.framework.errors_impl.InvalidArgumentError: Received a label value of 9 which is outside the valid range
of [0, 9). Label values: 9
(pid=3731) [[{{node default_policy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]]
It is not specific to my env, as I am able to reproduce the error using a toy problem with the same action space, such as below..
class MultiActionEnv(gym.Env):
def __init__(self):
super().__init__()
self.action_space = spaces.Tuple((
spaces.Discrete(9),
spaces.Box(low=0.0, high=1.0,
shape=(1,), dtype=np.float32)))
self.observation_space= spaces.Box(low=-1.0,
high=1.0, shape=(1,), dtype=np.float32)
def reset(self):
obs = self.observation_space.sample()
self.timestep = 0
return obs
def step(self, action):
# print(action)
obs = self.observation_space.sample()
reward = np.random.randn()
done = False
self.timestep += 1
if self.timestep > 1000:
done = True
return obs, reward, done, {}
register_env("MultiActionEnv-v0", lambda _: MultiActionEnv())
tune.run("IMPALA", name=_name, stop={"time_total_s": 10000000},
config={"num_workers": 2,"env": "MultiActionEnv-v0"},
checkpoint_at_end=False)
The full trace in the error file...
Traceback (most recent call last):
File "/home/ubuntu/algo/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 515, in _process_
trial
result = self.trial_executor.fetch_result(trial)
File "/home/ubuntu/algo/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 351, in fet
ch_result
result = ray.get(trial_future[0])
File "/home/ubuntu/algo/lib/python3.6/site-packages/ray/worker.py", line 2121, in get
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(ValueError): ray_IMPALA:train() (pid=8331, host=ip-172-31-5-40)
File "/home/ubuntu/algo/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 418, in train
raise e
File "/home/ubuntu/algo/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 407, in train
result = Trainable.train(self)
File "/home/ubuntu/algo/lib/python3.6/site-packages/ray/tune/trainable.py", line 176, in train
result = self._train()
File "/home/ubuntu/algo/lib/python3.6/site-packages/ray/rllib/agents/trainer_template.py", line 129,
in _train
fetches = self.optimizer.step()
File "/home/ubuntu/algo/lib/python3.6/site-packages/ray/rllib/optimizers/async_samples_optimizer.py",
line 136, in step
sample_timesteps, train_timesteps = self._step()
File "/home/ubuntu/algo/lib/python3.6/site-packages/ray/rllib/optimizers/async_samples_optimizer.py",
line 178, in _step
for train_batch in self.aggregator.iter_train_batches():
File "/home/ubuntu/algo/lib/python3.6/site-packages/ray/rllib/optimizers/aso_aggregator.py", line 117
, in iter_train_batches
blocking_wait=True, max_yield=max_yield)):
File "/home/ubuntu/algo/lib/python3.6/site-packages/ray/rllib/optimizers/aso_aggregator.py", line 170
, in _augment_with_replay sample_batch = ray_get_and_free(sample_batch)
File "/home/ubuntu/algo/lib/python3.6/site-packages/ray/rllib/utils/memory.py", line 33, in ray_get_a
nd_free result = ray.get(object_ids)
ray.exceptions.RayTaskError(ValueError): ray_RolloutWorker:sample() (pid=8070, host=ip-172-31-8-59)
File "/home/ubuntu/algo/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1356, in _do_call
return fn(*args)
File "/home/ubuntu/algo/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1341, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "/home/ubuntu/algo/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1429, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Received a label value of 9 which is outside the valid range of [0, 9). Label values: 9
[[{{node default_policy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogit
s}}]] During handling of the above exception, another exception occurred: ray_RolloutWorker:sample() (pid=8070, host=ip-172-31-8-59) File "/home/ubuntu/algo/lib/python3.6/site-packages/ray/rllib/utils/tf_run_builder.py", line 48, in g
et
self.feed_dict, os.environ.get("TF_TIMELINE_DIR"))
File "/home/ubuntu/algo/lib/python3.6/site-packages/ray/rllib/utils/tf_run_builder.py", line 94, in r
un_timeline
fetches = sess.run(ops, feed_dict=feed_dict)
File "/home/ubuntu/algo/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 950, i
n run
run_metadata_ptr)
File "/home/ubuntu/algo/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1173,
in _run feed_dict_tensor, options, run_metadata) File "/home/ubuntu/algo/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1350, in _do_run run_metadata) File "/home/ubuntu/algo/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1370, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Received a label value of 9 which is outside the valid range of [0, 9). Label values: 9
[[node default_policy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits
(defined at /ray/rllib/models/tf/tf_action_dist.py:54) ]]
Errors may have originated from an input operation.
Input Source operations connected to node default_policy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits:
default_policy/split_1 (defined at /ray/rllib/models/tf/tf_action_dist.py:214)
Original stack trace for 'default_policy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits':
File "/ray/workers/default_worker.py", line 98, in <module>
ray.worker.global_worker.main_loop()
File "/ray/rllib/evaluation/rollout_worker.py", line 348, in __init__
self._build_policy_map(policy_dict, policy_config)
File "/ray/rllib/evaluation/rollout_worker.py", line 764, in _build_policy_map
policy_map[name] = cls(obs_space, act_space, merged_conf)
File "/ray/rllib/policy/tf_policy_template.py", line 143, in __init__
obs_include_prev_action_reward=obs_include_prev_action_reward)
File "/ray/rllib/policy/dynamic_tf_policy.py", line 170, in __init__
action_logp = action_dist.sampled_action_logp()
File "/ray/rllib/models/tf/tf_action_dist.py", line 261, in sampled_action_logp
p = self.child_distributions[0].sampled_action_logp()
File "/ray/rllib/models/tf/tf_action_dist.py", line 41, in sampled_action_logp
return self.logp(self.sample_op)
File "/ray/rllib/models/tf/tf_action_dist.py", line 54, in logp
logits=self.inputs, labels=tf.cast(x, tf.int32))
File "/tensorflow/python/ops/nn_ops.py", line 3342, in sparse_softmax_cross_entropy_with_logits
precise_logits, labels, name=name)
File "/tensorflow/python/ops/gen_nn_ops.py", line 11350, in sparse_softmax_cross_entropy_with_logits
labels=labels, name=name)
File "/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper
op_def=op_def)
File "/tensorflow/python/util/deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "/tensorflow/python/framework/ops.py", line 3616, in create_op
op_def=op_def)
File "/tensorflow/python/framework/ops.py", line 2005, in __init__
self._traceback = tf_stack.extract_stack()
During handling of the above exception, another exception occurred:
ray_RolloutWorker:sample() (pid=8070, host=ip-172-31-8-59)
File "/home/ubuntu/algo/lib/python3.6/site-packages/ray/rllib/evaluation/rollout_worker.py", line 469, in sample
batches = [self.input_reader.next()]
File "/home/ubuntu/algo/lib/python3.6/site-packages/ray/rllib/evaluation/sampler.py", line 56, in next
batches = [self.get_data()]
File "/home/ubuntu/algo/lib/python3.6/site-packages/ray/rllib/evaluation/sampler.py", line 99, in get_data
item = next(self.rollout_provider)
File "/home/ubuntu/algo/lib/python3.6/site-packages/ray/rllib/evaluation/sampler.py", line 327, in _env_runner
active_episodes)
File "/home/ubuntu/algo/lib/python3.6/site-packages/ray/rllib/evaluation/sampler.py", line 551, in _do_policy_eval
eval_results[k] = builder.get(v)
File "/home/ubuntu/algo/lib/python3.6/site-packages/ray/rllib/utils/tf_run_builder.py", line 53, in get
self.fetches, self.feed_dict))
ValueError: Error fetching: [TupleActions(batches=[<tf.Tensor 'default_policy/Squeeze_2:0' shape=(?,) dtype=int64>, <tf.Tensor 'default_policy/add_1:0' shape=(?, 1) dtype=float32>]), {'action_prob': <tf.Tensor 'default_policy/Exp_1:0' shape=(?,) dtype=float32>, 'action_logp': <tf.Tensor 'default_policy/add_2:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'default_policy/concat:0' shape=(?, 11) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 54) dtype=float32>: [array([ 1. , 1. , 1. , 1. , 1. ,
0. , 0. , 0. , 0. , 0.70131896,
0.11328208, 0.11328208, -0.76101432, 0.36510177, 0.36162691,
0.03620103, 0.17106617, 0.08926075, 0.38048075, 0.36241551,
0.36211438, 0.34613316, 0.39477825, -0.12288058, 0.27199868,
-1.46890378, -1.40642859, 0.6146765 , 0.64622823, 0.56964214,
1.36563875, 1.09488068, 1.52385215, 1.94669157, 2.40748066,
2.10075465, -1.17808927, -1. , -1. , -1. ,
-1. , -1. , -1. , 0. , 0. ,
0. , -0.05753944, -0.05854531, -0.05952603, 0.19277988,
0.32501844, 0.44820571, 0.00793453, -0.50608655])], <tf.Tensor 'default_policy/action:0' shape=(?, 2) dtype=float32>: [array([ 0., -5441968.])], <tf.Tensor 'default_policy/prev_reward:0' shape=(?,) dtype=float32>: [0.0], <tf.Tensor 'default_policy/PlaceholderWithDefault:0' shape=() dtype=bool
Could you make sure the attached code is a runnable script? (please include the imports, etc needed).
FWIW, I think such issues can happen if NaNs appear in the policy output. When that happens, you can get out of range errors.
Usually it's due to the observation or reward somehow becoming NaN, though it could be the policy diverging as well.
I'm putting it in a script, but I'm having trouble reproducing outside of my full custom environment now... so I'm trying to find a toy example that triggers within a reasonable amount of time... to be precise, the model being used is similar to parametric action, in that there is masking going on in the forward step... and thus tf.float32.min is being introduced... so it's very possible that is the root cause, but it certainly only happens occasionally... i could simply raise the max_failures and increase checkpoint freq so that it effectively doesn't lose much of the training process.. but I'd like to see if there is a way to condition the inputs to avoid this first... as soon as I have something that triggers reliably I'll revert.
@waldroje @ericl
You can easily reproduce this error by this following error
In [0]: import numpy as np
In [1]: import tensorflow as tf # tf 2.x
In [2]: my_inputs = tf.convert_to_tensor([[np.nan, np.nan, np.nan], [.3, .3, .4]])
In [3]: tf.compat.v1.multinomial(my_inputs, 1)
Out[3]:
<tf.Tensor: shape=(2, 1), dtype=int64, numpy=
array([[3],
[0]])>
As you can see it, the the first input contains all NaN, and the distribution sampled 3 which is out of the range of [0, 3).
Coming back to this as I started playing around with Tuple spaces, and trying to condense some action spaces which I had previously been doing with very large discrete spaces... I keep running into the same version of error, wondering if you could provide some insight ... I'm attaching a simple env/model/script which when I ran it as is, on my local cluster.. I got a similar error as initially indicated,
(pid=27057, ip=10.32.46.148) 2020-04-11 18:56:43,095 ERROR tf_run_builder.py:47 -- Error fetching: [TupleActions(batches=[<tf.Tensor 'default_policy/cond/Merge:0' shape=(?,)
dtype=int64>, <tf.Tensor 'default_policy/cond/Merge_1:0' shape=(?,) dtype=int64>, <tf.Tensor 'default_policy/cond/Merge_2:0' shape=(?, 1) dtype=float32>]), {'action_prob': <tf.T
ensor 'default_policy/Exp_2:0' shape=(?,) dtype=float32>, 'action_logp': <tf.Tensor 'default_policy/cond_1/Merge:0' shape=(?,) dtype=float32>, 'behaviour_logits': <tf.Tensor 'de
fault_policy/model/my_out/BiasAdd:0' shape=(?, 14) dtype=float32>}], feed_dict={<tf.Tensor 'default_policy/observation:0' shape=(?, 1) dtype=float32>: [array([0.3794551], dtype=
float32)], <tf.Tensor 'default_policy/prev_action:0' shape=(?, 3) dtype=float32>: [array([ 1.00000000e+00, 8.00000000e+00, -1.81494215e+16])], <tf.Tensor 'default_policy/prev_r
eward:0' shape=(?,) dtype=float32>: [0.9880228042090168], <tf.Tensor 'default_policy/is_training:0' shape=() dtype=bool>: False, <tf.Tensor 'default_policy/is_exploring:0' shape
=() dtype=bool>: True, <tf.Tensor 'default_policy/timestep:0' shape=() dtype=int32>: 10036500}
(pid=27057, ip=10.32.46.148) Traceback (most recent call last):
(pid=27057, ip=10.32.46.148) File "/home/svc-tai-dev/algo_36/lib64/python3.6/site-packages/tensorflow_core/python/client/session.py", line 1367, in _do_call
(pid=27057, ip=10.32.46.148) return fn(*args)
(pid=27057, ip=10.32.46.148) File "/home/svc-tai-dev/algo_36/lib64/python3.6/site-packages/tensorflow_core/python/client/session.py", line 1352, in _run_fn
(pid=27057, ip=10.32.46.148) target_list, run_metadata)
(pid=27057, ip=10.32.46.148) File "/home/svc-tai-dev/algo_36/lib64/python3.6/site-packages/tensorflow_core/python/client/session.py", line 1445, in _call_tf_sessionrun
(pid=27057, ip=10.32.46.148) run_metadata)
(pid=27057, ip=10.32.46.148) tensorflow.python.framework.errors_impl.InvalidArgumentError: Received a label value of 3 which is outside the valid range of [0, 3). Label values:
3
Hi, I'm a bot from the Ray team :)
To help human contributors to focus on more relevant issues, I will automatically add the stale label to issues that have had no activity for more than 4 months.
If there is no further activity in the 14 days, the issue will be closed!
You can always ask for help on our discussion forum or Ray's public slack channel.
Most helpful comment
FWIW, I think such issues can happen if NaNs appear in the policy output. When that happens, you can get out of range errors.
Usually it's due to the observation or reward somehow becoming NaN, though it could be the policy diverging as well.