I run ray on two node.On one node not fond module named 'mlp_policy'. The the 'mlp_policy' is custom.
code
main.py
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import argparse
import random
import json
import os
import time
import ray
from ray.tune import Trainable, run
from ray.tune.schedulers import PopulationBasedTraining
import gym
import _pickle as pickle
from baselines.common import set_global_seeds, tf_util as U
import mlp_policy
from behavior_clone import BC
class MyTrainableClass(Trainable):
def _setup(self, config):
# self.timestep = 0
env_id = 'HalfCheetah-v2'
U.make_session(num_cpu=1).__enter__()
set_global_seeds(0)
self.env = gym.make(env_id)
ob_space = self.env.observation_space
ac_space = self.env.action_space
expert_path = '/home/kangkang/PycharmProjects/bc/demonstrator_trajectories/HalfCheetah.pkl'
with open(expert_path, "rb") as f:
data_stored = pickle.load(f)
dataset = data_stored['demonstrator_trajectory_key']
n = config['n']
def policy_fn(name, reuse=False):
return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, reuse=reuse,
hid_dims_p=n)
self.lr = config["lr"]
self.algo1 = BC(self.env, policy_fn, dataset, max_iters=10, adam_epsilon=self.lr, verbose=True)
def _train(self):
# self.timestep += 1
self.algo1.train1()
ob = self.env.reset()
ret = 0
step = 0
while True:
# env.render()
ac = self.algo1.pi.act(ob)
ob, rew, done, _ = self.env.step(ac)
ret += rew
step += 1
if done: break
self.episode_reward_mean = ret
return {"episode_reward_mean": ret}
def _save(self, checkpoint_dir):
return {
"episode_reward_mean": self.episode_reward_mean,
"lr": self.lr,
}
def _restore(self, checkpoint):
self.episode_reward_mean = checkpoint["episode_reward_mean"]
def reset_config(self, new_config):
self.lr = new_config["lr"]
return True
if __name__ == "__main__":
time_start = time.time()
parser = argparse.ArgumentParser()
parser.add_argument(
"--smoke-test", action="store_true", help="Finish quickly for testing")
args, _ = parser.parse_known_args()
ray.init(redis_address="172.17.0.2:6379")
pbt = PopulationBasedTraining(
time_attr="training_iteration",
reward_attr="episode_reward_mean",
mode="max",
perturbation_interval=20,
hyperparam_mutations={
# distribution for resampling
"lr": lambda: random.uniform(0.0001, 0.02),
# allow perturbations within this set of categorical values
"n": [[100, 100],[150, 160],[180, 200], [150, 150]],
})
run(
MyTrainableClass,
name="pbt",
scheduler=pbt,
# reuse_actors=True,
# verbose=False,
**{
"stop": {
"training_iteration": 100,
},
"num_samples": 50,
"config": {
"lr": 0.0001,
# note: this parameter is perturbed but has no effect on
# the model training in this example
"n": [100,150],
},
})
time_total = time.time() - time_start
print( time_total, "s")
log
2019-07-20 14:07:29,432 WARNING asy_pbt.py:199 -- `reward_attr` is deprecated and will be removed in a future version of Tune. Setting `metric=episode_reward_mean` and `mode=max`.
2019-07-20 14:07:29,441 INFO tune.py:61 -- Tip: to resume incomplete experiments, pass resume='prompt' or resume=True to run()
2019-07-20 14:07:29,441 INFO tune.py:233 -- Starting a new experiment.
== Status ==
Asy_PopulationBasedTraining: 0 perturbs
Resources requested: 0/16 CPUs, 0/0 GPUs
Memory usage on this node: 4.5/16.8 GB
WARNING: Logging before flag parsing goes to stderr.
W0720 14:07:29.496500 140510616524544 deprecation_wrapper.py:119] From /home/kangkang/anaconda3/envs/kk/lib/python3.6/site-packages/ray/tune/logger.py:136: The name tf.VERSION is deprecated. Please use tf.version.VERSION instead.
W0720 14:07:29.496733 140510616524544 deprecation_wrapper.py:119] From /home/kangkang/anaconda3/envs/kk/lib/python3.6/site-packages/ray/tune/logger.py:141: The name tf.summary.FileWriter is deprecated. Please use tf.compat.v1.summary.FileWriter instead.
== Status ==
Asy_PopulationBasedTraining: 0 perturbs
Resources requested: 1/16 CPUs, 0/0 GPUs
Memory usage on this node: 4.5/16.8 GB
Result logdir: /home/kangkang/ray_results/pbt
Number of trials: 50 ({'RUNNING': 1, 'PENDING': 49})
PENDING trials:
- MyTrainableClass_1: PENDING
- MyTrainableClass_2: PENDING
- MyTrainableClass_3: PENDING
- MyTrainableClass_4: PENDING
- MyTrainableClass_5: PENDING
- MyTrainableClass_6: PENDING
- MyTrainableClass_7: PENDING
- MyTrainableClass_8: PENDING
- MyTrainableClass_9: PENDING
... 31 not shown
- MyTrainableClass_41: PENDING
- MyTrainableClass_42: PENDING
- MyTrainableClass_43: PENDING
- MyTrainableClass_44: PENDING
- MyTrainableClass_45: PENDING
- MyTrainableClass_46: PENDING
- MyTrainableClass_47: PENDING
- MyTrainableClass_48: PENDING
- MyTrainableClass_49: PENDING
RUNNING trials:
- MyTrainableClass_0: RUNNING
2019-07-20 14:07:29,903 ERROR worker.py:1672 -- Failed to unpickle actor class 'MyTrainableClass' for actor ID d1f9da12bbde7eb0e41d05c6e823658022877b00. Traceback:
Traceback (most recent call last):
File "/home/cjj/anaconda3/envs/py3/lib/python3.6/site-packages/ray/function_manager.py", line 728, in _load_actor_class_from_gcs
actor_class = pickle.loads(pickled_class)
File "/home/cjj/anaconda3/envs/py3/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py", line 965, in subimport
__import__(name)
ModuleNotFoundError: No module named 'mlp_policy'
(pid=24549, ip=192.168.4.100) 2019-07-20 14:07:29,862 ERROR function_manager.py:731 -- Failed to load actor class %s.
(pid=24549, ip=192.168.4.100) Traceback (most recent call last):
(pid=24549, ip=192.168.4.100) File "/home/cjj/anaconda3/envs/py3/lib/python3.6/site-packages/ray/function_manager.py", line 728, in _load_actor_class_from_gcs
(pid=24549, ip=192.168.4.100) actor_class = pickle.loads(pickled_class)
(pid=24549, ip=192.168.4.100) File "/home/cjj/anaconda3/envs/py3/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py", line 965, in subimport
(pid=24549, ip=192.168.4.100) __import__(name)
(pid=24549, ip=192.168.4.100) ModuleNotFoundError: No module named 'mlp_policy'
2019-07-20 14:07:30,181 ERROR worker.py:1672 -- Failed to unpickle actor class 'MyTrainableClass' for actor ID a3a647bb4ab2ef738fd607f81aaf59e63c7469d9. Traceback:
Traceback (most recent call last):
File "/home/cjj/anaconda3/envs/py3/lib/python3.6/site-packages/ray/function_manager.py", line 728, in _load_actor_class_from_gcs
actor_class = pickle.loads(pickled_class)
File "/home/cjj/anaconda3/envs/py3/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py", line 965, in subimport
__import__(name)
ModuleNotFoundError: No module named 'mlp_policy'
(pid=24533, ip=192.168.4.100) 2019-07-20 14:07:30,112 ERROR function_manager.py:731 -- Failed to load actor class %s.
(pid=24533, ip=192.168.4.100) Traceback (most recent call last):
(pid=24533, ip=192.168.4.100) File "/home/cjj/anaconda3/envs/py3/lib/python3.6/site-packages/ray/function_manager.py", line 728, in _load_actor_class_from_gcs
(pid=24533, ip=192.168.4.100) actor_class = pickle.loads(pickled_class)
(pid=24533, ip=192.168.4.100) File "/home/cjj/anaconda3/envs/py3/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py", line 965, in subimport
(pid=24533, ip=192.168.4.100) __import__(name)
(pid=24533, ip=192.168.4.100) ModuleNotFoundError: No module named 'mlp_policy'
2019-07-20 14:07:30,207 ERROR trial_runner.py:487 -- Error processing event.
Traceback (most recent call last):
File "/home/kangkang/anaconda3/envs/kk/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 436, in _process_trial
result = self.trial_executor.fetch_result(trial)
File "/home/kangkang/anaconda3/envs/kk/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 323, in fetch_result
result = ray.get(trial_future[0])
File "/home/kangkang/anaconda3/envs/kk/lib/python3.6/site-packages/ray/worker.py", line 2195, in get
raise value
ray.exceptions.RayTaskError: ray_TemporaryActor:train() (pid=24549, host=cjj-dm)
Exception: The actor with name MyTrainableClass failed to be imported, and so cannot execute this method.
2019-07-20 14:07:30,208 INFO ray_trial_executor.py:187 -- Destroying actor for trial MyTrainableClass_1. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2019-07-20 14:07:30,359 ERROR worker.py:1672 -- Failed to unpickle actor class 'MyTrainableClass' for actor ID 0ba9cdced1ad3c516ec2128e1b2a8c72c40644cc. Traceback:
Traceback (most recent call last):
File "/home/cjj/anaconda3/envs/py3/lib/python3.6/site-packages/ray/function_manager.py", line 728, in _load_actor_class_from_gcs
actor_class = pickle.loads(pickled_class)
File "/home/cjj/anaconda3/envs/py3/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py", line 965, in subimport
__import__(name)
ModuleNotFoundError: No module named 'mlp_policy'
......
......
......
Result for MyTrainableClass_11:
date: 2019-07-20_14-07-36
done: false
episode_reward_mean: -606.9989077134618
experiment_id: 53cd9e3d270e4685ad967faa808ff510
hostname: kangkang-Vostro-3900
iterations_since_restore: 1
node_ip: 192.168.4.102
pid: 23578
time_since_restore: 1.7248926162719727
time_this_iter_s: 1.7248926162719727
time_total_s: 1.7248926162719727
timestamp: 1563602856
timesteps_since_restore: 0
training_iteration: 1
W0720 14:07:36.209538 140510616524544 deprecation_wrapper.py:119] From /home/kangkang/anaconda3/envs/kk/lib/python3.6/site-packages/ray/tune/logger.py:119: The name tf.Summary is deprecated. Please use tf.compat.v1.Summary instead.
(pid=23633) W0720 14:07:36.167340 140348898191104 deprecation.py:323] From /home/kangkang/anaconda3/envs/kk/lib/python3.6/site-packages/baselines/common/mpi_running_mean_std.py:25: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
(pid=23633) Instructions for updating:
(pid=23633) Use `tf.cast` instead.
(pid=23645) 2019-07-20 14:07:36.182084: W tensorflow/compiler/jit/mark_for_compilation_pass.cc:1412] (One-time warning): Not using XLA:CPU for cluster because envvar TF_XLA_FLAGS=--tf_xla_cpu_global_jit was not set. If you want XLA:CPU, either set that envvar, or use experimental_jit_scope to enable XLA:CPU. To confirm that XLA is active, pass --vmodule=xla_compilation_cache=1 (as a proper command-line flag, not via TF_XLA_FLAGS) or set the envvar XLA_FLAGS=--xla_hlo_profile.
== Status ==
Asy_PopulationBasedTraining: 0 perturbs
Resources requested: 16/16 CPUs, 0/0 GPUs
Memory usage on this node: 5.9/16.8 GB
Result logdir: /home/kangkang/ray_results/pbt
Number of trials: 50 ({'ERROR': 8, 'RUNNING': 16, 'PENDING': 26})
ERROR trials:
- MyTrainableClass_0: ERROR, 1 failures: /home/kangkang/ray_results/pbt/MyTrainableClass_0_2019-07-20_14-07-29djs5wwmw/error_2019-07-20_14-07-31.txt
- MyTrainableClass_1: ERROR, 1 failures: /home/kangkang/ray_results/pbt/MyTrainableClass_1_2019-07-20_14-07-291w5ye8bz/error_2019-07-20_14-07-30.txt
- MyTrainableClass_2: ERROR, 1 failures: /home/kangkang/ray_results/pbt/MyTrainableClass_2_2019-07-20_14-07-29829qk1_h/error_2019-07-20_14-07-30.txt
... 2 not shown
- MyTrainableClass_5: ERROR, 1 failures: /home/kangkang/ray_results/pbt/MyTrainableClass_5_2019-07-20_14-07-29wmsvoekc/error_2019-07-20_14-07-31.txt
- MyTrainableClass_9: ERROR, 1 failures: /home/kangkang/ray_results/pbt/MyTrainableClass_9_2019-07-20_14-07-29b7eoqp0d/error_2019-07-20_14-07-31.txt
- MyTrainableClass_10: ERROR, 1 failures: /home/kangkang/ray_results/pbt/MyTrainableClass_10_2019-07-20_14-07-29gfqwlqbl/error_2019-07-20_14-07-31.txt
PENDING trials:
- MyTrainableClass_24: PENDING
- MyTrainableClass_25: PENDING
- MyTrainableClass_26: PENDING
... 20 not shown
- MyTrainableClass_47: PENDING
- MyTrainableClass_48: PENDING
- MyTrainableClass_49: PENDING
RUNNING trials:
- MyTrainableClass_6: RUNNING
- MyTrainableClass_7: RUNNING
- MyTrainableClass_8: RUNNING
... 10 not shown
- MyTrainableClass_21: RUNNING
- MyTrainableClass_22: RUNNING
- MyTrainableClass_23: RUNNING
Result for MyTrainableClass_7:
date: 2019-07-20_14-07-36
done: false
episode_reward_mean: -627.9952494509495
experiment_id: 90c41aab57bf42fe9685ca6a7fd31af2
hostname: kangkang-Vostro-3900
iterations_since_restore: 1
node_ip: 192.168.4.102
pid: 23603
time_since_restore: 1.8066432476043701
time_this_iter_s: 1.8066432476043701
time_total_s: 1.8066432476043701
timestamp: 1563602856
timesteps_since_restore: 0
training_iteration: 1
(pid=23633) 2019-07-20 14:07:36.512050: W tensorflow/compiler/jit/mark_for_compilation_pass.cc:1412] (One-time warning): Not using XLA:CPU for cluster because envvar TF_XLA_FLAGS=--tf_xla_cpu_global_jit was not set. If you want XLA:CPU, either set that envvar, or use experimental_jit_scope to enable XLA:CPU. To confirm that XLA is active, pass --vmodule=xla_compilation_cache=1 (as a proper command-line flag, not via TF_XLA_FLAGS) or set the envvar XLA_FLAGS=--xla_hlo_profile.
(pid=23676) Training loss: 1.0883210897445679
Result for MyTrainableClass_13:
date: 2019-07-20_14-07-36
done: false
episode_reward_mean: -561.1634001966081
experiment_id: 474778e9deb8468f850414510ab0b3c7
hostname: kangkang-Vostro-3900
iterations_since_restore: 1
node_ip: 192.168.4.102
pid: 23589
time_since_restore: 1.8654873371124268
time_this_iter_s: 1.8654873371124268
time_total_s: 1.8654873371124268
timestamp: 1563602856
timesteps_since_restore: 0
training_iteration: 1
......
......
......
You have to make sure the other module is also accessible and on the default python path of the other machine. If you are using the autoscaler (which supports private machines), you can specify this as a file_mount.
You have to make sure the other module is also accessible and on the default python path of the other machine. If you are using the autoscaler (which supports private machines), you can specify this as a file_mount.
OK! Thanks!
Hi,
I have the same issue. @richardliaw Could you explain what to do in a bit more detail?
That would be so kind!
Thx a lot.
Most helpful comment
You have to make sure the other module is also accessible and on the default python path of the other machine. If you are using the autoscaler (which supports private machines), you can specify this as a file_mount.