I cannot use ray with Tensorflow 2.0 (stable). Even using the documentation code at https://ray.readthedocs.io/en/latest/using-ray-with-tensorflow.html throws the same pickling error as my own code:
Here for quick copying is the code:
#!/usr/bin/env python
"""TODO Module Docstring."""
import numpy as np
import ray
import tensorflow as tf
from tensorflow.keras import layers
def create_keras_model():
model = tf.keras.Sequential()
# Adds a densely-connected layer with 64 units to the model:
model.add(layers.Dense(64, activation="relu", input_shape=(32,)))
# Add another:
model.add(layers.Dense(64, activation="relu"))
# Add a softmax layer with 10 output units:
model.add(layers.Dense(10, activation="softmax"))
model.compile(
optimizer=tf.train.RMSPropOptimizer(0.01),
loss=tf.keras.losses.categorical_crossentropy,
metrics=[tf.keras.metrics.categorical_accuracy])
return model
def random_one_hot_labels(shape):
n, n_class = shape
classes = np.random.randint(0, n_class, n)
labels = np.zeros((n, n_class))
labels[np.arange(n), classes] = 1
return labels
ray.init()
@ray.remote
class Network(object):
def __init__(self):
self.model = create_keras_model()
self.dataset = np.random.random((1000, 32))
self.labels = random_one_hot_labels((1000, 10))
def train(self):
history = self.model.fit(self.dataset, self.labels, verbose=False)
return history.history
def get_weights(self):
return self.model.get_weights()
def set_weights(self, weights):
# Note that for simplicity this does not handle the optimizer state.
self.model.set_weights(weights)
NetworkActor = Network.remote()
result_object_id = NetworkActor.train.remote()
ray.get(result_object_id)
This gives a TypeError:
Traceback (most recent call last):
File "/home/***/test.py", line 56, in <module>
NetworkActor = Network.remote()
File "/home/***/venv/lib/python3.6/site-packages/ray/actor.py", line 322, in remote
return self._remote(args=args, kwargs=kwargs)
File "/home/***/venv/lib/python3.6/site-packages/ray/actor.py", line 405, in _remote
self._modified_class, self._actor_method_names)
File "/home/***/venv/lib/python3.6/site-packages/ray/function_manager.py", line 578, in export_actor_class
"class": pickle.dumps(Class),
File "/home/***/venv/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py", line 1123, in dumps
cp.dump(obj)
File "/home/***/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py", line 482, in dump
return Pickler.dump(self, obj)
File "/usr/lib/python3.6/pickle.py", line 409, in dump
self.save(obj)
File "/usr/lib/python3.6/pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "/home/***/venv/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py", line 875, in save_global
self.save_dynamic_class(obj)
File "/home/***/venv/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py", line 682, in save_dynamic_class
obj=obj)
File "/usr/lib/python3.6/pickle.py", line 610, in save_reduce
save(args)
File "/usr/lib/python3.6/pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python3.6/pickle.py", line 751, in save_tuple
save(element)
File "/usr/lib/python3.6/pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python3.6/pickle.py", line 736, in save_tuple
save(element)
File "/usr/lib/python3.6/pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "/home/***/venv/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py", line 875, in save_global
self.save_dynamic_class(obj)
File "/home/***/venv/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py", line 686, in save_dynamic_class
save(clsdict)
File "/usr/lib/python3.6/pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python3.6/pickle.py", line 821, in save_dict
self._batch_setitems(obj.items())
File "/usr/lib/python3.6/pickle.py", line 847, in _batch_setitems
save(v)
File "/usr/lib/python3.6/pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "/home/***/venv/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py", line 556, in save_function
return self.save_function_tuple(obj)
File "/home/***/venv/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py", line 756, in save_function_tuple
save(state)
File "/usr/lib/python3.6/pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python3.6/pickle.py", line 821, in save_dict
self._batch_setitems(obj.items())
File "/usr/lib/python3.6/pickle.py", line 847, in _batch_setitems
save(v)
File "/usr/lib/python3.6/pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python3.6/pickle.py", line 821, in save_dict
self._batch_setitems(obj.items())
File "/usr/lib/python3.6/pickle.py", line 847, in _batch_setitems
save(v)
File "/usr/lib/python3.6/pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "/home/***/venv/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py", line 556, in save_function
return self.save_function_tuple(obj)
File "/home/***/venv/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py", line 756, in save_function_tuple
save(state)
File "/usr/lib/python3.6/pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python3.6/pickle.py", line 821, in save_dict
self._batch_setitems(obj.items())
File "/usr/lib/python3.6/pickle.py", line 847, in _batch_setitems
save(v)
File "/usr/lib/python3.6/pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python3.6/pickle.py", line 781, in save_list
self._batch_appends(obj)
File "/usr/lib/python3.6/pickle.py", line 808, in _batch_appends
save(tmp[0])
File "/usr/lib/python3.6/pickle.py", line 496, in save
rv = reduce(self.proto)
TypeError: can't pickle _LazyLoader objects
Try this?
import numpy as np
import ray
from tensorflow.keras import layers
def create_keras_model():
import tensorflow as tf
model = tf.keras.Sequential()
# Adds a densely-connected layer with 64 units to the model:
model.add(layers.Dense(64, activation="relu", input_shape=(32,)))
# Add another:
model.add(layers.Dense(64, activation="relu"))
# Add a softmax layer with 10 output units:
model.add(layers.Dense(10, activation="softmax"))
model.compile(
optimizer=tf.train.RMSPropOptimizer(0.01),
loss=tf.keras.losses.categorical_crossentropy,
metrics=[tf.keras.metrics.categorical_accuracy])
return model
def random_one_hot_labels(shape):
n, n_class = shape
classes = np.random.randint(0, n_class, n)
labels = np.zeros((n, n_class))
labels[np.arange(n), classes] = 1
return labels
ray.init()
@ray.remote
class Network(object):
def __init__(self):
self.model = create_keras_model()
self.dataset = np.random.random((1000, 32))
self.labels = random_one_hot_labels((1000, 10))
def train(self):
history = self.model.fit(self.dataset, self.labels, verbose=False)
return history.history
def get_weights(self):
return self.model.get_weights()
def set_weights(self, weights):
# Note that for simplicity this does not handle the optimizer state.
self.model.set_weights(weights)
NetworkActor = Network.remote()
result_object_id = NetworkActor.train.remote()
ray.get(result_object_id)
Works! Interesting, given that with python's multiprocessing the local import is not necessary. Thanks for the help!
Can we get more on why it works when the python import tensor_flow as tf moved inside the function ?
I struggle with
File "/home/xx/.local/python/lib/python3.7/pickle.py", line 524, in save
rv = reduce(self.proto)
File "/home/xx/.local/python/lib/python3.7/socket.py", line 192, in __getstate__
raise TypeError("Cannot serialize socket object")
TypeError: Cannot serialize socket object
Maybe I missed it, but is Ray trying to serialize even import contexts , not just what we pass with ray.put ?
serialize
same error here, have you solved it? :)
@pengyuan-zhou can you post a stacktrace? (and reopen this issue when you do?)
@pengyuan-zhou can you post a stacktrace? (and reopen this issue when you do?)
Sure, it happens when I'm calling ray from another project, flow.
The error appears like this,
2020-06-22 10:52:00,288 INFO services.py:1170 -- View the Ray dashboard at localhost:8265
Traceback (most recent call last):
File "examples/train.py", line 404, in <module>
main(sys.argv[1:])
File "examples/train.py", line 397, in main
train_h_baselines(flow_params, args, multiagent)
File "examples/train.py", line 304, in train_h_baselines
**hp
File "/home/pengzhou/git/h-baselines/hbaselines/algorithms/off_policy.py", line 375, in __init__
for env_num in range(num_envs)
File "/home/pengzhou/git/h-baselines/hbaselines/algorithms/off_policy.py", line 375, in <listcomp>
for env_num in range(num_envs)
File "/home/pengzhou/miniconda3/envs/flow/lib/python3.7/site-packages/ray/actor.py", line 378, in remote
return self._remote(args=args, kwargs=kwargs)
File "/home/pengzhou/miniconda3/envs/flow/lib/python3.7/site-packages/ray/actor.py", line 556, in _remote
extension_data=str(actor_method_cpu))
File "python/ray/_raylet.pyx", line 918, in ray._raylet.CoreWorker.create_actor
File "python/ray/_raylet.pyx", line 923, in ray._raylet.CoreWorker.create_actor
File "python/ray/_raylet.pyx", line 280, in ray._raylet.prepare_args
File "/home/pengzhou/miniconda3/envs/flow/lib/python3.7/site-packages/ray/serialization.py", line 401, in serialize
return self._serialize_to_msgpack(metadata, value)
File "/home/pengzhou/miniconda3/envs/flow/lib/python3.7/site-packages/ray/serialization.py", line 373, in _serialize_to_msgpack
self._serialize_to_pickle5(metadata, python_objects)
File "/home/pengzhou/miniconda3/envs/flow/lib/python3.7/site-packages/ray/serialization.py", line 353, in _serialize_to_pickle5
raise e
File "/home/pengzhou/miniconda3/envs/flow/lib/python3.7/site-packages/ray/serialization.py", line 350, in _serialize_to_pickle5
value, protocol=5, buffer_callback=writer.buffer_callback)
File "/home/pengzhou/miniconda3/envs/flow/lib/python3.7/site-packages/ray/cloudpickle/cloudpickle_fast.py", line 72, in dumps
cp.dump(obj)
File "/home/pengzhou/miniconda3/envs/flow/lib/python3.7/site-packages/ray/cloudpickle/cloudpickle_fast.py", line 617, in dump
return Pickler.dump(self, obj)
File "/home/pengzhou/miniconda3/envs/flow/lib/python3.7/socket.py", line 192, in __getstate__
raise TypeError("Cannot serialize socket object")
TypeError: Cannot serialize socket object
Relevant libs are:
ray==0.8.5
numpy==1.18.4
tensorflow==1.15.2
tensorflow-probability==0.8.0
gym==0.14.0
pygame
opencv-python
dm-tree
ray[tune]
scipy==1.1.0
lxml==4.4.1
pyprind==2.11.2
nose2==0.8.0
six==1.11.0
path.py
joblib==0.10.3
python-dateutil==2.7.3
cached_property
cloudpickle==1.2.0
pyglet==1.3.2
matplotlib==3.1.0
imutils==0.5.1
numpydoc
dill
lz4
setproctitle
psutil
boto3==1.4.8
redis~=2.10.6
pandas==0.24.2
plotly==2.4.0
I also tried ray==0.8.5 and redis==3.4.1, same error.
Thanks.
BR.
@pengyuan-zhou can you post a stacktrace? (and reopen this issue when you do?)
Ya I can't reopen the issue by others in this project
Does this help?
Side note:
It is enough to move the definition of functions needing TensorFlow to a sperate module. The important part seems to be that TensorFlow is not imported in the module where tune.run() is called.
Most helpful comment
Try this?