Running the exact multi_processing example given here is not working for me on Windows 10 Python 3.6 or 3.7.
PS C:\Python\projects\spacy-multithread> pipenv run python .\main.py .\data\output\ en_core_web_sm -n 4
Loaded model 'en_core_web_sm'
Loading IMDB data...
Processing texts...
joblib.externals.loky.process_executor._RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Users\Bram\.virtualenvs\spacy-multithread-0ZRRAX-8\lib\site-packages\joblib\externals\loky\backend\queues.py", line 150, in _feed
obj_ = dumps(obj, reducers=reducers)
File "C:\Users\Bram\.virtualenvs\spacy-multithread-0ZRRAX-8\lib\site-packages\joblib\externals\loky\backend\reduction.py", line 243, in dumps
dump(obj, buf, reducers=reducers, protocol=protocol)
File "C:\Users\Bram\.virtualenvs\spacy-multithread-0ZRRAX-8\lib\site-packages\joblib\externals\loky\backend\reduction.py", line 236, in dump
_LokyPickler(file, reducers=reducers, protocol=protocol).dump(obj)
File "C:\Users\Bram\.virtualenvs\spacy-multithread-0ZRRAX-8\lib\site-packages\joblib\externals\cloudpickle\cloudpickle.py", line 284, in dump
return Pickler.dump(self, obj)
File "c:\python\python37\Lib\pickle.py", line 437, in dump
self.save(obj)
File "c:\python\python37\Lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "c:\python\python37\Lib\pickle.py", line 662, in save_reduce
save(state)
File "c:\python\python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "c:\python\python37\Lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "c:\python\python37\Lib\pickle.py", line 882, in _batch_setitems
save(v)
File "c:\python\python37\Lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "c:\python\python37\Lib\pickle.py", line 662, in save_reduce
save(state)
File "c:\python\python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "c:\python\python37\Lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "c:\python\python37\Lib\pickle.py", line 887, in _batch_setitems
save(v)
File "c:\python\python37\Lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "c:\python\python37\Lib\pickle.py", line 662, in save_reduce
save(state)
File "c:\python\python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "c:\python\python37\Lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "c:\python\python37\Lib\pickle.py", line 882, in _batch_setitems
save(v)
File "c:\python\python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "c:\python\python37\Lib\pickle.py", line 816, in save_list
self._batch_appends(obj)
File "c:\python\python37\Lib\pickle.py", line 843, in _batch_appends
save(tmp[0])
File "c:\python\python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "c:\python\python37\Lib\pickle.py", line 771, in save_tuple
save(element)
File "c:\python\python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "c:\python\python37\Lib\pickle.py", line 786, in save_tuple
save(element)
File "c:\python\python37\Lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "c:\python\python37\Lib\pickle.py", line 662, in save_reduce
save(state)
File "c:\python\python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "c:\python\python37\Lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "c:\python\python37\Lib\pickle.py", line 882, in _batch_setitems
save(v)
File "c:\python\python37\Lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "c:\python\python37\Lib\pickle.py", line 662, in save_reduce
save(state)
File "c:\python\python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "c:\python\python37\Lib\pickle.py", line 786, in save_tuple
save(element)
File "c:\python\python37\Lib\pickle.py", line 524, in save
rv = reduce(self.proto)
File "stringsource", line 2, in preshed.maps.PreshMap.__reduce_cython__
TypeError: self.c_map cannot be converted to a Python object for pickling
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File ".\main.py", line 77, in <module>
plac.call(main)
File "C:\Users\Bram\.virtualenvs\spacy-multithread-0ZRRAX-8\lib\site-packages\plac_core.py", line 328, in call
cmd, result = parser.consume(arglist)
File "C:\Users\Bram\.virtualenvs\spacy-multithread-0ZRRAX-8\lib\site-packages\plac_core.py", line 207, in consume
return cmd, self.func(*(args + varargs + extraopts), **kwargs)
File ".\main.py", line 41, in main
executor(tasks)
File "C:\Users\Bram\.virtualenvs\spacy-multithread-0ZRRAX-8\lib\site-packages\joblib\parallel.py", line 934, in __call__
self.retrieve()
File "C:\Users\Bram\.virtualenvs\spacy-multithread-0ZRRAX-8\lib\site-packages\joblib\parallel.py", line 833, in retrieve
self._output.extend(job.get(timeout=self.timeout))
File "C:\Users\Bram\.virtualenvs\spacy-multithread-0ZRRAX-8\lib\site-packages\joblib\_parallel_backends.py", line 521, in wrap_future_result
return future.result(timeout=timeout)
File "c:\python\python37\Lib\concurrent\futures\_base.py", line 432, in result
return self.__get_result()
File "c:\python\python37\Lib\concurrent\futures\_base.py", line 384, in __get_result
raise self._exception
_pickle.PicklingError: Could not pickle the task to send it to the workers.
I tried the nightly as suggested here https://github.com/explosion/spaCy/issues/1669 but the issue remains:
PS C:\Python\projects\spacy-multithread> pipenv run python .\main.py .\data\output\ en_core_web_sm -n 4
Loaded model 'en_core_web_sm'
Loading IMDB data...
Processing texts...
joblib.externals.loky.process_executor._RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Users\Bram\.virtualenvs\spacy-multithread-0ZRRAX-8\lib\site-packages\joblib\externals\loky\backend\queues.py", line 150, in _feed
obj_ = dumps(obj, reducers=reducers)
File "C:\Users\Bram\.virtualenvs\spacy-multithread-0ZRRAX-8\lib\site-packages\joblib\externals\loky\backend\reduction.py", line 243, in dumps
dump(obj, buf, reducers=reducers, protocol=protocol)
File "C:\Users\Bram\.virtualenvs\spacy-multithread-0ZRRAX-8\lib\site-packages\joblib\externals\loky\backend\reduction.py", line 236, in dump
_LokyPickler(file, reducers=reducers, protocol=protocol).dump(obj)
File "C:\Users\Bram\.virtualenvs\spacy-multithread-0ZRRAX-8\lib\site-packages\joblib\externals\cloudpickle\cloudpickle.py", line 284, in dump
return Pickler.dump(self, obj)
File "c:\python\python37\Lib\pickle.py", line 437, in dump
self.save(obj)
File "c:\python\python37\Lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "c:\python\python37\Lib\pickle.py", line 662, in save_reduce
save(state)
File "c:\python\python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "c:\python\python37\Lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "c:\python\python37\Lib\pickle.py", line 882, in _batch_setitems
save(v)
File "c:\python\python37\Lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "c:\python\python37\Lib\pickle.py", line 662, in save_reduce
save(state)
File "c:\python\python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "c:\python\python37\Lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "c:\python\python37\Lib\pickle.py", line 887, in _batch_setitems
save(v)
File "c:\python\python37\Lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "c:\python\python37\Lib\pickle.py", line 662, in save_reduce
save(state)
File "c:\python\python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "c:\python\python37\Lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "c:\python\python37\Lib\pickle.py", line 882, in _batch_setitems
save(v)
File "c:\python\python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "c:\python\python37\Lib\pickle.py", line 816, in save_list
self._batch_appends(obj)
File "c:\python\python37\Lib\pickle.py", line 843, in _batch_appends
save(tmp[0])
File "c:\python\python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "c:\python\python37\Lib\pickle.py", line 771, in save_tuple
save(element)
File "c:\python\python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "c:\python\python37\Lib\pickle.py", line 786, in save_tuple
save(element)
File "c:\python\python37\Lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "c:\python\python37\Lib\pickle.py", line 662, in save_reduce
save(state)
File "c:\python\python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "c:\python\python37\Lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "c:\python\python37\Lib\pickle.py", line 882, in _batch_setitems
save(v)
File "c:\python\python37\Lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "c:\python\python37\Lib\pickle.py", line 662, in save_reduce
save(state)
File "c:\python\python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "c:\python\python37\Lib\pickle.py", line 786, in save_tuple
save(element)
File "c:\python\python37\Lib\pickle.py", line 524, in save
rv = reduce(self.proto)
File "stringsource", line 2, in preshed.maps.PreshMap.__reduce_cython__
TypeError: self.c_map cannot be converted to a Python object for pickling
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File ".\main.py", line 77, in <module>
plac.call(main)
File "C:\Users\Bram\.virtualenvs\spacy-multithread-0ZRRAX-8\lib\site-packages\plac_core.py", line 328, in call
cmd, result = parser.consume(arglist)
File "C:\Users\Bram\.virtualenvs\spacy-multithread-0ZRRAX-8\lib\site-packages\plac_core.py", line 207, in consume
return cmd, self.func(*(args + varargs + extraopts), **kwargs)
File ".\main.py", line 41, in main
executor(tasks)
File "C:\Users\Bram\.virtualenvs\spacy-multithread-0ZRRAX-8\lib\site-packages\joblib\parallel.py", line 934, in __call__
self.retrieve()
File "C:\Users\Bram\.virtualenvs\spacy-multithread-0ZRRAX-8\lib\site-packages\joblib\parallel.py", line 833, in retrieve
self._output.extend(job.get(timeout=self.timeout))
File "C:\Users\Bram\.virtualenvs\spacy-multithread-0ZRRAX-8\lib\site-packages\joblib\_parallel_backends.py", line 521, in wrap_future_result
return future.result(timeout=timeout)
File "c:\python\python37\Lib\concurrent\futures\_base.py", line 432, in result
return self.__get_result()
File "c:\python\python37\Lib\concurrent\futures\_base.py", line 384, in __get_result
raise self._exception
_pickle.PicklingError: Could not pickle the task to send it to the workers.
I'm running into the same issue
Hmm. Could you just do a quick test for me?
>>> import spacy
>>> nlp = spacy.load('en_core_web_sm')
>>> import pickle
>>> b = pickle.dumps(nlp)
This is working for me, so I'm not sure what's going wrong. Would be interested to see whether this is failing for you.
The pickling of the model itself seems to go fine. I can do this without issue:
import spacy
import pickle
nlp = spacy.load('en_core_web_sm')
b = pickle.dumps(nlp)
c = pickle.loads(b)
But something else causes an issue, it seems:
Loaded model 'en_core_web_sm'
Loading IMDB data...
Downloading data from http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
84123648/84125825 [============================>.] - ETA: 0s Untaring file...
Processing texts...
joblib.externals.loky.process_executor._RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Users\bmvroy\.virtualenvs\spacy_mp_test-4MLbpdkD\lib\site-packages\joblib\externals\loky\backend\queues.py", line 150, in _feed
obj_ = dumps(obj, reducers=reducers)
File "C:\Users\bmvroy\.virtualenvs\spacy_mp_test-4MLbpdkD\lib\site-packages\joblib\externals\loky\backend\reduction.py", line 243, in dumps
dump(obj, buf, reducers=reducers, protocol=protocol)
File "C:\Users\bmvroy\.virtualenvs\spacy_mp_test-4MLbpdkD\lib\site-packages\joblib\externals\loky\backend\reduction.py", line 236, in dump
_LokyPickler(file, reducers=reducers, protocol=protocol).dump(obj)
File "C:\Users\bmvroy\.virtualenvs\spacy_mp_test-4MLbpdkD\lib\site-packages\joblib\externals\cloudpickle\cloudpickle.py", line 284, in dump
return Pickler.dump(self, obj)
File "C:\Python\Python37\Lib\pickle.py", line 437, in dump
self.save(obj)
File "C:\Python\Python37\Lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "C:\Python\Python37\Lib\pickle.py", line 662, in save_reduce
save(state)
File "C:\Python\Python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Python\Python37\Lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "C:\Python\Python37\Lib\pickle.py", line 882, in _batch_setitems
save(v)
File "C:\Python\Python37\Lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "C:\Python\Python37\Lib\pickle.py", line 662, in save_reduce
save(state)
File "C:\Python\Python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Python\Python37\Lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "C:\Python\Python37\Lib\pickle.py", line 887, in _batch_setitems
save(v)
File "C:\Python\Python37\Lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "C:\Python\Python37\Lib\pickle.py", line 662, in save_reduce
save(state)
File "C:\Python\Python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Python\Python37\Lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "C:\Python\Python37\Lib\pickle.py", line 882, in _batch_setitems
save(v)
File "C:\Python\Python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Python\Python37\Lib\pickle.py", line 816, in save_list
self._batch_appends(obj)
File "C:\Python\Python37\Lib\pickle.py", line 843, in _batch_appends
save(tmp[0])
File "C:\Python\Python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Python\Python37\Lib\pickle.py", line 771, in save_tuple
save(element)
File "C:\Python\Python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Python\Python37\Lib\pickle.py", line 786, in save_tuple
save(element)
File "C:\Python\Python37\Lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "C:\Python\Python37\Lib\pickle.py", line 662, in save_reduce
save(state)
File "C:\Python\Python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Python\Python37\Lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "C:\Python\Python37\Lib\pickle.py", line 882, in _batch_setitems
save(v)
File "C:\Python\Python37\Lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "C:\Python\Python37\Lib\pickle.py", line 662, in save_reduce
save(state)
File "C:\Python\Python37\Lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Python\Python37\Lib\pickle.py", line 786, in save_tuple
save(element)
File "C:\Python\Python37\Lib\pickle.py", line 524, in save
rv = reduce(self.proto)
File "stringsource", line 2, in preshed.maps.PreshMap.__reduce_cython__
TypeError: self.c_map cannot be converted to a Python object for pickling
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:/Python/projects/spacy_mp_test/main.py", line 77, in <module>
plac.call(main)
File "C:\Users\bmvroy\.virtualenvs\spacy_mp_test-4MLbpdkD\lib\site-packages\plac_core.py", line 328, in call
cmd, result = parser.consume(arglist)
File "C:\Users\bmvroy\.virtualenvs\spacy_mp_test-4MLbpdkD\lib\site-packages\plac_core.py", line 207, in consume
return cmd, self.func(*(args + varargs + extraopts), **kwargs)
File "C:/Python/projects/spacy_mp_test/main.py", line 41, in main
executor(tasks)
File "C:\Users\bmvroy\.virtualenvs\spacy_mp_test-4MLbpdkD\lib\site-packages\joblib\parallel.py", line 934, in __call__
self.retrieve()
File "C:\Users\bmvroy\.virtualenvs\spacy_mp_test-4MLbpdkD\lib\site-packages\joblib\parallel.py", line 833, in retrieve
self._output.extend(job.get(timeout=self.timeout))
File "C:\Users\bmvroy\.virtualenvs\spacy_mp_test-4MLbpdkD\lib\site-packages\joblib\_parallel_backends.py", line 521, in wrap_future_result
return future.result(timeout=timeout)
File "C:\Python\Python37\Lib\concurrent\futures\_base.py", line 432, in result
return self.__get_result()
File "C:\Python\Python37\Lib\concurrent\futures\_base.py", line 384, in __get_result
raise self._exception
_pickle.PicklingError: Could not pickle the task to send it to the workers.
The sample example works only with the latest nightly but not with 2.0.18. However I got the same error as the comment above when running multiprocess.py with the nightly version.
Not positive but I think this is related to a change in joblib. On my machine it's working if I stipulate the "multiprocessing" backend in joblib.Parallel, instead of using their new loky library. Change here: 582be874
Tested and approved. Works as expected now.
This thread has been automatically locked since there has not been any recent activity after it was closed. Please open a new issue for related bugs.
Most helpful comment
Not positive but I think this is related to a change in joblib. On my machine it's working if I stipulate the
"multiprocessing"backend injoblib.Parallel, instead of using their new loky library. Change here: 582be874