When I try to run the example in https://docs.pymc.io/notebooks/PyMC3_tips_and_heuristic.html by jupyter, I got the error.
the code is
class CAR(distribution.Continuous):
"""
Conditional Autoregressive (CAR) distribution
Parameters
----------
a : list of adjacency information
w : list of weight information
tau : precision at each location
"""
def __init__(self, w, a, tau, *args, **kwargs):
super(CAR, self).__init__(*args, **kwargs)
self.a = a = tt.as_tensor_variable(a)
self.w = w = tt.as_tensor_variable(w)
self.tau = tau*tt.sum(w, axis=1)
self.mode = 0.
def get_mu(self, x):
def weigth_mu(w, a):
a1 = tt.cast(a, 'int32')
return tt.sum(w*x[a1])/tt.sum(w)
mu_w, _ = scan(fn=weigth_mu,
sequences=[self.w, self.a])
return mu_w
def logp(self, x):
mu_w = self.get_mu(x)
tau = self.tau
return tt.sum(continuous.Normal.dist(mu=mu_w, tau=tau).logp(x))
with pm.Model() as model1:
# Vague prior on intercept
beta0 = pm.Normal('beta0', mu=0.0, tau=1.0e-5)
# Vague prior on covariate effect
beta1 = pm.Normal('beta1', mu=0.0, tau=1.0e-5)
# Random effects (hierarchial) prior
tau_h = pm.Gamma('tau_h', alpha=3.2761, beta=1.81)
# Spatial clustering prior
tau_c = pm.Gamma('tau_c', alpha=1.0, beta=1.0)
# Regional random effects
theta = pm.Normal('theta', mu=0.0, tau=tau_h, shape=N)
mu_phi = CAR('mu_phi', w=wmat, a=amat, tau=tau_c, shape=N)
# Zero-centre phi
phi = pm.Deterministic('phi', mu_phi-tt.mean(mu_phi))
# Mean model
mu = pm.Deterministic('mu', tt.exp(logE + beta0 + beta1*aff + theta + phi))
# Likelihood
Yi = pm.Poisson('Yi', mu=mu, observed=O)
# Marginal SD of heterogeniety effects
sd_h = pm.Deterministic('sd_h', tt.std(theta))
# Marginal SD of clustering (spatial) effects
sd_c = pm.Deterministic('sd_c', tt.std(phi))
# Proportion sptial variance
alpha = pm.Deterministic('alpha', sd_c/(sd_h+sd_c))
trace1 = pm.sample(int(3e3), cores=2, tune=1000, nuts_kwargs={'max_treedepth': 15})
And the error
BrokenPipeError Traceback (most recent call last)
<ipython-input-7-088cf3f1b726> in <module>
30 alpha = pm.Deterministic('alpha', sd_c/(sd_h+sd_c))
31
---> 32 trace1 = pm.sample(int(3e3), cores=2, tune=1000, nuts_kwargs={'max_treedepth': 15})
C:\Program\lib\site-packages\pymc3\sampling.py in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, nuts_kwargs, step_kwargs, progressbar, model, random_seed, live_plot, discard_tuned_samples, live_plot_kwargs, compute_convergence_checks, use_mmap, **kwargs)
437 _print_step_hierarchy(step)
438 try:
--> 439 trace = _mp_sample(**sample_args)
440 except pickle.PickleError:
441 _log.warning("Could not pickle model, sampling singlethreaded.")
C:\Program\lib\site-packages\pymc3\sampling.py in _mp_sample(draws, tune, step, chains, cores, chain, random_seed, start, progressbar, trace, model, use_mmap, **kwargs)
984 sampler = ps.ParallelSampler(
985 draws, tune, chains, cores, random_seed, start, step,
--> 986 chain, progressbar)
987 try:
988 try:
C:\Program\lib\site-packages\pymc3\parallel_sampling.py in __init__(self, draws, tune, chains, cores, seeds, start_points, step_method, start_chain_num, progressbar)
311 draws, tune, step_method, chain + start_chain_num, seed, start
312 )
--> 313 for chain, seed, start in zip(range(chains), seeds, start_points)
314 ]
315
C:\Program\lib\site-packages\pymc3\parallel_sampling.py in <listcomp>(.0)
311 draws, tune, step_method, chain + start_chain_num, seed, start
312 )
--> 313 for chain, seed, start in zip(range(chains), seeds, start_points)
314 ]
315
C:\Program\lib\site-packages\pymc3\parallel_sampling.py in __init__(self, draws, tune, step_method, chain, seed, start)
202 )
203 # We fork right away, so that the main process can start tqdm threads
--> 204 self._process.start()
205
206 @property
C:\Program\lib\multiprocessing\process.py in start(self)
110 'daemonic processes are not allowed to have children'
111 _cleanup()
--> 112 self._popen = self._Popen(self)
113 self._sentinel = self._popen.sentinel
114 # Avoid a refcycle if the target function holds an indirect
C:\Program\lib\multiprocessing\context.py in _Popen(process_obj)
221 @staticmethod
222 def _Popen(process_obj):
--> 223 return _default_context.get_context().Process._Popen(process_obj)
224
225 class DefaultContext(BaseContext):
C:\Program\lib\multiprocessing\context.py in _Popen(process_obj)
320 def _Popen(process_obj):
321 from .popen_spawn_win32 import Popen
--> 322 return Popen(process_obj)
323
324 class SpawnContext(BaseContext):
C:\Program\lib\multiprocessing\popen_spawn_win32.py in __init__(self, process_obj)
63 try:
64 reduction.dump(prep_data, to_child)
---> 65 reduction.dump(process_obj, to_child)
66 finally:
67 set_spawning_popen(None)
C:\Program\lib\multiprocessing\reduction.py in dump(obj, file, protocol)
58 def dump(obj, file, protocol=None):
59 '''Replacement for pickle.dump() using ForkingPickler.'''
---> 60 ForkingPickler(file, protocol).dump(obj)
61
62 #
BrokenPipeError: [Errno 32] Broken pipe
and the jupyter console output the traceback too.
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "C:\Program\lib\multiprocessing\spawn.py", line 105, in spawn_main
exitcode = _main(fd)
File "C:\Program\lib\multiprocessing\spawn.py", line 115, in _main
self = reduction.pickle.load(from_parent)
AttributeError: Can't get attribute 'CAR' on <module '__main__' (built-in)>
This is related to https://github.com/pymc-devs/pymc3/issues/3140. For now, try
trace = pm.sample(1000, tune=1000, cores=1)
@wangtong321, this is a typical error of the jupyter notebook with multiprocessing.
pymc3 uses multiprocessing to sample many chains in parallel, hence @junpenglao's suggestion of running sample with cores=1, in order to avoid multiprocessing entirely.
The problem is that, when multiprocessing runs with the spawn (default on windows) or forkserver protocols, it starts a completely new process to run without the class definitions run in other cells of the notebook. When the new processes spawn, they are sent through a communication pipe 1) the task that they must do and 2) the data to use. This data contains pickled instances of several classes, in particular some instance of the CAR class that you defined in a previous cell of the jupyter notebook. The thing is that the new process is unaware of CAR class definition because it was defined in a completely independent process. What ends up happending is:
CAR instance__main__ module and failsAttributeError you see in the endRegrettably, this is a problem that goes beyond pymc3, and the only workaround that you can try to do is detailed here. What you have to do is write the CAR class definition in a script outside the jupyter notebook (e.g. car_distribution.py), and in the notebook you import the CAR class as from car_distribution import CAR.
This should allow you to run the notebook with multiple cores.
I'm hesitant on what we can do about this problem at the pymc3 level. Maybe we could add some lines explaining the potential issue of running the notebook on windows, and how to work around it. What do you think @junpenglao, @aseyboldt?
Thanks very much. @lucianopaz @junpenglao
My PC now has another bug : ( , I'll try your suggestion when I fix the bug.
OK, it worked!
Any news on this?
I am arbitrarily getting the error pipe when sampling within a Spyder cell, on MacOS.
Most helpful comment
This is related to https://github.com/pymc-devs/pymc3/issues/3140. For now, try