Pymc3: GP-Introduction notebook fails on sample_gp

Created on 10 Apr 2017 · 5Comments · Source: pymc-devs/pymc3

Running the GP-introduction notebook from master currently fails when sample_gp runs:

with model:
    gp_samples = pm.gp.sample_gp(trace[1000:], y_obs, Z, samples=50, random_seed=42)
  0%|          | 0/50 [00:00<?, ?it/s]
---------------------------------------------------------------------------
LinAlgError                               Traceback (most recent call last)
<ipython-input-7-5bd56de45f47> in <module>()
      1 with model:
----> 2     gp_samples = pm.gp.sample_gp(trace[1000:], y_obs, Z, samples=50, random_seed=42)

/Users/fonnescj/Repos/pymc3/pymc3/gp/gp.py in sample_gp(trace, gp, X_values, samples, obs_noise, model, random_seed, progressbar)
    137     S_post = S_zz - tt.dot(tt.dot(S_xz.T, S_inv), S_xz)
    138 
--> 139     gp_post = MvNormal.dist(m_post, S_post, shape=Z.shape[0])
    140 
    141     samples = [gp_post.random(point=trace[idx]) for idx in indices]

/Users/fonnescj/Repos/pymc3/pymc3/distributions/distribution.py in dist(cls, *args, **kwargs)
     46     def dist(cls, *args, **kwargs):
     47         dist = object.__new__(cls)
---> 48         dist.__init__(*args, **kwargs)
     49         return dist
     50 

/Users/fonnescj/Repos/pymc3/pymc3/distributions/multivariate.py in __init__(self, mu, cov, tau, chol, *args, **kwargs)
     68         self.has_tau = tau is not None
     69         if cov is not None:
---> 70             self.chol_cov = tt.slinalg.cholesky(tt.as_tensor_variable(cov))
     71         elif tau is not None:
     72             self.chol_tau = tt.slinalg.cholesky(tt.as_tensor_variable(tau))

/Users/fonnescj/anaconda3/lib/python3.6/site-packages/theano/gof/op.py in __call__(self, *inputs, **kwargs)
    672                 thunk.outputs = [storage_map[v] for v in node.outputs]
    673 
--> 674                 required = thunk()
    675                 assert not required  # We provided all inputs
    676 

/Users/fonnescj/anaconda3/lib/python3.6/site-packages/theano/gof/op.py in rval(p, i, o, n)
    870             # default arguments are stored in the closure of `rval`
    871             def rval(p=p, i=node_input_storage, o=node_output_storage, n=node):
--> 872                 r = p(n, [x[0] for x in i], o)
    873                 for o in node.outputs:
    874                     compute_map[o][0] = True

/Users/fonnescj/anaconda3/lib/python3.6/site-packages/theano/tensor/slinalg.py in perform(self, node, inputs, outputs)
     61         x = inputs[0]
     62         z = outputs[0]
---> 63         z[0] = scipy.linalg.cholesky(x, lower=self.lower).astype(x.dtype)
     64 
     65     def grad(self, inputs, gradients):

/Users/fonnescj/anaconda3/lib/python3.6/site-packages/scipy/linalg/decomp_cholesky.py in cholesky(a, lower, overwrite_a, check_finite)
     79     """
     80     c, lower = _cholesky(a, lower=lower, overwrite_a=overwrite_a, clean=True,
---> 81                             check_finite=check_finite)
     82     return c
     83 

/Users/fonnescj/anaconda3/lib/python3.6/site-packages/scipy/linalg/decomp_cholesky.py in _cholesky(a, lower, overwrite_a, clean, check_finite)
     28     c, info = potrf(a1, lower=lower, overwrite_a=overwrite_a, clean=clean)
     29     if info > 0:
---> 30         raise LinAlgError("%d-th leading minor not positive definite" % info)
     31     if info < 0:
     32         raise ValueError('illegal value in %d-th argument of internal potrf'

LinAlgError: 6-th leading minor not positive definite

Source

fonnesbeck

Most helpful comment

A bit off topic, but while working on this I noticed that sampling in the examples in this notebook is much faster if we change the priors a bit:

with pm.Model() as model:
    # priors on the covariance function hyperparameters
    #l = pm.Uniform('l', 0, 10)  # old
    l = pm.HalfCauchy('l', beta=3)  # new

    # uninformative prior on the function variance
    #log_s2_f = pm.Uniform('log_s2_f', lower=-10, upper=5)
    #s2_f = pm.Deterministic('s2_f', tt.exp(log_s2_f))  # old
    s2_f = pm.HalfCauchy('s2_f', beta=2)  # new

    # uninformative prior on the noise variance
    #log_s2_n = pm.Uniform('log_s2_n', lower=-10, upper=5)
    #s2_n = pm.Deterministic('s2_n', tt.exp(log_s2_n))  # old
    s2_n = pm.HalfCauchy('s2_n', beta=2)  # new

    # covariance functions for the function f and the noise 
    f_cov = s2_f * pm.gp.cov.ExpQuad(1, l)

    y_obs = pm.gp.GP('y_obs', cov_func=f_cov, sigma=s2_n, observed={'X':X, 'Y':y})

To me, they sound like more realistic priors as well.

aseyboldt on 14 Apr 2017

👍2

All 5 comments

I think this was introduced with #1875. The problem is that the theano cholesky op raises an error if it can't decompose the matrix. We can't catch that error and convert it to a -inf logp value. The best way to fix this might be to add an option nofail or similar to the op. If this flag is set, it should return nans if the matrix does not seem to be pos definite.

aseyboldt on 11 Apr 2017

A bit off topic, but while working on this I noticed that sampling in the examples in this notebook is much faster if we change the priors a bit:

with pm.Model() as model:
    # priors on the covariance function hyperparameters
    #l = pm.Uniform('l', 0, 10)  # old
    l = pm.HalfCauchy('l', beta=3)  # new

    # uninformative prior on the function variance
    #log_s2_f = pm.Uniform('log_s2_f', lower=-10, upper=5)
    #s2_f = pm.Deterministic('s2_f', tt.exp(log_s2_f))  # old
    s2_f = pm.HalfCauchy('s2_f', beta=2)  # new

    # uninformative prior on the noise variance
    #log_s2_n = pm.Uniform('log_s2_n', lower=-10, upper=5)
    #s2_n = pm.Deterministic('s2_n', tt.exp(log_s2_n))  # old
    s2_n = pm.HalfCauchy('s2_n', beta=2)  # new

    # covariance functions for the function f and the noise 
    f_cov = s2_f * pm.gp.cov.ExpQuad(1, l)

    y_obs = pm.gp.GP('y_obs', cov_func=f_cov, sigma=s2_n, observed={'X':X, 'Y':y})

To me, they sound like more realistic priors as well.

aseyboldt on 14 Apr 2017

👍2

@aseyboldt Agree on the priors, should change those as well.

twiecki on 14 Apr 2017

I think that's a good idea too. Since making that example I've found this resource by the stan folks. I agree that the GP examples need a once over, I'll open a PR.