Statsmodels: BUG : fail with either MLEResults.extend or MLEResults.apply

Created on 23 Apr 2020 · 3Comments · Source: statsmodels/statsmodels

Expected Output

I want to get the out-of-sample predict from the state space model, however, it gives me a error below...

Describe the bug

statsmodels.tsa.statespace.mlemodel.MLEResults.extend, statsmodels.tsa.statespace.mlemodel.MLEResults.apply
fail with either res.extend or res.apply
1.

model_pre = res.apply(mdf2)
Traceback (most recent call last):

  File "<ipython-input-84-48626ef2ae71>", line 1, in <module>
    model_pre = res.apply(mdf2)

  File "C:\Users\ChingChih\Anaconda3\lib\site-packages\statsmodels\tsa\statespace\mlemodel.py", line 3859, in apply
    mod = self.model.clone(endog, exog=exog, **kwargs)

  File "C:\Users\ChingChih\Anaconda3\lib\site-packages\statsmodels\tsa\statespace\mlemodel.py", line 253, in clone
    raise NotImplementedError

NotImplementedError

model_pre = res.extend(mdf2)
Traceback (most recent call last):

File "", line 1, in
model_pre = res.extend(mdf2)

File "C:\Users\ChingChih\Anaconda3\lib\site-packages\statsmodels\tsa\statespace\mlemodel.py", line 3772, in extend
mod = self.model.clone(endog, exog=exog, **kwargs)

File "C:\Users\ChingChih\Anaconda3\lib\site-packages\statsmodels\tsa\statespace\mlemodel.py", line 253, in clone
raise NotImplementedError

NotImplementedError

data I use

data

Code Sample, a copy-pastable example if possible

# -*- coding: utf-8 -*-

# =============================================================================
# data
# =============================================================================
import pandas as pd
import numpy as np
from torch.nn.parameter import Parameter

import pandas_datareader.data as web
import datetime
rng = pd.date_range(start='2/1/1959', end='12/31/2002', freq='M')

fredmd = pd.read_excel('bciq1m4.xls', header=0, index_col = 0) 
fredmd .index = rng

# =============================================================================
# model construction
# =============================================================================
from abc import ABC
import statsmodels.api as sm
import numpy as np

np.random.seed(42)

class MFVAR(sm.tsa.statespace.MLEModel, ABC):
    def __init__(self, endog):
        n1 = 1   # number of quarterly variables (y)
        n2 = 4   # number of monthly variables (x)
        ns = (n1+n2)*5  # number of state variables
        tlen, nv = endog.shape   # number of observed v. & time length
        # actually, nv = n1+n2
        self.n1, self.n2, self.ns, self.nv = n1, n2, ns, nv

        super().__init__(endog, k_states=ns, k_posdef=nv, #k_posdef外生衝擊個數(echelon)
                         initialization='stationary') #從stationary改成diffuse (by Chad Fulton)

        # 1. time-varying design matrix Z[t]
        z0 = block_diag(1.0/3.0, 1.0, n1, n2)
        z1 = block_diag(2.0/3.0, 0.0, n1, n2)
        z2 = block_diag(1.0, 0.0, n1, n2)
        z3 = block_diag(2.0/3.0, 0.0, n1, n2)
        z4 = block_diag(1.0/3.0, 0.0, n1, n2)
        z = np.column_stack((z0, z1, z2, z3, z4))
        self['design',: ,:] = z

        # # 2. time-varying observation disturbance H[t]
        # when y is observable
        h = np.zeros((nv, nv))
        self['obs_cov',:,:] = h
        # self['obs_cov'] = np.eye(nv)

        # 3. time-invariant selection matrix R
        self['selection'] = np.eye(ns, nv)

    def update(self, params, transformed=True, **kwargs):
        n1, n2, ns, nv = self.n1, self.n2, self.ns, self.nv
        # nv = n1 + n2
        # ns = 5 * nv

        # 4. time-invariant transition matrix T
        var_coef = params[:nv**2].reshape(nv, nv)
        var_coef = np.column_stack((var_coef, np.zeros((nv, nv*4))))
        self['transition'] = np.row_stack((var_coef, np.eye(nv*4, ns)))

        # 5. time-invariant state covariance Q
        tril_state_cov = vec_tril(params[nv**2:])   # lower-triangular matrix
        self['state_cov'] = tril_state_cov @ tril_state_cov.transpose() #為何有@:矩陣相乘
# =============================================================================
#     mdf1
# =============================================================================
    @property
    def start_params(self):
        phi_11, phi_12, phi_13, phi_14, phi_15 = 0.9, 0., 0., 0., 0.   # initial guess
        phi_21, phi_22, phi_23, phi_24, phi_25 = 0., 0.5, 0., 0., 0.  # initial guess
        phi_31, phi_32, phi_33, phi_34, phi_35 = 0., 0., 0.2, 0., 0.  # initial guess
        phi_41, phi_42, phi_43, phi_44, phi_45 = 0., 0., 0., 0.8, 0.  # initial guess
        phi_51, phi_52, phi_53, phi_54, phi_55 = 0.8, 0., 0., 0., 0.7  # initial guess
        sig_11 =  1.0
        sig_21, sig_22 =  0., 0.3
        sig_31, sig_32, sig_33 =  0., 0., 0.7
        sig_41, sig_42, sig_43, sig_44 =  0., 0., 0., 0.3
        sig_51, sig_52, sig_53, sig_54, sig_55 =  0., 0., 0., 0., 0.9
        return np.array([phi_11, phi_12, phi_13, phi_14, phi_15,
                         phi_21, phi_22, phi_23, phi_24, phi_25,
                         phi_31, phi_32, phi_33, phi_34, phi_35,
                         phi_41, phi_42, phi_43, phi_44, phi_45,
                         phi_51, phi_52, phi_53, phi_54, phi_55,
                        sig_11, 
                        sig_21, sig_22, 
                        sig_31, sig_32, sig_33, 
                        sig_41, sig_42, sig_43, sig_44, 
                        sig_51, sig_52, sig_53, sig_54, sig_55])



def block_diag(a, b, n1, n2):
    """
    Create block diagonal matrix of this form:
     a*I |  0
    -----+-----
     0   |  b*I
     The size of the upper left corner is (n1, n1)
     The size of the lower right corner is (n2, n2)
    """
    upper_left = np.identity(n1)*a
    lower_right = np.identity(n2)*b
    upper_right = np.zeros((n1, n2))
    lower_left = np.zeros((n2, n1))
    upper = np.column_stack((upper_left, upper_right))
    lower = np.column_stack((lower_left, lower_right))
    z0 = np.row_stack((upper, lower))
    return z0


def vec_tril(x, order='C'):
    """
    Reverse half-vectorization into a triangular matrix in the order of C or F(ortran)
    If x is complex, ivech(x) is conjugate symmetric.
    That is, ivech(x) = ivech(x).transpose().conj()
    """
    r = len(x)
    n = np.int((np.sqrt(1+8*r)-1)/2)
    # if all(np.isreal(x)):
    #     a_mat = np.zeros((n, n))
    # else:
    #     a_mat = np.zeros((n, n), 'complex')
    #although you want the dtype to be complex. i.e. np.isreal(np.array([1, 2, 3]) + 0j) returns True.
    a_mat = np.zeros((n, n), dtype=x.dtype)

    if order == 'C':
        d = np.tril_indices(n)
        a_mat[d] = x
    else:
        a_mat[:, 0] = x[0:n]
        oldindex = n
        for ii in np.arange(1, n):
            newindex = oldindex + n - ii
            a_mat[ii:n, ii] = x[oldindex:newindex]
            oldindex = newindex
    return a_mat



# =============================================================================
# SS fit
# =============================================================================
#train
mdf0 = fredmd
# model = MFVAR(mdf0)
mdf1 = mdf0 [:int(mdf0.shape[0]*0.8)]
model = MFVAR(mdf1)
res = model.fit()
print(res.summary())


# #test
mdf2 = mdf0 [int(mdf0.shape[0]*0.8):]
model_pre = res.extend(mdf2)
print(model_pre.summary())


mphi = res.params[0:25].to_numpy().reshape(5,5)
a = np.zeros((5, 5), float)
np.fill_diagonal(a, vec_tril(res.params[25:]).diagonal())
msigma = vec_tril(res.params[25:]) + vec_tril(res.params[25:]).T - a

# pd.DataFrame(mphi).to_csv("mphi _py.csv")
# pd.DataFrame(msigma).to_csv("msigma_py.csv")
fcast_res2 = res.get_forecast(steps=5)
# Note: since we did not specify the alpha parameter, the
# confidence level is at the default, 95%
print(fcast_res2.summary_frame())

Output of `import statsmodels.api as sm; sm.show_versions()`

INSTALLED VERSIONS
------------------
Python: 3.7.4.final.0

statsmodels
===========

Installed: 0.12.0.dev0+195.g1ccd5cdba (C:\Users\ChingChih\Anaconda3\lib\site-packages\statsmodels)

Required Dependencies
=====================

cython: 0.29.15 (C:\Users\ChingChih\Anaconda3\lib\site-packages\Cython)
numpy: 1.18.1 (C:\Users\ChingChih\Anaconda3\lib\site-packages\numpy)
scipy: 1.4.1 (C:\Users\ChingChih\Anaconda3\lib\site-packages\scipy)
pandas: 1.0.3 (C:\Users\ChingChih\Anaconda3\lib\site-packages\pandas)
    dateutil: 2.8.1 (C:\Users\ChingChih\Anaconda3\lib\site-packages\dateutil)
patsy: 0.5.1 (C:\Users\ChingChih\Anaconda3\lib\site-packages\patsy)

Optional Dependencies
=====================

matplotlib: 3.2.1 (C:\Users\ChingChih\Anaconda3\lib\site-packages\matplotlib)
    backend: module://ipykernel.pylab.backend_inline 
cvxopt: Not installed
joblib: 0.14.1 (C:\Users\ChingChih\Anaconda3\lib\site-packages\joblib)

Developer Tools
================

IPython: 7.13.0 (C:\Users\ChingChih\Anaconda3\lib\site-packages\IPython)
    jinja2: 2.11.1 (C:\Users\ChingChih\Anaconda3\lib\site-packages\jinja2)
sphinx: 2.4.4 (C:\Users\ChingChih\Anaconda3\lib\site-packages\sphinx)
    pygments: 2.6.1 (C:\Users\ChingChih\Anaconda3\lib\site-packages\pygments)
pytest: Not installed
virtualenv: Not installed

comp-tsa-statespace question

Source

ChenChingChih

All 3 comments

The issue here is that we don't have a generic way to clone models, because in general, subclasses might have a bunch of positional and keyword arguments that need to be passed to the cloning class constructor. As a result, the base class clone method raises a NotImplementedError.

In your case, this looks like it will be an easy fix, since you don't have any arguments other than endog. You can try adding the following method to your class:

    def clone(self, endog, exog=None, **kwargs):
        return self._clone_from_init_kwds(endog, **kwargs)

ChadFulton on 24 Apr 2020

🎉1

Thanks a lot @ChadFulton !!! You save me so much time~
It is so happy to see the summary table pops up~

    mdf2 = mdf0 [int(mdf0.shape[0]*0.8):]
    model_pre = res.extend(mdf2)
    print(model_pre.summary())
                                                 Statespace Model Results                                            
    =================================================================================================================
    Dep. Variable:     ['mPAYEMS', 'mINC', 'mCMRMTSPL', 'mINDPRO', 'mGDPC1']   No. Observations:                  141
    Model:                                                             MFVAR   Log Likelihood                1312.343
    Date:                                                   Fri, 24 Apr 2020   AIC                          -2544.687
    Time:                                                           09:37:59   BIC                          -2426.736
    Sample:                                                       04-30-2006   HQIC                         -2496.756
                                                                - 12-31-2017                                         
    Covariance Type:                                                     opg                                         
    ==============================================================================
                     coef    std err          z      P>|z|      [0.025      0.975]
    ------------------------------------------------------------------------------
    param.0       -0.4394      0.226     -1.943      0.052      -0.883       0.004
    param.1         5e-05     43.685   1.14e-06      1.000     -85.622      85.622
    param.2        0.0004     21.577   1.88e-05      1.000     -42.290      42.291
    param.3        0.0004     28.772   1.22e-05      1.000     -56.392      56.392
    param.4        0.1941      0.349      0.556      0.578      -0.490       0.878
    param.5       -0.0001      0.000     -0.469      0.639      -0.001       0.000
    param.6       -0.0379      0.039     -0.969      0.332      -0.115       0.039
    param.7       -0.0061      0.028     -0.220      0.826      -0.060       0.048
    param.8        0.1093      0.040      2.726      0.006       0.031       0.188
    param.9       -0.0004      0.001     -0.461      0.645      -0.002       0.001
    param.10       0.0007      0.001      1.082      0.279      -0.001       0.002
    param.11       0.0737      0.144      0.512      0.608      -0.208       0.356
    param.12      -0.2126      0.044     -4.885      0.000      -0.298      -0.127
    param.13       0.2035      0.064      3.159      0.002       0.077       0.330
    param.14       0.0015      0.002      0.683      0.494      -0.003       0.006
    param.15      -0.0007      0.000     -1.419      0.156      -0.002       0.000
    param.16      -0.0125      0.106     -0.119      0.906      -0.219       0.194
    param.17      -0.1573      0.027     -5.824      0.000      -0.210      -0.104
    param.18       0.2206      0.045      4.886      0.000       0.132       0.309
    param.19      -0.0009      0.002     -0.589      0.556      -0.004       0.002
    param.20       0.6464      0.591      1.094      0.274      -0.512       1.804
    param.21    2.691e-05     11.618   2.32e-06      1.000     -22.772      22.772
    param.22       0.0001      5.299   2.34e-05      1.000     -10.385      10.385
    param.23      -0.0001      6.315  -1.71e-05      1.000     -12.378      12.378
    param.24       0.6768      0.384      1.761      0.078      -0.077       1.430
    param.25       2.0440      0.158     12.967      0.000       1.735       2.353
    param.26      -0.0005      0.001     -0.795      0.427      -0.002       0.001
    param.27      -0.0048      0.000    -43.991      0.000      -0.005      -0.005
    param.28       0.0015      0.002      0.988      0.323      -0.002       0.005
    param.29      -0.0020      0.001     -3.071      0.002      -0.003      -0.001
    param.30      -0.0110      0.000    -29.167      0.000      -0.012      -0.010
    param.31      -0.0003      0.001     -0.263      0.793      -0.003       0.002
    param.32      -0.0020      0.001     -3.615      0.000      -0.003      -0.001
    param.33      -0.0030      0.000     -9.120      0.000      -0.004      -0.002
    param.34      -0.0072      0.000    -36.624      0.000      -0.008      -0.007
    param.35       0.0222      0.442      0.050      0.960      -0.845       0.889
    param.36      -0.0012      0.064     -0.019      0.985      -0.128       0.125
    param.37      -0.0063      0.065     -0.097      0.923      -0.133       0.120
    param.38      -0.0065      0.070     -0.094      0.925      -0.143       0.130
    param.39       0.3110      0.103      3.008      0.003       0.108       0.514
    ========================================================================================================================
    Ljung-Box (Q):          2436.83, 28.41, 44.45, 54.13, 1907.05   Jarque-Bera (JB):   11.03, 2320.36, 72.37, 263.34, 26.53
    Prob(Q):                         0.00, 0.91, 0.29, 0.07, 0.00   Prob(JB):                   0.00, 0.00, 0.00, 0.00, 0.00
    Heteroskedasticity (H):          0.48, 0.05, 0.14, 0.29, 0.50   Skew:                  -0.02, -1.43, -0.86, -1.38, -1.05
    Prob(H) (two-sided):             0.01, 0.00, 0.00, 0.00, 0.02   Kurtosis:                  1.63, 22.67, 6.06, 9.10, 2.63
    ========================================================================================================================

    Warnings:
    [1] Parameters and standard errors were estimated using a different dataset and were then applied to this dataset. Covariance matrix calculated using the outer product of gradients (complex-step).calculated using the outer product of gradients (complex-step).

ChenChingChih on 24 Apr 2020

Glad to hear that it helped!

ChadFulton on 24 Apr 2020

🎉1

Was this page helpful?

0 / 5 - 0 ratings