From c1250617b9f2960a0e556118027ced0c1cbd8cff Mon Sep 17 00:00:00 2001 From: James Gaboardi Date: Mon, 31 Oct 2022 22:15:22 -0400 Subject: [PATCH 1/3] doc work --- .github/workflows/unittests.yml | 5 +- setup.cfg | 15 + spreg/sur_error.py | 851 +++++++++++++++----------------- 3 files changed, 427 insertions(+), 444 deletions(-) diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml index 07f7f311..3fdff083 100644 --- a/.github/workflows/unittests.yml +++ b/.github/workflows/unittests.yml @@ -13,10 +13,7 @@ jobs: unittests: env: - RUN_TEST: pytest -v spreg -n auto --cov=spreg --cov-config=.coveragerc --cov-report=xml --color yes --cov-append --cov-report term-missing - ############################################## replace above with this chunk when docstring testing gets worked out - #RUN_TEST: pytest -v spreg -n auto --cov=spreg --doctest-modules --cov-config=.coveragerc --cov-report=xml --color yes --cov-append --cov-report term-missing - ############################################## + RUN_TEST: pytest -v spreg -n auto --cov=spreg --doctest-modules --cov-config=.coveragerc --cov-report=xml --color yes --cov-append --cov-report term-missing name: ${{ matrix.os }}, ${{ matrix.environment-file }} runs-on: ${{ matrix.os }} timeout-minutes: 30 diff --git a/setup.cfg b/setup.cfg index 60b4c73e..8a4775e3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,3 +5,18 @@ versionfile_source = spreg/_version.py versionfile_build = spreg/_version.py tag_prefix = v parentdir_prefix = spreg- + +[flake8] +max_line_length = 88 +exclude = *__init__.py, docs/conf.py, versioneer.py, spreg/_version.py +ignore = + E741, + F403, + F405, + W503 + +[black] +line-length = 88 + +[isort] +profile = black diff --git a/spreg/sur_error.py b/spreg/sur_error.py index 37f2d6d1..4e2ed4ff 100644 --- a/spreg/sur_error.py +++ b/spreg/sur_error.py @@ -6,80 +6,77 @@ Pedro V. Amaral pedrovma@gmail.com" -import numpy as np +import numpy import numpy.linalg as la +from scipy import sparse as sp from scipy import stats +from scipy.optimize import minimize, minimize_scalar +from scipy.sparse.linalg import splu as SuperLU -stats.chisqprob = stats.chi2.sf +from . import regimes as REGI from . import summary_output as SUMMARY from . import user_output as USER -from . import regimes as REGI -from scipy.sparse.linalg import splu as SuperLU -from scipy.optimize import minimize_scalar, minimize -from scipy import sparse as sp - +from .diagnostics_sur import lam_setp, sur_chow, sur_setp from .ml_error import err_c_loglik_sp -from .utils import optim_moments +from .sur import BaseSUR, _sur_ols from .sur_utils import ( - sur_dictxy, + check_k, + filter_dict, sur_corr, - sur_dict2mat, sur_crossprod, sur_est, sur_resids, - filter_dict, - check_k, ) -from .sur import BaseSUR, _sur_ols -from .diagnostics_sur import sur_setp, lam_setp, sur_chow -from .regimes import buildR, wald_test +from .utils import optim_moments + +stats.chisqprob = stats.chi2.sf + __all__ = ["BaseSURerrorGM", "SURerrorGM", "BaseSURerrorML", "SURerrorML"] class BaseSURerrorGM: - """Base class for SUR Error estimation by Generalized Moment Estimation + """Base class for SUR Error estimation by Generalized Moment Estimation. Parameters ---------- - bigy : dictionary - with vector for dependent variable by equation - bigX : dictionary - with matrix of explanatory variables by equation - (note, already includes constant term) - w : spatial weights object + + bigy : dict + with vector for dependent variable by equation + bigX : dict + with matrix of explanatory variables by equation + (note, already includes constant term) + w : libpysal.weights.W Attributes ---------- - n_eq : int - number of equations - n : int - number of observations in each cross-section - bigy : dictionary - with vectors of dependent variable, one for - each equation - bigX : dictionary - with matrices of explanatory variables, - one for each equation - bigK : array - n_eq x 1 array with number of explanatory variables - by equation - bigylag : dictionary - spatially lagged dependent variable - bigXlag : dictionary - spatially lagged explanatory variable - lamsur : float - spatial autoregressive coefficient in GM SUR Error - bSUR : array - beta coefficients in GM SUR Error - varb : array - variance of beta coefficients in GM SUR Error - sig : array - error variance-covariance matrix in GM SUR Error - corr : array - error correlation matrix - bigE : array - n by n_eq matrix of vectors of residuals for each equation + + n_eq : int + number of equations + n : int + number of observations in each cross-section + bigy : dict + with vectors of dependent variable, one for each equation + bigX : dict + with matrices of explanatory variables, one for each equation + bigK : numpy.array + n_eq x 1 array with number of explanatory variables by equation + bigylag : dict + spatially lagged dependent variable + bigXlag : dict + spatially lagged explanatory variable + lamsur : float + spatial autoregressive coefficient in GM SUR Error + bSUR : numpy.array + beta coefficients in GM SUR Error + varb : numpy.array + variance of beta coefficients in GM SUR Error + sig : numpy.array + error variance-covariance matrix in GM SUR Error + corr : numpy.array + error correlation matrix + bigE : numpy.array + n by n_eq matrix of vectors of residuals for each equation """ @@ -87,12 +84,11 @@ def __init__(self, bigy, bigX, w): self.n = w.n self.n_eq = len(bigy.keys()) WS = w.sparse - I = sp.identity(self.n) # variables self.bigy = bigy self.bigX = bigX # number of variables by equation - self.bigK = np.zeros((self.n_eq, 1), dtype=np.int_) + self.bigK = numpy.zeros((self.n_eq, 1), dtype=numpy.int_) for r in range(self.n_eq): self.bigK[r] = self.bigX[r].shape[1] @@ -102,7 +98,7 @@ def __init__(self, bigy, bigX, w): # Moments moments = _momentsGM_sur_Error(WS, reg0.olsE) - lam = np.zeros((self.n_eq, 1)) + lam = numpy.zeros((self.n_eq, 1)) for r in range(self.n_eq): lam[r] = optim_moments(moments[r]) @@ -145,9 +141,9 @@ def _momentsGM_sur_Error(w, u): trWtW = w.multiply(w).sum() moments = {} for r in range(u.shape[1]): - g = np.array([[u2[r], wu2[r], uwu[r]]]).T / n + g = numpy.array([[u2[r], wu2[r], uwu[r]]]).T / n G = ( - np.array( + numpy.array( [ [2 * uwu[r], -wu2[r], n], [2 * wwuwu[r], -wwu2[r], trWtW], @@ -162,88 +158,81 @@ def _momentsGM_sur_Error(w, u): class SURerrorGM(BaseSURerrorGM, REGI.Regimes_Frame): """ - User class for SUR Error estimation by Maximum Likelihood + User class for SUR Error estimation by Maximum Likelihood. Parameters ---------- - bigy : dictionary - with vectors of dependent variable, one for - each equation - bigX : dictionary - with matrices of explanatory variables, - one for each equation - w : spatial weights object - regimes : list - List of n values with the mapping of each - observation to a regime. Assumed to be aligned with 'x'. - nonspat_diag : boolean - flag for non-spatial diagnostics, default = False - spat_diag : boolean - flag for spatial diagnostics, default = False (to be implemented) - vm : boolean - flag for asymptotic variance for lambda and Sigma, - default = False (to be implemented) - name_bigy : dictionary - with name of dependent variable for each equation. - default = None, but should be specified is done when - sur_stackxy is used - name_bigX : dictionary - with names of explanatory variables for each equation. - default = None, but should be specified is done when - sur_stackxy is used - name_ds : string - name for the data set - name_w : string - name for the weights file - name_regimes : string - name of regime variable for use in the output + + bigy : dict + with vectors of dependent variable, one for each equation + bigX : dict + with matrices of explanatory variables, one for each equation + w : libpysal.weights.W + regimes : list + List of n values with the mapping of each + observation to a regime. Assumed to be aligned with 'x'. + nonspat_diag : bool + flag for non-spatial diagnostics, default = False + spat_diag : bool + flag for spatial diagnostics, default = False (to be implemented) + vm : bool + flag for asymptotic variance for lambda and Sigma, + default = False (to be implemented) + name_bigy : dict + with name of dependent variable for each equation. + default = None, but should be specified is done when + sur_stackxy is used + name_bigX : dict + with names of explanatory variables for each equation. + default = None, but should be specified is done when sur_stackxy is used + name_ds : str + name for the data set + name_w : str + name for the weights file + name_regimes : str + name of regime variable for use in the output Attributes ---------- - n : int - number of observations in each cross-section - n_eq : int - number of equations - bigy : dictionary - with vectors of dependent variable, one for - each equation - bigX : dictionary - with matrices of explanatory variables, - one for each equation - bigK : array - n_eq x 1 array with number of explanatory variables - by equation - bigylag : dictionary - spatially lagged dependent variable - bigXlag : dictionary - spatially lagged explanatory variable - lamsur : float - spatial autoregressive coefficient in ML SUR Error - bSUR : array - beta coefficients in ML SUR Error - varb : array - variance of beta coefficients in ML SUR Error - sig : array - error variance-covariance matrix in ML SUR Error - bigE : array - n by n_eq matrix of vectors of residuals for each equation - sur_inf : array - inference for regression coefficients, stand. error, t, p - surchow : array - list with tuples for Chow test on regression coefficients. - each tuple contains test value, degrees of freedom, p-value - name_bigy : dictionary - with name of dependent variable for each equation - name_bigX : dictionary - with names of explanatory variables for each - equation - name_ds : string - name for the data set - name_w : string - name for the weights file - name_regimes : string - name of regime variable for use in the output - + n : int + number of observations in each cross-section + n_eq : int + number of equations + bigy : dict + with vectors of dependent variable, one for each equation + bigX : dict + with matrices of explanatory variables, one for each equation + bigK : numpy.array + n_eq x 1 array with number of explanatory variables by equation + bigylag : dict + spatially lagged dependent variable + bigXlag : dict + spatially lagged explanatory variable + lamsur : float + spatial autoregressive coefficient in ML SUR Error + bSUR : numpy.array + beta coefficients in ML SUR Error + varb : numpy.array + variance of beta coefficients in ML SUR Error + sig : numpy.array + error variance-covariance matrix in ML SUR Error + bigE : numpy.array + n by n_eq matrix of vectors of residuals for each equation + sur_inf : numpy.array + inference for regression coefficients, stand. error, t, p + surchow : numpy.array + list with tuples for Chow test on regression coefficients. + each tuple contains test value, degrees of freedom, p-value + name_bigy : dict + with name of dependent variable for each equation + name_bigX : dict + with names of explanatory variables for each equation + name_ds : str + name for the data set + name_w : str + name for the weights file + name_regimes : str + name of regime variable for use in the output Examples -------- @@ -253,14 +242,19 @@ class SURerrorGM(BaseSURerrorGM, REGI.Regimes_Frame): >>> import libpysal >>> from libpysal.examples import load_example >>> from libpysal.weights import Queen + >>> import numpy >>> import spreg - >>> np.set_printoptions(suppress=True) #prevent scientific format + >>> numpy.set_printoptions(suppress=True) #prevent scientific format Open data on NCOVR US County Homicides (3085 areas) using pysal.open(). This is the DBF associated with the NAT shapefile. Note that pysal.open() also reads data in CSV format. >>> nat = load_example('Natregimes') + Example not available: Natregimes + Example not downloaded: Chicago parcels + Example not downloaded: Chile Migration + Example not downloaded: Spirals >>> db = libpysal.io.open(nat.get_path('natregimes.dbf'), 'r') The specification of the model to be estimated can be provided as lists. @@ -269,10 +263,10 @@ class SURerrorGM(BaseSURerrorGM, REGI.Regimes_Frame): For equation 2, HR90 is the dependent variable, and PS90 and UE90 the exogenous regressors. - >>> y_var = ['HR80','HR90'] - >>> x_var = [['PS80','UE80'],['PS90','UE90']] - >>> yend_var = [['RD80'],['RD90']] - >>> q_var = [['FP79'],['FP89']] + >>> y_var = ['HR80', 'HR90'] + >>> x_var = [['PS80', 'UE80'],['PS90', 'UE90']] + >>> yend_var = [['RD80'], ['RD90']] + >>> q_var = [['FP79'], ['FP89']] The SUR method requires data to be provided as dictionaries. PySAL provides the tool sur_dictxy to create these dictionaries from the @@ -298,19 +292,32 @@ class SURerrorGM(BaseSURerrorGM, REGI.Regimes_Frame): Alternatively, we can just check the betas and standard errors, asymptotic t and p-value of the parameters: - >>> reg = spreg.SURerrorGM(bigy,bigX,w=w,name_bigy=bigyvars,name_bigX=bigXvars,name_ds="NAT",name_w="nat_queen") - >>> reg.bSUR - {0: array([[3.97746866], - [0.89021219], - [0.43050363]]), 1: array([[2.93679119], - [1.11002826], - [0.48761542]])} - >>> reg.sur_inf - {0: array([[ 0.37251476, 10.67734504, 0. ], - [ 0.14224297, 6.25839153, 0. ], - [ 0.04322388, 9.95985608, 0. ]]), 1: array([[ 0.33694902, 8.71583245, 0. ], - [ 0.13413626, 8.27537783, 0. ], - [ 0.04033105, 12.09032288, 0. ]])} + >>> reg = spreg.SURerrorGM( + ... bigy, + ... bigX, + ... w=w, + ... name_bigy=bigyvars, + ... name_bigX=bigXvars, + ... name_ds="NAT", + ... name_w="nat_queen" + ... ) + >>> reg.bSUR[0].round(5) + array([[3.97744], + [0.89022], + [0.43051]]) + >>> reg.bSUR[1].round(5) + array([[2.93678], + [1.11003], + [0.48762]]) + + >>> reg.sur_inf[0].round(5) + array([[ 0.37252, 10.6772 , 0. ], + [ 0.14224, 6.25841, 0. ], + [ 0.04322, 9.95991, 0. ]]) + >>> reg.sur_inf[1].round(5) + array([[ 0.33695, 8.71582, 0. ], + [ 0.13414, 8.27539, 0. ], + [ 0.04033, 12.09035, 0. ]]) """ def __init__( @@ -426,62 +433,58 @@ class BaseSURerrorML: Parameters ---------- - bigy : dictionary - with vectors of dependent variable, one for - each equation - bigX : dictionary - with matrices of explanatory variables, - one for each equation - w : spatial weights object - epsilon : float - convergence criterion for ML iterations - default 0.0000001 + + bigy : dict + with vectors of dependent variable, one for each equation + bigX : dict + with matrices of explanatory variables, one for each equation + w : libpysal.weights.W + epsilon : float + convergence criterion for ML iterations. Default is ``0.0000001``. Attributes ---------- - n : int - number of observations in each cross-section - n2 : int - n/2 - n_eq : int - number of equations - bigy : dictionary - with vectors of dependent variable, one for - each equation - bigX : dictionary - with matrices of explanatory variables, - one for each equation - bigK : array - n_eq x 1 array with number of explanatory variables - by equation - bigylag : dictionary - spatially lagged dependent variable - bigXlag : dictionary - spatially lagged explanatory variable - lamols : array - spatial autoregressive coefficients from equation by - equation ML-Error estimation - clikerr : float - concentrated log-likelihood from equation by equation - ML-Error estimation (no constant) - bSUR0 : array - SUR estimation for betas without spatial autocorrelation - llik : float - log-likelihood for classic SUR estimation (includes constant) - lamsur : float - spatial autoregressive coefficient in ML SUR Error - bSUR : array - beta coefficients in ML SUR Error - varb : array - variance of beta coefficients in ML SUR Error - sig : array - error variance-covariance matrix in ML SUR Error - corr : array - error correlation matrix - bigE : array - n by n_eq matrix of vectors of residuals for each equation - cliksurerr : float - concentrated log-likelihood from ML SUR Error (no constant) + + n : int + number of observations in each cross-section + n2 : int + n/2 + n_eq : int + number of equations + bigy : dict + with vectors of dependent variable, one for each equation + bigX : dict + with matrices of explanatory variables, one for each equation + bigK : numpy.array + n_eq x 1 array with number of explanatory variables by equation + bigylag : dict + spatially lagged dependent variable + bigXlag : dict + spatially lagged explanatory variable + lamols : numpy.array + spatial autoregressive coefficients from equation by + equation ML-Error estimation + clikerr : float + concentrated log-likelihood from equation by equation + ML-Error estimation (no constant) + bSUR0 : numpy.array + SUR estimation for betas without spatial autocorrelation + llik : float + log-likelihood for classic SUR estimation (includes constant) + lamsur : float + spatial autoregressive coefficient in ML SUR Error + bSUR : numpy.array + beta coefficients in ML SUR Error + varb : numpy.array + variance of beta coefficients in ML SUR Error + sig : numpy.array + error variance-covariance matrix in ML SUR Error + corr : numpy.array + error correlation matrix + bigE : numpy.array + n by n_eq matrix of vectors of residuals for each equation + clikserr : float + concentrated log-likelihood from ML SUR Error (no constant) """ @@ -496,7 +499,7 @@ def __init__(self, bigy, bigX, w, epsilon=0.0000001): self.bigy = bigy self.bigX = bigX # number of variables by equation - self.bigK = np.zeros((self.n_eq, 1), dtype=np.int_) + self.bigK = numpy.zeros((self.n_eq, 1), dtype=numpy.int_) for r in range(self.n_eq): self.bigK[r] = self.bigX[r].shape[1] # spatially lagged variables @@ -509,7 +512,7 @@ def __init__(self, bigy, bigX, w, epsilon=0.0000001): self.bigXlag[r] = WS * self.bigX[r] # spatial parameter starting values - lam = np.zeros((self.n_eq, 1)) # initialize as an array + lam = numpy.zeros((self.n_eq, 1)) # initialize as an array fun0 = 0.0 fun1 = 0.0 for r in range(self.n_eq): @@ -576,120 +579,111 @@ class SURerrorML(BaseSURerrorML, REGI.Regimes_Frame): Parameters ---------- - bigy : dictionary - with vectors of dependent variable, one for - each equation - bigX : dictionary - with matrices of explanatory variables, - one for each equation - w : spatial weights object - regimes : list - default = None. - List of n values with the mapping of each - observation to a regime. Assumed to be aligned with 'x'. - epsilon : float - convergence criterion for ML iterations. - default 0.0000001 - nonspat_diag : boolean - flag for non-spatial diagnostics, default = True - spat_diag : boolean - flag for spatial diagnostics, default = False - vm : boolean - flag for asymptotic variance for lambda and Sigma, - default = False - name_bigy : dictionary - with name of dependent variable for each equation. - default = None, but should be specified is done when - sur_stackxy is used - name_bigX : dictionary - with names of explanatory variables for each equation. - default = None, but should be specified is done when - sur_stackxy is used - name_ds : string - name for the data set - name_w : string - name for the weights file - name_regimes : string - name of regime variable for use in the output + + bigy : dict + with vectors of dependent variable, one for each equation + bigX : dict + with matrices of explanatory variables, one for each equation + w : libpysal.weights.W + regimes : list + default = None. List of n values with the mapping of each + observation to a regime. Assumed to be aligned with 'x'. + epsilon : float + convergence criterion for ML iterations. default 0.0000001 + nonspat_diag : bool + flag for non-spatial diagnostics, default = True + spat_diag : bool + flag for spatial diagnostics, default = False + vm : bool + flag for asymptotic variance for lambda and Sigma, default = False + name_bigy : dict + with name of dependent variable for each equation. default = None, but + should be specified is done when sur_stackxy is used + name_bigX : dict + with names of explanatory variables for each equation. + default = None, but should be specified is done when + sur_stackxy is used + name_ds : str + name for the data set + name_w : str + name for the weights file + name_regimes : str + name of regime variable for use in the output Attributes ---------- - n : int - number of observations in each cross-section - n2 : int - n/2 - n_eq : int - number of equations - bigy : dictionary - with vectors of dependent variable, one for - each equation - bigX : dictionary - with matrices of explanatory variables, - one for each equation - bigK : array - n_eq x 1 array with number of explanatory variables - by equation - bigylag : dictionary - spatially lagged dependent variable - bigXlag : dictionary - spatially lagged explanatory variable - lamols : array - spatial autoregressive coefficients from equation by - equation ML-Error estimation - clikerr : float - concentrated log-likelihood from equation by equation - ML-Error estimation (no constant) - bSUR0 : array - SUR estimation for betas without spatial autocorrelation - llik : float - log-likelihood for classic SUR estimation (includes constant) - lamsur : float - spatial autoregressive coefficient in ML SUR Error - bSUR : array - beta coefficients in ML SUR Error - varb : array - variance of beta coefficients in ML SUR Error - sig : array - error variance-covariance matrix in ML SUR Error - bigE : array - n by n_eq matrix of vectors of residuals for each equation + + n : int + number of observations in each cross-section + n2 : int + n/2 + n_eq : int + number of equations + bigy : dict + with vectors of dependent variable, one for each equation + bigX : dict + with matrices of explanatory variables, one for each equation + bigK : numpy.array + n_eq x 1 array with number of explanatory variables by equation + bigylag : dict + spatially lagged dependent variable + bigXlag : dict + spatially lagged explanatory variable + lamols : numpy.array + spatial autoregressive coefficients from equation by + equation ML-Error estimation + clikerr : float + concentrated log-likelihood from equation by equation + ML-Error estimation (no constant) + bSUR0 : numpy.array + SUR estimation for betas without spatial autocorrelation + llik : float + log-likelihood for classic SUR estimation (includes constant) + lamsur : float + spatial autoregressive coefficient in ML SUR Error + bSUR : numpy.array + beta coefficients in ML SUR Error + varb : numpy.array + variance of beta coefficients in ML SUR Error + sig : numpy.array + error variance-covariance matrix in ML SUR Error + bigE : numpy.array + n by n_eq matrix of vectors of residuals for each equation cliksurerr : float - concentrated log-likelihood from ML SUR Error (no constant) - sur_inf : array - inference for regression coefficients, stand. error, t, p - errllik : float - log-likelihood for error model without SUR (with constant) + concentrated log-likelihood from ML SUR Error (no constant) + sur_inf : numpy.array + inference for regression coefficients, stand. error, t, p + errllik : float + log-likelihood for error model without SUR (with constant) surerrllik : float - log-likelihood for SUR error model (with constant) - lrtest : tuple - likelihood ratio test for off-diagonal Sigma elements + log-likelihood for SUR error model (with constant) + lrtest : tuple + likelihood ratio test for off-diagonal Sigma elements likrlambda : tuple - likelihood ratio test on spatial autoregressive coefficients - vm : array - asymptotic variance matrix for lambda and Sigma (only for vm=True) - lamsetp : array - inference for lambda, stand. error, t, p (only for vm=True) - lamtest : tuple - with test for constancy of lambda across equations - (test value, degrees of freedom, p-value) - joinlam : tuple - with test for joint significance of lambda across - equations (test value, degrees of freedom, p-value) - surchow : list - with tuples for Chow test on regression coefficients. - each tuple contains test value, degrees of freedom, p-value - name_bigy : dictionary - with name of dependent variable for each equation - name_bigX : dictionary - with names of explanatory variables for each - equation - name_ds : string - name for the data set - name_w : string - name for the weights file - name_regimes : string - name of regime variable for use in the output - + likelihood ratio test on spatial autoregressive coefficients + vm : numpy.array + asymptotic variance matrix for lambda and Sigma (only for vm=True) + lamsetp : numpy.array + inference for lambda, stand. error, t, p (only for vm=True) + lamtest : tuple + with test for constancy of lambda across equations + (test value, degrees of freedom, p-value) + joinlam : tuple + with test for joint significance of lambda across + equations (test value, degrees of freedom, p-value) + surchow : list + with tuples for Chow test on regression coefficients. + each tuple contains test value, degrees of freedom, p-value + name_bigy : dict + with name of dependent variable for each equation + name_bigX : dict + with names of explanatory variables for each equation + name_ds : str + name for the data set + name_w : str + name for the weights file + name_regimes : str + name of regime variable for use in the output Examples -------- @@ -699,14 +693,19 @@ class SURerrorML(BaseSURerrorML, REGI.Regimes_Frame): >>> import libpysal >>> from libpysal.examples import load_example >>> from libpysal.weights import Queen + >>> import numpy >>> import spreg - >>> np.set_printoptions(suppress=True) #prevent scientific format + >>> numpy.set_printoptions(suppress=True) #prevent scientific format Open data on NCOVR US County Homicides (3085 areas) using libpysal.io.open(). This is the DBF associated with the NAT shapefile. Note that libpysal.io.open() also reads data in CSV format. >>> nat = load_example('Natregimes') + Example not available: Natregimes + Example not downloaded: Chicago parcels + Example not downloaded: Chile Migration + Example not downloaded: Spirals >>> db = libpysal.io.open(nat.get_path('natregimes.dbf'), 'r') The specification of the model to be estimated can be provided as lists. @@ -715,10 +714,10 @@ class SURerrorML(BaseSURerrorML, REGI.Regimes_Frame): For equation 2, HR90 is the dependent variable, and PS90 and UE90 the exogenous regressors. - >>> y_var = ['HR80','HR90'] - >>> x_var = [['PS80','UE80'],['PS90','UE90']] - >>> yend_var = [['RD80'],['RD90']] - >>> q_var = [['FP79'],['FP89']] + >>> y_var = ['HR80', 'HR90'] + >>> x_var = [['PS80', 'UE80'], ['PS90', 'UE90']] + >>> yend_var = [['RD80'], ['RD90']] + >>> q_var = [['FP79'], ['FP89']] The SUR method requires data to be provided as dictionaries. PySAL provides the tool sur_dictxy to create these dictionaries from the @@ -728,7 +727,7 @@ class SURerrorML(BaseSURerrorML, REGI.Regimes_Frame): (bigXvars). All these will be created from th database (db) and lists of variables (y_var and x_var) created above. - >>> bigy,bigX,bigyvars,bigXvars = spreg.sur_dictxy(db,y_var,x_var) + >>> bigy, bigX, bigyvars, bigXvars = spreg.sur_dictxy(db,y_var, x_var) To run a spatial error model, we need to specify the spatial weights matrix. To do that, we can open an already existing gal file or create a new one. @@ -744,20 +743,32 @@ class SURerrorML(BaseSURerrorML, REGI.Regimes_Frame): Alternatively, we can just check the betas and standard errors, asymptotic t and p-value of the parameters: - >>> reg = spreg.SURerrorML(bigy,bigX,w=w,name_bigy=bigyvars,name_bigX=bigXvars,name_ds="NAT",name_w="nat_queen") - >>> reg.bSUR - {0: array([[4.02228606], - [0.88489637], - [0.42402845]]), 1: array([[3.04923031], - [1.10972632], - [0.47075678]])} - - >>> reg.sur_inf - {0: array([[ 0.36692175, 10.96224484, 0. ], - [ 0.14129077, 6.26294545, 0. ], - [ 0.04267954, 9.93516909, 0. ]]), 1: array([[ 0.33139967, 9.20106629, 0. ], - [ 0.13352591, 8.31094381, 0. ], - [ 0.04004097, 11.75687747, 0. ]])} + >>> reg = spreg.SURerrorML( + ... bigy, + ... bigX, + ... w=w, + ... name_bigy=bigyvars, + ... name_bigX=bigXvars, + ... name_ds="NAT", + ... name_w="nat_queen" + ... ) + >>> reg.bSUR[0].round(5) + array([[4.02229], + [0.8849 ], + [0.42403]]) + >>> reg.bSUR[1].round(5) + array([[3.04923], + [1.10973], + [0.47076]]) + + >>> reg.sur_inf[0].round(5) + array([[ 0.36692, 10.96224, 0. ], + [ 0.14129, 6.26295, 0. ], + [ 0.04268, 9.93517, 0. ]]) + >>> reg.sur_inf[1].round(5) + array([[ 0.3314 , 9.20107, 0. ], + [ 0.13353, 8.31094, 0. ], + [ 0.04004, 11.75688, 0. ]]) """ @@ -833,7 +844,7 @@ def __init__( self.sur_inf = sur_setp(self.bSUR, self.varb) # adjust concentrated log lik for constant - const = -self.n2 * (self.n_eq * (1.0 + np.log(2.0 * np.pi))) + const = -self.n2 * (self.n_eq * (1.0 + numpy.log(2.0 * numpy.pi))) self.errllik = const + self.clikerr self.surerrllik = const + self.cliksurerr @@ -861,12 +872,12 @@ def __init__( self.lamsetp = lam_setp(self.lamsur, vlam) # test on constancy of lambdas R = REGI.buildR(kr=1, kf=0, nr=self.n_eq) - w, p = REGI.wald_test(self.lamsur, R, np.zeros((R.shape[0], 1)), vlam) + w, p = REGI.wald_test(self.lamsur, R, numpy.zeros((R.shape[0], 1)), vlam) self.lamtest = (w, R.shape[0], p) if spat_diag: # test on joint significance of lambdas - Rj = np.identity(self.n_eq) + Rj = numpy.identity(self.n_eq) wj, pj = REGI.wald_test( - self.lamsur, Rj, np.zeros((Rj.shape[0], 1)), vlam + self.lamsur, Rj, numpy.zeros((Rj.shape[0], 1)), vlam ) self.joinlam = (wj, Rj.shape[0], pj) else: @@ -916,19 +927,21 @@ def jacob(lam, n_eq, I, WS): Parameters ---------- - lam : array - n_eq by 1 array of spatial autoregressive parameters - n_eq : int - number of equations - I : sparse matrix - sparse Identity matrix - WS : sparse matrix - sparse spatial weights matrix + + lam : array + ``n_eq`` by 1 array of spatial autoregressive parameters + n_eq : int + number of equations + I : scipy.sparse.csr_matrix + sparse Identity matrix + WS : scipy.sparse.csr_matrix + sparse spatial weights matrix Returns ------- - logjac : float - the log Jacobian + + logjac : float + the log Jacobian """ logjac = 0.0 @@ -937,7 +950,7 @@ def jacob(lam, n_eq, I, WS): lamWS = WS.multiply(lami) B = (I - lamWS).tocsc() LU = SuperLU(B) - jj = np.sum(np.log(np.abs(LU.U.diagonal()))) + jj = numpy.sum(numpy.log(numpy.abs(LU.U.diagonal()))) logjac += jj return logjac @@ -948,30 +961,30 @@ def clik(lam, n, n2, n_eq, bigE, I, WS): Parameters ---------- - lam : array - n_eq x 1 array of spatial autoregressive parameters - n : int - number of observations in each cross-section - n2 : int - n/2 - n_eq : int - number of equations - bigE : array - n by n_eq matrix with vectors of residuals for - each equation - I : sparse Identity matrix - WS : sparse spatial weights matrix + + lam : numpy.array + n_eq x 1 array of spatial autoregressive parameters + n : int + number of observations in each cross-section + n2 : int + n/2 + n_eq : int + number of equations + bigE : numpy.array + n by n_eq matrix with vectors of residuals for each equation + I : sparse Identity matrix + WS : sparse spatial weights matrix Returns ------- - -clik : float - negative (for minimize) of the concentrated - log-likelihood function + + -clik : float + negative (for minimize) of the concentrated log-likelihood function """ WbigE = WS * bigE spfbigE = bigE - WbigE * lam.T - sig = np.dot(spfbigE.T, spfbigE) / n + sig = numpy.dot(spfbigE.T, spfbigE) / n ldet = la.slogdet(sig)[1] logjac = jacob(lam, n_eq, I, WS) clik = -n2 * ldet + logjac @@ -987,22 +1000,24 @@ def surerrvm(n, n_eq, w, lam, sig): Parameters ---------- - n : int - number of cross-sectional observations - n_eq : int - number of equations - w : spatial weights object - lam : array - n_eq by 1 vector with spatial autoregressive coefficients - sig : array - n_eq by n_eq matrix with cross-equation error covariances + + n : int + number of cross-sectional observations + n_eq : int + number of equations + w : libpysal.weights.W + lam : numpy.array + n_eq by 1 vector with spatial autoregressive coefficients + sig : numpy.array + n_eq by n_eq matrix with cross-equation error covariances Returns ------- - vm : array - asymptotic variance-covariance matrix for spatial autoregressive - coefficients and the upper triangular elements of Sigma - n_eq + n_eq x (n_eq + 1) / 2 coefficients + + vm : numpy.array + asymptotic variance-covariance matrix for spatial autoregressive + coefficients and the upper triangular elements of Sigma + n_eq + n_eq x (n_eq + 1) / 2 coefficients """ @@ -1011,10 +1026,10 @@ def surerrvm(n, n_eq, w, lam, sig): sisi = sigi * sig # elements of Psi_lam,lam # trace terms - trDi = np.zeros((n_eq, 1)) - trDDi = np.zeros((n_eq, 1)) - trDTDi = np.zeros((n_eq, 1)) - trDTiDj = np.zeros((n_eq, n_eq)) + trDi = numpy.zeros((n_eq, 1)) + trDDi = numpy.zeros((n_eq, 1)) + trDTDi = numpy.zeros((n_eq, 1)) + trDTiDj = numpy.zeros((n_eq, n_eq)) WS = w.sparse I = sp.identity(n) for i in range(n_eq): @@ -1024,11 +1039,11 @@ def surerrvm(n, n_eq, w, lam, sig): bb = B.todense() Bi = la.inv(bb) D = WS * Bi - trDi[i] = np.trace(D) - DD = np.dot(D, D) - trDDi[i] = np.trace(DD) - DD = np.dot(D.T, D) - trDTDi[i] = np.trace(DD) + trDi[i] = numpy.trace(D) + DD = numpy.dot(D, D) + trDDi[i] = numpy.trace(DD) + DD = numpy.dot(D.T, D) + trDTDi[i] = numpy.trace(DD) for j in range(i + 1, n_eq): lamj = lam[j][0] lamWS = WS.multiply(lamj) @@ -1036,19 +1051,19 @@ def surerrvm(n, n_eq, w, lam, sig): bb = B.todense() Bi = la.inv(bb) Dj = WS * Bi - DD = np.dot(D.T, Dj) - trDTiDj[i, j] = np.trace(DD) + DD = numpy.dot(D.T, Dj) + trDTiDj[i, j] = numpy.trace(DD) trDTiDj[j, i] = trDTiDj[i, j] - np.fill_diagonal(trDTiDj, trDTDi) + numpy.fill_diagonal(trDTiDj, trDTDi) sisjT = sisi * trDTiDj - Vll = np.diagflat(trDDi) + sisjT + Vll = numpy.diagflat(trDDi) + sisjT # elements of Psi_lam_sig P = int(n_eq * (n_eq + 1) / 2) # force ints to be ints tlist = [(i, j) for i in range(n_eq) for j in range(i, n_eq)] zog = sigi * trDi - Vlsig = np.zeros((n_eq, P)) + Vlsig = numpy.zeros((n_eq, P)) for i in range(n_eq): for j in range(n_eq): if i > j: @@ -1058,11 +1073,11 @@ def surerrvm(n, n_eq, w, lam, sig): Vlsig[i, jj] = zog[i, j] # top of Psi - vtop = np.hstack((Vll, Vlsig)) + vtop = numpy.hstack((Vll, Vlsig)) # elements of Psi_sig_sig - Vsig = np.zeros((P, P)) + Vsig = numpy.zeros((P, P)) for ij in range(P): i, j = tlist[ij] for hk in range(P): @@ -1080,56 +1095,12 @@ def surerrvm(n, n_eq, w, lam, sig): Vsig = n * Vsig # bottom of Psi - vbottom = np.hstack((Vlsig.T, Vsig)) + vbottom = numpy.hstack((Vlsig.T, Vsig)) # all of Psi - vbig = np.vstack((vtop, vbottom)) + vbig = numpy.vstack((vtop, vbottom)) # inverse of Psi vm = la.inv(vbig) return vm - - -def _test(): - import doctest - - start_suppress = np.get_printoptions()["suppress"] - np.set_printoptions(suppress=True) - doctest.testmod() - np.set_printoptions(suppress=start_suppress) - - -if __name__ == "__main__": - _test() - import numpy as np - import libpysal - from .sur_utils import sur_dictxy, sur_dictZ - from libpysal.examples import load_example - from libpysal.weights import Queen - - nat = load_example("Natregimes") - db = libpysal.io.open(nat.get_path("natregimes.dbf"), "r") - y_var = ["HR80", "HR90"] - x_var = [["PS80", "UE80"], ["PS90", "UE90"]] - w = Queen.from_shapefile(nat.get_path("natregimes.shp")) - w.transform = "r" - bigy0, bigX0, bigyvars0, bigXvars0 = sur_dictxy(db, y_var, x_var) - reg0 = SURerrorML( - bigy0, - bigX0, - w, - regimes=regimes, - name_bigy=bigyvars0, - name_bigX=bigXvars0, - name_w="natqueen", - name_ds="natregimes", - vm=True, - nonspat_diag=True, - spat_diag=True, - ) - - # reg0 = SURerrorGM(bigy0,bigX0,w,regimes=regimes,name_bigy=bigyvars0,name_bigX=bigXvars0,\ - # name_w="natqueen",name_ds="natregimes",vm=False,nonspat_diag=True,spat_diag=False) - - print(reg0.summary) From 8a5a8a125e2b6aaeb9e4e90a46e0ed1bef7c5a18 Mon Sep 17 00:00:00 2001 From: James Gaboardi Date: Mon, 31 Oct 2022 22:22:22 -0400 Subject: [PATCH 2/3] don't run doctets yet --- .github/workflows/unittests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml index 3fdff083..dcfc27e5 100644 --- a/.github/workflows/unittests.yml +++ b/.github/workflows/unittests.yml @@ -13,7 +13,7 @@ jobs: unittests: env: - RUN_TEST: pytest -v spreg -n auto --cov=spreg --doctest-modules --cov-config=.coveragerc --cov-report=xml --color yes --cov-append --cov-report term-missing + RUN_TEST: pytest -v spreg -n auto --cov=spreg --cov-config=.coveragerc --cov-report=xml --color yes --cov-append --cov-report term-missing name: ${{ matrix.os }}, ${{ matrix.environment-file }} runs-on: ${{ matrix.os }} timeout-minutes: 30 From ab617f51c7526dff903809b601548cbc2a155659 Mon Sep 17 00:00:00 2001 From: James Gaboardi Date: Sat, 7 Oct 2023 18:15:48 -0400 Subject: [PATCH 3/3] np --> numpy --- spreg/sur_error.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/spreg/sur_error.py b/spreg/sur_error.py index ddcb6164..35a452f2 100644 --- a/spreg/sur_error.py +++ b/spreg/sur_error.py @@ -2,8 +2,10 @@ Spatial Error SUR estimation """ -__author__ = "Luc Anselin lanselin@gmail.com, \ - Pedro V. Amaral pedrovma@gmail.com" +__author__ = """ +Luc Anselin [lanselin@gmail.com], +Pedro V. Amaral [pedrovma@gmail.com] +""" import numpy @@ -557,7 +559,7 @@ def __init__(self, bigy, bigX, w, epsilon=0.0000001): bigE = sur_resids(self.bigy, self.bigX, b1) res = minimize( clik, - np.array(lam).flatten(), + numpy.array(lam).flatten(), args=(self.n, self.n2, self.n_eq, bigE, I, WS), method="L-BFGS-B", bounds=lambdabounds, @@ -1110,15 +1112,15 @@ def surerrvm(n, n_eq, w, lam, sig): def _test(): import doctest - start_suppress = np.get_printoptions()["suppress"] - np.set_printoptions(suppress=True) + start_suppress = numpy.get_printoptions()["suppress"] + numpy.set_printoptions(suppress=True) doctest.testmod() - np.set_printoptions(suppress=start_suppress) + numpy.set_printoptions(suppress=start_suppress) if __name__ == "__main__": _test() - import numpy as np + import numpy import libpysal from .sur_utils import sur_dictxy, sur_dictZ from libpysal.examples import load_example