diff --git a/spreg/diagnostics.py b/spreg/diagnostics.py old mode 100644 new mode 100755 index b67da250..817fc565 --- a/spreg/diagnostics.py +++ b/spreg/diagnostics.py @@ -13,7 +13,7 @@ import numpy.linalg as la import scipy.sparse as SP from scipy import stats - +from .ols import BaseOLS from .utils import spmultiply, sphstack, spmin, spmax @@ -34,19 +34,23 @@ "vif", "likratiotest", "constant_check", + "dwh", ] -def f_stat(reg): +def f_stat(reg,df=0): """ - Calculates the f-statistic and associated p-value of the - regression. :cite:`Greene2003`. + Calculates the f-statistic and associated p-value for multiple + coefficient constraints :cite:`Greene2003`. (For two stage least squares see f_stat_tsls) + (default is F statistic for regression) Parameters ---------- reg : regression object output instance from a regression model + df : number of coefficient constraints + (zero constraint for last df coefficients in betas) Returns ---------- @@ -94,12 +98,21 @@ def f_stat(reg): k = reg.k # (scalar) number of ind. vars (includes constant) n = reg.n # (scalar) number of observations utu = reg.utu # (scalar) residual sum of squares - predy = reg.predy # (array) vector of predicted values (n x 1) - mean_y = reg.mean_y # (scalar) mean of dependent observations - Q = utu - U = np.sum((predy - mean_y) ** 2) - fStat = (U / (k - 1)) / (Q / (n - k)) - pValue = stats.f.sf(fStat, k - 1, n - k) + # default case, all coefficients + if df == 0: + r = k-1 + predy = reg.predy # (array) vector of predicted values (n x 1) + mean_y = reg.mean_y # (scalar) mean of dependent observations + U = np.sum((predy - mean_y) ** 2) + else: # F test on last df coefficients + y = reg.y + r = df + x0 = reg.x[:,:-r] + olsr = BaseOLS(y,x0) # constrained regression + rtr = olsr.utu + U = rtr - utu + fStat = (U / r) / (utu / (n - k)) + pValue = stats.f.sf(fStat, r, n - k) fs_result = (fStat, pValue) return fs_result @@ -1376,6 +1389,40 @@ def likratiotest(reg0, reg1): likratio = {"likr": likr, "df": 1, "p-value": pvalue} return likratio +def dwh(reg): + """ + Durbin-Wu-Hausman test on endogeneity of variables + + A significant test statistic points to endogeneity + + Parameters + ---------- + reg : regression object + output instance from a regression model + + Returns + ------- + dwh : tuple with value of F-statistic in augmented regression + and associated p-value + + """ + n = reg.n + ny = reg.yend.shape[1] # number of endogenous variables + qq = reg.h # all exogenous and instruments + xx = reg.z # all exogenous and endogenous + # get predicted values for endogenous variables on all instruments + py = np.zeros((n,ny)) + for i in range(ny): + yy = reg.yend[:, i].reshape(n,1) + ols1 = BaseOLS(y=yy,x=qq) + yp = ols1.predy + py[0:n,i] = yp.flatten() + nxq = sphstack(xx, py) + # F-test in augmented regression + ols2 = BaseOLS(y=reg.y, x=nxq) + dwh = f_stat(ols2, df=ny) + return dwh + def _test(): import doctest diff --git a/spreg/error_sp.py b/spreg/error_sp.py index cdbf6275..566147a4 100644 --- a/spreg/error_sp.py +++ b/spreg/error_sp.py @@ -16,6 +16,7 @@ from .output import output, _spat_pseudo_r2 from .error_sp_het import GM_Error_Het, GM_Endog_Error_Het, GM_Combo_Het from .error_sp_hom import GM_Error_Hom, GM_Endog_Error_Hom, GM_Combo_Hom +from itertools import compress __all__ = ["GMM_Error", "GM_Error", "GM_Endog_Error", "GM_Combo"] @@ -126,9 +127,9 @@ class GM_Error(BaseGM_Error): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant w : pysal W object @@ -136,6 +137,8 @@ class GM_Error(BaseGM_Error): slx_lags : integer Number of spatial lags of X to include in the model specification. If slx_lags>0, the specification becomes of the SLX-Error type. + slx_vars : either "All" (default) or list of booleans to select x variables + to be lagged vm : boolean If True, include variance-covariance matrix in summary results @@ -291,22 +294,26 @@ class GM_Error(BaseGM_Error): """ def __init__( - self, y, x, w, slx_lags=0, vm=False, name_y=None, name_x=None, name_w=None, name_ds=None, latex=False, + self, y, x, w, slx_lags=0, slx_vars="All",vm=False, name_y=None, name_x=None, name_w=None, name_ds=None, latex=False, hard_bound=False): n = USER.check_arrays(y, x) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) x_constant, name_x, warn = USER.check_constant(x, name_x) name_x = USER.set_name_x(name_x, x_constant) # intialize in case of None, contains constant set_warn(self, warn) self.title = "GM SPATIALLY WEIGHTED LEAST SQUARES" if slx_lags >0: - lag_x = get_lags(w, x_constant[:, 1:], slx_lags) - x_constant = np.hstack((x_constant, lag_x)) + #lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + #x_constant = np.hstack((x_constant, lag_x)) # name_x += USER.set_name_spatial_lags(name_x, slx_lags) - name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + + x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True, + slx_lags=slx_lags,slx_vars=slx_vars) + self.title += " WITH SLX (SLX-Error)" BaseGM_Error.__init__(self, y=y, x=x_constant, w=w.sparse, hard_bound=hard_bound) @@ -443,15 +450,15 @@ class GM_Endog_Error(BaseGM_Endog_Error): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - yend : array + yend : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each endogenous variable - q : array + q : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each external exogenous variable to use as instruments (note: this should not contain any variables from x) @@ -460,6 +467,8 @@ class GM_Endog_Error(BaseGM_Endog_Error): slx_lags : integer Number of spatial lags of X to include in the model specification. If slx_lags>0, the specification becomes of the SLX-Error type. + slx_vars : either "All" (default) or list of booleans to select x variables + to be lagged vm : boolean If True, include variance-covariance matrix in summary results @@ -648,6 +657,7 @@ def __init__( q, w, slx_lags=0, + slx_vars="All", vm=False, name_y=None, name_x=None, @@ -660,17 +670,22 @@ def __init__( ): n = USER.check_arrays(y, x, yend, q) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) + yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) x_constant, name_x, warn = USER.check_constant(x, name_x) name_x = USER.set_name_x(name_x, x_constant) # initialize for None, includes constant set_warn(self, warn) self.title = "GM SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES" if slx_lags >0: - lag_x = get_lags(w, x_constant[:, 1:], slx_lags) - x_constant = np.hstack((x_constant, lag_x)) + #lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + #x_constant = np.hstack((x_constant, lag_x)) # name_x += USER.set_name_spatial_lags(name_x, slx_lags) - name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + + x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True, + slx_lags=slx_lags,slx_vars=slx_vars) + self.title += " WITH SLX (SLX-Error)" BaseGM_Endog_Error.__init__(self, y=y, x=x_constant, w=w.sparse, yend=yend, q=q, hard_bound=hard_bound) self.name_ds = USER.set_name_ds(name_ds) @@ -699,15 +714,15 @@ class GM_Combo(BaseGM_Endog_Error): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - yend : array + yend : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each endogenous variable - q : array + q : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each external exogenous variable to use as instruments (note: this should not contain any variables from x) @@ -721,6 +736,8 @@ class GM_Combo(BaseGM_Endog_Error): Number of spatial lags of X to include in the model specification. If slx_lags>0, the specification becomes of the General Nesting Spatial Model (GNSM) type. + slx_vars : either "All" (default) or list of booleans to select x variables + to be lagged lag_q : boolean If True, then include spatial lags of the additional instruments (q) @@ -942,6 +959,7 @@ def __init__( w=None, w_lags=1, slx_lags=0, + slx_vars="All", lag_q=True, vm=False, name_y=None, @@ -955,16 +973,27 @@ def __init__( ): n = USER.check_arrays(y, x, yend, q) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) + yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) x_constant, name_x, warn = USER.check_constant(x, name_x) name_x = USER.set_name_x(name_x, x_constant) - set_warn(self, warn) - if slx_lags == 0: - yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) - else: - yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags) + + if slx_lags > 0: + yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags,slx_vars) x_constant = np.hstack((x_constant, wx)) + else: + yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) + + + + set_warn(self, warn) + # OLD + #if slx_lags == 0: + #yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) + #else: + #yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags) + #x_constant = np.hstack((x_constant, wx)) BaseGM_Endog_Error.__init__(self, y=y, x=x_constant, w=w.sparse, yend=yend2, q=q2, hard_bound=hard_bound) @@ -974,25 +1003,75 @@ def __init__( ) set_warn(self, warn) self.title = "SPATIALLY WEIGHTED 2SLS - GM-COMBO MODEL" - if slx_lags > 0: + # OLD + #if slx_lags > 0: # name_x += USER.set_name_spatial_lags(name_x, slx_lags) - name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + #self.title += " WITH SLX (GNSM)" + + # kx and wkx are used to replace complex calculation for output + if slx_lags > 0: # adjust for flexwx + if (isinstance(slx_vars,list)): # slx_vars has True,False + if len(slx_vars) != x.shape[1] : + raise Exception("slx_vars incompatible with x column dimensions") + else: # use slx_vars to extract proper columns + workname = name_x[1:] + kx = len(workname) + vv = list(compress(workname,slx_vars)) + name_x += USER.set_name_spatial_lags(vv, slx_lags) + wkx = slx_vars.count(True) + else: + kx = len(name_x) - 1 + wkx = kx + name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant self.title += " WITH SLX (GNSM)" + self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) # self.name_x = USER.set_name_x(name_x, x_constant) self.name_x = name_x # constant already in list self.name_yend = USER.set_name_yend(name_yend, yend) self.name_yend.append(USER.set_name_yend_sp(self.name_y)) + + self.name_z = self.name_x + self.name_yend self.name_z.append("lambda") self.name_q = USER.set_name_q(name_q, q) - self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) + + if slx_lags > 0: # need to remove all but last SLX variables from name_x + self.name_x0 = [] + self.name_x0.append(self.name_x[0]) # constant + if (isinstance(slx_vars,list)): # boolean list passed + # x variables that were not lagged + self.name_x0.extend(list(compress(self.name_x[1:],[not i for i in slx_vars]))) + # last wkx variables + self.name_x0.extend(self.name_x[-wkx:]) + + + else: + okx = int((self.k - self.yend.shape[1] - 1) / (slx_lags + 1)) # number of original exogenous vars + + self.name_x0.extend(self.name_x[-okx:]) + + self.name_q.extend(USER.set_name_q_sp(self.name_x0, w_lags, self.name_q, lag_q)) + + #var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] + var_types = ['x'] * (kx + 1) + ['wx'] * wkx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho','lambda'] + else: + self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) + var_types = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend) - 1) + ['rho','lambda'] + + + #self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) self.name_h = USER.set_name_h(self.name_x, self.name_q) self.name_w = USER.set_name_w(name_w, w) self.output = pd.DataFrame(self.name_z, columns=['var_names']) - self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend) - 1) + ['rho', 'lambda'] + + + #self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend) - 1) + ['rho', 'lambda'] + self.output['var_type'] = var_types + self.output['regime'], self.output['equation'] = (0, 0) self.other_top = _spat_pseudo_r2(self) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) @@ -1006,17 +1085,17 @@ class GMM_Error(GM_Error, GM_Endog_Error, GM_Combo, GM_Error_Het, GM_Endog_Error Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant w : pysal W object Spatial weights object (always needed) - yend : array + yend : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each endogenous variable (if any) - q : array + q : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each external exogenous variable to use as instruments (if any) (note: this should not contain any variables from x) @@ -1029,7 +1108,9 @@ class GMM_Error(GM_Error, GM_Endog_Error, GM_Combo, GM_Error_Het, GM_Endog_Error If True, then a spatial lag of the dependent variable is included. slx_lags : integer Number of spatial lags of X to include in the model specification. - If slx_lags>0, the specification becomes of the SLX-Error or GNSM type. + If slx_lags>0, the specification becomes of the SLX-Error or GNSM type. + slx_vars : either "All" (default) or list of booleans to select x variables + to be lagged vm : boolean If True, include variance-covariance matrix in summary results @@ -1050,6 +1131,7 @@ class GMM_Error(GM_Error, GM_Endog_Error, GM_Combo, GM_Error_Het, GM_Endog_Error hard_bound : boolean If true, raises an exception if the estimated spatial autoregressive parameter is outside the maximum/minimum bounds. + spat_diag : boolean, ignored, included for compatibility with other models **kwargs : keywords Additional arguments to pass on to the estimators. See the specific functions for details on what can be used. @@ -1232,50 +1314,50 @@ class GMM_Error(GM_Error, GM_Endog_Error, GM_Combo, GM_Error_Het, GM_Endog_Error """ def __init__( - self, y, x, w, yend=None, q=None, estimator='het', add_wy=False, slx_lags=0, vm=False, name_y=None, name_x=None, name_w=None, name_yend=None, - name_q=None, name_ds=None, latex=False, hard_bound=False, **kwargs): + self, y, x, w, yend=None, q=None, estimator='het', add_wy=False, slx_lags=0, slx_vars="All",vm=False, name_y=None, name_x=None, name_w=None, name_yend=None, + name_q=None, name_ds=None, latex=False, hard_bound=False,spat_diag=False, **kwargs): if estimator == 'het': if yend is None and not add_wy: - GM_Error_Het.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + GM_Error_Het.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) elif yend is not None and not add_wy: - GM_Endog_Error_Het.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + GM_Endog_Error_Het.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) elif add_wy: - GM_Combo_Het.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + GM_Combo_Het.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) else: set_warn(self, 'Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.') - GM_Error_Het.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + GM_Error_Het.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound) elif estimator == 'hom': if yend is None and not add_wy: - GM_Error_Hom.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + GM_Error_Hom.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, slx_vars=slx_vars,vm=vm, name_y=name_y, name_x=name_x, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) elif yend is not None and not add_wy: - GM_Endog_Error_Hom.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + GM_Endog_Error_Hom.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) elif add_wy: - GM_Combo_Hom.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + GM_Combo_Hom.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) else: set_warn(self, 'Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.') - GM_Error_Hom.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + GM_Error_Hom.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, slx_vars=slx_vars,vm=vm, name_y=name_y, name_x=name_x, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound) elif estimator == 'kp98': if yend is None and not add_wy: - GM_Error.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + GM_Error.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) elif yend is not None and not add_wy: - GM_Endog_Error.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + GM_Endog_Error.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) elif add_wy: - GM_Combo.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + GM_Combo.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) else: set_warn(self, 'Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.') - GM_Error.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + GM_Error.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound) else: set_warn(self, 'Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.') diff --git a/spreg/error_sp_het.py b/spreg/error_sp_het.py index 24c9adcb..e688ee19 100755 --- a/spreg/error_sp_het.py +++ b/spreg/error_sp_het.py @@ -19,6 +19,7 @@ from libpysal.weights.spatial_lag import lag_spatial import pandas as pd from .output import output, _summary_iteration, _spat_pseudo_r2 +from itertools import compress __all__ = ["GM_Error_Het", "GM_Endog_Error_Het", "GM_Combo_Het"] @@ -173,9 +174,9 @@ class GM_Error_Het(BaseGM_Error_Het): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant w : pysal W object @@ -183,6 +184,8 @@ class GM_Error_Het(BaseGM_Error_Het): slx_lags : integer Number of spatial lags of X to include in the model specification. If slx_lags>0, the specification becomes of the SLX-Error type. + slx_vars : either "All" (default) or list of booleans to select x variables + to be lagged max_iter : int Maximum number of iterations of steps 2a and 2b from :cite:`Arraiz2010`. Note: epsilon provides an additional @@ -356,6 +359,7 @@ def __init__( x, w, slx_lags=0, + slx_vars="All", max_iter=1, epsilon=0.00001, step1c=False, @@ -369,28 +373,44 @@ def __init__( ): n = USER.check_arrays(y, x) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) x_constant, name_x, warn = USER.check_constant(x, name_x) name_x = USER.set_name_x(name_x, x_constant) # initialize in case None, includes constant set_warn(self, warn) self.title = "GM SPATIALLY WEIGHTED LEAST SQUARES (HET)" + if slx_lags >0: - lag_x = get_lags(w, x_constant[:, 1:], slx_lags) - x_constant = np.hstack((x_constant, lag_x)) + #lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + #x_constant = np.hstack((x_constant, lag_x)) # name_x += USER.set_name_spatial_lags(name_x, slx_lags) - name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # no constant + #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + + x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True, + slx_lags=slx_lags,slx_vars=slx_vars) + self.title += " WITH SLX (SLX-Error)" + + # OLD + #if slx_lags >0: + #lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + #x_constant = np.hstack((x_constant, lag_x)) +# name_x += USER.set_name_spatial_lags(name_x, slx_lags) + #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # no constant + #self.title += " WITH SLX (SLX-Error)" + BaseGM_Error_Het.__init__( self, - y, - x_constant, - w.sparse, + y=y, + x=x_constant, + w=w.sparse, max_iter=max_iter, step1c=step1c, epsilon=epsilon, hard_bound = hard_bound ) + + self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) # self.name_x = USER.set_name_x(name_x, x_constant) @@ -651,15 +671,15 @@ class GM_Endog_Error_Het(BaseGM_Endog_Error_Het): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - yend : array + yend : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each endogenous variable - q : array + q : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each external exogenous variable to use as instruments (note: this should not contain any variables from x) @@ -668,6 +688,8 @@ class GM_Endog_Error_Het(BaseGM_Endog_Error_Het): slx_lags : integer Number of spatial lags of X to include in the model specification. If slx_lags>0, the specification becomes of the SLX-Error type. + slx_vars : either "All" (default) or list of booleans to select x variables + to be lagged max_iter : int Maximum number of iterations of steps 2a and 2b from :cite:`Arraiz2010`. Note: epsilon provides an additional @@ -881,6 +903,7 @@ def __init__( q, w, slx_lags=0, + slx_vars="All", max_iter=1, epsilon=0.00001, step1c=False, @@ -897,17 +920,29 @@ def __init__( ): n = USER.check_arrays(y, x, yend, q) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) + yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) x_constant, name_x, warn = USER.check_constant(x, name_x) name_x = USER.set_name_x(name_x, x_constant) # initialize in case None, includes constant set_warn(self, warn) self.title = "GM SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES (HET)" - if slx_lags > 0: - lag_x = get_lags(w, x_constant[:, 1:], slx_lags) - x_constant = np.hstack((x_constant, lag_x)) - name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # no constant - self.title += " WITH SLX (SLX-Error)" + + if slx_lags >0: + + x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True, + slx_lags=slx_lags,slx_vars=slx_vars) + + self.title += " WITH SLX (SLX-Error)" + + # OLD + #if slx_lags > 0: + #lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + #x_constant = np.hstack((x_constant, lag_x)) + #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # no constant + #self.title += " WITH SLX (SLX-Error)" + + BaseGM_Endog_Error_Het.__init__( self, y=y, @@ -1120,15 +1155,15 @@ class GM_Combo_Het(BaseGM_Combo_Het): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - yend : array + yend : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each endogenous variable - q : array + q : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each external exogenous variable to use as instruments (note: this should not contain any variables from x) @@ -1142,6 +1177,8 @@ class GM_Combo_Het(BaseGM_Combo_Het): Number of spatial lags of X to include in the model specification. If slx_lags>0, the specification becomes of the General Nesting Spatial Model (GNSM) type. + slx_vars : either "All" (default) or list of booleans to select x variables + to be lagged lag_q : boolean If True, then include spatial lags of the additional instruments (q). @@ -1378,6 +1415,7 @@ def __init__( w=None, w_lags=1, slx_lags=0, + slx_vars="All", lag_q=True, max_iter=1, epsilon=0.00001, @@ -1395,17 +1433,25 @@ def __init__( ): n = USER.check_arrays(y, x, yend, q) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) + yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) x_constant, name_x, warn = USER.check_constant(x, name_x) name_x = USER.set_name_x(name_x, x_constant) # initialize in case None, includes constant set_warn(self, warn) - if slx_lags == 0: - yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) - else: - yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags) + if slx_lags > 0: + yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags,slx_vars) x_constant = np.hstack((x_constant, wx)) + else: + yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) + + # OLS + #if slx_lags == 0: + #yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) + #else: + #yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags) + #x_constant = np.hstack((x_constant, wx)) BaseGM_Combo_Het.__init__( self, @@ -1429,10 +1475,28 @@ def __init__( UTILS.set_warn(self, warn) self.title = "SPATIALLY WEIGHTED 2SLS- GM-COMBO MODEL (HET)" - if slx_lags > 0: -# name_x += USER.set_name_spatial_lags(name_x, slx_lags) - name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # no constant + if slx_lags > 0: # adjust for flexwx + if (isinstance(slx_vars,list)): # slx_vars has True,False + if len(slx_vars) != x.shape[1] : + raise Exception("slx_vars incompatible with x column dimensions") + else: # use slx_vars to extract proper columns + workname = name_x[1:] + kx = len(workname) + vv = list(compress(workname,slx_vars)) + name_x += USER.set_name_spatial_lags(vv, slx_lags) + wkx = slx_vars.count(True) + else: + kx = len(name_x) - 1 + wkx = kx + name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant self.title += " WITH SLX (GNSM)" + + # OLD + #if slx_lags > 0: +# name_x += USER.set_name_spatial_lags(name_x, slx_lags) + #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # no constant + #self.title += " WITH SLX (GNSM)" + self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) # self.name_x = USER.set_name_x(name_x, x_constant) @@ -1442,12 +1506,42 @@ def __init__( self.name_z = self.name_x + self.name_yend self.name_z.append("lambda") # listing lambda last self.name_q = USER.set_name_q(name_q, q) - self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) + + if slx_lags > 0: # need to remove all but last SLX variables from name_x + self.name_x0 = [] + self.name_x0.append(self.name_x[0]) # constant + if (isinstance(slx_vars,list)): # boolean list passed + # x variables that were not lagged + self.name_x0.extend(list(compress(self.name_x[1:],[not i for i in slx_vars]))) + # last wkx variables + self.name_x0.extend(self.name_x[-wkx:]) + + + else: + okx = int((self.k - self.yend.shape[1] - 1) / (slx_lags + 1)) # number of original exogenous vars + + self.name_x0.extend(self.name_x[-okx:]) + + self.name_q.extend(USER.set_name_q_sp(self.name_x0, w_lags, self.name_q, lag_q)) + + #var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] + var_types = ['x'] * (kx + 1) + ['wx'] * wkx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho','lambda'] + else: + self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) + var_types = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend) - 1) + ['rho','lambda'] + + + + #self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) self.name_h = USER.set_name_h(self.name_x, self.name_q) self.name_w = USER.set_name_w(name_w, w) self.output = pd.DataFrame(self.name_z, columns=['var_names']) - self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend)-1) + ['rho', 'lambda'] + + self.output['var_type'] = var_types + + #self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend)-1) + ['rho', 'lambda'] + self.output['regime'], self.output['equation'] = (0, 0) self.other_top = _spat_pseudo_r2(self) self.other_top += _summary_iteration(self) diff --git a/spreg/error_sp_het_regimes.py b/spreg/error_sp_het_regimes.py index 9ba5b68e..413f5642 100644 --- a/spreg/error_sp_het_regimes.py +++ b/spreg/error_sp_het_regimes.py @@ -37,12 +37,12 @@ class GM_Error_Het_Regimes(RegressionPropsY, REGI.Regimes_Frame): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - regimes : list + regimes : list or pandas.Series List of n values with the mapping of each observation to a regime. Assumed to be aligned with 'x'. w : pysal W object @@ -327,14 +327,15 @@ def __init__( ): n = USER.check_arrays(y, x) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) self.constant_regi = constant_regi self.cols2regi = cols2regi self.regime_err_sep = regime_err_sep self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) self.name_w = USER.set_name_w(name_w, w) + regimes, name_regimes = USER.check_reg_list(regimes, name_regimes, n) self.name_regimes = USER.set_name_ds(name_regimes) self.n, self.step1c = n, step1c self.y = y @@ -589,19 +590,19 @@ class GM_Endog_Error_Het_Regimes(RegressionPropsY, REGI.Regimes_Frame): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - yend : array + yend : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each endogenous variable - q : array + q : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each external exogenous variable to use as instruments (note: this should not contain any variables from x) - regimes : list + regimes : list or pandas.Series List of n values with the mapping of each observation to a regime. Assumed to be aligned with 'x'. w : pysal W object @@ -939,11 +940,13 @@ def __init__( ): n = USER.check_arrays(y, x, yend, q) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) + yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) self.constant_regi = constant_regi self.cols2regi = cols2regi self.name_ds = USER.set_name_ds(name_ds) + regimes, name_regimes = USER.check_reg_list(regimes, name_regimes, n) self.name_regimes = USER.set_name_ds(name_regimes) self.name_w = USER.set_name_w(name_w, w) self.n, self.step1c = n, step1c @@ -1318,19 +1321,19 @@ class GM_Combo_Het_Regimes(GM_Endog_Error_Het_Regimes): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - yend : array + yend : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each endogenous variable - q : array + q : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each external exogenous variable to use as instruments (note: this should not contain any variables from x) - regimes : list + regimes : list or pandas.Series List of n values with the mapping of each observation to a regime. Assumed to be aligned with 'x'. w : pysal W object @@ -1716,8 +1719,9 @@ def __init__( regime_err_sep = False n = USER.check_arrays(y, x) self.step1c = step1c - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) + yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) @@ -1725,6 +1729,7 @@ def __init__( self.name_y = USER.set_name_y(name_y) name_yend = USER.set_name_yend(name_yend, yend) name_q = USER.set_name_q(name_q, q) + regimes, name_regimes = USER.check_reg_list(regimes, name_regimes, n) if regime_err_sep and any(col != True for col in cols2regi): set_warn(self, "All coefficients must vary across regimes if regime_err_sep = True, so setting cols2regi = 'all'.") diff --git a/spreg/error_sp_hom.py b/spreg/error_sp_hom.py index cfa78852..2a8c4358 100644 --- a/spreg/error_sp_hom.py +++ b/spreg/error_sp_hom.py @@ -18,6 +18,7 @@ from . import user_output as USER import pandas as pd from .output import output, _spat_pseudo_r2, _summary_iteration +from itertools import compress __all__ = ["GM_Error_Hom", "GM_Endog_Error_Hom", "GM_Combo_Hom"] @@ -173,9 +174,9 @@ class GM_Error_Hom(BaseGM_Error_Hom): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant w : pysal W object @@ -183,6 +184,8 @@ class GM_Error_Hom(BaseGM_Error_Hom): slx_lags : integer Number of spatial lags of X to include in the model specification. If slx_lags>0, the specification becomes of the SLX-Error type. + slx_vars : either "All" (default) or list of booleans to select x variables + to be lagged max_iter : int Maximum number of iterations of steps 2a and 2b from :cite:`Arraiz2010`. Note: epsilon provides an additional stop condition. @@ -352,6 +355,7 @@ def __init__( x, w, slx_lags=0, + slx_vars="All", max_iter=1, epsilon=0.00001, A1="hom_sc", @@ -365,18 +369,29 @@ def __init__( ): n = USER.check_arrays(y, x) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) x_constant, name_x, warn = USER.check_constant(x, name_x) name_x = USER.set_name_x(name_x, x_constant) # initialize in case None, includes constant set_warn(self, warn) self.title = "GM SPATIALLY WEIGHTED LEAST SQUARES (HOM)" + if slx_lags >0: - lag_x = get_lags(w, x_constant[:, 1:], slx_lags) - x_constant = np.hstack((x_constant, lag_x)) + + x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True, + slx_lags=slx_lags,slx_vars=slx_vars) + self.title += " WITH SLX (SLX-Error)" + + # OLD + #if slx_lags >0: + #lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + #x_constant = np.hstack((x_constant, lag_x)) # name_x += USER.set_name_spatial_lags(name_x, slx_lags) - name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant - self.title += " WITH SLX (SLX-Error)" + #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + #self.title += " WITH SLX (SLX-Error)" + + + BaseGM_Error_Hom.__init__( self, y=y, @@ -579,15 +594,15 @@ class GM_Endog_Error_Hom(BaseGM_Endog_Error_Hom): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - yend : array + yend : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each endogenous variable - q : array + q : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each external exogenous variable to use as instruments (note: this should not contain any variables from x) @@ -595,7 +610,9 @@ class GM_Endog_Error_Hom(BaseGM_Endog_Error_Hom): Spatial weights object slx_lags : integer Number of spatial lags of X to include in the model specification. - If slx_lags>0, the specification becomes of the SLX-Error type. + If slx_lags>0, the specification becomes of the SLX-Error type. + slx_vars : either "All" (default) or list of booleans to select x variables + to be lagged max_iter : int Maximum number of iterations of steps 2a and 2b from :cite:`Arraiz2010`. Note: epsilon provides an additional stop condition. @@ -808,6 +825,7 @@ def __init__( q, w, slx_lags=0, + slx_vars="All", max_iter=1, epsilon=0.00001, A1="hom_sc", @@ -823,18 +841,30 @@ def __init__( ): n = USER.check_arrays(y, x, yend, q) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) + yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) x_constant, name_x, warn = USER.check_constant(x, name_x) name_x = USER.set_name_x(name_x, x_constant) # initialize in case None, includes constant set_warn(self, warn) self.title = "GM SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES (HOM)" - if slx_lags > 0: - lag_x = get_lags(w, x_constant[:, 1:], slx_lags) - x_constant = np.hstack((x_constant, lag_x)) + + if slx_lags >0: + + x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True, + slx_lags=slx_lags,slx_vars=slx_vars) + + self.title += " WITH SLX (SLX-Error)" + + # OLD + #if slx_lags > 0: + #lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + #x_constant = np.hstack((x_constant, lag_x)) # name_x += USER.set_name_spatial_lags(name_x, slx_lags) - name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant - self.title += " WITH SLX (SLX-Error)" + #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + #self.title += " WITH SLX (SLX-Error)" + + BaseGM_Endog_Error_Hom.__init__( self, y=y, @@ -1049,15 +1079,15 @@ class GM_Combo_Hom(BaseGM_Combo_Hom): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - yend : array + yend : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each endogenous variable - q : array + q : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each external exogenous variable to use as instruments (note: this should not contain any variables from x) @@ -1071,6 +1101,8 @@ class GM_Combo_Hom(BaseGM_Combo_Hom): Number of spatial lags of X to include in the model specification. If slx_lags>0, the specification becomes of the General Nesting Spatial Model (GNSM) type. + slx_vars : either "All" (default) or list of booleans to select x variables + to be lagged lag_q : boolean If True, then include spatial lags of the additional instruments (q). @@ -1303,6 +1335,7 @@ def __init__( w=None, w_lags=1, slx_lags=0, + slx_vars="All", lag_q=True, max_iter=1, epsilon=0.00001, @@ -1319,16 +1352,27 @@ def __init__( ): n = USER.check_arrays(y, x, yend, q) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) + yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) x_constant, name_x, warn = USER.check_constant(x, name_x) name_x = USER.set_name_x(name_x, x_constant) # initialize in case None, includes constant set_warn(self, warn) - if slx_lags == 0: - yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) - else: - yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags) + + if slx_lags > 0: + yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags,slx_vars) x_constant = np.hstack((x_constant, wx)) + else: + yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) + + + # OLD + #if slx_lags == 0: + #yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) + #else: + #yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags) + #x_constant = np.hstack((x_constant, wx)) + BaseGM_Combo_Hom.__init__( self, y=y, @@ -1349,10 +1393,31 @@ def __init__( ) set_warn(self, warn) self.title = "SPATIALLY WEIGHTED 2SLS- GM-COMBO MODEL (HOM)" - if slx_lags > 0: -# name_x += USER.set_name_spatial_lags(name_x, slx_lags) - name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + + if slx_lags > 0: # adjust for flexwx + if (isinstance(slx_vars,list)): # slx_vars has True,False + if len(slx_vars) != x.shape[1] : + raise Exception("slx_vars incompatible with x column dimensions") + else: # use slx_vars to extract proper columns + workname = name_x[1:] + kx = len(workname) + vv = list(compress(workname,slx_vars)) + name_x += USER.set_name_spatial_lags(vv, slx_lags) + wkx = slx_vars.count(True) + else: + kx = len(name_x) - 1 + wkx = kx + name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant self.title += " WITH SLX (GNSM)" + + + # OLD + #if slx_lags > 0: +# name_x += USER.set_name_spatial_lags(name_x, slx_lags) + #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + #self.title += " WITH SLX (GNSM)" + + self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) # self.name_x = USER.set_name_x(name_x, x_constant) @@ -1362,13 +1427,41 @@ def __init__( self.name_z = self.name_x + self.name_yend self.name_z.append("lambda") # listing lambda last self.name_q = USER.set_name_q(name_q, q) - self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) + + if slx_lags > 0: # need to remove all but last SLX variables from name_x + self.name_x0 = [] + self.name_x0.append(self.name_x[0]) # constant + if (isinstance(slx_vars,list)): # boolean list passed + # x variables that were not lagged + self.name_x0.extend(list(compress(self.name_x[1:],[not i for i in slx_vars]))) + # last wkx variables + self.name_x0.extend(self.name_x[-wkx:]) + + + else: + okx = int((self.k - self.yend.shape[1] - 1) / (slx_lags + 1)) # number of original exogenous vars + + self.name_x0.extend(self.name_x[-okx:]) + + self.name_q.extend(USER.set_name_q_sp(self.name_x0, w_lags, self.name_q, lag_q)) + + #var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] + var_types = ['x'] * (kx + 1) + ['wx'] * wkx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho','lambda'] + else: + self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) + var_types = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend) - 1) + ['rho','lambda'] + + + #self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) self.name_h = USER.set_name_h(self.name_x, self.name_q) self.name_w = USER.set_name_w(name_w, w) self.A1 = A1 self.output = pd.DataFrame(self.name_z, columns=['var_names']) - self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend) - 1) + ['rho', 'lambda'] + + self.output['var_type'] = var_types + + #self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend) - 1) + ['rho', 'lambda'] self.output['regime'], self.output['equation'] = (0, 0) self.other_top = _spat_pseudo_r2(self) self.other_top += _summary_iteration(self) diff --git a/spreg/error_sp_hom_regimes.py b/spreg/error_sp_hom_regimes.py index 0853e2db..f4a88044 100644 --- a/spreg/error_sp_hom_regimes.py +++ b/spreg/error_sp_hom_regimes.py @@ -39,12 +39,12 @@ class GM_Error_Hom_Regimes(RegressionPropsY, REGI.Regimes_Frame): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - regimes : list + regimes : list or pandas.Series List of n values with the mapping of each observation to a regime. Assumed to be aligned with 'x'. w : pysal W object @@ -338,14 +338,15 @@ def __init__( ): n = USER.check_arrays(y, x) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) self.constant_regi = constant_regi self.cols2regi = cols2regi self.regime_err_sep = regime_err_sep self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) self.name_w = USER.set_name_w(name_w, w) + regimes, name_regimes = USER.check_reg_list(regimes, name_regimes, n) self.name_regimes = USER.set_name_ds(name_regimes) self.n = n self.y = y @@ -601,19 +602,19 @@ class GM_Endog_Error_Hom_Regimes(RegressionPropsY, REGI.Regimes_Frame): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - yend : array + yend : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each endogenous variable - q : array + q : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each external exogenous variable to use as instruments (note: this should not contain any variables from x) - regimes : list + regimes : list or pandas.Series List of n values with the mapping of each observation to a regime. Assumed to be aligned with 'x'. w : pysal W object @@ -955,11 +956,13 @@ def __init__( ): n = USER.check_arrays(y, x, yend, q) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) + yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) self.constant_regi = constant_regi self.cols2regi = cols2regi self.name_ds = USER.set_name_ds(name_ds) + regimes, name_regimes = USER.check_reg_list(regimes, name_regimes, n) self.name_regimes = USER.set_name_ds(name_regimes) self.name_w = USER.set_name_w(name_w, w) self.n = n @@ -1312,19 +1315,19 @@ class GM_Combo_Hom_Regimes(GM_Endog_Error_Hom_Regimes): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - yend : array + yend : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each endogenous variable - q : array + q : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each external exogenous variable to use as instruments (note: this should not contain any variables from x) - regimes : list + regimes : list or pandas.Series List of n values with the mapping of each observation to a regime. Assumed to be aligned with 'x'. w : pysal W object @@ -1714,8 +1717,9 @@ def __init__( set_warn(self, "regime_err_sep set to False when regime_lag_sep=False.") regime_err_sep = False n = USER.check_arrays(y, x) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) + yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) @@ -1723,6 +1727,7 @@ def __init__( self.name_y = USER.set_name_y(name_y) name_yend = USER.set_name_yend(name_yend, yend) name_q = USER.set_name_q(name_q, q) + regimes, name_regimes = USER.check_reg_list(regimes, name_regimes, n) if regime_err_sep and any(col != True for col in cols2regi): set_warn(self, "All coefficients must vary across regimes if regime_err_sep = True, so setting cols2regi = 'all'.") diff --git a/spreg/error_sp_regimes.py b/spreg/error_sp_regimes.py index 5c9ddd5d..a50543d5 100644 --- a/spreg/error_sp_regimes.py +++ b/spreg/error_sp_regimes.py @@ -30,12 +30,12 @@ class GM_Error_Regimes(RegressionPropsY, REGI.Regimes_Frame): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - regimes : list + regimes : list or pandas.Series List of n values with the mapping of each observation to a regime. Assumed to be aligned with 'x'. w : pysal W object @@ -295,8 +295,8 @@ def __init__( ): n = USER.check_arrays(y, x) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) x_constant, name_x, warn = USER.check_constant( x, name_x, just_rem=True) set_warn(self, warn) @@ -314,6 +314,7 @@ def __init__( self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) self.name_w = USER.set_name_w(name_w, w) + regimes, name_regimes = USER.check_reg_list(regimes, name_regimes, n) self.name_regimes = USER.set_name_ds(name_regimes) self.n = n self.y = y @@ -503,19 +504,19 @@ class GM_Endog_Error_Regimes(RegressionPropsY, REGI.Regimes_Frame): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - yend : array + yend : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each endogenous variable - q : array + q : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each external exogenous variable to use as instruments (note: this should not contain any variables from x) - regimes : list + regimes : list or pandas.Series List of n values with the mapping of each observation to a regime. Assumed to be aligned with 'x'. w : pysal W object @@ -817,8 +818,9 @@ def __init__( ): n = USER.check_arrays(y, x, yend, q) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) + yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) x_constant, name_x, warn = USER.check_constant( x, name_x, just_rem=True) set_warn(self, warn) @@ -832,6 +834,7 @@ def __init__( self.constant_regi = constant_regi self.cols2regi = cols2regi self.name_ds = USER.set_name_ds(name_ds) + regimes, name_regimes = USER.check_reg_list(regimes, name_regimes, n) self.name_regimes = USER.set_name_ds(name_regimes) self.name_w = USER.set_name_w(name_w, w) self.n = n @@ -1134,12 +1137,12 @@ class GM_Combo_Regimes(GM_Endog_Error_Regimes, REGI.Regimes_Frame): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - regimes : list + regimes : list or pandas.Series List of n values with the mapping of each observation to a regime. Assumed to be aligned with 'x'. yend : array @@ -1497,8 +1500,9 @@ def __init__( set_warn(self, "regime_err_sep set to False when regime_lag_sep=False.") regime_err_sep = False n = USER.check_arrays(y, x) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) + yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) x_constant, name_x, warn = USER.check_constant( x, name_x, just_rem=True) set_warn(self, warn) @@ -1506,6 +1510,7 @@ def __init__( self.name_y = USER.set_name_y(name_y) name_yend = USER.set_name_yend(name_yend, yend) name_q = USER.set_name_q(name_q, q) + regimes, name_regimes = USER.check_reg_list(regimes, name_regimes, n) if regime_err_sep and any(col != True for col in cols2regi): set_warn(self, "All coefficients must vary across regimes if regime_err_sep = True, so setting cols2regi = 'all'.") @@ -1596,12 +1601,12 @@ class GMM_Error_Regimes(GM_Error_Regimes, GM_Combo_Regimes, GM_Endog_Error_Regim Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - regimes : list + regimes : list or pandas.Series List of n values with the mapping of each observation to a regime. Assumed to be aligned with 'x'. w : pysal W object diff --git a/spreg/ml_error.py b/spreg/ml_error.py index aad98fab..363ec5d2 100644 --- a/spreg/ml_error.py +++ b/spreg/ml_error.py @@ -304,16 +304,18 @@ class ML_Error(BaseML_Error): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant w : Sparse matrix Spatial weights sparse matrix slx_lags : integer Number of spatial lags of X to include in the model specification. - If slx_lags>0, the specification becomes of the SLX-Error type. + If slx_lags>0, the specification becomes of the SLX-Error type. + slx_vars : either "All" (default) or list of booleans to select x variables + to be lagged method : string if 'full', brute force calculation (full matrix expressions) if 'ord', Ord eigenvalue method @@ -472,6 +474,7 @@ def __init__( x, w, slx_lags=0, + slx_vars="All", method="full", epsilon=0.0000001, vm=False, @@ -482,17 +485,19 @@ def __init__( latex=False, ): n = USER.check_arrays(y, x) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) x_constant, name_x, warn = USER.check_constant(x, name_x) name_x = USER.set_name_x(name_x, x_constant) # initialize in case None includes constant set_warn(self, warn) self.title = "ML SPATIAL ERROR" if slx_lags >0: - lag_x = get_lags(w, x_constant[:, 1:], slx_lags) - x_constant = np.hstack((x_constant, lag_x)) + # lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + # x_constant = np.hstack((x_constant, lag_x)) # name_x += USER.set_name_spatial_lags(name_x, slx_lags) - name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant from name_x + # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant from name_x + x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True, + slx_lags=slx_lags,slx_vars=slx_vars) self.title += " WITH SLX (SLX-Error)" self.title += " (METHOD = " + method + ")" diff --git a/spreg/ml_error_regimes.py b/spreg/ml_error_regimes.py index 9793b9df..cf3a0f00 100644 --- a/spreg/ml_error_regimes.py +++ b/spreg/ml_error_regimes.py @@ -28,12 +28,12 @@ class ML_Error_Regimes(BaseML_Error, REGI.Regimes_Frame): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - regimes : list + regimes : list or pandas.Series List of n values with the mapping of each observation to a regime. Assumed to be aligned with 'x'. constant_regi: string @@ -302,12 +302,13 @@ def __init__( ): n = USER.check_arrays(y, x) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) self.constant_regi = constant_regi self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) self.name_w = USER.set_name_w(name_w, w) + regimes, name_regimes = USER.check_reg_list(regimes, name_regimes, n) self.name_regimes = USER.set_name_ds(name_regimes) self.n = n self.y = y diff --git a/spreg/ml_lag.py b/spreg/ml_lag.py index 32287e76..00a9c793 100755 --- a/spreg/ml_lag.py +++ b/spreg/ml_lag.py @@ -12,7 +12,7 @@ from scipy import sparse as sp from scipy.sparse.linalg import splu as SuperLU from .utils import RegressionPropsY, RegressionPropsVM, inverse_prod, set_warn, get_lags -from .sputils import spdot, spfill_diagonal, spinv, _spmultiplier +from .sputils import spdot, spfill_diagonal, spinv from . import diagnostics as DIAG from . import user_output as USER import pandas as pd @@ -195,8 +195,9 @@ def __init__(self, y, x, w, slx_lags=0, method="full", epsilon=0.0000001): ylag = weights.lag_spatial(w, y) # b0, b1, e0 and e1 - if slx_lags>0: - self.x = np.hstack((self.x, get_lags(w, self.x[:, 1:], slx_lags))) +# now set in ML_Lag +# if slx_lags>0: +# self.x = np.hstack((self.x, get_lags(w, self.x[:, 1:], slx_lags))) self.n, self.k = self.x.shape xtx = spdot(self.x.T, self.x) @@ -323,9 +324,9 @@ class ML_Lag(BaseML_Lag): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant w : pysal W object @@ -333,6 +334,8 @@ class ML_Lag(BaseML_Lag): slx_lags : integer Number of spatial lags of X to include in the model specification. If slx_lags>0, the specification becomes of the Spatial Durbin type. + slx_vars : either "All" (default) or list of booleans to select x variables + to be lagged method : string if 'full', brute force calculation (full matrix expressions) if 'ord', Ord eigenvalue method @@ -340,11 +343,11 @@ class ML_Lag(BaseML_Lag): tolerance criterion in mimimize_scalar function and inverse_product spat_diag : boolean If True, then compute Common Factor Hypothesis test when applicable - spat_impacts : string + spat_impacts : string or list Include average direct impact (ADI), average indirect impact (AII), and average total impact (ATI) in summary results. - Options are 'simple', 'full', 'power', or None. - See sputils.spmultiplier for more information. + Options are 'simple', 'full', 'power', 'all' or None. + See sputils._spmultiplier for more information. vm : boolean if True, include variance-covariance matrix in summary results @@ -421,6 +424,8 @@ class ML_Lag(BaseML_Lag): z_stat : list of tuples z statistic; each tuple contains the pair (statistic, p-value), where each is a float + sp_multipliers: dict + Dictionary of spatial multipliers (if spat_impacts is not None) name_y : string Name of dependent variable for use in output name_x : list of strings @@ -594,6 +599,7 @@ def __init__( x, w, slx_lags=0, + slx_vars="All", method="full", epsilon=0.0000001, spat_impacts="simple", @@ -606,12 +612,25 @@ def __init__( latex=False, ): n = USER.check_arrays(y, x) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) x_constant, name_x, warn = USER.check_constant(x, name_x) name_x = USER.set_name_x(name_x, x_constant) # needs to be initialized for none, now with constant set_warn(self, warn) method = method.upper() + # using flex_wx + kx = len(name_x) + if slx_lags > 0: + x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True, + slx_lags=slx_lags,slx_vars=slx_vars) + if isinstance(slx_vars,list): + kw = slx_vars.count(True) + if kw < kx - 1: + spat_diag = False # no common factor test + else: + kw = kx-1 + + BaseML_Lag.__init__( self, y=y, x=x_constant, w=w, slx_lags=slx_lags, method=method, epsilon=epsilon ) @@ -619,14 +638,17 @@ def __init__( self.k += 1 if slx_lags>0: - kx = len(name_x) - name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + # kx = len(name_x) + # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + self.title = "MAXIMUM LIKELIHOOD SPATIAL LAG WITH SLX - SPATIAL DURBIN MODEL" + " (METHOD = " + method + ")" - var_types = ['x'] * kx + ['wx'] * (kx-1) * slx_lags + ['rho'] +# var_types = ['x'] * kx + ['wx'] * (kx-1) * slx_lags + ['rho'] + var_types = ['x'] * kx + ['wx'] * (kw) * slx_lags + ['rho'] else: self.title = "MAXIMUM LIKELIHOOD SPATIAL LAG" + " (METHOD = " + method + ")" var_types = ['x'] * len(name_x) + ['rho'] self.slx_lags = slx_lags + self.slx_vars = slx_vars self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) self.name_x = name_x # already has constant @@ -643,12 +665,12 @@ def __init__( diag_out = None if spat_diag and slx_lags==1: diag_out = _spat_diag_out(self, w, 'yend', ml=True) - if spat_impacts and slx_lags == 0: - impacts = _summary_impacts(self, _spmultiplier(w, self.rho, method=spat_impacts), spat_impacts) + if spat_impacts: + self.sp_multipliers, impacts_str = _summary_impacts(self, w, spat_impacts, slx_lags,slx_vars) try: - diag_out += impacts + diag_out += impacts_str except TypeError: - diag_out = impacts + diag_out = impacts_str output(reg=self, vm=vm, robust=False, other_end=diag_out, latex=latex) def lag_c_loglik(rho, n, e0, e1, W): diff --git a/spreg/ml_lag_regimes.py b/spreg/ml_lag_regimes.py index 0eb990ca..e5af2d38 100644 --- a/spreg/ml_lag_regimes.py +++ b/spreg/ml_lag_regimes.py @@ -11,7 +11,6 @@ import multiprocessing as mp from .ml_lag import BaseML_Lag from .utils import set_warn, get_lags -from .sputils import _spmultiplier import pandas as pd from .output import output, _nonspat_top, _spat_diag_out, _spat_pseudo_r2, _summary_impacts @@ -27,12 +26,12 @@ class ML_Lag_Regimes(BaseML_Lag, REGI.Regimes_Frame): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - regimes : list + regimes : list or pandas.Series List of n values with the mapping of each observation to a regime. Assumed to be aligned with 'x'. constant_regi: string @@ -66,11 +65,11 @@ class ML_Lag_Regimes(BaseML_Lag, REGI.Regimes_Frame): the spatial parameter is fixed accross regimes. spat_diag : boolean If True, then compute Common Factor Hypothesis test when applicable - spat_impacts : string + spat_impacts : string or list Include average direct impact (ADI), average indirect impact (AII), and average total impact (ATI) in summary results. - Options are 'simple', 'full', 'power', or None. - See sputils.spmultiplier for more information. + Options are 'simple', 'full', 'power', 'all' or None. + See sputils._spmultiplier for more information. cores : boolean Specifies if multiprocessing is to be used Default: no multiprocessing, cores = False @@ -180,6 +179,10 @@ class ML_Lag_Regimes(BaseML_Lag, REGI.Regimes_Frame): p-value), where each is a float Only available in dictionary 'multi' when multiple regressions (see 'multi' below for details) + sp_multipliers: dict + Dictionary of spatial multipliers (if spat_impacts is not None) + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) name_y : string Name of dependent variable for use in output name_x : list of strings @@ -331,8 +334,8 @@ def __init__( ): n = USER.check_arrays(y, x) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) name_y = USER.set_name_y(name_y) self.name_y = name_y @@ -349,6 +352,7 @@ def __init__( self.name_x_r = USER.set_name_x(name_x, x_constant) + [USER.set_name_yend_sp(name_y)] self.method = method self.epsilon = epsilon + regimes, name_regimes = USER.check_reg_list(regimes, name_regimes, n) self.name_regimes = USER.set_name_ds(name_regimes) self.constant_regi = constant_regi self.n = n @@ -451,12 +455,12 @@ def __init__( else: self.title = ("MAXIMUM LIKELIHOOD SPATIAL LAG - REGIMES"+ " (METHOD = "+ method+ ")") - if spat_impacts and slx_lags == 0: - impacts = _summary_impacts(self, _spmultiplier(w, self.rho, method=spat_impacts), spat_impacts, regimes=True) + if spat_impacts: + self.sp_multipliers, impacts_str = _summary_impacts(self, w, spat_impacts, slx_lags, regimes=True) try: - diag_out += impacts + diag_out += impacts_str except TypeError: - diag_out = impacts + diag_out = impacts_str self.other_top = _spat_pseudo_r2(self) self.other_top += _nonspat_top(self, ml=True) output(reg=self, vm=vm, robust=False, other_end=diag_out, latex=latex) @@ -607,8 +611,9 @@ def ML_Lag_Regimes_Multi( self.output = pd.concat([self.output, results[r].output], ignore_index=True) if spat_diag and slx_lags == 1: results[r].other_mid += _spat_diag_out(results[r], None, 'yend', ml=True) - if spat_impacts and slx_lags == 0: - results[r].other_mid += _summary_impacts(results[r], _spmultiplier(results[r].w, results[r].rho, method=spat_impacts), spat_impacts) + if spat_impacts: + results[r].sp_multipliers, impacts_str = _summary_impacts(results[r], results[r].w, spat_impacts, slx_lags) + results[r].other_mid += impacts_str counter += 1 self.multi = results self.chow = REGI.Chow(self) diff --git a/spreg/ols.py b/spreg/ols.py index 8efb9c70..f5320395 100644 --- a/spreg/ols.py +++ b/spreg/ols.py @@ -4,10 +4,11 @@ import numpy as np import numpy.linalg as la from . import user_output as USER -from .output import output, _spat_diag_out, _nonspat_mid, _nonspat_top +from .output import output, _spat_diag_out, _nonspat_mid, _nonspat_top, _summary_vif from . import robust as ROBUST from .utils import spdot, RegressionPropsY, RegressionPropsVM, set_warn, get_lags import pandas as pd +from libpysal import weights # needed for check on kernel weights in slx __all__ = ["OLS"] @@ -128,9 +129,9 @@ class OLS(BaseOLS): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant w : pysal W object @@ -147,6 +148,8 @@ class OLS(BaseOLS): slx_lags : integer Number of spatial lags of X to include in the model specification. If slx_lags>0, the specification becomes of the SLX type. + slx_vars : either "All" (default) or list of booleans to select x variables + to be lagged sig2n_k : boolean If True, then use n-k to estimate sigma^2. If False, use n. nonspat_diag : boolean @@ -161,6 +164,8 @@ class OLS(BaseOLS): white_test : boolean If True, compute White's specification robust test. (requires nonspat_diag=True) + vif : boolean + If True, compute variance inflation factor. vm : boolean If True, include variance-covariance matrix in summary results @@ -434,11 +439,13 @@ def __init__( robust=None, gwk=None, slx_lags = 0, + slx_vars = "All", sig2n_k=True, nonspat_diag=True, spat_diag=False, moran=False, white_test=False, + vif=False, vm=False, name_y=None, name_x=None, @@ -449,7 +456,7 @@ def __init__( ): n = USER.check_arrays(y, x) - y = USER.check_y(y, n) + y, name_y = USER.check_y(y, n, name_y) USER.check_robust(robust, gwk) if robust == "hac" and spat_diag: set_warn( @@ -465,15 +472,17 @@ def __init__( white_test = False USER.check_spat_diag(spat_diag, w) x_constant, name_x, warn = USER.check_constant(x, name_x) + set_warn(self, warn) self.name_x = USER.set_name_x(name_x, x_constant) + + w = USER.check_weights(w, y, slx_lags=slx_lags) if slx_lags >0: - USER.check_weights(w, y, w_required=True) - lag_x = get_lags(w, x_constant[:, 1:], slx_lags) - x_constant = np.hstack((x_constant, lag_x)) - self.name_x += USER.set_name_spatial_lags(self.name_x[1:], slx_lags) - else: - USER.check_weights(w, y, w_required=False) - set_warn(self, warn) +# lag_x = get_lags(w, x_constant[:, 1:], slx_lags) +# x_constant = np.hstack((x_constant, lag_x)) +# self.name_x += USER.set_name_spatial_lags(self.name_x[1:], slx_lags) + x_constant,self.name_x = USER.flex_wx(w,x=x_constant,name_x=self.name_x,constant=True, + slx_lags=slx_lags,slx_vars=slx_vars) + BaseOLS.__init__( self, y=y, x=x_constant, robust=robust, gwk=gwk, sig2n_k=sig2n_k ) @@ -493,6 +502,8 @@ def __init__( if nonspat_diag: self.other_mid += _nonspat_mid(self, white_test=white_test) self.other_top += _nonspat_top(self) + if vif: + self.other_mid += _summary_vif(self) if spat_diag: other_end += _spat_diag_out(self, w, 'ols', moran=moran) output(reg=self, vm=vm, robust=robust, other_end=other_end, latex=latex) diff --git a/spreg/ols_regimes.py b/spreg/ols_regimes.py index 66ba3d8e..cc9eac95 100755 --- a/spreg/ols_regimes.py +++ b/spreg/ols_regimes.py @@ -21,12 +21,12 @@ class OLS_Regimes(BaseOLS, REGI.Regimes_Frame, RegressionPropsY): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - regimes : list + regimes : list or pandas.Series List of n values with the mapping of each observation to a regime. Assumed to be aligned with 'x'. w : pysal W object @@ -415,7 +415,7 @@ def __init__( ): n = USER.check_arrays(y, x) - y = USER.check_y(y, n) + y, name_y = USER.check_y(y, n, name_y) USER.check_robust(robust, gwk) if robust == "hac": if regime_err_sep: @@ -439,13 +439,12 @@ def __init__( USER.check_spat_diag(spat_diag, w) x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) name_x = USER.set_name_x(name_x, x_constant, constant=True) + w = USER.check_weights(w, y, slx_lags=slx_lags) if slx_lags > 0: - USER.check_weights(w, y, w_required=True) lag_x = get_lags(w, x_constant, slx_lags) x_constant = np.hstack((x_constant, lag_x)) name_x += USER.set_name_spatial_lags(name_x, slx_lags) - else: - USER.check_weights(w, y, w_required=False) + set_warn(self, warn) self.slx_lags = slx_lags self.name_x_r = USER.set_name_x(name_x, x_constant) @@ -455,6 +454,8 @@ def __init__( self.name_gwk = USER.set_name_w(name_gwk, gwk) self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) + + regimes, name_regimes = USER.check_reg_list(regimes, name_regimes, n) self.name_regimes = USER.set_name_ds(name_regimes) self.n = n cols2regi = REGI.check_cols2regi( @@ -712,8 +713,8 @@ def __init__( self, y, x, w, n_clusters=None, quorum=-np.inf, trace=True, **kwargs): n = USER.check_arrays(y, x) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True) # Standardize the variables x_std = (x - np.mean(x, axis=0)) / np.std(x, axis=0) @@ -757,7 +758,7 @@ def _test(): _test() import numpy as np import libpysal - import pysal + from spreg import OLS_Regimes db = libpysal.io.open(libpysal.examples.get_path("NAT.dbf"), "r") y_var = "HR90" @@ -768,7 +769,6 @@ def _test(): regimes = db.by_col(r_var) w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("NAT.shp")) w.transform = "r" - #olsr = pysal.model.spreg.OLS_Regimes( olsr = OLS_Regimes( y, x, diff --git a/spreg/output.py b/spreg/output.py old mode 100644 new mode 100755 index 5c98f586..fbebe65e --- a/spreg/output.py +++ b/spreg/output.py @@ -8,7 +8,7 @@ from . import diagnostics as diagnostics from . import diagnostics_tsls as diagnostics_tsls from . import diagnostics_sp as diagnostics_sp -from .sputils import _sp_effects +from .sputils import _sp_effects, _spmultiplier __all__ = [] @@ -269,26 +269,28 @@ def _spat_diag_out(reg, w, type, moran=False, ml=False): reg.ak_test[1], ) if any(reg.output['var_type'] == 'rho'): + # no common factor test if slx_vars is not "All" if reg.slx_lags == 1 and not any(reg.output['var_type'] == 'yend'): - wx_indices = reg.output[(reg.output['var_type'] == 'wx') & (reg.output['regime'] != '_Global')].index - x_indices = [] - for m in reg.output['regime'].unique(): - x_indices.extend(reg.output[(reg.output['regime'] == m) & (reg.output['var_type'] == 'x')].index[1:]) - vm_indices = x_indices + wx_indices.tolist() + reg.output[reg.output['var_type'] == 'rho'].index.tolist() - cft, cft_p = diagnostics_sp.comfac_test(reg.rho, + if not hasattr(reg, 'slx_vars') or not isinstance(reg.slx_vars, list): + wx_indices = reg.output[(reg.output['var_type'] == 'wx') & (reg.output['regime'] != '_Global')].index + x_indices = [] + for m in reg.output['regime'].unique(): + x_indices.extend(reg.output[(reg.output['regime'] == m) & (reg.output['var_type'] == 'x')].index[1:]) + vm_indices = x_indices + wx_indices.tolist() + reg.output[reg.output['var_type'] == 'rho'].index.tolist() + cft, cft_p = diagnostics_sp.comfac_test(reg.rho, reg.betas[x_indices], reg.betas[wx_indices], reg.vm[vm_indices, :][:, vm_indices]) - reg.cfh_test = cft, cft_p - strSummary += "%-27s %2d %12.3f %9.4f\n" % ( - "Common Factor Hypothesis Test", - len(wx_indices), - reg.cfh_test[0], - reg.cfh_test[1], - ) + reg.cfh_test = cft, cft_p + strSummary += "%-27s %2d %12.3f %9.4f\n" % ( + "Common Factor Hypothesis Test", + len(wx_indices), + reg.cfh_test[0], + reg.cfh_test[1], + ) elif type == "ols": - strSummary += "- SARMA -\n" + strSummary += "- SARERR -\n" if not moran: strSummary += ( "TEST DF VALUE PROB\n" @@ -568,7 +570,7 @@ def _summary_iteration(reg): return txt -def _summary_impacts(reg, spmult, spat_impacts, slx_lags=0, regimes=False): +def _summary_impacts(reg, w, spat_impacts, slx_lags=0, slx_vars="All",regimes=False): """ Spatial direct, indirect and total effects in spatial lag model. Uses multipliers computed by sputils._spmultipliers. @@ -576,27 +578,97 @@ def _summary_impacts(reg, spmult, spat_impacts, slx_lags=0, regimes=False): Attributes ---------- reg: spreg regression object - spmult: spatial multipliers as a dictionary - spat_impacts: spatial impacts method as string + w: spatial weights object + spat_impacts: spatial impacts method as string or list with strings slx_lags: int, number of spatial lags of X in the model + slx_vars : either "All" (default) for all variables lagged, or a list + of booleans matching the columns of x that will be lagged or not regimes: boolean, True if regimes model Returns ------- - strings with direct, indirect and total effects + sp_multipliers: dict with direct, indirect and total multipliers + strSummary: strings with direct, indirect and total effects """ - variables = reg.output.query("var_type in ['x', 'yend'] and index != 0") + try: + spat_impacts = [spat_impacts.lower()] + except AttributeError: + spat_impacts = [x.lower() for x in spat_impacts] + + #variables = reg.output.query("var_type in ['x', 'yend'] and index != 0") # excludes constant + variables = reg.output.query("var_type == 'x' and index != 0") # excludes constant and endogenous variables + if regimes: variables = variables[~variables['var_names'].str.endswith('_CONSTANT')] variables_index = variables.index - btot, bdir, bind = _sp_effects(reg, variables, spmult, slx_lags) - - strSummary = "\nSPATIAL LAG MODEL IMPACTS\n" - strSummary += "Impacts computed using the '" + spat_impacts + "' method.\n" - strSummary += " Variable Direct Indirect Total\n" - for i in range(len(variables)): - strSummary += "%20s %12.4f %12.4f %12.4f\n" % ( - variables['var_names'][variables_index[i]], bdir[i][0], bind[i][0], btot[i][0]) + if slx_lags==0: + strSummary = "\nSPATIAL LAG MODEL IMPACTS\n" + else: + strSummary = "\nSPATIAL DURBIN MODEL IMPACTS\n" + + if abs(reg.rho) >= 1: + strSummary += "Omitted since spatial autoregressive parameter is outside the boundary (-1, 1).\n" + return None, strSummary + + if "all" in spat_impacts: + spat_impacts = ["simple", "full", "power"] + + sp_multipliers = {} + for i in spat_impacts: + spmult = _spmultiplier(w, reg.rho, method=i) # computes the multipliers, slx_lags not needed + + strSummary += spmult["warn"] + btot, bdir, bind = _sp_effects(reg, variables, spmult, slx_lags,slx_vars) # computes the impacts, needs slx_lags + sp_multipliers[spmult["method"]] = spmult['adi'], spmult['aii'].item(), spmult['ati'].item() + + strSummary += "Impacts computed using the '" + spmult["method"] + "' method.\n" + strSummary += " Variable Direct Indirect Total\n" + for i in range(len(variables)): + strSummary += "%20s %12.4f %12.4f %12.4f\n" % ( + variables['var_names'][variables_index[i]], bdir[i][0], bind[i][0], btot[i][0]) + + return sp_multipliers, strSummary + +def _summary_vif(reg): + """ + Summary of variance inflation factors for the model. + + Parameters + ---------- + reg: spreg regression object + + Returns + ------- + strSummary: string with variance inflation factors + + """ + vif = diagnostics.vif(reg) + strSummary = "\nVARIANCE INFLATION FACTOR\n" + strSummary += " Variable VIF Tolerance\n" + for i in range(len(reg.name_x)-1): + i += 1 + strSummary += "%20s %12.4f %12.4f\n" % ( + reg.name_x[i], vif[i][0], vif[i][1]) + return strSummary + +def _summary_dwh(reg): + """ + Summary of Durbin-Wu-Hausman test on endogeneity of variables. + + Parameters + ---------- + reg: spreg regression object + + Returns + ------- + strSummary: string with Durbin-Wu-Hausman test results + + """ + strSummary = "\nREGRESSION DIAGNOSTICS\n" + strSummary += ( + "TEST DF VALUE PROB\n") + strSummary += "%-27s %2d %12.3f %9.4f\n" % ( + "Durbin-Wu-Hausman test",reg.yend.shape[1],reg.dwh[0],reg.dwh[1]) return strSummary \ No newline at end of file diff --git a/spreg/panel_fe.py b/spreg/panel_fe.py index f8ac9491..786e4d2b 100644 --- a/spreg/panel_fe.py +++ b/spreg/panel_fe.py @@ -201,9 +201,9 @@ class Panel_FE_Lag(BasePanel_FE_Lag): Parameters ---------- - y : array + y : numpy.ndarray or pandas object nxt or (nxt)x1 array for dependent variable - x : array + x : numpy.ndarray or pandas object nx(txk) or (nxt)xk array for independent (exogenous) variables, no constant w : pysal W object @@ -328,7 +328,7 @@ def __init__( set_warn(self, warn) bigy, bigx, name_y, name_x, warn = check_panel(y, x_constant, w, name_y, name_x) set_warn(self, warn) - USER.check_weights(w, bigy, w_required=True, time=True) + w = USER.check_weights(w, bigy, w_required=True, time=True) BasePanel_FE_Lag.__init__(self, bigy, bigx, w, epsilon=epsilon) # increase by 1 to have correct aic and sc, include rho in count @@ -495,9 +495,9 @@ class Panel_FE_Error(BasePanel_FE_Error): Parameters ---------- - y : array + y : numpy.ndarray or pandas object nxt or (nxt)x1 array for dependent variable - x : array + x : numpy.ndarray or pandas object nx(txk) or (nxt)xk array for independent (exogenous) variables, no constant w : pysal W object @@ -617,7 +617,7 @@ def __init__( set_warn(self, warn) bigy, bigx, name_y, name_x, warn = check_panel(y, x_constant, w, name_y, name_x) set_warn(self, warn) - USER.check_weights(w, bigy, w_required=True, time=True) + w = USER.check_weights(w, bigy, w_required=True, time=True) BasePanel_FE_Error.__init__(self, bigy, bigx, w, epsilon=epsilon) self.title = "MAXIMUM LIKELIHOOD SPATIAL ERROR PANEL" + " - FIXED EFFECTS" diff --git a/spreg/panel_re.py b/spreg/panel_re.py index b53e01aa..6752eff9 100644 --- a/spreg/panel_re.py +++ b/spreg/panel_re.py @@ -244,9 +244,9 @@ class Panel_RE_Lag(BasePanel_RE_Lag): Parameters ---------- - y : array + y : numpy.ndarray or pandas object nxt or (nxt)x1 array for dependent variable - x : array + x : numpy.ndarray or pandas object nx(txk) or (nxt)xk array for independent (exogenous) variables, excluding the constant w : pysal W object @@ -382,7 +382,7 @@ def __init__( set_warn(self, warn) bigx, name_x, warn = USER.check_constant(bigx, name_x) set_warn(self, warn) - USER.check_weights(w, bigy, w_required=True, time=True) + w = USER.check_weights(w, bigy, w_required=True, time=True) BasePanel_RE_Lag.__init__(self, bigy, bigx, w, epsilon=epsilon) # increase by 1 to have correct aic and sc, include rho in count @@ -613,9 +613,9 @@ class Panel_RE_Error(BasePanel_RE_Error): Parameters ---------- - y : array + y : numpy.ndarray or pandas object nxt or (nxt)x1 array for dependent variable - x : array + x : numpy.ndarray or pandas object nx(txk) or (nxt)xk array for independent (exogenous) variables, no constant w : pysal W object @@ -749,7 +749,7 @@ def __init__( set_warn(self, warn) bigx, name_x, warn = USER.check_constant(bigx, name_x) set_warn(self, warn) - USER.check_weights(w, bigy, w_required=True, time=True) + w = USER.check_weights(w, bigy, w_required=True, time=True) BasePanel_RE_Error.__init__(self, bigy, bigx, w, epsilon=epsilon) self.title = "MAXIMUM LIKELIHOOD SPATIAL ERROR PANEL" + " - RANDOM EFFECTS" diff --git a/spreg/panel_utils.py b/spreg/panel_utils.py index ba2ac47d..d4f96af1 100644 --- a/spreg/panel_utils.py +++ b/spreg/panel_utils.py @@ -7,6 +7,7 @@ Pablo Estrada pabloestradace@gmail.com" import numpy as np +import pandas as pd from scipy import sparse as sp from .sputils import spdot @@ -31,6 +32,22 @@ def check_panel(y, x, w, name_y, name_x): Names of independent variables for use in output """ + if isinstance(y, (pd.Series, pd.DataFrame)): + if name_y is None: + try: + name_y = y.columns.to_list() + except AttributeError: + name_y = y.name + y = y.to_numpy() + + if isinstance(x, (pd.Series, pd.DataFrame)): + if name_x is None: + try: + name_x = x.columns.to_list() + except AttributeError: + name_x = x.name + x = x.to_numpy() + # Check if 'y' is a balanced panel with respect to 'W' if y.shape[0] / w.n != y.shape[0] // w.n: raise Exception("y must be ntx1 or nxt, and w must be an nxn PySAL W" "object.") diff --git a/spreg/probit.py b/spreg/probit.py index e744251a..e644787f 100644 --- a/spreg/probit.py +++ b/spreg/probit.py @@ -6,12 +6,12 @@ import numpy.linalg as la import scipy.optimize as op from scipy.stats import norm, chi2 - +from libpysal import weights chisqprob = chi2.sf import scipy.sparse as SP from . import user_output as USER from . import summary_output as SUMMARY -from .utils import spdot, spbroadcast, set_warn +from .utils import spdot, spbroadcast, set_warn, get_lags __all__ = ["Probit"] @@ -651,12 +651,15 @@ class Probit(BaseProbit): Parameters ---------- - x : array + x : numpy.ndarray or pandas object nxk array of independent variables (assumed to be aligned with y) - y : array + y : numpy.ndarray or pandas.Series nx1 array of dependent binary variable w : W PySAL weights instance aligned with y + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the SLX type. optim : string Optimization method. Default: 'newton' (Newton-Raphson). @@ -837,6 +840,7 @@ def __init__( y, x, w=None, + slx_lags=0, optim="newton", scalem="phimean", maxiter=100, @@ -849,23 +853,31 @@ def __init__( ): n = USER.check_arrays(y, x) - y = USER.check_y(y, n) - if w != None: - USER.check_weights(w, y) + y, name_y = USER.check_y(y, n, name_y) + x_constant, name_x, warn = USER.check_constant(x, name_x) + set_warn(self, warn) + self.name_x = USER.set_name_x(name_x, x_constant) + + if w != None or slx_lags > 0: + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) spat_diag = True ws = w.sparse + if slx_lags > 0: + lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + x_constant = np.hstack((x_constant, lag_x)) + self.name_x += USER.set_name_spatial_lags(self.name_x[1:], slx_lags) else: ws = None - x_constant, name_x, warn = USER.check_constant(x, name_x) - set_warn(self, warn) BaseProbit.__init__( self, y=y, x=x_constant, w=ws, optim=optim, scalem=scalem, maxiter=maxiter ) self.title = "CLASSIC PROBIT ESTIMATOR" + if slx_lags > 0: + self.title += " WITH SPATIALLY LAGGED X (SLX)" + self.slx_lags = slx_lags self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) - self.name_x = USER.set_name_x(name_x, x) self.name_w = USER.set_name_w(name_w, w) SUMMARY.Probit(reg=self, w=w, vm=vm, spat_diag=spat_diag) diff --git a/spreg/sp_panels.py b/spreg/sp_panels.py index 68d170a2..e14f3b0e 100644 --- a/spreg/sp_panels.py +++ b/spreg/sp_panels.py @@ -308,7 +308,7 @@ def __init__( ): n_rows = USER.check_arrays(y, x) bigy, bigx, name_y, name_x = _get_panel_data(y, x, w, name_y, name_x) - USER.check_weights(w, bigy, w_required=True, time=True) + w = USER.check_weights(w, bigy, w_required=True, time=True) x_constant, name_x, warn = USER.check_constant(bigx, name_x) set_warn(self, warn) self.title = "GM SPATIAL ERROR PANEL MODEL - RANDOM EFFECTS (KKP)" diff --git a/spreg/sputils.py b/spreg/sputils.py old mode 100644 new mode 100755 index e0e7cece..80f7a2df --- a/spreg/sputils.py +++ b/spreg/sputils.py @@ -1,8 +1,9 @@ import numpy as np import numpy.linalg as la import scipy.sparse as SP -import scipy +import pandas as pd from scipy.sparse import linalg as SPla +from itertools import compress def spdot(a, b, array_out=True): @@ -281,7 +282,7 @@ def _spmultiplier(w, rho, method="simple", mtol=0.00000001): ---------- w : PySAL format spatial weights matrix rho : spatial autoregressive coefficient - method : one of "simple" (default), full" or "power" + method : one of "simple" (default), "full" or "power" mtol : tolerance for power iteration (default=0.00000001) Returns @@ -293,7 +294,7 @@ def _spmultiplier(w, rho, method="simple", mtol=0.00000001): pow = powers used in power approximation (otherwise 0) """ - multipliers = {"ati": 1.0, "adi": 1.0, "aii": 1.0} + multipliers = {"ati": 1.0, "adi": 1.0, "aii": 1.0, "method": method, "warn": ''} multipliers["pow"] = 0 multipliers["ati"] = 1.0 / (1.0 - rho) n = w.n @@ -321,14 +322,15 @@ def _spmultiplier(w, rho, method="simple", mtol=0.00000001): rhop = rhop * rho adidiff = rhop * trw / n adi = adi + adidiff - multipliers["adi"] = adi + multipliers["adi"] = adi.item() multipliers["pow"] = pow else: - print("Method not supported") + multipliers["warn"] = "Method '"+method+"' not supported for spatial impacts.\n" + multipliers["method"] ='simple' multipliers["aii"] = multipliers["ati"] - multipliers["adi"] return (multipliers) -def _sp_effects(reg, variables, spmult, slx_lags=0): +def _sp_effects(reg, variables, spmult, slx_lags=0,slx_vars="All"): """ Calculate spatial lag, direct and indirect effects @@ -338,6 +340,8 @@ def _sp_effects(reg, variables, spmult, slx_lags=0): variables : chunk of self.output with variables to calculate effects spmult : dictionary with spatial multipliers slx_lags : number of SLX lags + slx_vars : either "All" (default) for all variables lagged, or a list + of booleans matching the columns of x that will be lagged or not Returns ------- @@ -345,27 +349,46 @@ def _sp_effects(reg, variables, spmult, slx_lags=0): bdir : direct effects bind : indirect effects """ - variables_index = variables.index + + variables_x_index = variables.index + m1 = spmult['ati'] - btot = m1 * reg.betas[variables_index] + btot = m1 * reg.betas[variables_x_index] m2 = spmult['adi'] - bdir = m2 * reg.betas[variables_index] + bdir = m2 * reg.betas[variables_x_index] - # Assumes all SLX effects are indirect effects. Needs revision by LA. + # Assumes all SLX effects are indirect effects. if slx_lags > 0: - variables_wx = reg.output.query("var_type == 'wx'") - variables_wx_index = variables_wx.index - - chunk_size = len(variables) - for i in range(slx_lags): - start_idx = i * chunk_size - end_idx = start_idx + chunk_size - chunk_indices = variables_wx_index[start_idx:end_idx] - btot += m1 * reg.betas[chunk_indices] + if reg.output.regime.nunique() > 1: + btot_idx = pd.Series(btot.flatten(), index=variables_x_index) + wchunk_size = len(variables.query("regime == @reg.output.regime.iloc[0]")) #Number of exogenous variables in each regime + for i in range(slx_lags): + chunk_indices = variables_x_index + (i+1) * wchunk_size + bmult = m1 * reg.betas[chunk_indices] + btot_idx[variables_x_index] += bmult.flatten() + btot = btot_idx.to_numpy().reshape(btot.shape) + + else: + variables_wx = reg.output.query("var_type == 'wx'") + variables_wx_index = variables_wx.index + if hasattr(reg, 'slx_vars') and isinstance(slx_vars,list): + flexwx_indices = list(compress(variables_x_index,slx_vars)) # indices of x variables in wx + else: + flexwx_indices = variables_x_index # all x variables + xind = [h - 1 for h in flexwx_indices] + wchunk_size = len(variables_wx_index)//slx_lags + for i in range(slx_lags): + start_idx = i * wchunk_size + end_idx = start_idx + wchunk_size + chunk_indices = variables_wx_index[start_idx:end_idx] + bmult = m1 * reg.betas[chunk_indices] + btot[xind] = btot[xind] + bmult + bind = btot - bdir else: m3 = spmult['aii'] - bind = m3 * reg.betas[variables_index] + bind = m3 * reg.betas[variables_x_index] + return btot, bdir, bind def _test(): diff --git a/spreg/sur.py b/spreg/sur.py index f2fa6868..b8c33f09 100644 --- a/spreg/sur.py +++ b/spreg/sur.py @@ -996,14 +996,13 @@ def _test(): _test() import numpy as np import libpysal - from .sur_utils import sur_dictxy, sur_dictZ + from .sur_utils import sur_dictxy from libpysal.examples import load_example nat = load_example("Natregimes") db = libpysal.io.open(nat.get_path("NAT.dbf"), "r") y_var = ["HR80", "HR90"] x_var = [["PS80", "UE80"], ["PS90", "UE90"]] - regimes = db.by_col("SOUTH") # Example SUR # """ diff --git a/spreg/sur_utils.py b/spreg/sur_utils.py index bb78d3c8..bf2912f6 100644 --- a/spreg/sur_utils.py +++ b/spreg/sur_utils.py @@ -7,6 +7,7 @@ import numpy as np import numpy.linalg as la +import pandas as pd from .utils import spdot __all__ = [ @@ -30,11 +31,11 @@ def sur_dictxy(db, y_vars, x_vars, space_id=None, time_id=None): Parameters ---------- db : data object - created by libpysal.io.open - y_vars : array + created by libpysal.io.open or pandas.DataFrame + y_vars : list list of lists with variable name(s) for dependent var (Note must be a list of lists, even in splm case) - x_vars : array + x_vars : list list of lists with variable names for explanatory vars space_id : variable with space ID used for splm format @@ -62,8 +63,8 @@ def sur_dictxy(db, y_vars, x_vars, space_id=None, time_id=None): bigy = {} bigy_vars = dict((r, y_vars[r]) for r in range(n_eq)) bigy = dict((r, np.resize(y[:, r], (n, 1))) for r in range(n_eq)) - if not (len(x_vars) == n_eq): # CHANGE into exception - print("Error: mismatch variable lists") + if not (len(x_vars) == n_eq): + raise Exception("Error: mismatch variable lists") bigX = {} bigX_vars = {} for r in range(n_eq): @@ -79,8 +80,8 @@ def sur_dictxy(db, y_vars, x_vars, space_id=None, time_id=None): k = litxc.shape[1] return (bigy, bigX, bigy_vars, bigX_vars) elif len(y_vars) == 1: # splm format - if not (time_id): # CHANGE into exception - print("Error: time id must be specified") + if not (time_id): + raise Exception("Error: time id must be specified") try: y = np.array([db[name] for name in y_vars]).T except: @@ -137,7 +138,7 @@ def sur_dictxy(db, y_vars, x_vars, space_id=None, time_id=None): bigX_vars[r] = [i + "_" + tt3[r] for i in xvars] return (bigy, bigX, bigy_vars, bigX_vars) else: - print("error message, but should never be here") + raise Exception("error message, but should never be here") def sur_dictZ(db, z_vars, form="spreg", const=False, space_id=None, time_id=None): @@ -147,7 +148,7 @@ def sur_dictZ(db, z_vars, form="spreg", const=False, space_id=None, time_id=None Parameters ---------- db : data object - created by libpysal.io.open + created by libpysal.io.open or pandas.DataFrame varnames : array list of lists with variable name(s) (Note must be a list of lists, even in splm case) diff --git a/spreg/tests/test_error_sp.py b/spreg/tests/test_error_sp.py index c87999af..8738ccab 100644 --- a/spreg/tests/test_error_sp.py +++ b/spreg/tests/test_error_sp.py @@ -2,9 +2,10 @@ import libpysal from libpysal import weights import numpy as np +import spreg from spreg import error_sp as SP -from spreg import utils from libpysal.common import RTOL +import pandas as pd class TestBaseGMError(unittest.TestCase): def setUp(self): @@ -264,5 +265,159 @@ def test_model(self): z_stat = np.array([[ 2.52051597e+00, 1.17182922e-02], [ 1.50535954e+00, 1.32231664e-01], [ -3.31909311e+00, 9.03103123e-04], [ -4.68530506e-01, 6.39405261e-01]]) np.testing.assert_allclose(reg.z_stat,z_stat,RTOL) +class TestGMMError(unittest.TestCase): + def setUp(self): + try: + self.db = pd.read_csv(libpysal.examples.get_path('columbus.csv')) + except ValueError: + import geopandas as gpd + self.db = gpd.read_file(libpysal.examples.get_path('columbus.dbf')) + self.w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + self.w.transform = 'r' + + def test_model(self): + reg = SP.GMM_Error(self.db[["HOVAL"]], self.db[["INC", "CRIME"]], w=self.w, estimator='kp98') #GM_Error + betas = np.array([[ 47.94371455], [ 0.70598088], [ -0.55571746], [ 0.37230161]]) + np.testing.assert_allclose(reg.betas,betas,RTOL) + vm = np.array([[ 1.51884943e+02, -5.37622793e+00, -1.86970286e+00], [ -5.37622793e+00, 2.48972661e-01, 5.26564244e-02], [ -1.86970286e+00, 5.26564244e-02, 3.18930650e-02]]) + np.testing.assert_allclose(reg.vm,vm,RTOL) + + reg = SP.GMM_Error(self.db[["HOVAL"]], self.db[["INC"]], yend=self.db[["CRIME"]], q=self.db[["DISCBD"]], w=self.w, estimator='kp98') #GM_Endog_Error + betas = np.array([[ 55.36095292], [ 0.46411479], [ -0.66883535], [ 0.38989939]]) + np.testing.assert_allclose(reg.betas,betas,RTOL) + vm = np.array([[ 5.29158422e+02, -1.57833675e+01, -8.38021080e+00], + [ -1.57833675e+01, 5.40235041e-01, 2.31120327e-01], + [ -8.38021080e+00, 2.31120327e-01, 1.44977385e-01]]) + np.testing.assert_allclose(reg.vm,vm,RTOL) + + reg = SP.GMM_Error(self.db[["HOVAL"]], self.db[["INC", "CRIME"]], w=self.w, estimator='kp98', add_wy=True) #GM_Combo + betas = np.array([[ 57.61123515],[ 0.73441313], [ -0.59459416], [ -0.21762921], [ 0.54732051]]) + np.testing.assert_allclose(reg.betas,betas,RTOL) + vm = np.array([ 5.22438333e+02, 2.38012875e-01, 3.20924173e-02, + 2.15753579e-01]) + np.testing.assert_allclose(np.diag(reg.vm),vm,RTOL) + + reg = SP.GMM_Error(self.db[["HOVAL"]], self.db[["INC", "CRIME"]], w=self.w, estimator='kp98', slx_lags=1) #SLX_error + betas = np.array([[29.46053861], + [ 0.82091985], + [-0.57543046], + [ 0.47808558], + [ 0.30069346], + [ 0.35833037]]) + np.testing.assert_allclose(reg.betas,betas,RTOL) + vm = np.array([[ 1.01097138e+03, -9.41982352e+00, -2.05551111e+00, + -2.90663546e+01, -1.04492128e+01], + [-9.41982352e+00, 2.70128986e-01, 4.98208129e-02, + 1.18829756e-01, 5.97170349e-02], + [-2.05551111e+00, 4.98208129e-02, 3.45149007e-02, + 2.31378455e-02, -6.15257324e-03], + [-2.90663546e+01, 1.18829756e-01, 2.31378455e-02, + 1.06830398e+00, 3.06585506e-01], + [-1.04492128e+01, 5.97170349e-02, -6.15257324e-03, + 3.06585506e-01, 1.51494962e-01]]) + np.testing.assert_allclose(reg.vm,vm,RTOL) + + reg = SP.GMM_Error(self.db[["HOVAL"]], self.db[["INC", "CRIME"]], w=self.w, estimator='hom', A1='hom_sc') #GM_Error_Hom + betas = np.array([[ 47.9478524 ], [ 0.70633223], [ -0.55595633], [ 0.41288558]]) + np.testing.assert_allclose(reg.betas,betas,RTOL) + vm = np.array([[ 1.51340717e+02, -5.29057506e+00, -1.85654540e+00, -2.39139054e-03], [ -5.29057506e+00, 2.46669610e-01, 5.14259101e-02, 3.19241302e-04], [ -1.85654540e+00, 5.14259101e-02, 3.20510550e-02, -5.95640240e-05], [ -2.39139054e-03, 3.19241302e-04, -5.95640240e-05, 3.36690159e-02]]) + np.testing.assert_allclose(reg.vm,vm,RTOL) + + reg = SP.GMM_Error(self.db[["HOVAL"]], self.db[["INC"]], yend=self.db[["CRIME"]], q=self.db[["DISCBD"]], w=self.w, estimator='hom', A1='hom_sc') #GM_Endog_Error_Hom + betas = np.array([[ 55.36575166], [ 0.46432416], [ -0.66904404], [ 0.43205526]]) + vm = np.array([[ 5.52064057e+02, -1.61264555e+01, -8.86360735e+00, 1.04251912e+00], [ -1.61264555e+01, 5.44898242e-01, 2.39518645e-01, -1.88092950e-02], [ -8.86360735e+00, 2.39518645e-01, 1.55501840e-01, -2.18638648e-02], [ 1.04251912e+00, -1.88092950e-02, -2.18638648e-02, 3.71222222e-02]]) + np.testing.assert_allclose(reg.vm,vm,RTOL) + + reg = SP.GMM_Error(self.db[["HOVAL"]], self.db[["INC"]], w=self.w, estimator='hom', add_wy=True, A1='hom_sc') #GM_Combo_Hom + betas = np.array([[ 10.12541428], [ 1.56832263], [ 0.15132076], [ 0.21033397]]) + np.testing.assert_allclose(reg.betas,betas,RTOL) + vm = np.array([[ 2.33694742e+02, -6.66856869e-01, -5.58304254e+00, 4.85488380e+00], [ -6.66856869e-01, 1.94241504e-01, -5.42327138e-02, 5.37225570e-02], [ -5.58304254e+00, -5.42327138e-02, 1.63860721e-01, -1.44425498e-01], [ 4.85488380e+00, 5.37225570e-02, -1.44425498e-01, 1.78622255e-01]]) + np.testing.assert_allclose(reg.vm,vm,RTOL) + + reg = SP.GMM_Error(self.db[["HOVAL"]], self.db[["INC", "CRIME"]], w=self.w, estimator='hom', slx_lags=1) #SLX_error_Hom + betas = np.array([[29.45631607], + [ 0.82147165], + [-0.57539916], + [ 0.47867457], + [ 0.30033727], + [ 0.40129812]]) + np.testing.assert_allclose(reg.betas,betas,RTOL) + vm = np.array([[ 1.06448125e+03, -1.00662342e+01, -2.27271870e+00, + -3.04900044e+01, -1.08579426e+01, -1.04655994e-02], + [-1.00662342e+01, 2.72517164e-01, 5.08167383e-02, + 1.44806006e-01, 6.54774010e-02, -2.75387247e-04], + [-2.27271870e+00, 5.08167383e-02, 3.44272896e-02, + 2.96786313e-02, -3.01082586e-03, 5.42396309e-05], + [-3.04900044e+01, 1.44806006e-01, 2.96786313e-02, + 1.10589204e+00, 3.13859279e-01, 3.30913956e-04], + [-1.08579426e+01, 6.54774010e-02, -3.01082586e-03, + 3.13859279e-01, 1.54648513e-01, 2.16465817e-04], + [-1.04655994e-02, -2.75387247e-04, 5.42396309e-05, + 3.30913956e-04, 2.16465817e-04, 3.91478879e-02]]) + np.testing.assert_allclose(reg.vm,vm,RTOL) + + reg = SP.GMM_Error(self.db[["HOVAL"]], self.db[["INC", "CRIME"]], w=self.w, step1c=True) #GM_Error_Het + betas = np.array([[ 47.99626638], [ 0.71048989], [ -0.55876126], [ 0.41178776]]) + np.testing.assert_allclose(reg.betas,betas,RTOL) + vm = np.array([[ 1.31767529e+02, -3.58368748e+00, -1.65090647e+00, + 0.00000000e+00], + [ -3.58368748e+00, 1.35513711e-01, 3.77539055e-02, + 0.00000000e+00], + [ -1.65090647e+00, 3.77539055e-02, 2.61042702e-02, + 0.00000000e+00], + [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, + 2.82398517e-02]]) + np.testing.assert_allclose(reg.vm,vm,RTOL) + + reg = SP.GMM_Error(self.db[["HOVAL"]], self.db[["INC"]], yend=self.db[["CRIME"]], q=self.db[["DISCBD"]], w=self.w, step1c=True) #GM_Endog_Error_Het + betas = np.array([[ 55.39707924], [ 0.46563046], [ -0.67038326], [ 0.41135023]]) + np.testing.assert_allclose(reg.betas,betas,RTOL) + vm = np.array([[ 8.34637805e+02, -2.16932259e+01, -1.33327894e+01, + 1.65840848e+00], + [ -2.16932259e+01, 5.97683070e-01, 3.39503523e-01, + -3.90111107e-02], + [ -1.33327894e+01, 3.39503523e-01, 2.19008080e-01, + -2.81929695e-02], + [ 1.65840848e+00, -3.90111107e-02, -2.81929695e-02, + 3.15686105e-02]]) + np.testing.assert_allclose(reg.vm,vm,RTOL) + + reg = SP.GMM_Error(self.db[["HOVAL"]], self.db[["INC", "CRIME"]], w=self.w, add_wy=True, step1c=True) #GM_Combo_Het + betas = np.array([[ 57.7778574 ], [ 0.73034922], [ -0.59257362], [ -0.2230231 ], [ 0.56636724]]) + np.testing.assert_allclose(reg.betas,betas,RTOL) + vm = np.array([[ 4.86218274e+02, -2.77268729e+00, -1.59987770e+00, + -1.01969471e+01, 2.74302006e+00], + [ -2.77268729e+00, 1.04680972e-01, 2.51172238e-02, + 1.95136385e-03, 3.70052723e-03], + [ -1.59987770e+00, 2.51172238e-02, 2.15655720e-02, + 7.65868344e-03, -7.30173070e-03], + [ -1.01969471e+01, 1.95136385e-03, 7.65868344e-03, + 2.78273684e-01, -6.89402590e-02], + [ 2.74302006e+00, 3.70052723e-03, -7.30173070e-03, + -6.89402590e-02, 7.12034037e-02]]) + np.testing.assert_allclose(reg.vm,vm,RTOL*10) + + reg = SP.GMM_Error(self.db[["HOVAL"]], self.db[["INC", "CRIME"]], w=self.w, slx_lags=1) #SLX_error_Het + betas = np.array([[29.38238574], + [ 0.82921502], + [-0.57499819], + [ 0.48748671], + [ 0.29556428], + [ 0.39636619]]) + np.testing.assert_allclose(reg.betas,betas,RTOL) + vm = np.array([[ 5.97495766e+02, -5.55463269e+00, -6.81492201e-01, + -1.53802421e+01, -7.21116007e+00, 0.00000000e+00], + [-5.55463269e+00, 1.34368489e-01, 2.10846256e-02, + 3.83654841e-02, 5.54102390e-02, 0.00000000e+00], + [-6.81492201e-01, 2.10846256e-02, 2.56742519e-02, + 2.40939941e-03, -1.84082192e-02, 0.00000000e+00], + [-1.53802421e+01, 3.83654841e-02, 2.40939941e-03, + 6.69703341e-01, 1.58284990e-01, 0.00000000e+00], + [-7.21116007e+00, 5.54102390e-02, -1.84082192e-02, + 1.58284990e-01, 1.30057684e-01, 0.00000000e+00], + [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, + 0.00000000e+00, 0.00000000e+00, 2.67800026e-02]]) + np.testing.assert_allclose(reg.vm,vm,RTOL) + if __name__ == '__main__': unittest.main() diff --git a/spreg/tests/test_twosls_sp.py b/spreg/tests/test_twosls_sp.py index 8ecd865c..9e52a118 100755 --- a/spreg/tests/test_twosls_sp.py +++ b/spreg/tests/test_twosls_sp.py @@ -4,149 +4,79 @@ import spreg.diagnostics as D from spreg.twosls_sp import BaseGM_Lag, GM_Lag from libpysal.common import RTOL -import spreg - class TestBaseGMLag(unittest.TestCase): def setUp(self): - self.w = libpysal.weights.Rook.from_shapefile( - libpysal.examples.get_path("columbus.shp") - ) - self.w.transform = "r" - self.db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + self.w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + self.w.transform = 'r' + self.db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), 'r') y = np.array(self.db.by_col("HOVAL")) - self.y = np.reshape(y, (49, 1)) - + self.y = np.reshape(y, (49,1)) + def test___init__(self): w_lags = 2 X = [] X.append(self.db.by_col("INC")) X.append(self.db.by_col("CRIME")) self.X = np.array(X).T - # yd2, q2 = spreg.utils.set_endog(self.y, self.X, self.w, None, None, w_lags, True) - self.X = np.hstack((np.ones(self.y.shape), self.X)) + #yd2, q2 = spreg.utils.set_endog(self.y, self.X, self.w, None, None, w_lags, True) + self.X = np.hstack((np.ones(self.y.shape),self.X)) reg = BaseGM_Lag(self.y, self.X, w=self.w, w_lags=w_lags) - betas = np.array( - [[4.53017056e01], [6.20888617e-01], [-4.80723451e-01], [2.83622122e-02]] - ) - np.testing.assert_allclose(reg.betas, betas, RTOL) - h_0 = np.array( - [1.0, 19.531, 15.72598, 18.594, 24.7142675, 13.72216667, 27.82929567] - ) + betas = np.array([[ 4.53017056e+01], [ 6.20888617e-01], [ -4.80723451e-01], [ 2.83622122e-02]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + h_0 = np.array([ 1. , 19.531 , 15.72598 , 18.594 , + 24.7142675 , 13.72216667, 27.82929567]) np.testing.assert_allclose(reg.h[0], h_0) - hth = np.array( - [ - 49.0, - 704.371999, - 1721.312371, - 724.7435916, - 1707.35412945, - 711.31248483, - 1729.63201243, - ] - ) - np.testing.assert_allclose(reg.hth[0], hth, RTOL) - hthi = np.array( - [ - 7.33701328e00, - 2.27764882e-02, - 2.18153588e-02, - -5.11035447e-02, - 1.22515181e-03, - -2.38079378e-01, - -1.20149133e-01, - ] - ) - np.testing.assert_allclose(reg.hthi[0], hthi, RTOL) + hth = np. array([ 49. , 704.371999 , 1721.312371 , 724.7435916 , + 1707.35412945, 711.31248483, 1729.63201243]) + np.testing.assert_allclose(reg.hth[0], hth,RTOL) + hthi = np.array([ 7.33701328e+00, 2.27764882e-02, 2.18153588e-02, + -5.11035447e-02, 1.22515181e-03, -2.38079378e-01, + -1.20149133e-01]) + np.testing.assert_allclose(reg.hthi[0], hthi,RTOL) self.assertEqual(reg.k, 4) self.assertEqual(reg.kstar, 1) - np.testing.assert_allclose(reg.mean_y, 38.436224469387746, RTOL) + np.testing.assert_allclose(reg.mean_y, 38.436224469387746,RTOL) self.assertEqual(reg.n, 49) - pfora1a2 = np.array([80.5588479, -1.06625281, -0.61703759, -1.10071931]) - np.testing.assert_allclose(reg.pfora1a2[0], pfora1a2, RTOL) - predy_5 = np.array( - [[50.87411532], [50.76969931], [41.77223722], [33.44262382], [28.77418036]] - ) - np.testing.assert_allclose(reg.predy[0:5], predy_5, RTOL) - q_5 = np.array([18.594, 24.7142675, 13.72216667, 27.82929567]) + pfora1a2 = np.array([ 80.5588479 , -1.06625281, -0.61703759, -1.10071931]) + np.testing.assert_allclose(reg.pfora1a2[0], pfora1a2,RTOL) + predy_5 = np.array([[ 50.87411532],[ 50.76969931],[ 41.77223722],[ 33.44262382],[ 28.77418036]]) + np.testing.assert_allclose(reg.predy[0:5], predy_5,RTOL) + q_5 = np.array([ 18.594 , 24.7142675 , 13.72216667, 27.82929567]) np.testing.assert_allclose(reg.q[0], q_5) - np.testing.assert_allclose(reg.sig2n_k, 234.54258763039289, RTOL) - np.testing.assert_allclose(reg.sig2n, 215.39625394627919, RTOL) - np.testing.assert_allclose(reg.sig2, 215.39625394627919, RTOL) - np.testing.assert_allclose(reg.std_y, 18.466069465206047, RTOL) - u_5 = np.array( - [[29.59288768], [-6.20269831], [-15.42223722], [-0.24262282], [-5.54918036]] - ) - np.testing.assert_allclose(reg.u[0:5], u_5, RTOL) - np.testing.assert_allclose(reg.utu, 10554.41644336768, RTOL) - varb = np.array( - [ - [1.48966377e00, -2.28698061e-02, -1.20217386e-02, -1.85763498e-02], - [-2.28698061e-02, 1.27893998e-03, 2.74600023e-04, -1.33497705e-04], - [-1.20217386e-02, 2.74600023e-04, 1.54257766e-04, 6.86851184e-05], - [-1.85763498e-02, -1.33497705e-04, 6.86851184e-05, 4.67711582e-04], - ] - ) - np.testing.assert_allclose(reg.varb, varb, RTOL) - vm = np.array( - [ - [3.20867996e02, -4.92607057e00, -2.58943746e00, -4.00127615e00], - [-4.92607057e00, 2.75478880e-01, 5.91478163e-02, -2.87549056e-02], - [-2.58943746e00, 5.91478163e-02, 3.32265449e-02, 1.47945172e-02], - [-4.00127615e00, -2.87549056e-02, 1.47945172e-02, 1.00743323e-01], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) - x_0 = np.array([1.0, 19.531, 15.72598]) - np.testing.assert_allclose(reg.x[0], x_0, RTOL) - y_5 = np.array([[80.467003], [44.567001], [26.35], [33.200001], [23.225]]) - np.testing.assert_allclose(reg.y[0:5], y_5, RTOL) - yend_5 = np.array( - [[35.4585005], [46.67233467], [45.36475125], [32.81675025], [30.81785714]] - ) - np.testing.assert_allclose(reg.yend[0:5], yend_5, RTOL) - z_0 = np.array([1.0, 19.531, 15.72598, 35.4585005]) - np.testing.assert_allclose(reg.z[0], z_0, RTOL) - zthhthi = np.array( - [ - [ - 1.00000000e00, - -2.22044605e-16, - -2.22044605e-16, - 2.22044605e-16, - 4.44089210e-16, - 0.00000000e00, - -8.88178420e-16, - ], - [ - 0.00000000e00, - 1.00000000e00, - -3.55271368e-15, - 3.55271368e-15, - -7.10542736e-15, - 7.10542736e-14, - 0.00000000e00, - ], - [ - 1.81898940e-12, - 2.84217094e-14, - 1.00000000e00, - 0.00000000e00, - -2.84217094e-14, - 5.68434189e-14, - 5.68434189e-14, - ], - [ - -8.31133940e00, - -3.76104678e-01, - -2.07028208e-01, - 1.32618931e00, - -8.04284562e-01, - 1.30527047e00, - 1.39136816e00, - ], - ] - ) + np.testing.assert_allclose(reg.sig2n_k, 234.54258763039289,RTOL) + np.testing.assert_allclose(reg.sig2n, 215.39625394627919,RTOL) + np.testing.assert_allclose(reg.sig2, 215.39625394627919,RTOL) + np.testing.assert_allclose(reg.std_y, 18.466069465206047,RTOL) + u_5 = np.array( [[ 29.59288768], [ -6.20269831], [-15.42223722], [ -0.24262282], [ -5.54918036]]) + np.testing.assert_allclose(reg.u[0:5], u_5,RTOL) + np.testing.assert_allclose(reg.utu, 10554.41644336768,RTOL) + varb = np.array( [[ 1.48966377e+00, -2.28698061e-02, -1.20217386e-02, -1.85763498e-02], + [ -2.28698061e-02, 1.27893998e-03, 2.74600023e-04, -1.33497705e-04], + [ -1.20217386e-02, 2.74600023e-04, 1.54257766e-04, 6.86851184e-05], + [ -1.85763498e-02, -1.33497705e-04, 6.86851184e-05, 4.67711582e-04]]) + np.testing.assert_allclose(reg.varb, varb,RTOL) + vm = np.array([[ 3.20867996e+02, -4.92607057e+00, -2.58943746e+00, -4.00127615e+00], + [ -4.92607057e+00, 2.75478880e-01, 5.91478163e-02, -2.87549056e-02], + [ -2.58943746e+00, 5.91478163e-02, 3.32265449e-02, 1.47945172e-02], + [ -4.00127615e+00, -2.87549056e-02, 1.47945172e-02, 1.00743323e-01]]) + np.testing.assert_allclose(reg.vm, vm,RTOL) + x_0 = np.array([ 1. , 19.531 , 15.72598]) + np.testing.assert_allclose(reg.x[0], x_0,RTOL) + y_5 = np.array( [[ 80.467003], [ 44.567001], [ 26.35 ], [ 33.200001], [ 23.225 ]]) + np.testing.assert_allclose(reg.y[0:5], y_5,RTOL) + yend_5 = np.array( [[ 35.4585005 ], [ 46.67233467], [ 45.36475125], [ 32.81675025], [ 30.81785714]]) + np.testing.assert_allclose(reg.yend[0:5], yend_5,RTOL) + z_0 = np.array([ 1. , 19.531 , 15.72598 , 35.4585005]) + np.testing.assert_allclose(reg.z[0], z_0,RTOL) + zthhthi = np.array( [[ 1.00000000e+00, -2.22044605e-16, -2.22044605e-16 , 2.22044605e-16, + 4.44089210e-16, 0.00000000e+00, -8.88178420e-16], + [ 0.00000000e+00, 1.00000000e+00, -3.55271368e-15 , 3.55271368e-15, + -7.10542736e-15, 7.10542736e-14, 0.00000000e+00], + [ 1.81898940e-12, 2.84217094e-14, 1.00000000e+00 , 0.00000000e+00, + -2.84217094e-14, 5.68434189e-14, 5.68434189e-14], + [ -8.31133940e+00, -3.76104678e-01, -2.07028208e-01 , 1.32618931e+00, + -8.04284562e-01, 1.30527047e+00, 1.39136816e+00]]) # np.testing.assert_allclose(reg.zthhthi, zthhthi, RTOL) WHYYYY np.testing.assert_array_almost_equal(reg.zthhthi, zthhthi, 7) @@ -156,17 +86,13 @@ def test_init_white_(self): X.append(self.db.by_col("INC")) X.append(self.db.by_col("CRIME")) self.X = np.array(X).T - # yd2, q2 = spreg.utils.set_endog(self.y, self.X, self.w, None, None, w_lags, True) - self.X = np.hstack((np.ones(self.y.shape), self.X)) - base_gm_lag = BaseGM_Lag( - self.y, self.X, w=self.w, w_lags=w_lags, robust="white" - ) - tbetas = np.array( - [[4.53017056e01], [6.20888617e-01], [-4.80723451e-01], [2.83622122e-02]] - ) - np.testing.assert_allclose(base_gm_lag.betas, tbetas) + #yd2, q2 = spreg.utils.set_endog(self.y, self.X, self.w, None, None, w_lags, True) + self.X = np.hstack((np.ones(self.y.shape),self.X)) + base_gm_lag = BaseGM_Lag(self.y, self.X, w=self.w, w_lags=w_lags, robust='white') + tbetas = np.array([[ 4.53017056e+01], [ 6.20888617e-01], [ -4.80723451e-01], [ 2.83622122e-02]]) + np.testing.assert_allclose(base_gm_lag.betas, tbetas) dbetas = D.se_betas(base_gm_lag) - se_betas = np.array([20.47077481, 0.50613931, 0.20138425, 0.38028295]) + se_betas = np.array([ 20.47077481, 0.50613931, 0.20138425, 0.38028295 ]) np.testing.assert_allclose(dbetas, se_betas) def test_init_hac_(self): @@ -175,40 +101,31 @@ def test_init_hac_(self): X.append(self.db.by_col("INC")) X.append(self.db.by_col("CRIME")) self.X = np.array(X).T - # yd2, q2 = spreg.utils.set_endog(self.y, self.X, self.w, None, None, w_lags, True) - self.X = np.hstack((np.ones(self.y.shape), self.X)) - gwk = libpysal.weights.Kernel.from_shapefile( - libpysal.examples.get_path("columbus.shp"), - k=15, - function="triangular", - fixed=False, - ) - base_gm_lag = BaseGM_Lag( - self.y, self.X, w=self.w, w_lags=w_lags, robust="hac", gwk=gwk - ) - tbetas = np.array( - [[4.53017056e01], [6.20888617e-01], [-4.80723451e-01], [2.83622122e-02]] - ) - np.testing.assert_allclose(base_gm_lag.betas, tbetas) + #yd2, q2 = spreg.utils.set_endog(self.y, self.X, self.w, None, None, w_lags, True) + self.X = np.hstack((np.ones(self.y.shape),self.X)) + gwk = libpysal.weights.Kernel.from_shapefile(libpysal.examples.get_path('columbus.shp'),k=15,function='triangular', fixed=False) + base_gm_lag = BaseGM_Lag(self.y, self.X, w=self.w, w_lags=w_lags, robust='hac', gwk=gwk) + tbetas = np.array([[ 4.53017056e+01], [ 6.20888617e-01], [ -4.80723451e-01], [ 2.83622122e-02]]) + np.testing.assert_allclose(base_gm_lag.betas, tbetas) dbetas = D.se_betas(base_gm_lag) - se_betas = np.array([19.08513569, 0.51769543, 0.18244862, 0.35460553]) + se_betas = np.array([ 19.08513569, 0.51769543, 0.18244862, 0.35460553]) np.testing.assert_allclose(dbetas, se_betas) def test_init_discbd(self): w_lags = 2 X = np.array(self.db.by_col("INC")) - self.X = np.reshape(X, (49, 1)) + self.X = np.reshape(X, (49,1)) yd = np.array(self.db.by_col("CRIME")) - yd = np.reshape(yd, (49, 1)) + yd = np.reshape(yd, (49,1)) q = np.array(self.db.by_col("DISCBD")) - q = np.reshape(q, (49, 1)) - # yd2, q2 = spreg.utils.set_endog(self.y, self.X, self.w, yd, q, w_lags, True) - self.X = np.hstack((np.ones(self.y.shape), self.X)) + q = np.reshape(q, (49,1)) + #yd2, q2 = spreg.utils.set_endog(self.y, self.X, self.w, yd, q, w_lags, True) + self.X = np.hstack((np.ones(self.y.shape),self.X)) reg = BaseGM_Lag(self.y, self.X, w=self.w, yend=yd, q=q, w_lags=w_lags) - tbetas = np.array([[100.79359082], [-0.50215501], [-1.14881711], [-0.38235022]]) + tbetas = np.array([[ 100.79359082], [ -0.50215501], [ -1.14881711], [ -0.38235022]]) np.testing.assert_allclose(tbetas, reg.betas) dbetas = D.se_betas(reg) - se_betas = np.array([53.0829123, 1.02511494, 0.57589064, 0.59891744]) + se_betas = np.array([ 53.0829123 , 1.02511494, 0.57589064, 0.59891744 ]) np.testing.assert_allclose(dbetas, se_betas) def test_n_k(self): @@ -217,191 +134,113 @@ def test_n_k(self): X.append(self.db.by_col("INC")) X.append(self.db.by_col("CRIME")) self.X = np.array(X).T - # yd2, q2 = spreg.utils.set_endog(self.y, self.X, self.w, None, None, w_lags, True) - self.X = np.hstack((np.ones(self.y.shape), self.X)) + #yd2, q2 = spreg.utils.set_endog(self.y, self.X, self.w, None, None, w_lags, True) + self.X = np.hstack((np.ones(self.y.shape),self.X)) reg = BaseGM_Lag(self.y, self.X, w=self.w, w_lags=w_lags, sig2n_k=True) - betas = np.array( - [[4.53017056e01], [6.20888617e-01], [-4.80723451e-01], [2.83622122e-02]] - ) - np.testing.assert_allclose(reg.betas, betas, RTOL) - vm = np.array( - [ - [3.49389596e02, -5.36394351e00, -2.81960968e00, -4.35694515e00], - [-5.36394351e00, 2.99965892e-01, 6.44054000e-02, -3.13108972e-02], - [-2.81960968e00, 6.44054000e-02, 3.61800155e-02, 1.61095854e-02], - [-4.35694515e00, -3.13108972e-02, 1.61095854e-02, 1.09698285e-01], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) + betas = np. array([[ 4.53017056e+01], [ 6.20888617e-01], [ -4.80723451e-01], [ 2.83622122e-02]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + vm = np.array( [[ 3.49389596e+02, -5.36394351e+00, -2.81960968e+00, -4.35694515e+00], + [ -5.36394351e+00, 2.99965892e-01, 6.44054000e-02, -3.13108972e-02], + [ -2.81960968e+00, 6.44054000e-02, 3.61800155e-02, 1.61095854e-02], + [ -4.35694515e+00, -3.13108972e-02, 1.61095854e-02, 1.09698285e-01]]) + np.testing.assert_allclose(reg.vm, vm,RTOL) def test_lag_q(self): w_lags = 2 X = np.array(self.db.by_col("INC")) - self.X = np.reshape(X, (49, 1)) + self.X = np.reshape(X, (49,1)) yd = np.array(self.db.by_col("CRIME")) - yd = np.reshape(yd, (49, 1)) + yd = np.reshape(yd, (49,1)) q = np.array(self.db.by_col("DISCBD")) - q = np.reshape(q, (49, 1)) - # yd2, q2 = spreg.utils.set_endog(self.y, self.X, self.w, yd, q, w_lags, False) - self.X = np.hstack((np.ones(self.y.shape), self.X)) - reg = BaseGM_Lag( - self.y, self.X, w=self.w, yend=yd, q=q, w_lags=w_lags, lag_q=False - ) - tbetas = np.array([[108.83261383], [-0.48041099], [-1.18950006], [-0.56140186]]) + q = np.reshape(q, (49,1)) + #yd2, q2 = spreg.utils.set_endog(self.y, self.X, self.w, yd, q, w_lags, False) + self.X = np.hstack((np.ones(self.y.shape),self.X)) + reg = BaseGM_Lag(self.y, self.X, w=self.w, yend=yd, q=q, w_lags=w_lags, lag_q=False) + tbetas = np.array( [[ 108.83261383], [ -0.48041099], [ -1.18950006], [ -0.56140186]]) np.testing.assert_allclose(tbetas, reg.betas) dbetas = D.se_betas(reg) - se_betas = np.array([58.33203837, 1.09100446, 0.62315167, 0.68088777]) + se_betas = np.array([ 58.33203837, 1.09100446, 0.62315167, 0.68088777]) np.testing.assert_allclose(dbetas, se_betas) + class TestGMLag(unittest.TestCase): def setUp(self): - self.w = libpysal.weights.Rook.from_shapefile( - libpysal.examples.get_path("columbus.shp") - ) - self.w.transform = "r" - self.db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + self.w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + self.w.transform = 'r' + self.w2 = libpysal.weights.Queen.from_shapefile(libpysal.examples.get_path('columbus.shp')) + self.db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), 'r') y = np.array(self.db.by_col("HOVAL")) - self.y = np.reshape(y, (49, 1)) - + self.y = np.reshape(y, (49,1)) + def test___init__(self): X = [] X.append(self.db.by_col("INC")) X.append(self.db.by_col("CRIME")) self.X = np.array(X).T reg = GM_Lag(self.y, self.X, w=self.w, w_lags=2) - betas = np.array( - [[4.53017056e01], [6.20888617e-01], [-4.80723451e-01], [2.83622122e-02]] - ) - np.testing.assert_allclose(reg.betas, betas, RTOL) - e_5 = np.array( - [[29.28976367], [-6.07439501], [-15.30080685], [-0.41773375], [-5.67197968]] - ) - np.testing.assert_allclose(reg.e_pred[0:5], e_5, RTOL) - h_0 = np.array( - [1.0, 19.531, 15.72598, 18.594, 24.7142675, 13.72216667, 27.82929567] - ) + betas = np.array([[ 4.53017056e+01], [ 6.20888617e-01], [ -4.80723451e-01], [ 2.83622122e-02]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + e_5 = np.array( [[ 29.28976367], [ -6.07439501], [-15.30080685], [ -0.41773375], [ -5.67197968]]) + np.testing.assert_allclose(reg.e_pred[0:5], e_5,RTOL) + h_0 = np.array([ 1. , 19.531 , 15.72598 , 18.594 , + 24.7142675 , 13.72216667, 27.82929567]) np.testing.assert_allclose(reg.h[0], h_0) - hth = np.array( - [ - 49.0, - 704.371999, - 1721.312371, - 724.7435916, - 1707.35412945, - 711.31248483, - 1729.63201243, - ] - ) - np.testing.assert_allclose(reg.hth[0], hth, RTOL) - hthi = np.array( - [ - 7.33701328e00, - 2.27764882e-02, - 2.18153588e-02, - -5.11035447e-02, - 1.22515181e-03, - -2.38079378e-01, - -1.20149133e-01, - ] - ) - np.testing.assert_allclose(reg.hthi[0], hthi, RTOL) + hth = np. array([ 49. , 704.371999 , 1721.312371 , 724.7435916 , + 1707.35412945, 711.31248483, 1729.63201243]) + np.testing.assert_allclose(reg.hth[0], hth,RTOL) + hthi = np.array([ 7.33701328e+00, 2.27764882e-02, 2.18153588e-02, + -5.11035447e-02, 1.22515181e-03, -2.38079378e-01, + -1.20149133e-01]) + np.testing.assert_allclose(reg.hthi[0], hthi,RTOL) self.assertEqual(reg.k, 4) self.assertEqual(reg.kstar, 1) - np.testing.assert_allclose(reg.mean_y, 38.436224469387746, RTOL) + np.testing.assert_allclose(reg.mean_y, 38.436224469387746,RTOL) self.assertEqual(reg.n, 49) - pfora1a2 = np.array([80.5588479, -1.06625281, -0.61703759, -1.10071931]) - np.testing.assert_allclose(reg.pr2, 0.3551928222612527, RTOL) - np.testing.assert_allclose(reg.pr2_e, 0.34763857386174174, RTOL) - np.testing.assert_allclose(reg.pfora1a2[0], pfora1a2, RTOL) - predy_5 = np.array( - [[50.87411532], [50.76969931], [41.77223722], [33.44262382], [28.77418036]] - ) - np.testing.assert_allclose(reg.predy[0:5], predy_5, RTOL) - predy_e_5 = np.array( - [[51.17723933], [50.64139601], [41.65080685], [33.61773475], [28.89697968]] - ) - np.testing.assert_allclose(reg.predy_e[0:5], predy_e_5, RTOL) - q_5 = np.array([18.594, 24.7142675, 13.72216667, 27.82929567]) + pfora1a2 = np.array([ 80.5588479 , -1.06625281, -0.61703759, -1.10071931]) + np.testing.assert_allclose(reg.pr2, 0.3551928222612527,RTOL) + np.testing.assert_allclose(reg.pr2_e, 0.34763857386174174,RTOL) + np.testing.assert_allclose(reg.pfora1a2[0], pfora1a2,RTOL) + predy_5 = np.array([[ 50.87411532],[ 50.76969931],[ 41.77223722],[ 33.44262382],[ 28.77418036]]) + np.testing.assert_allclose(reg.predy[0:5], predy_5,RTOL) + predy_e_5 = np.array( [[ 51.17723933], [ 50.64139601], [ 41.65080685], [ 33.61773475], [ 28.89697968]]) + np.testing.assert_allclose(reg.predy_e[0:5], predy_e_5,RTOL) + q_5 = np.array([ 18.594 , 24.7142675 , 13.72216667, 27.82929567]) np.testing.assert_allclose(reg.q[0], q_5) - self.assertEqual(reg.robust, "unadjusted") - np.testing.assert_allclose(reg.sig2n_k, 234.54258763039289, RTOL) - np.testing.assert_allclose(reg.sig2n, 215.39625394627919, RTOL) - np.testing.assert_allclose(reg.sig2, 215.39625394627919, RTOL) - np.testing.assert_allclose(reg.std_y, 18.466069465206047, RTOL) - u_5 = np.array( - [[29.59288768], [-6.20269831], [-15.42223722], [-0.24262282], [-5.54918036]] - ) - np.testing.assert_allclose(reg.u[0:5], u_5, RTOL) - np.testing.assert_allclose(reg.utu, 10554.41644336768, RTOL) - varb = np.array( - [ - [1.48966377e00, -2.28698061e-02, -1.20217386e-02, -1.85763498e-02], - [-2.28698061e-02, 1.27893998e-03, 2.74600023e-04, -1.33497705e-04], - [-1.20217386e-02, 2.74600023e-04, 1.54257766e-04, 6.86851184e-05], - [-1.85763498e-02, -1.33497705e-04, 6.86851184e-05, 4.67711582e-04], - ] - ) - np.testing.assert_allclose(reg.varb, varb, RTOL) - vm = np.array( - [ - [3.20867996e02, -4.92607057e00, -2.58943746e00, -4.00127615e00], - [-4.92607057e00, 2.75478880e-01, 5.91478163e-02, -2.87549056e-02], - [-2.58943746e00, 5.91478163e-02, 3.32265449e-02, 1.47945172e-02], - [-4.00127615e00, -2.87549056e-02, 1.47945172e-02, 1.00743323e-01], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) - x_0 = np.array([1.0, 19.531, 15.72598]) - np.testing.assert_allclose(reg.x[0], x_0, RTOL) - y_5 = np.array([[80.467003], [44.567001], [26.35], [33.200001], [23.225]]) - np.testing.assert_allclose(reg.y[0:5], y_5, RTOL) - yend_5 = np.array( - [[35.4585005], [46.67233467], [45.36475125], [32.81675025], [30.81785714]] - ) - np.testing.assert_allclose(reg.yend[0:5], yend_5, RTOL) - z_0 = np.array([1.0, 19.531, 15.72598, 35.4585005]) - np.testing.assert_allclose(reg.z[0], z_0, RTOL) - zthhthi = np.array( - [ - [ - 1.00000000e00, - -2.22044605e-16, - -2.22044605e-16, - 2.22044605e-16, - 4.44089210e-16, - 0.00000000e00, - -8.88178420e-16, - ], - [ - 0.00000000e00, - 1.00000000e00, - -3.55271368e-15, - 3.55271368e-15, - -7.10542736e-15, - 7.10542736e-14, - 0.00000000e00, - ], - [ - 1.81898940e-12, - 2.84217094e-14, - 1.00000000e00, - 0.00000000e00, - -2.84217094e-14, - 5.68434189e-14, - 5.68434189e-14, - ], - [ - -8.31133940e00, - -3.76104678e-01, - -2.07028208e-01, - 1.32618931e00, - -8.04284562e-01, - 1.30527047e00, - 1.39136816e00, - ], - ] - ) + self.assertEqual(reg.robust, 'unadjusted') + np.testing.assert_allclose(reg.sig2n_k, 234.54258763039289,RTOL) + np.testing.assert_allclose(reg.sig2n, 215.39625394627919,RTOL) + np.testing.assert_allclose(reg.sig2, 215.39625394627919,RTOL) + np.testing.assert_allclose(reg.std_y, 18.466069465206047,RTOL) + u_5 = np.array( [[ 29.59288768], [ -6.20269831], [-15.42223722], [ -0.24262282], [ -5.54918036]]) + np.testing.assert_allclose(reg.u[0:5], u_5,RTOL) + np.testing.assert_allclose(reg.utu, 10554.41644336768,RTOL) + varb = np.array( [[ 1.48966377e+00, -2.28698061e-02, -1.20217386e-02, -1.85763498e-02], + [ -2.28698061e-02, 1.27893998e-03, 2.74600023e-04, -1.33497705e-04], + [ -1.20217386e-02, 2.74600023e-04, 1.54257766e-04, 6.86851184e-05], + [ -1.85763498e-02, -1.33497705e-04, 6.86851184e-05, 4.67711582e-04]]) + np.testing.assert_allclose(reg.varb, varb,RTOL) + vm = np.array([[ 3.20867996e+02, -4.92607057e+00, -2.58943746e+00, -4.00127615e+00], + [ -4.92607057e+00, 2.75478880e-01, 5.91478163e-02, -2.87549056e-02], + [ -2.58943746e+00, 5.91478163e-02, 3.32265449e-02, 1.47945172e-02], + [ -4.00127615e+00, -2.87549056e-02, 1.47945172e-02, 1.00743323e-01]]) + np.testing.assert_allclose(reg.vm, vm,RTOL) + x_0 = np.array([ 1. , 19.531 , 15.72598]) + np.testing.assert_allclose(reg.x[0], x_0,RTOL) + y_5 = np.array( [[ 80.467003], [ 44.567001], [ 26.35 ], [ 33.200001], [ 23.225 ]]) + np.testing.assert_allclose(reg.y[0:5], y_5,RTOL) + yend_5 = np.array( [[ 35.4585005 ], [ 46.67233467], [ 45.36475125], [ 32.81675025], [ 30.81785714]]) + np.testing.assert_allclose(reg.yend[0:5], yend_5,RTOL) + z_0 = np.array([ 1. , 19.531 , 15.72598 , 35.4585005]) + np.testing.assert_allclose(reg.z[0], z_0,RTOL) + zthhthi = np.array( [[ 1.00000000e+00, -2.22044605e-16, -2.22044605e-16 , 2.22044605e-16, + 4.44089210e-16, 0.00000000e+00, -8.88178420e-16], + [ 0.00000000e+00, 1.00000000e+00, -3.55271368e-15 , 3.55271368e-15, + -7.10542736e-15, 7.10542736e-14, 0.00000000e+00], + [ 1.81898940e-12, 2.84217094e-14, 1.00000000e+00 , 0.00000000e+00, + -2.84217094e-14, 5.68434189e-14, 5.68434189e-14], + [ -8.31133940e+00, -3.76104678e-01, -2.07028208e-01 , 1.32618931e+00, + -8.04284562e-01, 1.30527047e+00, 1.39136816e+00]]) # np.testing.assert_allclose(reg.zthhthi, zthhthi RTOL) #another issue with rtol np.testing.assert_array_almost_equal(reg.zthhthi, zthhthi, 7) @@ -410,13 +249,11 @@ def test_init_white_(self): X.append(self.db.by_col("INC")) X.append(self.db.by_col("CRIME")) self.X = np.array(X).T - base_gm_lag = GM_Lag(self.y, self.X, w=self.w, w_lags=2, robust="white") - tbetas = np.array( - [[4.53017056e01], [6.20888617e-01], [-4.80723451e-01], [2.83622122e-02]] - ) - np.testing.assert_allclose(base_gm_lag.betas, tbetas) + base_gm_lag = GM_Lag(self.y, self.X, w=self.w, w_lags=2, robust='white') + tbetas = np.array([[ 4.53017056e+01], [ 6.20888617e-01], [ -4.80723451e-01], [ 2.83622122e-02]]) + np.testing.assert_allclose(base_gm_lag.betas, tbetas) dbetas = D.se_betas(base_gm_lag) - se_betas = np.array([20.47077481, 0.50613931, 0.20138425, 0.38028295]) + se_betas = np.array([ 20.47077481, 0.50613931, 0.20138425, 0.38028295 ]) np.testing.assert_allclose(dbetas, se_betas) def test_init_hac_(self): @@ -424,33 +261,26 @@ def test_init_hac_(self): X.append(self.db.by_col("INC")) X.append(self.db.by_col("CRIME")) self.X = np.array(X).T - gwk = libpysal.weights.Kernel.from_shapefile( - libpysal.examples.get_path("columbus.shp"), - k=15, - function="triangular", - fixed=False, - ) - base_gm_lag = GM_Lag(self.y, self.X, w=self.w, w_lags=2, robust="hac", gwk=gwk) - tbetas = np.array( - [[4.53017056e01], [6.20888617e-01], [-4.80723451e-01], [2.83622122e-02]] - ) - np.testing.assert_allclose(base_gm_lag.betas, tbetas) + gwk = libpysal.weights.Kernel.from_shapefile(libpysal.examples.get_path('columbus.shp'),k=15,function='triangular', fixed=False) + base_gm_lag = GM_Lag(self.y, self.X, w=self.w, w_lags=2, robust='hac', gwk=gwk) + tbetas = np.array([[ 4.53017056e+01], [ 6.20888617e-01], [ -4.80723451e-01], [ 2.83622122e-02]]) + np.testing.assert_allclose(base_gm_lag.betas, tbetas) dbetas = D.se_betas(base_gm_lag) - se_betas = np.array([19.08513569, 0.51769543, 0.18244862, 0.35460553]) + se_betas = np.array([ 19.08513569, 0.51769543, 0.18244862, 0.35460553]) np.testing.assert_allclose(dbetas, se_betas) def test_init_discbd(self): X = np.array(self.db.by_col("INC")) - X = np.reshape(X, (49, 1)) + X = np.reshape(X, (49,1)) yd = np.array(self.db.by_col("CRIME")) - yd = np.reshape(yd, (49, 1)) + yd = np.reshape(yd, (49,1)) q = np.array(self.db.by_col("DISCBD")) - q = np.reshape(q, (49, 1)) + q = np.reshape(q, (49,1)) reg = GM_Lag(self.y, X, w=self.w, yend=yd, q=q, w_lags=2) - tbetas = np.array([[100.79359082], [-0.50215501], [-1.14881711], [-0.38235022]]) + tbetas = np.array([[ 100.79359082], [ -0.50215501], [ -1.14881711], [ -0.38235022]]) np.testing.assert_allclose(tbetas, reg.betas) dbetas = D.se_betas(reg) - se_betas = np.array([53.0829123, 1.02511494, 0.57589064, 0.59891744]) + se_betas = np.array([ 53.0829123 , 1.02511494, 0.57589064, 0.59891744 ]) np.testing.assert_allclose(dbetas, se_betas) def test_n_k(self): @@ -459,124 +289,144 @@ def test_n_k(self): X.append(self.db.by_col("CRIME")) self.X = np.array(X).T reg = GM_Lag(self.y, self.X, w=self.w, w_lags=2, sig2n_k=True) - betas = np.array( - [[4.53017056e01], [6.20888617e-01], [-4.80723451e-01], [2.83622122e-02]] - ) - np.testing.assert_allclose(reg.betas, betas, RTOL) - vm = np.array( - [ - [3.49389596e02, -5.36394351e00, -2.81960968e00, -4.35694515e00], - [-5.36394351e00, 2.99965892e-01, 6.44054000e-02, -3.13108972e-02], - [-2.81960968e00, 6.44054000e-02, 3.61800155e-02, 1.61095854e-02], - [-4.35694515e00, -3.13108972e-02, 1.61095854e-02, 1.09698285e-01], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) + betas = np. array([[ 4.53017056e+01], [ 6.20888617e-01], [ -4.80723451e-01], [ 2.83622122e-02]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + vm = np.array( [[ 3.49389596e+02, -5.36394351e+00, -2.81960968e+00, -4.35694515e+00], + [ -5.36394351e+00, 2.99965892e-01, 6.44054000e-02, -3.13108972e-02], + [ -2.81960968e+00, 6.44054000e-02, 3.61800155e-02, 1.61095854e-02], + [ -4.35694515e+00, -3.13108972e-02, 1.61095854e-02, 1.09698285e-01]]) + np.testing.assert_allclose(reg.vm, vm,RTOL) def test_lag_q(self): X = np.array(self.db.by_col("INC")) - X = np.reshape(X, (49, 1)) + X = np.reshape(X, (49,1)) yd = np.array(self.db.by_col("CRIME")) - yd = np.reshape(yd, (49, 1)) + yd = np.reshape(yd, (49,1)) q = np.array(self.db.by_col("DISCBD")) - q = np.reshape(q, (49, 1)) + q = np.reshape(q, (49,1)) reg = GM_Lag(self.y, X, w=self.w, yend=yd, q=q, w_lags=2, lag_q=False) - tbetas = np.array([[108.83261383], [-0.48041099], [-1.18950006], [-0.56140186]]) + tbetas = np.array( [[ 108.83261383], [ -0.48041099], [ -1.18950006], [ -0.56140186]]) np.testing.assert_allclose(tbetas, reg.betas) dbetas = D.se_betas(reg) - se_betas = np.array([58.33203837, 1.09100446, 0.62315167, 0.68088777]) + se_betas = np.array([ 58.33203837, 1.09100446, 0.62315167, 0.68088777]) np.testing.assert_allclose(dbetas, se_betas) def test_spatial(self): X = np.array(self.db.by_col("INC")) - X = np.reshape(X, (49, 1)) + X = np.reshape(X, (49,1)) yd = np.array(self.db.by_col("CRIME")) - yd = np.reshape(yd, (49, 1)) + yd = np.reshape(yd, (49,1)) q = np.array(self.db.by_col("DISCBD")) - q = np.reshape(q, (49, 1)) - w = libpysal.weights.Queen.from_shapefile( - libpysal.examples.get_path("columbus.shp") - ) - reg = GM_Lag(self.y, X, yd, q, spat_diag=True, w=w) - betas = np.array( - [[5.46344924e01], [4.13301682e-01], [-5.92637442e-01], [-7.40490883e-03]] - ) - np.testing.assert_allclose(reg.betas, betas, RTOL) - vm = np.array( - [ - [4.45202654e02, -1.50290275e01, -6.36557072e00, -5.71403440e-03], - [-1.50290275e01, 5.93124683e-01, 2.19169508e-01, -6.70675916e-03], - [-6.36557072e00, 2.19169508e-01, 1.06577542e-01, -2.96533875e-03], - [-5.71403440e-03, -6.70675916e-03, -2.96533875e-03, 1.15655425e-03], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) - ak_test = np.array([2.52597326, 0.11198567]) - np.testing.assert_allclose(reg.ak_test, ak_test, RTOL) + q = np.reshape(q, (49,1)) + reg = GM_Lag(self.y, X, yd, q, spat_diag=True, w=self.w2) + betas = np.array([[ 5.46344924e+01], [ 4.13301682e-01], [ -5.92637442e-01], [ -7.40490883e-03]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + vm = np.array( [[ 4.45202654e+02, -1.50290275e+01, -6.36557072e+00, -5.71403440e-03], + [ -1.50290275e+01, 5.93124683e-01, 2.19169508e-01, -6.70675916e-03], + [ -6.36557072e+00, 2.19169508e-01, 1.06577542e-01, -2.96533875e-03], + [ -5.71403440e-03, -6.70675916e-03, -2.96533875e-03, 1.15655425e-03]]) + np.testing.assert_allclose(reg.vm, vm,RTOL) + ak_test = np.array([ 2.52597326, 0.11198567]) + np.testing.assert_allclose(reg.ak_test, ak_test,RTOL) def test_names(self): X = np.array(self.db.by_col("INC")) - X = np.reshape(X, (49, 1)) + X = np.reshape(X, (49,1)) yd = np.array(self.db.by_col("CRIME")) - yd = np.reshape(yd, (49, 1)) + yd = np.reshape(yd, (49,1)) q = np.array(self.db.by_col("DISCBD")) - q = np.reshape(q, (49, 1)) - w = libpysal.weights.Queen.from_shapefile( - libpysal.examples.get_path("columbus.shp") - ) - gwk = libpysal.weights.Kernel.from_shapefile( - libpysal.examples.get_path("columbus.shp"), - k=5, - function="triangular", - fixed=False, - ) - name_x = ["inc"] - name_y = "crime" - name_yend = ["crime"] - name_q = ["discbd"] - name_w = "queen" - name_gwk = "k=5" - name_ds = "columbus" - reg = GM_Lag( - self.y, - X, - yd, - q, - spat_diag=True, - w=w, - robust="hac", - gwk=gwk, - name_x=name_x, - name_y=name_y, - name_q=name_q, - name_w=name_w, - name_yend=name_yend, - name_gwk=name_gwk, - name_ds=name_ds, - ) - betas = np.array( - [[5.46344924e01], [4.13301682e-01], [-5.92637442e-01], [-7.40490883e-03]] - ) - np.testing.assert_allclose(reg.betas, betas, RTOL) - vm = np.array( - [ - [5.70817052e02, -1.83655385e01, -8.36602575e00, 2.37538877e-02], - [-1.85224661e01, 6.53311383e-01, 2.84209566e-01, -6.47694160e-03], - [-8.31105622e00, 2.78772694e-01, 1.38144928e-01, -3.98175246e-03], - [2.66662466e-02, -6.23783104e-03, -4.11092891e-03, 1.10936528e-03], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) - self.assertListEqual(reg.name_x, ["CONSTANT"] + name_x) - name_yend.append("W_crime") + q = np.reshape(q, (49,1)) + gwk = libpysal.weights.Kernel.from_shapefile(libpysal.examples.get_path('columbus.shp'),k=5,function='triangular', fixed=False) + name_x = ['inc'] + name_y = 'crime' + name_yend = ['crime'] + name_q = ['discbd'] + name_w = 'queen' + name_gwk = 'k=5' + name_ds = 'columbus' + reg = GM_Lag(self.y, X, yd, q, + spat_diag=True, w=self.w2, robust='hac', gwk=gwk, + name_x=name_x, name_y=name_y, name_q=name_q, name_w=name_w, + name_yend=name_yend, name_gwk=name_gwk, name_ds=name_ds) + betas = np.array([[ 5.46344924e+01], [ 4.13301682e-01], [ -5.92637442e-01], [ -7.40490883e-03]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + vm = np.array( [[ 5.70817052e+02, -1.83655385e+01, -8.36602575e+00, 2.37538877e-02], + [ -1.85224661e+01, 6.53311383e-01, 2.84209566e-01, -6.47694160e-03], + [ -8.31105622e+00, 2.78772694e-01, 1.38144928e-01, -3.98175246e-03], + [ 2.66662466e-02, -6.23783104e-03, -4.11092891e-03, 1.10936528e-03]]) + np.testing.assert_allclose(reg.vm, vm,RTOL) + self.assertListEqual(reg.name_x, ['CONSTANT']+name_x) + name_yend.append('W_crime') self.assertListEqual(reg.name_yend, name_yend) - name_q.extend(["W_inc", "W_discbd"]) + name_q.extend(['W_inc', 'W_discbd']) self.assertListEqual(reg.name_q, name_q) self.assertEqual(reg.name_y, name_y) self.assertEqual(reg.name_w, name_w) self.assertEqual(reg.name_gwk, name_gwk) self.assertEqual(reg.name_ds, name_ds) + def test_slx(self): + X = [] + X.append(self.db.by_col("INC")) + X.append(self.db.by_col("CRIME")) + self.X = np.array(X).T + reg = GM_Lag(self.y, self.X, slx_lags=1, spat_diag=True, w=self.w2) + betas = np.array([[41.29886912], + [ 0.89735195], + [-0.70238066], + [-2.67204851], + [-0.11340296], + [ 1.16663877]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + vm = np.array( [[ 4.52762279e+03, -8.39233459e+01, 4.57574864e+01, 1.33304743e+03, + 7.66131947e+01, -6.05018197e+02], + [-8.39233459e+01, 2.72427983e+00, -4.27333794e-01, -1.81523886e+01, + -1.00244932e+00, 8.15816638e+00], + [ 4.57574864e+01, -4.27333794e-01, 1.34781526e+00, 2.38492740e+01, + 1.34429440e+00, -1.07935724e+01], + [ 1.33304743e+03, -1.81523886e+01, 2.38492740e+01, 5.33976362e+02, + 3.05662628e+01, -2.41933825e+02], + [ 7.66131947e+01, -1.00244932e+00, 1.34429440e+00, 3.05662628e+01, + 1.75801008e+00, -1.38579938e+01], + [-6.05018197e+02, 8.15816638e+00, -1.07935724e+01, -2.41933825e+02, + -1.38579938e+01, 1.09633338e+02]]) + np.testing.assert_allclose(reg.vm, vm,RTOL) + ak_test = np.array([0.001324, 0.970974]) + np.testing.assert_allclose(reg.ak_test, ak_test,RTOL) + cfh_test = np.array([0.10818 , 0.947347]) + np.testing.assert_allclose(reg.cfh_test, cfh_test,RTOL) + + def test_graph(self): + X = [] + X.append(self.db.by_col("INC")) + X.append(self.db.by_col("CRIME")) + self.X = np.array(X).T + reg = GM_Lag(self.y, self.X, slx_lags=1, spat_diag=True, w=libpysal.graph.Graph.from_W(self.w2)) + betas = np.array([[41.29886912], + [ 0.89735195], + [-0.70238066], + [-2.67204851], + [-0.11340296], + [ 1.16663877]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + vm = np.array( [[ 4.52762279e+03, -8.39233459e+01, 4.57574864e+01, 1.33304743e+03, + 7.66131947e+01, -6.05018197e+02], + [-8.39233459e+01, 2.72427983e+00, -4.27333794e-01, -1.81523886e+01, + -1.00244932e+00, 8.15816638e+00], + [ 4.57574864e+01, -4.27333794e-01, 1.34781526e+00, 2.38492740e+01, + 1.34429440e+00, -1.07935724e+01], + [ 1.33304743e+03, -1.81523886e+01, 2.38492740e+01, 5.33976362e+02, + 3.05662628e+01, -2.41933825e+02], + [ 7.66131947e+01, -1.00244932e+00, 1.34429440e+00, 3.05662628e+01, + 1.75801008e+00, -1.38579938e+01], + [-6.05018197e+02, 8.15816638e+00, -1.07935724e+01, -2.41933825e+02, + -1.38579938e+01, 1.09633338e+02]]) + np.testing.assert_allclose(reg.vm, vm,RTOL) + ak_test = np.array([0.001324, 0.970974]) + np.testing.assert_allclose(reg.ak_test, ak_test,RTOL) + cfh_test = np.array([0.10818 , 0.947347]) + np.testing.assert_allclose(reg.cfh_test, cfh_test,RTOL) + -if __name__ == "__main__": +if __name__ == '__main__': unittest.main() diff --git a/spreg/twosls.py b/spreg/twosls.py index c0a5bc39..c44937a6 100644 --- a/spreg/twosls.py +++ b/spreg/twosls.py @@ -2,9 +2,10 @@ import numpy.linalg as la from . import robust as ROBUST from . import user_output as USER +from . import diagnostics as DIAG +from .output import output, _spat_diag_out, _summary_dwh from .utils import spdot, sphstack, RegressionPropsY, RegressionPropsVM, set_warn, get_lags import pandas as pd -from .output import output, _spat_diag_out __author__ = "Luc Anselin lanselin@gmail.com, Pedro Amaral pedrovma@gmail.com, David C. Folch david.folch@asu.edu, Jing Yao jingyao@asu.edu" __all__ = ["TSLS"] @@ -223,15 +224,15 @@ class TSLS(BaseTSLS): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - yend : array + yend : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each endogenous variable - q : array + q : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each external exogenous variable to use as instruments (note: this should not contain any variables from x) @@ -249,10 +250,14 @@ class TSLS(BaseTSLS): slx_lags : integer Number of spatial lags of X to include in the model specification. If slx_lags>0, the specification becomes of the SLX type. + slx_vars : either "All" (default) or list of booleans to select x variables + to be lagged sig2n_k : boolean If True, then use n-k to estimate sigma^2. If False, use n. spat_diag : boolean If True, then compute Anselin-Kelejian test (requires w) + nonspat_diag : boolean + If True, then compute non-spatial diagnostics vm : boolean If True, include variance-covariance matrix in summary results @@ -330,6 +335,9 @@ class TSLS(BaseTSLS): ak_test : tuple Anselin-Kelejian test; tuple contains the pair (statistic, p-value) + dwh : tuple + Durbin-Wu-Hausman test; tuple contains the pair (statistic, + p-value). Only returned if dwh=True. name_y : string Name of dependent variable for use in output name_x : list of strings @@ -441,8 +449,10 @@ def __init__( robust=None, gwk=None, slx_lags=0, + slx_vars="All", sig2n_k=False, spat_diag=False, + nonspat_diag=True, vm=False, name_y=None, name_x=None, @@ -455,7 +465,8 @@ def __init__( ): n = USER.check_arrays(y, x, yend, q) - y = USER.check_y(y, n) + y, name_y = USER.check_y(y, n, name_y) + yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) USER.check_robust(robust, gwk) if robust == "hac" and spat_diag: set_warn( @@ -466,13 +477,14 @@ def __init__( USER.check_spat_diag(spat_diag, w) x_constant, name_x, warn = USER.check_constant(x, name_x) self.name_x = USER.set_name_x(name_x, x_constant) + w = USER.check_weights(w, y, slx_lags=slx_lags) if slx_lags>0: - USER.check_weights(w, y, w_required=True) - lag_x = get_lags(w, x_constant[:, 1:], slx_lags) - x_constant = np.hstack((x_constant, lag_x)) - self.name_x += USER.set_name_spatial_lags(self.name_x[1:], slx_lags) - else: - USER.check_weights(w, y, w_required=False) +# lag_x = get_lags(w, x_constant[:, 1:], slx_lags) +# x_constant = np.hstack((x_constant, lag_x)) +# self.name_x += USER.set_name_spatial_lags(self.name_x[1:], slx_lags) + x_constant,self.name_x = USER.flex_wx(w,x=x_constant,name_x=self.name_x,constant=True, + slx_lags=slx_lags,slx_vars=slx_vars) + set_warn(self, warn) BaseTSLS.__init__( self, @@ -500,10 +512,15 @@ def __init__( columns=['var_names']) self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * len(self.name_yend) self.output['regime'], self.output['equation'] = (0, 0) + diag_out = "" + if nonspat_diag: + self.dwh = DIAG.dwh(self) + sum_dwh = _summary_dwh(self) + diag_out += sum_dwh if spat_diag: - diag_out = _spat_diag_out(self, w, 'yend') - else: - diag_out = None + diag_out += _spat_diag_out(self, w, 'yend') + + output(reg=self, vm=vm, robust=robust, other_end=diag_out, latex=latex) def _test(): diff --git a/spreg/twosls_regimes.py b/spreg/twosls_regimes.py index 26458331..abf89f86 100644 --- a/spreg/twosls_regimes.py +++ b/spreg/twosls_regimes.py @@ -22,19 +22,19 @@ class TSLS_Regimes(BaseTSLS, REGI.Regimes_Frame): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - yend : array + yend : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each endogenous variable - q : array + q : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each external exogenous variable to use as instruments (note: this should not contain any variables from x) - regimes : list + regimes : list or pandas.Series List of n values with the mapping of each observation to a regime. Assumed to be aligned with 'x'. constant_regi: string @@ -368,8 +368,9 @@ def __init__( ): n = USER.check_arrays(y, x) - y = USER.check_y(y, n) + y, name_y = USER.check_y(y, n, name_y) USER.check_robust(robust, gwk) + yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) if robust == "hac": if regime_err_sep: set_warn( @@ -387,16 +388,16 @@ def __init__( x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) + w = USER.check_weights(w, y, slx_lags=slx_lags) if slx_lags > 0: - USER.check_weights(w, y, w_required=True) lag_x = get_lags(w, x_constant, slx_lags) x_constant = np.hstack((x_constant, lag_x)) name_x += USER.set_name_spatial_lags(name_x, slx_lags) - else: - USER.check_weights(w, y, w_required=False) + self.constant_regi = constant_regi self.cols2regi = cols2regi self.name_ds = USER.set_name_ds(name_ds) + regimes, name_regimes = USER.check_reg_list(regimes, name_regimes, n) self.name_regimes = USER.set_name_ds(name_regimes) self.name_w = USER.set_name_w(name_w, w) self.name_gwk = USER.set_name_w(name_gwk, gwk) diff --git a/spreg/twosls_sp.py b/spreg/twosls_sp.py index be57004c..59c247ab 100755 --- a/spreg/twosls_sp.py +++ b/spreg/twosls_sp.py @@ -2,15 +2,15 @@ Spatial Two Stages Least Squares """ -__author__ = "Luc Anselin luc.anselin@asu.edu, David C. Folch david.folch@asu.edu" +__author__ = "Luc Anselin lanselin@gmail.com, David C. Folch david.folch@asu.edu" import numpy as np from . import twosls as TSLS from . import user_output as USER from .utils import set_endog, sp_att, set_warn -from .sputils import _spmultiplier import pandas as pd from .output import output, _spat_diag_out, _spat_pseudo_r2, _summary_impacts +from itertools import compress __all__ = ["GM_Lag"] @@ -48,6 +48,8 @@ class BaseGM_Lag(TSLS.BaseTSLS): slx_lags : integer Number of spatial lags of X to include in the model specification. If slx_lags>0, the specification becomes of the Spatial Durbin type. + slx_vars : either "All" (default) or list of booleans to select x variables + to be lagged robust : string If 'white', then a White consistent estimator of the variance-covariance matrix is given. If 'hac', then a @@ -177,18 +179,23 @@ def __init__( w=None, w_lags=1, slx_lags=0, + slx_vars="All", lag_q=True, robust=None, gwk=None, sig2n_k=False, ): + + if slx_lags > 0: - yend2, q2, wx = set_endog(y, x[:, 1:], w, yend, q, w_lags, lag_q, slx_lags) + yend2, q2, wx = set_endog(y, x[:, 1:], w, yend, q, w_lags, lag_q, slx_lags,slx_vars) x = np.hstack((x, wx)) else: yend2, q2 = set_endog(y, x[:, 1:], w, yend, q, w_lags, lag_q) + + TSLS.BaseTSLS.__init__( self, y=y, x=x, yend=yend2, q=q2, robust=robust, gwk=gwk, sig2n_k=sig2n_k ) @@ -201,15 +208,15 @@ class GM_Lag(BaseGM_Lag): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - yend : array + yend : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each endogenous variable - q : array + q : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each external exogenous variable to use as instruments (note: this should not contain any variables from x); cannot be @@ -226,6 +233,8 @@ class GM_Lag(BaseGM_Lag): slx_lags : integer Number of spatial lags of X to include in the model specification. If slx_lags>0, the specification becomes of the Spatial Durbin type. + slx_vars : either "All" (default) or list of booleans to select x variables + to be lagged robust : string If 'white', then a White consistent estimator of the variance-covariance matrix is given. If 'hac', then a @@ -238,11 +247,11 @@ class GM_Lag(BaseGM_Lag): If True, then use n-k to estimate sigma^2. If False, use n. spat_diag : boolean If True, then compute Anselin-Kelejian test and Common Factor Hypothesis test (if applicable) - spat_impacts : string + spat_impacts : string or list Include average direct impact (ADI), average indirect impact (AII), and average total impact (ATI) in summary results. - Options are 'simple', 'full', 'power', or None. - See sputils.spmultiplier for more information. + Options are 'simple', 'full', 'power', 'all' or None. + See sputils._spmultiplier for more information. vm : boolean If True, include variance-covariance matrix in summary results @@ -367,7 +376,8 @@ class GM_Lag(BaseGM_Lag): :math:`Z'H(H'H)^{-1}` pfora1a2 : array n(zthhthi)'varb - + sp_multipliers: dict + Dictionary of spatial multipliers (if spat_impacts is not None) Examples -------- @@ -503,6 +513,7 @@ def __init__( w_lags=1, lag_q=True, slx_lags=0, + slx_vars="All", robust=None, gwk=None, sig2n_k=False, @@ -521,9 +532,10 @@ def __init__( ): n = USER.check_arrays(x, yend, q) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) USER.check_robust(robust, gwk) + yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) if robust == "hac" and spat_diag: set_warn( self, @@ -533,8 +545,22 @@ def __init__( x_constant, name_x, warn = USER.check_constant(x, name_x) name_x = USER.set_name_x(name_x, x_constant) # need to check for None and set defaults - if slx_lags > 0: - name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + # kx and wkx are used to replace complex calculation for output + if slx_lags > 0: # adjust for flexwx + if (isinstance(slx_vars,list)): # slx_vars has True,False + if len(slx_vars) != x.shape[1] : + raise Exception("slx_vars incompatible with x column dimensions") + else: # use slx_vars to extract proper columns + workname = name_x[1:] + kx = len(workname) + vv = list(compress(workname,slx_vars)) + name_x += USER.set_name_spatial_lags(vv, slx_lags) + wkx = slx_vars.count(True) + else: + kx = len(name_x) - 1 + wkx = kx + name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + set_warn(self, warn) BaseGM_Lag.__init__( @@ -546,11 +572,13 @@ def __init__( q=q, w_lags=w_lags, slx_lags=slx_lags, + slx_vars=slx_vars, robust=robust, gwk=gwk, lag_q=lag_q, sig2n_k=sig2n_k, ) + self.rho = self.betas[-1] self.predy_e, self.e_pred, warn = sp_att( w, self.y, self.predy, self.yend[:, -1].reshape(self.n, 1), self.rho, hard_bound=hard_bound @@ -567,21 +595,37 @@ def __init__( self.name_yend.append(USER.set_name_yend_sp(self.name_y)) self.name_z = self.name_x + self.name_yend self.name_q = USER.set_name_q(name_q, q) + if slx_lags > 0: # need to remove all but last SLX variables from name_x self.name_x0 = [] self.name_x0.append(self.name_x[0]) # constant - kx = int((self.k - self.kstar - 1) / (slx_lags + 1)) # number of original exogenous vars - self.name_x0.extend(self.name_x[-kx:]) + if (isinstance(slx_vars,list)): # boolean list passed + # x variables that were not lagged + self.name_x0.extend(list(compress(self.name_x[1:],[not i for i in slx_vars]))) + # last wkx variables + self.name_x0.extend(self.name_x[-wkx:]) + + + else: + okx = int((self.k - self.kstar - 1) / (slx_lags + 1)) # number of original exogenous vars + + self.name_x0.extend(self.name_x[-okx:]) + self.name_q.extend(USER.set_name_q_sp(self.name_x0, w_lags, self.name_q, lag_q)) - var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] + + #var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] + var_types = ['x'] * (kx + 1) + ['wx'] * wkx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] else: self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) var_types = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend) - 1) + ['rho'] + self.name_h = USER.set_name_h(self.name_x, self.name_q) self.robust = USER.set_robust(robust) self.name_w = USER.set_name_w(name_w, w) self.name_gwk = USER.set_name_w(name_gwk, gwk) self.slx_lags = slx_lags + self.slx_vars = slx_vars + self.output = pd.DataFrame(self.name_x + self.name_yend, columns=['var_names']) self.output['var_type'] = var_types self.output['regime'], self.output['equation'] = (0, 0) @@ -590,12 +634,12 @@ def __init__( if spat_diag: diag_out = _spat_diag_out(self, w, 'yend') - if spat_impacts and slx_lags == 0: - impacts = _summary_impacts(self, _spmultiplier(w, self.rho, method=spat_impacts), spat_impacts, slx_lags) + if spat_impacts: + self.sp_multipliers, impacts_str = _summary_impacts(self, w, spat_impacts, slx_lags,slx_vars) try: - diag_out += impacts + diag_out += impacts_str except TypeError: - diag_out = impacts + diag_out = impacts_str output(reg=self, vm=vm, robust=robust, other_end=diag_out, latex=latex) diff --git a/spreg/twosls_sp_regimes.py b/spreg/twosls_sp_regimes.py old mode 100755 new mode 100644 index 18a443dd..7749108f --- a/spreg/twosls_sp_regimes.py +++ b/spreg/twosls_sp_regimes.py @@ -13,7 +13,6 @@ from .twosls import BaseTSLS from .utils import set_endog, set_endog_sparse, sp_att, set_warn, sphstack, spdot, optim_k from .robust import hac_multi -from .sputils import _spmultiplier from .output import output, _spat_diag_out, _spat_pseudo_r2, _summary_impacts from .skater_reg import Skater_reg from .twosls_sp import BaseGM_Lag @@ -26,18 +25,18 @@ class GM_Lag_Regimes(TSLS_Regimes, REGI.Regimes_Frame): Parameters ---------- - y : array + y : numpy.ndarray or pandas.Series nx1 array for dependent variable - x : array + x : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each independent (exogenous) variable, excluding the constant - regimes : list + regimes : list or pandas.Series List of n values with the mapping of each observation to a regime. Assumed to be aligned with 'x'. - yend : array + yend : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each endogenous variable - q : array + q : numpy.ndarray or pandas object Two dimensional array with n rows and one column for each external exogenous variable to use as instruments (note: this should not contain any variables from x); cannot be @@ -88,11 +87,11 @@ class GM_Lag_Regimes(TSLS_Regimes, REGI.Regimes_Frame): matrix must have ones along the main diagonal. sig2n_k : boolean If True, then use n-k to estimate sigma^2. If False, use n. - spat_impacts : string + spat_impacts : string or list Include average direct impact (ADI), average indirect impact (AII), and average total impact (ATI) in summary results. - Options are 'simple', 'full', 'power', or None. - See sputils.spmultiplier for more information. + Options are 'simple', 'full', 'power', 'all' or None. + See sputils._spmultiplier for more information. spat_diag : boolean If True, then compute Anselin-Kelejian test and Common Factor Hypothesis test (if applicable) vm : boolean @@ -265,6 +264,10 @@ class GM_Lag_Regimes(TSLS_Regimes, REGI.Regimes_Frame): n(zthhthi)'varb Only available in dictionary 'multi' when multiple regressions (see 'multi' below for details) + sp_multipliers: dict + Dictionary of spatial multipliers (if spat_impacts is not None) + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) regimes : list List of n values with the mapping of each observation to a regime. Assumed to be aligned with 'x'. @@ -477,8 +480,9 @@ def __init__( ): n = USER.check_arrays(y, x) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) USER.check_robust(robust, gwk) if regime_lag_sep and not regime_err_sep: set_warn(self, "regime_err_sep set to True when regime_lag_sep=True.") @@ -506,6 +510,8 @@ def __init__( name_y = USER.set_name_y(name_y) name_yend = USER.set_name_yend(name_yend, yend) name_q = USER.set_name_q(name_q, q) + + regimes, name_regimes = USER.check_reg_list(regimes, name_regimes, n) self.name_regimes = USER.set_name_ds(name_regimes) self.constant_regi = constant_regi if slx_lags > 0: @@ -644,12 +650,12 @@ def __init__( diag_out = None if spat_diag: diag_out = _spat_diag_out(self, w, 'yend') - if spat_impacts and slx_lags == 0: - impacts = _summary_impacts(self, _spmultiplier(w, self.rho, method=spat_impacts), spat_impacts, regimes=True) + if spat_impacts: + self.sp_multipliers, impacts_str = _summary_impacts(self, w, spat_impacts, slx_lags, regimes=True) try: - diag_out += impacts + diag_out += impacts_str except TypeError: - diag_out = impacts + diag_out = impacts_str output(reg=self, vm=vm, robust=robust, other_end=diag_out, latex=latex) def GM_Lag_Regimes_Multi( @@ -848,8 +854,9 @@ def GM_Lag_Regimes_Multi( self.output = pd.concat([self.output, results[r].output], ignore_index=True) if spat_diag: results[r].other_mid += _spat_diag_out(results[r], results[r].w, 'yend') - if spat_impacts and slx_lags == 0: - results[r].other_mid += _summary_impacts(results[r], _spmultiplier(results[r].w, results[r].rho, method=spat_impacts), spat_impacts) + if spat_impacts: + results[r].sp_multipliers, impacts_str = _summary_impacts(results[r], results[r].w, spat_impacts, slx_lags) + results[r].other_mid += impacts_str counter += 1 self.multi = results if robust == "hac": @@ -974,8 +981,8 @@ def __init__( self, y, x, w, n_clusters=None, quorum=-np.inf, trace=True, **kwargs): n = USER.check_arrays(y, x) - y = USER.check_y(y, n) - USER.check_weights(w, y, w_required=True) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True) # Standardize the variables x_std = (x - np.mean(x, axis=0)) / np.std(x, axis=0) diff --git a/spreg/user_output.py b/spreg/user_output.py index 376fd3dd..34bd3e2f 100755 --- a/spreg/user_output.py +++ b/spreg/user_output.py @@ -8,11 +8,15 @@ "Jing Yao jingyao@asu.edu" ) import numpy as np +import pandas as pd import copy as COPY from . import diagnostics from . import sputils as spu from libpysal import weights +from libpysal import graph from scipy.sparse.csr import csr_matrix +from .utils import get_lags # new for flex_wx +from itertools import compress # new for lfex_wx def set_name_ds(name_ds): @@ -339,7 +343,7 @@ def set_name_multi( def check_arrays(*arrays): """Check if the objects passed by a user to a regression class are correctly structured. If the user's data is correctly formed this function - returns nothing, if not then an exception is raised. Note, this does not + returns the number of observations, if not then an exception is raised. Note, this does not check for model setup, simply the shape and types of the objects. Parameters @@ -376,6 +380,8 @@ def check_arrays(*arrays): for i in arrays: if i is None: continue + if isinstance(i, (pd.Series, pd.DataFrame)): + i = i.to_numpy() if not isinstance(i, (np.ndarray, csr_matrix)): raise Exception( "all input data must be either numpy arrays or sparse csr matrices" @@ -395,7 +401,7 @@ def check_arrays(*arrays): return rows[0] -def check_y(y, n): +def check_y(y, n, name_y=None): """Check if the y object passed by a user to a regression class is correctly structured. If the user's data is correctly formed this function returns nothing, if not then an exception is raised. Note, this does not @@ -409,11 +415,17 @@ def check_y(y, n): n : int number of observations + + name_y : string + Name of the y variable Returns ------- y : anything Object passed by the user to a regression class + + name_y : string + Name of the y variable Examples -------- @@ -432,9 +444,19 @@ def check_y(y, n): # should not raise an exception """ + if isinstance(y, (pd.Series, pd.DataFrame)): + if not name_y: + try: + name_y = y.name + except AttributeError: + name_y = y.columns.to_list() + if len(name_y) == 1: + name_y = name_y[0] + + y = y.to_numpy() if not isinstance(y, np.ndarray): print(y.__class__.__name__) - raise Exception("y must be a numpy array") + raise Exception("y must be a numpy array or a pandas Series") shape = y.shape if len(shape) > 2: raise Exception("all input arrays must have two dimensions") @@ -449,10 +471,35 @@ def check_y(y, n): raise Exception( "y must be a single column array matching the length of other arrays" ) - return y + return y, name_y +def check_endog(arrays, names): + """Check if each of the endogenous arrays passed by a user to a regression class are + pandas objects. In this case, the function converts them to numpy arrays and collects their names. -def check_weights(w, y, w_required=False, time=False): + Parameters + ---------- + arrays : list + List of endogenous variables passed by the user to a regression class + names : list + List of names of the endogenous variables, assumed in the same order as the arrays + """ + for i in range(len(arrays)): + if isinstance(arrays[i], (pd.Series, pd.DataFrame)): + if not names[i]: + try: + names[i] = [arrays[i].name] + except AttributeError: + names[i] = arrays[i].columns.to_list() + arrays[i] = arrays[i].to_numpy() + + if arrays[i] is None: + pass + elif len(arrays[i].shape) == 1: + arrays[i].shape = (arrays[i].shape[0], 1) + return (*arrays, *names) + +def check_weights(w, y, w_required=False, time=False, slx_lags=0): """Check if the w parameter passed by the user is a libpysal.W object and check that its dimensionality matches the y parameter. Note that this check is not performed if w set to None. @@ -470,11 +517,13 @@ def check_weights(w, y, w_required=False, time=False): time : boolean True if data contains a time dimension. False (default) if not. + slx_lags : int + Number of lags of X in the spatial lag model. Returns ------- - Returns : nothing - Nothing is returned + Returns : w + weights object passed by the user to a regression class Examples -------- @@ -490,26 +539,34 @@ def check_weights(w, y, w_required=False, time=False): >>> X.append(db.by_col("HOVAL")) >>> X = np.array(X).T >>> w = libpysal.io.open(libpysal.examples.get_path("columbus.gal"), 'r').read() - >>> check_weights(w, y) + >>> w = check_weights(w, y) # should not raise an exception """ - if w_required == True or w != None: + if w_required == True or w != None or slx_lags > 0: + if isinstance(w, graph.Graph): + w = w.to_W() + if w == None: raise Exception("A weights matrix w must be provided to run this method.") + if not isinstance(w, weights.W): from warnings import warn - warn("w must be API-compatible pysal weights object") + + # check for kernel weights, if so insert zeros on diagonal + if slx_lags == 1 and isinstance(w, weights.Kernel): + w = weights.fill_diagonal(w,val=0.0) + if w.n != y.shape[0] and time == False: raise Exception("y must have n rows, and w must be an nxn PySAL W object") diag = w.sparse.diagonal() # check to make sure all entries equal 0 - if diag.min() != 0: - raise Exception("All entries on diagonal must equal 0.") - if diag.max() != 0: + if diag.min() != 0 or diag.max() != 0: raise Exception("All entries on diagonal must equal 0.") + + return w def check_robust(robust, wk): @@ -629,6 +686,49 @@ def check_spat_diag(spat_diag, w): raise Exception("w must be a libpysal.W object to run spatial diagnostics") +def check_reg_list(regimes, name_regimes, n): + """Check if the regimes parameter passed by the user is a valid list of + regimes. Note: this does not check if the regimes are valid for the + regression model. + + Parameters + ---------- + regimes : list or np.array or pd.Series + Object passed by the user to a regression class + name_regimes : string + Name of the regimes variable + n : int + number of observations + + Returns + ------- + regimes : list + regimes object passed by the user to a regression class as a list + name_regimes : string + + """ + if isinstance(regimes, list): + pass + elif isinstance(regimes, pd.Series): + if not name_regimes: + name_regimes = regimes.name + regimes = regimes.tolist() + elif isinstance(regimes, np.ndarray): + regimes = regimes.tolist() + else: + raise Exception("regimes must be a list, numpy array, or pandas Series") + if len(regimes) != n: + raise Exception( + "regimes must have the same number of observations as the dependent variable" + ) + return regimes, name_regimes + + + + + + + def check_regimes(reg_set, N=None, K=None): """Check if there are at least two regimes @@ -686,6 +786,13 @@ def check_constant(x, name_x=None, just_rem=False): (49, 3) """ + if isinstance(x, (pd.Series, pd.DataFrame)): + if name_x is None: + try: + name_x = x.columns.to_list() + except AttributeError: + name_x = x.name + x = x.to_numpy() x_constant = COPY.copy(x) keep_x = COPY.copy(name_x) warn = None @@ -717,6 +824,51 @@ def check_constant(x, name_x=None, just_rem=False): return x_constant, keep_x, warn +def flex_wx(w,x,name_x,constant=True,slx_lags=1,slx_vars="All"): + """ + Adds spatially lagged variables to an existing x matrix with or without a constant term + Adds variable names prefaced by W_ for the lagged variables + Allows flexible selection of x-variables to be lagged through list of booleans + + Arguments + --------- + w : PySAL supported spatial weights + x : input matrix of x variables + name_x : input list of variable names for x + constant : flag for whether constant is included in x, default = True + no spatial lags are computed for constant term + slx_lags : order of spatial lags, default = 1 + slx_vars : either "All" (default) for all variables lagged, or a list + of booleans matching the columns of x that will be lagged or not + + Returns + ------- + a tuple with + bigx : concatenation of original x matrix and spatial lags + bignamex : list of variable names including spatial lags + + """ + if constant == True: + xwork = x[:,1:] + xnamework = name_x[1:] + else: + xwork = x + xnamework = name_x + + if isinstance(slx_vars,list): + if len(slx_vars) == len(xnamework): + xwork = xwork[:,slx_vars] + xnamework = list(compress(xnamework,slx_vars)) + else: + raise Exception("Mismatch number of columns and length slx_vars") + + lagx = get_lags(w,xwork,slx_lags) + xlagname = set_name_spatial_lags(xnamework,slx_lags) + bigx = np.hstack((x,lagx)) + bignamex = name_x + xlagname + return(bigx,bignamex) + + def _test(): import doctest diff --git a/spreg/utils.py b/spreg/utils.py index ccd74926..639954ba 100755 --- a/spreg/utils.py +++ b/spreg/utils.py @@ -17,6 +17,7 @@ import copy + class RegressionPropsY(object): """ @@ -511,7 +512,7 @@ def get_lags_split(w, x, max_lags, split_at): max_lags : integer Maximum order of spatial lag split_at: integer - Separates the resulting lags into two groups: up to split_at and above + Separates the resulting lags into two cc: up to split_at and above Returns -------- @@ -670,7 +671,7 @@ def power_expansion( return running_total -def set_endog(y, x, w, yend, q, w_lags, lag_q, slx_lags=0): +def set_endog(y, x, w, yend, q, w_lags, lag_q, slx_lags=0,slx_vars="All"): # Create spatial lag of y yl = lag_spatial(w, y) # spatial and non-spatial instruments @@ -697,15 +698,17 @@ def set_endog(y, x, w, yend, q, w_lags, lag_q, slx_lags=0): raise Exception("invalid value passed to yend") if slx_lags == 0: return yend, q - else: - return yend, q, lag_x + else: # ajdust returned lag_x here using slx_vars + if (isinstance(slx_vars,list)): # slx_vars has True,False + if len(slx_vars) != x.shape[1] : + raise Exception("slx_vars incompatible with x column dimensions") + else: # use slx_vars to extract proper columns + vv = slx_vars * slx_lags + lag_x = lag_x[:,vv] + return yend, q, lag_x + else: # slx_vars is "All" + return yend, q, lag_x - lag = lag_spatial(w, x) - spat_lags = lag - for i in range(w_lags - 1): - lag = lag_spatial(w, lag) - spat_lags = sphstack(spat_lags, lag) - return spat_lags def set_endog_sparse(y, x, w, yend, q, w_lags, lag_q):