Merge pull request #171 from pedrovma/main

Release 1.8.2
pysal · Feb 10, 2025 · d68f3aa · d68f3aa
2 parents 3abfa09 + 101dda3
commit d68f3aa
Show file tree

Hide file tree

Showing 33 changed files with 2,821 additions and 3,394 deletions.
diff --git a/docs/api.rst b/docs/api.rst
@@ -26,8 +26,8 @@ These are the standard spatial regression models supported by the `spreg` packag
 
     spreg.GM_Lag
     spreg.ML_Lag
-    spreg.GMM_Error
     spreg.ML_Error
+    spreg.GMM_Error
     spreg.GM_Error
     spreg.GM_Error_Het
     spreg.GM_Error_Hom
@@ -141,6 +141,7 @@ Diagnostic tests are useful for identifying model fit, sufficiency, and specific
     spreg.panel_rLMerror
     spreg.panel_Hausman
     spreg.sputils.spmultiplier
+    spreg.diagnostics_probit.sp_tests
 
 
 Spatial Specification Search
@@ -184,4 +185,4 @@ Tools for simulating synthetic data according to data-generating processes impli
     spreg.dgp.dgp_probit
     spreg.dgp.make_bin
     spreg.dgp.make_heterror
-    spreg.dgp.make_vmult
+    spreg.dgp.make_vmult
diff --git a/spreg/__init__.py b/spreg/__init__.py
@@ -8,6 +8,7 @@
 from .diagnostics_sp import *
 from .diagnostics_sur import *
 from .diagnostics_tsls import *
+from .diagnostics_probit import *
 from .error_sp import *
 from .error_sp_het import *
 from .error_sp_het_regimes import *

diff --git a/spreg/diagnostics_probit.py b/spreg/diagnostics_probit.py
@@ -0,0 +1,343 @@
+"""
+Diagnostics in probit regression. 
+        
+"""
+__author__ = (
+    "Luc Anselin [email protected], Pedro Amaral [email protected] "
+)
+
+from math import sqrt, pi
+
+from libpysal.common import MISSINGVALUE
+import numpy as np
+import numpy.linalg as la
+import scipy.sparse as SP
+from scipy import stats
+from scipy.stats import norm
+
+__all__ = [
+    "pred_table",
+    "probit_fit",
+    "probit_lrtest",
+    "mcfad_rho",
+    "probit_ape",
+    "sp_tests",
+    "moran_KP",
+]
+
+
+def pred_table(reg):
+    """
+    Calculates a table comparing predicted to actual outcomes for a 
+    discrete choice model
+
+    Parameters
+    ----------
+    reg             : regression object
+                      output instance from a probit regression model
+
+    Returns
+    ----------
+    predtab_vals    : dictionary
+                      includes margins and cells of actual and predicted
+                      values for discrete choice model
+                      actpos   : observed positives (=1)
+                      actneg   : observed negatives (=0)
+                      predpos  : predicted positives 
+                      predneg  : predicted negatives
+                      truepos  : predicted 1 when actual = 1
+                      falsepos : predicted 1 when actual = 0
+                      trueneg  : predicted 0 when actual = 0
+                      falseneg : predicted 0 when actual = 1
+
+    """
+    predtab_vals = {}
+    pos = reg.y.sum()
+    predtab_vals["actpos"] = int(pos)
+    neg = reg.n - pos
+    predtab_vals["actneg"] = int(neg)
+    act1 = (reg.y == 1) * 1
+    act0 = (reg.y == 0) * 1
+    ppos = reg.predybin.sum()
+    predtab_vals["predpos"] = ppos
+    pneg = reg.n - ppos
+    predtab_vals["predneg"] = pneg
+    pred1 = (reg.predybin == 1) * 1
+    pred0 = (reg.predybin == 0) * 1
+    truep = (pred1 * act1) * 1
+    predtab_vals["truepos"] = truep.sum()
+    truen = (pred0 * act0) * 1
+    predtab_vals["trueneg"] = truen.sum()
+    fpos = (pred1 * act0) * 1
+    predtab_vals["falsepos"] = fpos.sum()
+    fneg = (pred0 * act1) * 1
+    predtab_vals["falseneg"] = fneg.sum()
+
+    return predtab_vals
+
+
+def probit_fit(reg):
+    """
+    Various measures of fit for discrete choice models, derived from the
+    prediction table (pred_table)
+    
+    Parameters
+    ----------
+    reg             : regression object
+                      output instance from a probit regression model
+                      must contain predtable attribute
+
+    Returns
+    ----------
+    prob_fit    : a dictionary containing various measures of fit
+                  TPR    : true positive rate (sensitivity, recall, hit rate)
+                  TNR    : true negative rate (specificity, selectivity)
+                  PREDPC : accuracy, percent correctly predicted
+                  BA     : balanced accuracy
+    
+    """
+
+    prob_fit = {}
+    prob_fit["TPR"] = 100.0 * reg.predtable["truepos"] / reg.predtable["actpos"]
+    prob_fit["TNR"] = 100.0 * reg.predtable["trueneg"] / reg.predtable["actneg"]
+    prob_fit["BA"] = (prob_fit["TPR"] + prob_fit["TNR"])/2.0
+    prob_fit["PREDPC"] = 100.0 * (reg.predtable["truepos"] + reg.predtable["trueneg"]) / reg.n
+
+    return prob_fit
+
+def probit_lrtest(regprob):
+    """
+    Likelihood ratio test statistic for probit model
+
+    Parameters
+    ----------
+    regprob      : probit regression object
+
+    Returns
+    -------
+
+    likratio     : dictionary
+                   contains the statistic for the null model (L0), the LR test(likr), 
+                   the degrees of freedom (df) and the p-value (pvalue)
+    L0           : float
+                   log likelihood of null model
+    likr         : float
+                   likelihood ratio statistic
+    df           : integer
+                   degrees of freedom
+    p-value      : float
+                   p-value
+    """
+
+    likratio = {}
+    P = np.mean(regprob.y)
+    L0 = regprob.n * (P * np.log(P) + (1 - P) * np.log(1 - P))
+    likratio["L0"] = L0
+    LR = -2.0 * (L0 - regprob.logl)
+    likratio["likr"] = LR
+    likratio["df"] = regprob.k
+    pval = stats.chisqprob(LR, regprob.k)
+    likratio["p-value"] = pval
+
+    return likratio
+
+def mcfad_rho(regprob):
+    """
+    McFadden's rho measure of fit
+
+    Parameters
+    ---------
+    regprob    : probit regression object
+
+    Returns
+    -------
+    rho        : McFadden's rho (1 - L/L0)
+    
+    """
+
+    rho = 1.0 - (regprob.logl / regprob.L0)
+    return rho
+
+def probit_ape(regprob):
+    """
+    Average partial effects
+
+    Parameters
+    ----------
+    regprob   : probit regression object
+
+    Returns
+    -------
+    tuple with:
+        scale          : the scale of the marginal effects, determined by regprob.scalem
+                         Default: 'phimean' (Mean of individual marginal effects)
+                         Alternative: 'xmean' (Marginal effects at variables mean)
+        slopes         : marginal effects or average partial effects (not for constant)
+        slopes_vm      : estimates of variance of marginal effects (not for constant)
+        slopes_std_err : estimates of standard errors of marginal effects
+        slopes_z_stat  : tuple with z-statistics and p-values for marginal effects
+    
+    """
+
+
+    if regprob.scalem == "xmean":
+        xmb = regprob.xmean.T @ regprob.betas
+        scale = stats.norm.pdf(xmb)
+
+    elif regprob.scalem == "phimean":
+        scale = np.mean(regprob.phiy,axis=0)
+
+    # average partial effects (no constant)
+    slopes = (regprob.betas[1:,0] * scale).reshape(-1,1)
+
+    # variance of partial effects
+    xmb = regprob.xmean.T @ regprob.betas
+    bxt = regprob.betas @ regprob.xmean.T
+    dfdb = np.eye(regprob.k) - xmb * bxt
+    slopes_vm = (scale ** 2) * ((dfdb @ regprob.vm) @ dfdb.T)
+
+    # standard errors
+    slopes_std_err = np.sqrt(slopes_vm[1:,1:].diagonal()).reshape(-1,1)
+
+    # z-stats and p-values
+    sl_zStat = slopes / slopes_std_err
+    slopes_z_stat = [(sl_zStat[i,0],stats.norm.sf(abs(sl_zStat[i,0])) * 2) for i in range(len(slopes))]
+
+
+    return (scale, slopes,slopes_vm[1:,1:],slopes_std_err,slopes_z_stat)
+
+
+def sp_tests(regprob=None, obj_list=None):
+    """
+    Calculates tests for spatial dependence in Probit models
+
+    Parameters
+    ----------
+    regprob     : regression object from spreg
+                  output instance from a probit model
+    obj_list    : list
+                  list of regression elements from both libpysal and statsmodels' ProbitResults
+                  The list should be such as:
+                  [libpysal.weights, ProbitResults.fittedvalues, ProbitResults.resid_response, ProbitResults.resid_generalized]               
+    
+    Returns
+    -------
+    tuple with LM_Err, moran, ps as 2x1 arrays with statistic and p-value
+               LM_Err: Pinkse
+               moran : Kelejian-Prucha generalized Moran
+               ps    : Pinkse-Slade
+
+    Examples
+    --------
+    The results of this function will be automatically added to the output of the probit model if using spreg.
+    If using the Probit estimator from statsmodels, the user can call the function with the obj_list argument.
+    The argument obj_list should be a list with the following elements, in this order:
+    [libpysal.weights, ProbitResults.fittedvalues, ProbitResults.resid_response, ProbitResults.resid_generalized]
+    The function will then return and print the results of the spatial diagnostics.
+
+    >>> import libpysal
+    >>> import statsmodels.api as sm
+    >>> import geopandas as gpd
+    >>> from spreg.diagnostics_probit import sp_tests
+
+    >>> columb = libpysal.examples.load_example('Columbus')
+    >>> dfs = gpd.read_file(columb.get_path("columbus.shp"))
+    >>> w = libpysal.weights.Queen.from_dataframe(dfs)
+    >>> w.transform='r'
+
+    >>> y = (dfs["CRIME"] > 40).astype(float)
+    >>> X = dfs[["INC","HOVAL"]]
+    >>> X = sm.add_constant(X)
+
+    >>> probit_mod = sm.Probit(y, X)
+    >>> probit_res = probit_mod.fit(disp=False)
+    >>> LM_err, moran, ps = sp_tests(obj_list=[w, probit_res.fittedvalues, probit_res.resid_response, probit_res.resid_generalized])
+    PROBIT MODEL DIAGNOSTICS FOR SPATIAL DEPENDENCE
+    TEST                              DF         VALUE           PROB
+    Kelejian-Prucha (error)           1          1.721           0.0852
+    Pinkse (error)                    1          3.132           0.0768
+    Pinkse-Slade (error)              1          2.558           0.1097
+
+    """
+    if regprob:
+        w, Phi, phi, u_naive, u_gen, n = regprob.w, regprob.predy, regprob.phiy, regprob.u_naive, regprob.u_gen, regprob.n
+    elif obj_list:
+        w, fittedvalues, u_naive, u_gen = obj_list
+        Phi = norm.cdf(fittedvalues)
+        phi = norm.pdf(fittedvalues)        
+        n = w.n
+
+    try:
+        w = w.sparse
+    except:
+        w = w    
+
+    # Pinkse_error:
+    Phi_prod = Phi * (1 - Phi)
+    sig2 = np.sum((phi * phi) / Phi_prod) / n
+    LM_err_num = np.dot(u_gen.T, (w * u_gen)) ** 2
+    trWW = np.sum((w * w).diagonal())
+    trWWWWp = trWW + np.sum((w * w.T).diagonal())
+    LM_err = float(1.0 * LM_err_num / (sig2 ** 2 * trWWWWp))
+    LM_err = np.array([LM_err, stats.chisqprob(LM_err, 1)])
+    # KP_error:
+    moran = moran_KP(w, u_naive, Phi_prod)
+    # Pinkse-Slade_error:
+    u_std = u_naive / np.sqrt(Phi_prod)
+    ps_num = np.dot(u_std.T, (w * u_std)) ** 2
+    trWpW = np.sum((w.T * w).diagonal())
+    ps = float(ps_num / (trWW + trWpW))
+    # chi-square instead of bootstrap.
+    ps = np.array([ps, stats.chisqprob(ps, 1)])
+
+    if obj_list:
+        from .output import _probit_out
+        reg_simile = type('reg_simile', (object,), {})()
+        reg_simile.Pinkse_error = LM_err
+        reg_simile.KP_error = moran
+        reg_simile.PS_error = ps
+        print("PROBIT MODEL "+_probit_out(reg_simile, spat_diag=True, sptests_only=True)[1:])
+
+    return LM_err, moran, ps
+
+def moran_KP(w, u, sig2i):
+    """
+    Calculates Kelejian-Prucha Moran-flavoured tests
+
+    Parameters
+    ----------
+
+    w           : W
+                  PySAL weights instance aligned with y
+    u           : array
+                  nx1 array of naive residuals
+    sig2i       : array
+                  nx1 array of individual variance
+
+    Returns
+    -------
+    moran       : array, Kelejian-Prucha Moran's I with p-value   
+    """
+    try:
+        w = w.sparse
+    except:
+        pass
+    moran_num = np.dot(u.T, (w * u))
+    E = SP.lil_matrix(w.get_shape())
+    E.setdiag(sig2i.flat)
+    E = E.asformat("csr")
+    WE = w * E
+    moran_den = np.sqrt(np.sum((WE * WE + (w.T * E) * WE).diagonal()))
+    moran = float(1.0 * moran_num / moran_den)
+    moran = np.array([moran, stats.norm.sf(abs(moran)) * 2.0])
+    return moran
+
+
+def _test():
+    import doctest
+
+    doctest.testmod()
+
+
+if __name__ == "__main__":
+    _test()