Add Gini-based metrics for vertical equity (#10)

Add pytest-cov to requirements. Finished pytest added edits to ratiostudy testing to see if pull works testing commit added changes split lines modified conf.py running locally fixing single line still working removed indent two lines another attempt Dam help likely final push commit fmt skip added init changes init changes test noqa post-run changes removed tox and added gitignore modified formulas deleted Rhistory delete .Rproj removed pd reformatted mki and ki edited readme added references.rst minor change in formulas removed leading # restructured formulas switched mki and ki testing with gini change changed back to pd to test transitioned to np finalish test modified reference modified reference added mki_ki doc moved mki_ki added mki_ki to docbuild modified mki_ki name modified ki modified mki modified formula futher seprarated ki_mki ran black modified language black changes modified spacing ran black modified reference Add some tabs for linting Fix mkt_met reference in docs Remove language about AVM from readme Bump version number to reflect major new functionality Update commenting and formatting in vignette Remove language about AVM from readme Bump version number to reflect major new functionality Update pages workflow to follow AssessR
ccao-data · Aug 23, 2023 · 75d96d2 · 75d96d2
1 parent 3036a64
commit 75d96d2
Show file tree

Hide file tree

Showing 17 changed files with 593 additions and 179 deletions.
diff --git a/.github/workflows/pages.yaml b/.github/workflows/pages.yaml
@@ -8,15 +8,8 @@ on:
 name: pages
 
 jobs:
-  pages:
+  build-sphinx-site:
     runs-on: ubuntu-latest
-    environment:
-      name: github-pages
-      url: ${{ steps.deployment.outputs.page_url }}
-    permissions:
-      pages: write
-      id-token: write
-
     steps:
       - name: Checkout
         uses: actions/checkout@v3
@@ -35,6 +28,17 @@ jobs:
         with:
           path: '_build/html'
 
+  deploy:
+    if: contains(fromJSON('["main", "master"]'), github.ref_name) && github.event_name != 'pull_request'
+    needs: build-sphinx-site
+    runs-on: ubuntu-latest
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    permissions:
+      pages: write
+      id-token: write
+    steps:
       - name: Deploy to GitHub Pages
         id: deployment
         uses: actions/deploy-pages@v2
diff --git a/.gitignore b/.gitignore
@@ -20,3 +20,7 @@ __pycache__/
 .coverage
 .coveragerc.txt
 venv/
+.Rproj.user
+.tox
+.Rhistory
+.Rproj
diff --git a/README.md b/README.md
@@ -5,25 +5,20 @@
 [![pre-commit](https://github.com/ccao-data/assesspy/actions/workflows/pre-commit.yaml/badge.svg)](https://github.com/ccao-data/assesspy/actions/workflows/pre-commit.yaml)
 [![codecov](https://codecov.io/gh/ccao-data/assesspy/branch/main/graph/badge.svg)](https://codecov.io/gh/ccao-data/assesspy)
 
-AssessPy is a software package for Python developed by the Cook County
-Assessor's (CCAO) Data Department. The codebase for the CCAO's CAMA system
-uses a wide range of functions regularly, and packaging these functions
-streamlines and standardizes their use. The CCAO is publishing this package
-to make it available to assessors, reporters, and citizens everywhere.
-
-For assessors, we believe that this package will reduce the complexity
-of calculating ratio statistics and detecting sales chasing. We also
-believe that reporters, taxpayers, and members of academia will find
-this package helpful in monitoring the performance of local assessors
-and conducting research.
+Assesspy is a software package for python developed by the Cook County Assessor’s (CCAO)
+Data Department. It contains many of the functions necessary to perform a standard
+[sales ratio study](https://www.iaao.org/media/standards/Standard_on_Ratio_Studies.pdf).
+
+For assessors, we believe that this package will reduce the complexity of calculating
+ratio statistics and detecting sales chasing. We also hope that reporters, taxpayers,
+and members of academia will find this package helpful in monitoring the performance
+of local assessors and conducting research.
 
 For detailed documentation on included functions and data, [**visit the
-full reference
-list**](https://ccao-data.github.io/assesspy/reference.html).
+full reference list**](https://ccao-data.github.io/assesspy/reference.html).
 
 For examples of specific tasks you can complete with `assesspy`
-functions, see the [**vignettes
-page**](https://ccao-data.github.io/assesspy/vignettes.html).
+functions, see the [**vignettes page**](https://ccao-data.github.io/assesspy/vignettes.html).
 
 ## Installation
 

diff --git a/assesspy/__init__.py b/assesspy/__init__.py
@@ -1,5 +1,5 @@
 from .ci import boot_ci, cod_ci, prd_ci
-from .formulas import cod, cod_met, prb, prb_met, prd, prd_met
+from .formulas import cod, cod_met, ki, mki, mki_met, prb, prb_met, prd, prd_met  # noqa
 from .load_data import ratios_sample
 from .outliers import iqr_outlier, is_outlier, quantile_outlier
 from .sales_chasing import detect_chasing

diff --git a/assesspy/data/mki_ki.csv b/assesspy/data/mki_ki.csv
@@ -0,0 +1,30 @@
+"32900","37299"
+"36000","40166"
+"54000","56317"
+"64500","66184"
+"68000","69487"
+"70000","71515"
+"74000","75338"
+"80000","81036"
+"84900","85673"
+"89000","85021"
+"94250","90046"
+"99000","94089"
+"105900","100227"
+"109000","103157"
+"115000","108290"
+"124500","117099"
+"129900","115347"
+"135000","119678"
+"149000","131631"
+"155800","137321"
+"163500","143974"
+"175000","153572"
+"179000","148457"
+"185600","153488"
+"199900","165040"
+"215000","176940"
+"235000","192959"
+"250000","180046"
+"279000","200240"
+"295000","211445"
diff --git a/assesspy/formulas.py b/assesspy/formulas.py
@@ -1,11 +1,12 @@
 # Import necessary libraries
 import numpy as np
+import pandas as pd
 import statsmodels.api as sm
 
 from .utils import check_inputs
 
 
-# COD, PRD, PRB functions
+# COD, PRD, PRB, KI, MKI functions
 def cod(ratio):
     """
     COD is the average absolute percent deviation from the
@@ -44,8 +45,6 @@ def cod(ratio):
 
         ap.cod(ap.ratios_sample().ratio)
     """
-
-    # Input checking and error handling
     check_inputs(ratio)
 
     ratio = np.array(ratio)
@@ -98,8 +97,6 @@ def prd(assessed, sale_price):
 
     assessed = np.array(assessed)
     sale_price = np.array(sale_price)
-
-    # Input checking and error handling
     check_inputs(assessed, sale_price)
 
     ratio = assessed / sale_price
@@ -151,8 +148,6 @@ def prb(assessed, sale_price, round=None):
 
     assessed = np.array(assessed)
     sale_price = np.array(sale_price)
-
-    # Input checking and error handling
     check_inputs(assessed, sale_price)
 
     ratio = assessed / sale_price
@@ -178,7 +173,113 @@ def prb(assessed, sale_price, round=None):
     return out
 
 
-# Functions to determine whether assessment fairness criteria has been met
+# Calculate the Gini cofficients needed for KI and MKI
+def calculate_gini(assessed, sale_price):
+    df = pd.DataFrame({"av": assessed, "sp": sale_price})
+    df = df.sort_values(by="sp")
+    assessed_price = df["av"].values
+    sale_price = df["sp"].values
+    n = len(assessed_price)
+
+    sale_sum = np.sum(sale_price * np.arange(1, n + 1))
+    g_sale = 2 * sale_sum / np.sum(sale_price) - (n + 1)
+    gini_sale = g_sale / n
+
+    assessed_sum = np.sum(assessed_price * np.arange(1, n + 1))
+    g_assessed = 2 * assessed_sum / np.sum(assessed_price) - (n + 1)
+    gini_assessed = g_assessed / n
+
+    return float(gini_assessed), float(gini_sale)
+
+
+def mki(assessed, sale_price):
+    r"""
+    The Modified Kakwani Index (mki) is a GINI-based measures
+    to test for vertical equity. It first orders properties by sale price
+    (ascending), then calculates the Gini coefficient for sale values
+    and assessed values (while remaining ordered by sale price). The
+    Modified Kakwani Index is the the ratio of Gini of Assessed / Gini of Sale.
+
+    For the Modified Kakwani Index:
+
+    MKI < 1 is regressive
+    MKI = 1 is vertical equity
+    MKI > 1 is progressive
+
+    .. Quintos, C. (2020). A Gini measure for vertical equity in property
+        assessments. https://researchexchange.iaao.org/jptaa/vol17/iss2/2
+
+    .. Quintos, C. (2021). A Gini decomposition of the sources of inequality in
+        property assessments. https://researchexchange.iaao.org/jptaa/vol18/iss2/6
+
+    :param assessed:
+        A numeric vector of assessed values. Must be the same
+        length as ``sale_price``.
+    :param sale_price:
+        A numeric vector of sale prices. Must be the same length
+        as ``assessed``.
+    :type assessed: numeric
+    :type sale_price: numeric
+    :return: A numeric vector MKI of the input vectors.
+    :rtype: float
+
+    :Example:
+
+    .. code-block:: python
+
+        # Calculate MKI:
+        import assesspy as ap
+
+        mki(ap.ratios_sample().assessed, ap.ratios_sample().sale_price)
+    """
+
+    check_inputs(assessed, sale_price)
+    gini_assessed, gini_sale = calculate_gini(assessed, sale_price)
+    MKI = gini_assessed / gini_sale
+    return float(MKI)
+
+
+def ki(assessed, sale_price):
+    r"""
+    The Kakwani Index (ki) is a GINI-based measure to test for vertical equity.
+    It first orders properties by sale price (ascending), then calculates the Gini
+    coefficient for sale values and assessed values (while remaining ordered by sale price).
+    The Kakwani Index is the difference between Gini of Assessed - Gini of Sale.
+
+    For the Kakwani Index:
+
+    KI < 0 is regressive
+    KI = 0 is vertical equity
+    KI > 0 is progressive
+
+    :param assessed:
+        A numeric vector of assessed values. Must be the same
+        length as ``sale_price``.
+    :param sale_price:
+        A numeric vector of sale prices. Must be the same length
+        as ``assessed``.
+    :type assessed: numeric
+    :type sale_price: numeric
+    :return: A numeric vector KI of the input vectors.
+    :rtype: float
+
+    :Example:
+
+    .. code-block:: python
+
+        # Calculate KI:
+        import assesspy as ap
+
+        ki(ap.ratios_sample().assessed, ap.ratios_sample().sale_price)
+    """
+
+    check_inputs(assessed, sale_price)
+    gini_assessed, gini_sale = calculate_gini(assessed, sale_price)
+    KI = gini_assessed - gini_sale
+    return float(KI)
+
+
+# Functions to determine whether IAAO/Quintos fairness criteria has been met
 def cod_met(x):
     return 5 <= x <= 15
 
@@ -189,3 +290,7 @@ def prd_met(x):
 
 def prb_met(x):
     return -0.05 <= x <= 0.05
+
+
+def mki_met(x):
+    return 0.95 <= x <= 1.05
diff --git a/assesspy/tests/test_formulas.py b/assesspy/tests/test_formulas.py
@@ -145,3 +145,108 @@ def test_round(self):  # Rounding must be int
 
     def test_prb_met(self):  # Standard met function
         assert assesspy.prb_met(prb_out)
+
+
+with open("assesspy/data/mki_ki.csv", "r") as input_csvfile:
+    # Create a list to store the extracted columns
+    gini_data_sale = []
+    gini_data_assessed = []
+
+    # Iterate through each line in the input CSV
+    for line in input_csvfile:
+        columns = line.strip().split(",")
+
+        first_column = columns[0].split('"')[1]
+        second_column = columns[1]
+
+        gini_data_sale.append(first_column)
+        gini_data_assessed.append(second_column)
+
+gini_data_assessed = [int(value.replace('"', "")) for value in gini_data_assessed]
+gini_data_sale = [int(value.replace('"', "")) for value in gini_data_sale]
+
+mki_out = assesspy.mki(gini_data_assessed, gini_data_sale)
+
+
+class Test_MKI:
+    def test_mki(self):  # Output equal to expected
+        npt.assert_allclose(mki_out, 0.794, rtol=0.02)
+
+    def test_numeric_output(self):  # Output is numeric
+        assert type(mki_out) is float
+
+        with pt.raises(Exception):
+            assesspy.mki([1, 1, 1], [1, 1])
+
+        with pt.raises(Exception):
+            assesspy.mki(10, 10)
+
+        with pt.raises(Exception):
+            assesspy.mki(
+                pd.concat([gini_data_assessed, pd.Series(float("Inf"))]),
+                pd.concat([gini_data_sale, pd.Series(1.0)]),
+            )
+
+        with pt.raises(Exception):
+            assesspy.mki(pd.DataFrame(ratio))
+
+        with pt.raises(Exception):
+            assesspy.mki(
+                pd.concat([gini_data_assessed, pd.Series(float("NaN"))]),
+                pd.concat([gini_data_sale, pd.Series(1.0)]),
+            )
+
+        with pt.raises(Exception):
+            assesspy.mki([1] * 30, [1] * 29 + ["1"])
+
+    def test_round(self):  # Rounding must be int
+        with pt.raises(Exception):
+            assesspy.mki(gini_data_assessed, sale_price, "z")
+
+        with pt.raises(Exception):
+            assesspy.mki(gini_data_assessed, sale_price, 1.1)
+
+    def test_mki_met(self):  # Standard met function
+        assert not assesspy.mki_met(mki_out)
+
+
+ki_out = assesspy.ki(gini_data_assessed, gini_data_sale)
+
+
+class Test_KI:
+    def test_ki(self):  # Output equal to expected
+        npt.assert_allclose(ki_out, -0.06, rtol=0.02)
+
+    def test_numeric_output(self):  # Output is numeric
+        assert type(ki_out) is float
+
+        with pt.raises(Exception):
+            assesspy.ki([1, 1, 1], [1, 1])
+
+        with pt.raises(Exception):
+            assesspy.ki(10, 10)
+
+        with pt.raises(Exception):
+            assesspy.ki(
+                pd.concat([gini_data_assessed, pd.Series(float("Inf"))]),
+                pd.concat([gini_data_sale, pd.Series(1.0)]),
+            )
+
+        with pt.raises(Exception):
+            assesspy.ki(pd.DataFrame(ratio))
+
+        with pt.raises(Exception):
+            assesspy.ki(
+                pd.concat([gini_data_assessed, pd.Series(float("NaN"))]),
+                pd.concat([gini_data_sale, pd.Series(1.0)]),
+            )
+
+        with pt.raises(Exception):
+            assesspy.ki([1] * 30, [1] * 29 + ["1"])
+
+    def test_round(self):  # Rounding must be int
+        with pt.raises(Exception):
+            assesspy.ki(gini_data_assessed, gini_data_sale, "z")
+
+        with pt.raises(Exception):
+            assesspy.ki(gini_data_assessed, gini_data_sale, 1.1)
diff --git a/assesspy/utils.py b/assesspy/utils.py
@@ -13,7 +13,7 @@ def check_inputs(*args):
             args = x
 
     for x in args:
-        if type(x) == pd.core.frame.DataFrame:
+        if isinstance(x, pd.core.frame.DataFrame):
             raise Exception("Input cannot be a dataframe.")
 
         check = pd.Series(x)