Merge pull request #59 from mggg/add-margin-plot

karink520 · web-flow · commit 2d47262e413a · 2021-06-28T17:51:24.000-04:00
Add margin plot
diff --git a/pyei/examples/santa_clara_demo_r_by_c.ipynb b/pyei/examples/santa_clara_demo_r_by_c.ipynb
diff --git a/pyei/plot_utils.py b/pyei/plot_utils.py
@@ -463,6 +463,66 @@ def plot_precinct_scatterplot(ei_runs, run_names, candidate, demographic_group="
     return ax
 
 
+def plot_margin_kde(group, candidates, samples, thresholds, percentile, show_threshold, ax):
+    """
+    Plots a kde for the margin between two candidates among a given demographic group
+
+    Parameters:
+    -----------
+    samples: array
+        samples of the differences in voting preferences (candidate 1 - candidate 2)
+    thresholds: array
+        a list of thresholds for the difference in voting patterns between two groups
+    group: str
+        the name of the demographic group in question
+    candidates : list of str
+        the names of the two candidates in question
+    show_threshold: bool
+        if true, add vertical lines at the threshold on the plot
+
+    Returns
+    -------
+    ax: Matplotlib axis object
+    """
+    if ax is None:
+        _, ax = plt.subplots(figsize=FIGSIZE)
+
+    sns.histplot(
+        samples,
+        kde=True,
+        ax=ax,
+        element="step",
+        stat="density",
+        color="steelblue",
+        linewidth=0,
+    )
+    ax.set_ylabel("Density", fontsize=FONTSIZE)
+    if len(thresholds) == 1:
+        threshold_string = f"> {thresholds[0]:.2f}"
+    else:
+        threshold_string = f"in [{thresholds[0]:.2f}, {thresholds[1]:.2f}]"
+    if show_threshold:
+        for threshold in thresholds:
+            ax.axvline(threshold, c="gray")
+        if len(thresholds) == 2:
+            ax.axvspan(thresholds[0], thresholds[1], facecolor="gray", alpha=0.2)
+        else:
+            ax.axvspan(thresholds[0], 1, facecolor="gray", alpha=0.2)
+        ax.text(
+            thresholds[-1] + 0.05,
+            0.5,
+            f"Prob (margin {threshold_string} ) = {percentile:.1f}%",
+            fontsize=FONTSIZE,
+        )
+
+    ax.set_title(f"{candidates[0]} - {candidates[1]} margin among {group}", fontsize=TITLESIZE)
+    ax.set_xlabel(f"{group} support for {candidates[0]} - {candidates[1]}", fontsize=FONTSIZE)
+    ax.set_xlim((-1, 1))
+    xticks = ax.get_xticks()
+    ax.set_xticks(xticks)
+    ax.set_xticklabels(xticks, size=TICKSIZE)
+
+
 def plot_polarization_kde(
     diff_samples,
     thresholds,
diff --git a/pyei/r_by_c.py b/pyei/r_by_c.py
@@ -18,6 +18,7 @@
     plot_kdes,
     plot_intervals_all_precincts,
     plot_polarization_kde,
+    plot_margin_kde,
 )
 
 __all__ = ["ei_multinom_dirichlet_modified", "ei_multinom_dirichlet", "RowByColumnEI"]
@@ -307,6 +308,115 @@ def calculate_summary(self):
                     self.sampled_voting_prefs[:, row, col], percentiles
                 )
 
+    def _calculate_margin(self, group, candidates, threshold=None, percentile=None):
+        """
+        Calculating the Candidate 1 - Candidate 2 margin among the given group.
+        Calculate the percentile given a threshold, or vice versa. Exactly one of
+        {percentile, threshold} must be None.
+
+        Parameters:
+        ----------
+        group: str
+            Demographic group in question
+        candidates: list of str
+            Length 2 vector of candidates upon which to calculate the margin
+        threshold: float (optional)
+            A specified level for the margin between the two candidates. If specified,
+            use the threshold to calculate the percentile (% of samples with a larger margin)
+        percentile: float (opetional)
+            Between 0 and 100. Used to calculate the equal-tailed interval for the margin between
+            the two candidates.
+        """
+        candidate_index_0 = self.candidate_names.index(candidates[0])
+        candidate_index_1 = self.candidate_names.index(candidates[1])
+        group_index = self.demographic_group_names.index(group)
+
+        samples = (
+            self.sampled_voting_prefs[:, group_index, candidate_index_0]
+            - self.sampled_voting_prefs[:, group_index, candidate_index_1]
+        )
+
+        if percentile is None and threshold is not None:
+            percentile = 100 * (samples > threshold).sum() / len(self.sampled_voting_prefs)
+        elif threshold is None and percentile is not None:
+            threshold = np.percentile(samples, 100 - percentile)
+        else:
+            raise ValueError(
+                """Exactly one of threshold or percentile must be None.
+            Set a threshold to calculate the associated percentile, or a percentile
+            to calculate the associated threshold.
+            """
+            )
+        return threshold, percentile, samples, group, candidates
+
+    def margin_report(self, group, candidates, threshold=None, percentile=None, verbose=True):
+        """
+        For a given threshold, return the probability that the margin between
+        the two candidates preferences in the given demographic group is greater than
+        the threshold
+        OR
+        For a given confidence level, calculate the associated confidence interval
+        of the difference between the two candidates preference among the group.
+        Exactly one of {percentile, threshold} must be None.
+        Parameters:
+        -----------
+        group: str
+            Demographic group in question
+        candidates: list of str
+            Length 2 vector of candidates upon which to calculate the margin
+        threshold: float (optional)
+            A specified level for the margin between the two candidates. If specified,
+            use the threshold to calculate the percentile (% of samples with a larger margin)
+        percentile: float (opetional)
+            Between 0 and 100. Used to calculate the equal-tailed interval for the margin between
+            the two candidates.
+        verbose: bool
+            If true, print a report putting margin in context
+        """
+        return_interval = threshold is None
+
+        if not all(candidate in self.candidate_names for candidate in candidates):
+            raise ValueError(
+                f"""candidate names must be in the list of candidate_names provided to fit():
+                {self.candidate_names}"""
+            )
+
+        if group not in self.demographic_group_names:
+            raise ValueError(
+                f"""group name must be in the list of demographic_group_names
+                provided to fit():
+                {self.demographic_group_names}"""
+            )
+
+        if return_interval:
+            lower_percentile = (100 - percentile) / 2
+            upper_percentile = lower_percentile + percentile
+            lower_threshold, _, _, group, candidates = self._calculate_margin(
+                group, candidates, threshold, upper_percentile
+            )
+            upper_threshold, _, _, group, candidates = self._calculate_margin(
+                group, candidates, threshold, lower_percentile
+            )
+
+            if verbose:
+                print(
+                    f"There is a {percentile}% probability that the difference between"
+                    + f" {group}s' preferences for {candidates[0]} and {candidates[1]} is"
+                    + f" between [{lower_threshold:.2f}, {upper_threshold:.2f}]."
+                )
+            return (lower_threshold, upper_threshold)
+        else:
+            threshold, percentile, _, group, candidates = self._calculate_margin(
+                group, candidates, threshold, percentile
+            )
+            if verbose:
+                print(
+                    f"There is a {percentile:.1f}% probability that the difference between"
+                    + f" {group}s' preferences for {candidates[0]} and {candidates[1]}"
+                    + f" is more than {threshold:.2f}."
+                )
+            return percentile
+
     def _calculate_polarization(self, groups, candidate, threshold=None, percentile=None):
         """
         Calculate percentile given a threshold, or vice versa.
@@ -619,6 +729,48 @@ def plot_kdes(self, plot_by="candidate", axes=None):
             axes=axes,
         )
 
+    def plot_margin_kde(
+        self, group, candidates, threshold=None, percentile=None, show_threshold=False, ax=None
+    ):
+        """
+        Plot kde of the margin between two candidates among the given demographic group.
+
+        Parameters:
+        ----------
+        group: str
+            Demographic group in question
+        candidates: list of str
+            Length 2 vector of candidates upon which to calculate the margin
+        threshold: float (optional)
+            A specified level for the margin between the two candidates. If specified,
+            use the threshold to calculate the percentile (% of samples with a larger margin)
+        percentile: float (opetional)
+            Between 0 and 100. Used to calculate the equal-tailed interval for the margin between
+            the two candidates.
+        show_threshold: bool
+            Show threshold in the plot.
+        """
+        return_interval = threshold is None
+        if return_interval:
+            lower_percentile = (100 - percentile) / 2
+            upper_percentile = lower_percentile + percentile
+            lower_threshold, _, samples, group, candidates = self._calculate_margin(
+                group, candidates, threshold, upper_percentile
+            )
+            upper_threshold, _, samples, group, candidates = self._calculate_margin(
+                group, candidates, threshold, lower_percentile
+            )
+            thresholds = [lower_threshold, upper_threshold]
+        else:
+            threshold, percentile, samples, group, candidates = self._calculate_margin(
+                group, candidates, threshold, percentile
+            )
+            thresholds = [threshold]
+
+        return plot_margin_kde(
+            group, candidates, samples, thresholds, percentile, show_threshold, ax
+        )
+
     def plot_polarization_kde(
         self, groups, candidate, threshold=None, percentile=None, show_threshold=False, ax=None
     ):