Skip to content

Commit 2d47262

Browse files
authored
Merge pull request #59 from mggg/add-margin-plot
Add margin plot
2 parents f47ec7d + a349b94 commit 2d47262

File tree

3 files changed

+577
-214
lines changed

3 files changed

+577
-214
lines changed

pyei/examples/santa_clara_demo_r_by_c.ipynb

+365-214
Large diffs are not rendered by default.

pyei/plot_utils.py

+60
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,66 @@ def plot_precinct_scatterplot(ei_runs, run_names, candidate, demographic_group="
463463
return ax
464464

465465

466+
def plot_margin_kde(group, candidates, samples, thresholds, percentile, show_threshold, ax):
467+
"""
468+
Plots a kde for the margin between two candidates among a given demographic group
469+
470+
Parameters:
471+
-----------
472+
samples: array
473+
samples of the differences in voting preferences (candidate 1 - candidate 2)
474+
thresholds: array
475+
a list of thresholds for the difference in voting patterns between two groups
476+
group: str
477+
the name of the demographic group in question
478+
candidates : list of str
479+
the names of the two candidates in question
480+
show_threshold: bool
481+
if true, add vertical lines at the threshold on the plot
482+
483+
Returns
484+
-------
485+
ax: Matplotlib axis object
486+
"""
487+
if ax is None:
488+
_, ax = plt.subplots(figsize=FIGSIZE)
489+
490+
sns.histplot(
491+
samples,
492+
kde=True,
493+
ax=ax,
494+
element="step",
495+
stat="density",
496+
color="steelblue",
497+
linewidth=0,
498+
)
499+
ax.set_ylabel("Density", fontsize=FONTSIZE)
500+
if len(thresholds) == 1:
501+
threshold_string = f"> {thresholds[0]:.2f}"
502+
else:
503+
threshold_string = f"in [{thresholds[0]:.2f}, {thresholds[1]:.2f}]"
504+
if show_threshold:
505+
for threshold in thresholds:
506+
ax.axvline(threshold, c="gray")
507+
if len(thresholds) == 2:
508+
ax.axvspan(thresholds[0], thresholds[1], facecolor="gray", alpha=0.2)
509+
else:
510+
ax.axvspan(thresholds[0], 1, facecolor="gray", alpha=0.2)
511+
ax.text(
512+
thresholds[-1] + 0.05,
513+
0.5,
514+
f"Prob (margin {threshold_string} ) = {percentile:.1f}%",
515+
fontsize=FONTSIZE,
516+
)
517+
518+
ax.set_title(f"{candidates[0]} - {candidates[1]} margin among {group}", fontsize=TITLESIZE)
519+
ax.set_xlabel(f"{group} support for {candidates[0]} - {candidates[1]}", fontsize=FONTSIZE)
520+
ax.set_xlim((-1, 1))
521+
xticks = ax.get_xticks()
522+
ax.set_xticks(xticks)
523+
ax.set_xticklabels(xticks, size=TICKSIZE)
524+
525+
466526
def plot_polarization_kde(
467527
diff_samples,
468528
thresholds,

pyei/r_by_c.py

+152
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
plot_kdes,
1919
plot_intervals_all_precincts,
2020
plot_polarization_kde,
21+
plot_margin_kde,
2122
)
2223

2324
__all__ = ["ei_multinom_dirichlet_modified", "ei_multinom_dirichlet", "RowByColumnEI"]
@@ -307,6 +308,115 @@ def calculate_summary(self):
307308
self.sampled_voting_prefs[:, row, col], percentiles
308309
)
309310

311+
def _calculate_margin(self, group, candidates, threshold=None, percentile=None):
312+
"""
313+
Calculating the Candidate 1 - Candidate 2 margin among the given group.
314+
Calculate the percentile given a threshold, or vice versa. Exactly one of
315+
{percentile, threshold} must be None.
316+
317+
Parameters:
318+
----------
319+
group: str
320+
Demographic group in question
321+
candidates: list of str
322+
Length 2 vector of candidates upon which to calculate the margin
323+
threshold: float (optional)
324+
A specified level for the margin between the two candidates. If specified,
325+
use the threshold to calculate the percentile (% of samples with a larger margin)
326+
percentile: float (opetional)
327+
Between 0 and 100. Used to calculate the equal-tailed interval for the margin between
328+
the two candidates.
329+
"""
330+
candidate_index_0 = self.candidate_names.index(candidates[0])
331+
candidate_index_1 = self.candidate_names.index(candidates[1])
332+
group_index = self.demographic_group_names.index(group)
333+
334+
samples = (
335+
self.sampled_voting_prefs[:, group_index, candidate_index_0]
336+
- self.sampled_voting_prefs[:, group_index, candidate_index_1]
337+
)
338+
339+
if percentile is None and threshold is not None:
340+
percentile = 100 * (samples > threshold).sum() / len(self.sampled_voting_prefs)
341+
elif threshold is None and percentile is not None:
342+
threshold = np.percentile(samples, 100 - percentile)
343+
else:
344+
raise ValueError(
345+
"""Exactly one of threshold or percentile must be None.
346+
Set a threshold to calculate the associated percentile, or a percentile
347+
to calculate the associated threshold.
348+
"""
349+
)
350+
return threshold, percentile, samples, group, candidates
351+
352+
def margin_report(self, group, candidates, threshold=None, percentile=None, verbose=True):
353+
"""
354+
For a given threshold, return the probability that the margin between
355+
the two candidates preferences in the given demographic group is greater than
356+
the threshold
357+
OR
358+
For a given confidence level, calculate the associated confidence interval
359+
of the difference between the two candidates preference among the group.
360+
Exactly one of {percentile, threshold} must be None.
361+
Parameters:
362+
-----------
363+
group: str
364+
Demographic group in question
365+
candidates: list of str
366+
Length 2 vector of candidates upon which to calculate the margin
367+
threshold: float (optional)
368+
A specified level for the margin between the two candidates. If specified,
369+
use the threshold to calculate the percentile (% of samples with a larger margin)
370+
percentile: float (opetional)
371+
Between 0 and 100. Used to calculate the equal-tailed interval for the margin between
372+
the two candidates.
373+
verbose: bool
374+
If true, print a report putting margin in context
375+
"""
376+
return_interval = threshold is None
377+
378+
if not all(candidate in self.candidate_names for candidate in candidates):
379+
raise ValueError(
380+
f"""candidate names must be in the list of candidate_names provided to fit():
381+
{self.candidate_names}"""
382+
)
383+
384+
if group not in self.demographic_group_names:
385+
raise ValueError(
386+
f"""group name must be in the list of demographic_group_names
387+
provided to fit():
388+
{self.demographic_group_names}"""
389+
)
390+
391+
if return_interval:
392+
lower_percentile = (100 - percentile) / 2
393+
upper_percentile = lower_percentile + percentile
394+
lower_threshold, _, _, group, candidates = self._calculate_margin(
395+
group, candidates, threshold, upper_percentile
396+
)
397+
upper_threshold, _, _, group, candidates = self._calculate_margin(
398+
group, candidates, threshold, lower_percentile
399+
)
400+
401+
if verbose:
402+
print(
403+
f"There is a {percentile}% probability that the difference between"
404+
+ f" {group}s' preferences for {candidates[0]} and {candidates[1]} is"
405+
+ f" between [{lower_threshold:.2f}, {upper_threshold:.2f}]."
406+
)
407+
return (lower_threshold, upper_threshold)
408+
else:
409+
threshold, percentile, _, group, candidates = self._calculate_margin(
410+
group, candidates, threshold, percentile
411+
)
412+
if verbose:
413+
print(
414+
f"There is a {percentile:.1f}% probability that the difference between"
415+
+ f" {group}s' preferences for {candidates[0]} and {candidates[1]}"
416+
+ f" is more than {threshold:.2f}."
417+
)
418+
return percentile
419+
310420
def _calculate_polarization(self, groups, candidate, threshold=None, percentile=None):
311421
"""
312422
Calculate percentile given a threshold, or vice versa.
@@ -619,6 +729,48 @@ def plot_kdes(self, plot_by="candidate", axes=None):
619729
axes=axes,
620730
)
621731

732+
def plot_margin_kde(
733+
self, group, candidates, threshold=None, percentile=None, show_threshold=False, ax=None
734+
):
735+
"""
736+
Plot kde of the margin between two candidates among the given demographic group.
737+
738+
Parameters:
739+
----------
740+
group: str
741+
Demographic group in question
742+
candidates: list of str
743+
Length 2 vector of candidates upon which to calculate the margin
744+
threshold: float (optional)
745+
A specified level for the margin between the two candidates. If specified,
746+
use the threshold to calculate the percentile (% of samples with a larger margin)
747+
percentile: float (opetional)
748+
Between 0 and 100. Used to calculate the equal-tailed interval for the margin between
749+
the two candidates.
750+
show_threshold: bool
751+
Show threshold in the plot.
752+
"""
753+
return_interval = threshold is None
754+
if return_interval:
755+
lower_percentile = (100 - percentile) / 2
756+
upper_percentile = lower_percentile + percentile
757+
lower_threshold, _, samples, group, candidates = self._calculate_margin(
758+
group, candidates, threshold, upper_percentile
759+
)
760+
upper_threshold, _, samples, group, candidates = self._calculate_margin(
761+
group, candidates, threshold, lower_percentile
762+
)
763+
thresholds = [lower_threshold, upper_threshold]
764+
else:
765+
threshold, percentile, samples, group, candidates = self._calculate_margin(
766+
group, candidates, threshold, percentile
767+
)
768+
thresholds = [threshold]
769+
770+
return plot_margin_kde(
771+
group, candidates, samples, thresholds, percentile, show_threshold, ax
772+
)
773+
622774
def plot_polarization_kde(
623775
self, groups, candidate, threshold=None, percentile=None, show_threshold=False, ax=None
624776
):

0 commit comments

Comments
 (0)