|
18 | 18 | plot_kdes,
|
19 | 19 | plot_intervals_all_precincts,
|
20 | 20 | plot_polarization_kde,
|
| 21 | + plot_margin_kde, |
21 | 22 | )
|
22 | 23 |
|
23 | 24 | __all__ = ["ei_multinom_dirichlet_modified", "ei_multinom_dirichlet", "RowByColumnEI"]
|
@@ -307,6 +308,115 @@ def calculate_summary(self):
|
307 | 308 | self.sampled_voting_prefs[:, row, col], percentiles
|
308 | 309 | )
|
309 | 310 |
|
| 311 | + def _calculate_margin(self, group, candidates, threshold=None, percentile=None): |
| 312 | + """ |
| 313 | + Calculating the Candidate 1 - Candidate 2 margin among the given group. |
| 314 | + Calculate the percentile given a threshold, or vice versa. Exactly one of |
| 315 | + {percentile, threshold} must be None. |
| 316 | +
|
| 317 | + Parameters: |
| 318 | + ---------- |
| 319 | + group: str |
| 320 | + Demographic group in question |
| 321 | + candidates: list of str |
| 322 | + Length 2 vector of candidates upon which to calculate the margin |
| 323 | + threshold: float (optional) |
| 324 | + A specified level for the margin between the two candidates. If specified, |
| 325 | + use the threshold to calculate the percentile (% of samples with a larger margin) |
| 326 | + percentile: float (opetional) |
| 327 | + Between 0 and 100. Used to calculate the equal-tailed interval for the margin between |
| 328 | + the two candidates. |
| 329 | + """ |
| 330 | + candidate_index_0 = self.candidate_names.index(candidates[0]) |
| 331 | + candidate_index_1 = self.candidate_names.index(candidates[1]) |
| 332 | + group_index = self.demographic_group_names.index(group) |
| 333 | + |
| 334 | + samples = ( |
| 335 | + self.sampled_voting_prefs[:, group_index, candidate_index_0] |
| 336 | + - self.sampled_voting_prefs[:, group_index, candidate_index_1] |
| 337 | + ) |
| 338 | + |
| 339 | + if percentile is None and threshold is not None: |
| 340 | + percentile = 100 * (samples > threshold).sum() / len(self.sampled_voting_prefs) |
| 341 | + elif threshold is None and percentile is not None: |
| 342 | + threshold = np.percentile(samples, 100 - percentile) |
| 343 | + else: |
| 344 | + raise ValueError( |
| 345 | + """Exactly one of threshold or percentile must be None. |
| 346 | + Set a threshold to calculate the associated percentile, or a percentile |
| 347 | + to calculate the associated threshold. |
| 348 | + """ |
| 349 | + ) |
| 350 | + return threshold, percentile, samples, group, candidates |
| 351 | + |
| 352 | + def margin_report(self, group, candidates, threshold=None, percentile=None, verbose=True): |
| 353 | + """ |
| 354 | + For a given threshold, return the probability that the margin between |
| 355 | + the two candidates preferences in the given demographic group is greater than |
| 356 | + the threshold |
| 357 | + OR |
| 358 | + For a given confidence level, calculate the associated confidence interval |
| 359 | + of the difference between the two candidates preference among the group. |
| 360 | + Exactly one of {percentile, threshold} must be None. |
| 361 | + Parameters: |
| 362 | + ----------- |
| 363 | + group: str |
| 364 | + Demographic group in question |
| 365 | + candidates: list of str |
| 366 | + Length 2 vector of candidates upon which to calculate the margin |
| 367 | + threshold: float (optional) |
| 368 | + A specified level for the margin between the two candidates. If specified, |
| 369 | + use the threshold to calculate the percentile (% of samples with a larger margin) |
| 370 | + percentile: float (opetional) |
| 371 | + Between 0 and 100. Used to calculate the equal-tailed interval for the margin between |
| 372 | + the two candidates. |
| 373 | + verbose: bool |
| 374 | + If true, print a report putting margin in context |
| 375 | + """ |
| 376 | + return_interval = threshold is None |
| 377 | + |
| 378 | + if not all(candidate in self.candidate_names for candidate in candidates): |
| 379 | + raise ValueError( |
| 380 | + f"""candidate names must be in the list of candidate_names provided to fit(): |
| 381 | + {self.candidate_names}""" |
| 382 | + ) |
| 383 | + |
| 384 | + if group not in self.demographic_group_names: |
| 385 | + raise ValueError( |
| 386 | + f"""group name must be in the list of demographic_group_names |
| 387 | + provided to fit(): |
| 388 | + {self.demographic_group_names}""" |
| 389 | + ) |
| 390 | + |
| 391 | + if return_interval: |
| 392 | + lower_percentile = (100 - percentile) / 2 |
| 393 | + upper_percentile = lower_percentile + percentile |
| 394 | + lower_threshold, _, _, group, candidates = self._calculate_margin( |
| 395 | + group, candidates, threshold, upper_percentile |
| 396 | + ) |
| 397 | + upper_threshold, _, _, group, candidates = self._calculate_margin( |
| 398 | + group, candidates, threshold, lower_percentile |
| 399 | + ) |
| 400 | + |
| 401 | + if verbose: |
| 402 | + print( |
| 403 | + f"There is a {percentile}% probability that the difference between" |
| 404 | + + f" {group}s' preferences for {candidates[0]} and {candidates[1]} is" |
| 405 | + + f" between [{lower_threshold:.2f}, {upper_threshold:.2f}]." |
| 406 | + ) |
| 407 | + return (lower_threshold, upper_threshold) |
| 408 | + else: |
| 409 | + threshold, percentile, _, group, candidates = self._calculate_margin( |
| 410 | + group, candidates, threshold, percentile |
| 411 | + ) |
| 412 | + if verbose: |
| 413 | + print( |
| 414 | + f"There is a {percentile:.1f}% probability that the difference between" |
| 415 | + + f" {group}s' preferences for {candidates[0]} and {candidates[1]}" |
| 416 | + + f" is more than {threshold:.2f}." |
| 417 | + ) |
| 418 | + return percentile |
| 419 | + |
310 | 420 | def _calculate_polarization(self, groups, candidate, threshold=None, percentile=None):
|
311 | 421 | """
|
312 | 422 | Calculate percentile given a threshold, or vice versa.
|
@@ -619,6 +729,48 @@ def plot_kdes(self, plot_by="candidate", axes=None):
|
619 | 729 | axes=axes,
|
620 | 730 | )
|
621 | 731 |
|
| 732 | + def plot_margin_kde( |
| 733 | + self, group, candidates, threshold=None, percentile=None, show_threshold=False, ax=None |
| 734 | + ): |
| 735 | + """ |
| 736 | + Plot kde of the margin between two candidates among the given demographic group. |
| 737 | +
|
| 738 | + Parameters: |
| 739 | + ---------- |
| 740 | + group: str |
| 741 | + Demographic group in question |
| 742 | + candidates: list of str |
| 743 | + Length 2 vector of candidates upon which to calculate the margin |
| 744 | + threshold: float (optional) |
| 745 | + A specified level for the margin between the two candidates. If specified, |
| 746 | + use the threshold to calculate the percentile (% of samples with a larger margin) |
| 747 | + percentile: float (opetional) |
| 748 | + Between 0 and 100. Used to calculate the equal-tailed interval for the margin between |
| 749 | + the two candidates. |
| 750 | + show_threshold: bool |
| 751 | + Show threshold in the plot. |
| 752 | + """ |
| 753 | + return_interval = threshold is None |
| 754 | + if return_interval: |
| 755 | + lower_percentile = (100 - percentile) / 2 |
| 756 | + upper_percentile = lower_percentile + percentile |
| 757 | + lower_threshold, _, samples, group, candidates = self._calculate_margin( |
| 758 | + group, candidates, threshold, upper_percentile |
| 759 | + ) |
| 760 | + upper_threshold, _, samples, group, candidates = self._calculate_margin( |
| 761 | + group, candidates, threshold, lower_percentile |
| 762 | + ) |
| 763 | + thresholds = [lower_threshold, upper_threshold] |
| 764 | + else: |
| 765 | + threshold, percentile, samples, group, candidates = self._calculate_margin( |
| 766 | + group, candidates, threshold, percentile |
| 767 | + ) |
| 768 | + thresholds = [threshold] |
| 769 | + |
| 770 | + return plot_margin_kde( |
| 771 | + group, candidates, samples, thresholds, percentile, show_threshold, ax |
| 772 | + ) |
| 773 | + |
622 | 774 | def plot_polarization_kde(
|
623 | 775 | self, groups, candidate, threshold=None, percentile=None, show_threshold=False, ax=None
|
624 | 776 | ):
|
|
0 commit comments