-
Notifications
You must be signed in to change notification settings - Fork 4
/
distributions_by_method_2D.py
255 lines (195 loc) · 8.48 KB
/
distributions_by_method_2D.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
"""
Show how the winner distribution and bias of a voting method changes with
varying dispersion of candidates, in a 2D opinion space.
Essentially a 2D and normally-distributed version of Figure 3
The distributions of the winning position with k = 3, 4, 5 candidates
from
Kiran Tomlinson, Johan Ugander, Jon Kleinberg (2023) Moderation in instant
runoff voting https://arxiv.org/abs/2303.09734
or a single-winner version of Figure 3
Histograms and sample elections for our rules and distributions
from
Edith Elkind, et al. (2019) What Do Multiwinner Voting Rules Do?
https://arxiv.org/abs/1901.09217
"""
import os
import pickle
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
# from colorcet import fire
from joblib import Parallel, delayed
from elsim.elections import normal_electorate, normed_dist_utilities
from elsim.methods import (approval, black, borda, coombs, fptp, irv, runoff,
star)
from elsim.strategies import (approval_optimal, honest_normed_scores,
honest_rankings, vote_for_k)
try:
import ehtplot.color # Creates afmhot_u colormap
except ValueError: # https://github.com/liamedeiros/ehtplot/pull/6
pass
n_elections = 1_000_000 # Roughly 13 minutes on a 2019 6-core i7-9750H
n_voters = 1_000 # 10k looks the same but takes 2 hours. Lower diffuses rings.
n_cands = 9
cand_dist = 'normal'
u_width = 5
disp = 0.5
dims = 2
vote_for = n_cands//2
# Simulate more than just one election per worker to improve efficiency
batch_size = 100
n_batches = n_elections // batch_size
assert n_batches * batch_size == n_elections
bins = 50
range_lim = [-1.5, 1.5]
def ceildiv(a, b):
return -(a // -b)
def human_format(num):
for unit in ['', 'k', 'M', 'B', 'T']:
if abs(num) < 1000:
return f"{num:.3g}{unit}"
num /= 1000.0
def simulate_batch(n_cands):
winners = defaultdict(list)
for iteration in range(batch_size):
v, c = normal_electorate(n_voters, n_cands, dims=dims, disp=disp)
# Contrived candidate at exact center
# c[0] = 0, 0
if cand_dist == 'uniform':
# Replace with uniform distribution of candidates of same shape
v = np.random.uniform(-u_width/2, +u_width/2, n_voters)
v = np.atleast_2d(v).T
c = np.random.uniform(-u_width/2, +u_width/2, n_cands)
c = np.atleast_2d(c).T
raise SystemExit
# Voter distribution
winners['Voters'].append(v)
# Candidate distribution
winners['Candidates'].append(c)
# Ideal winner method. Votes don't matter at all; pick the center.
# winner = np.argmin(abs(c)) # 1D
winner = np.argmin(np.sum(c**2, axis=1)) # 2D
winners['Best possible winner'].append(c[winner])
# FPTP voting method
utilities = normed_dist_utilities(v, c)
rankings = honest_rankings(utilities)
winner = fptp(rankings, tiebreaker='random')
winners['First Past The Post'].append(c[winner])
# Top-two runoff
utilities = normed_dist_utilities(v, c)
rankings = honest_rankings(utilities)
winner = runoff(rankings, tiebreaker='random')
winners['Top-Two Runoff/Primary'].append(c[winner])
# Instant-runoff
winner = irv(rankings, tiebreaker='random')
winners['Ranked-Choice Voting (Hare)'].append(c[winner])
# Approval voting
winner = approval(approval_optimal(utilities), tiebreaker='random')
winners['Approval Voting ("optimal")'].append(c[winner])
# Approval voting
winner = approval(vote_for_k(utilities, vote_for), tiebreaker='random')
winners[f'Approval Voting ("Vote-for-{vote_for}")'].append(c[winner])
# STAR voting
ballots = honest_normed_scores(utilities)
winner = star(ballots, tiebreaker='random')
winners['STAR Voting'].append(c[winner])
# Borda count
winner = borda(rankings, tiebreaker='random')
winners['Borda count'].append(c[winner])
# Coombs method
winner = coombs(rankings, tiebreaker='random')
winners["Coombs' method"].append(c[winner])
# Condorcet RCV
winner = black(rankings, tiebreaker='random')
winners['Condorcet RCV (Black)'].append(c[winner])
histograms = {}
variances = {}
for method, points in winners.items():
points = np.vstack(points) # Concatenate all points
histograms[method] = np.histogram2d(points[:, 0], points[:, 1],
bins=bins,
range=[range_lim, range_lim])[0]
variances[method] = np.var(points, axis=0) # x, y
return histograms, variances
title = f'{human_format(n_elections)} 2D elections, '
title += f'{human_format(n_voters)} voters, '
title += f'{human_format(n_cands)} candidates'
if cand_dist == 'normal':
title += f', both Gaussian'
title += f', {disp:.1f} relative dispersion'
# Load from .pkl file if it exists
pkl_filename = title + '.pkl'
if os.path.exists(pkl_filename):
print('Loading pickled simulation results')
with open(pkl_filename, "rb") as file:
aggregated_histograms, standard_deviations = pickle.load(file)
else:
print('Running simulations')
jobs = [delayed(simulate_batch)(n_cands)] * n_batches
print(f'{len(jobs)} tasks total:')
results = Parallel(n_jobs=-3, verbose=5)(jobs)
del jobs
# Get keys from the histograms of the first result
keys = results[0][0].keys()
aggregated_histograms = {key: np.zeros((bins, bins)) for key in keys}
aggregated_variances = {key: np.zeros(2) for key in keys}
for result in results:
histograms, variances = result
for key in histograms:
# Technically requires Bessel's correction since we are sampling
# an infinite number of elections(?), but negligible for large N.
aggregated_histograms[key] += histograms[key]
# Sum all variances for calculating mean
aggregated_variances[key] += variances[key]
del results # n_batches * bins**2 * n methods = GBs of RAM
# Average the variances and get standard deviations
standard_deviations = {key: np.zeros(2) for key in keys}
for key in aggregated_variances:
# Technically the overall variance of a union of chunks has an
# inter-chunk variance term as well, but since ours are all zero-mean,
# I don't think it matters.
std = np.sqrt(aggregated_variances[key] / n_batches)
standard_deviations[key] = std
# Save the generated data to .pkl file
with open(pkl_filename, "wb") as file:
pickle.dump((aggregated_histograms, standard_deviations), file)
# %% Measure distributions
for method, std in standard_deviations.items():
print(f"{method}:")
print(f"Winner distribution std: {std[0]:.3f}")
print()
# %% Plotting
def plot_distribution(ax, histogram, title, max_lim):
extent = [-max_lim, max_lim, -max_lim, max_lim]
ax.imshow(histogram.T, cmap='afmhot_10us', origin='lower',
aspect='auto', extent=extent, interpolation='none')
ax.set_xlim([-max_lim, max_lim])
ax.set_ylim([-max_lim, max_lim])
ax.set_aspect('equal') # Set the aspect ratio to be equal (square)
ax.set_title(title, loc='left')
ax.tick_params(left=False, bottom=False) # Remove tick marks
ax.set_xticks([]) # Remove x-axis ticks
ax.set_yticks([]) # Remove y-axis ticks
ax.set_xlabel("") # Remove x-axis label
ax.set_ylabel("") # Remove y-axis label
for spine in ax.spines.values():
spine.set_visible(True)
fig, ax = plt.subplots(nrows=ceildiv(len(aggregated_histograms), 4), ncols=4,
num=title, sharex=True, constrained_layout=True,
figsize=(11, 9.5))
fig.suptitle(title)
ax = ax.T.flatten() # Flatten the ax array for easier indexing
max_lim = 1.5
for n, method in enumerate(aggregated_histograms.keys()):
plot_distribution(ax[n], aggregated_histograms[method], method, max_lim)
# Add standard deviation text in the lower right corner
std = standard_deviations[method][1]
ax[n].text(0.98, 0.02, f'std: {std:.3f}',
verticalalignment='bottom', horizontalalignment='right',
transform=ax[n].transAxes, color='white', fontsize=9)
# Hide the last axes if they are not used
for i in range(len(aggregated_histograms), len(ax)):
fig.delaxes(ax[i])
plt.show()
# %% Save the figure
plt.savefig(title + '.png')