Skip to content

Commit

Permalink
ES&S CVR parsing: Protect against unknown columns in CVR file (#1960)
Browse files Browse the repository at this point in the history
We added a hotfix in #1954 to allow different sets of metadata columsn
in the CVR file. However, we still may see unknown columns we haven't
seen before. Since there's no good way to differentiate those columsn
from contest columns, our current approach may silently fail in that
case. The consequence would be that metadata columns are treated as
contests. This may or may not cause downstream issues.

To reduce the likelihood of that happening, we change our method of
searching for the dividing line between metadata columns and contest
columns to look for the _last_ known metadata header. That way, we'll
get it right in every case except the case where the dividing line is a
header we haven't seen before, which is much less likely to occur.
  • Loading branch information
jonahkagan authored Aug 14, 2024
1 parent 19895b9 commit 5c4bb29
Show file tree
Hide file tree
Showing 3 changed files with 169 additions and 19 deletions.
13 changes: 9 additions & 4 deletions server/api/cvrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -812,7 +812,11 @@ def parse_ballots_file(
def parse_contest_metadata(cvr_csv: CSVIterator) -> CVR_CONTESTS_METADATA:
headers = next(cvr_csv)
# Based on files we've seen, the first few columns are metadata, and the
# rest are contest names
# rest are contest names. We want to figure out where the dividing line
# is. The challenge is that there may be metadata columns that we've
# never seen before. To maximize the chance of getting this right, we
# look for the last column that matches a known metadata header, hoping
# that the dividing line will be one of our known headers.
known_metadata_headers = [
"Election ID",
"Audit Number",
Expand All @@ -823,11 +827,12 @@ def parse_contest_metadata(cvr_csv: CSVIterator) -> CVR_CONTESTS_METADATA:
"Precinct",
"Ballot Style",
]
first_contest_column = next(
last_header_column = next(
index
for index, header in enumerate(headers)
if header not in known_metadata_headers
for index, header in reversed(list(enumerate(headers)))
if header in known_metadata_headers
)
first_contest_column = last_header_column + 1
contest_names = headers[first_contest_column:]
# { contest_name: choice_names }
contest_choices = defaultdict(set)
Expand Down
121 changes: 121 additions & 0 deletions server/tests/ballot_comparison/snapshots/snap_test_cvrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1238,6 +1238,127 @@
},
}

snapshots["test_ess_cvr_upload_cvr_file_with_tabulator_cvr_column 1"] = [
{
"ballot_position": 1,
"batch_name": "BATCH2",
"imprinted_id": "0001000415",
"interpretations": "0,1,1,0,0",
"tabulator": "0001",
},
{
"ballot_position": 2,
"batch_name": "BATCH2",
"imprinted_id": "0001000416",
"interpretations": "1,0,1,0,0",
"tabulator": "0001",
},
{
"ballot_position": 3,
"batch_name": "BATCH2",
"imprinted_id": "0001000417",
"interpretations": "0,1,0,1,0",
"tabulator": "0001",
},
{
"ballot_position": 1,
"batch_name": "BATCH1",
"imprinted_id": "0001013415",
"interpretations": "0,1,1,0,0",
"tabulator": "0001",
},
{
"ballot_position": 2,
"batch_name": "BATCH1",
"imprinted_id": "0001013416",
"interpretations": "1,0,1,0,0",
"tabulator": "0001",
},
{
"ballot_position": 3,
"batch_name": "BATCH1",
"imprinted_id": "0001013417",
"interpretations": "u,u,1,0,0",
"tabulator": "0001",
},
{
"ballot_position": 1,
"batch_name": "BATCH2",
"imprinted_id": "0002000171",
"interpretations": "1,0,0,1,0",
"tabulator": "0002",
},
{
"ballot_position": 2,
"batch_name": "BATCH2",
"imprinted_id": "0002000172",
"interpretations": "0,1,0,1,0",
"tabulator": "0002",
},
{
"ballot_position": 3,
"batch_name": "BATCH2",
"imprinted_id": "0002000173",
"interpretations": "1,0,0,1,0",
"tabulator": "0002",
},
{
"ballot_position": 4,
"batch_name": "BATCH2",
"imprinted_id": "0002000174",
"interpretations": "0,1,0,0,1",
"tabulator": "0002",
},
{
"ballot_position": 5,
"batch_name": "BATCH2",
"imprinted_id": "0002000175",
"interpretations": "1,0,0,0,1",
"tabulator": "0002",
},
{
"ballot_position": 1,
"batch_name": "BATCH1",
"imprinted_id": "0002003171",
"interpretations": "o,o,1,0,0",
"tabulator": "0002",
},
{
"ballot_position": 2,
"batch_name": "BATCH1",
"imprinted_id": "0002003172",
"interpretations": "0,1,1,0,0",
"tabulator": "0002",
},
{
"ballot_position": 3,
"batch_name": "BATCH1",
"imprinted_id": "0002003173",
"interpretations": "1,0,1,0,0",
"tabulator": "0002",
},
]

snapshots["test_ess_cvr_upload_cvr_file_with_tabulator_cvr_column 2"] = {
"Contest 1": {
"choices": {
"Choice 1-1": {"column": 0, "num_votes": 6},
"Choice 1-2": {"column": 1, "num_votes": 6},
},
"total_ballots_cast": 14,
"votes_allowed": 1,
},
"Contest 2": {
"choices": {
"Choice 2-1": {"column": 2, "num_votes": 8},
"Choice 2-2": {"column": 3, "num_votes": 4},
"Choice 2-3": {"column": 4, "num_votes": 2},
},
"total_ballots_cast": 14,
"votes_allowed": 1,
},
}

snapshots["test_hart_cvr_upload 1"] = [
{
"ballot_position": 1,
Expand Down
54 changes: 39 additions & 15 deletions server/tests/ballot_comparison/test_cvrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1394,21 +1394,21 @@ def test_clearballot_cvr_upload_invalid(
15,BATCH2,Not Reviewed,,,,N,REP 405,Election Day,0002000175,7074480632,Card,Election Day,28,405,21,0002,
"""

ESS_CVR_WITH_TABULATOR_CVR_COLUMN = """Cast Vote Record,Precinct,Ballot Style,Tabulator CVR,Contest 1,Contest 2
1,p,bs,0001013415,Choice 1-2,Choice 2-1
2,p,bs,0001013416,Choice 1-1,Choice 2-1
3,p,bs,0001013417,undervote,Choice 2-1
4,p,bs,0002003171,overvote,Choice 2-1
5,p,bs,0002003172,Choice 1-2,Choice 2-1
6,p,bs,0002003173,Choice 1-1,Choice 2-1
7,p,bs,0001000415,Choice 1-2,Choice 2-1
8,p,bs,0001000416,Choice 1-1,Choice 2-1
9,p,bs,0001000417,Choice 1-2,Choice 2-2
10,p,bs,0002000171,Choice 1-1,Choice 2-2
11,p,bs,0002000172,Choice 1-2,Choice 2-2
12,p,bs,0002000173,Choice 1-1,Choice 2-2
13,p,bs,0002000174,Choice 1-2,Choice 2-3
15,p,bs,0002000175,Choice 1-1,Choice 2-3
ESS_CVR_WITH_TABULATOR_CVR_COLUMN = """Unknown Column,Cast Vote Record,Precinct,Ballot Style,Tabulator CVR,Contest 1,Contest 2
x,1,p,bs,0001013415,Choice 1-2,Choice 2-1
x,2,p,bs,0001013416,Choice 1-1,Choice 2-1
x,3,p,bs,0001013417,undervote,Choice 2-1
x,4,p,bs,0002003171,overvote,Choice 2-1
x,5,p,bs,0002003172,Choice 1-2,Choice 2-1
x,6,p,bs,0002003173,Choice 1-1,Choice 2-1
x,7,p,bs,0001000415,Choice 1-2,Choice 2-1
x,8,p,bs,0001000416,Choice 1-1,Choice 2-1
x,9,p,bs,0001000417,Choice 1-2,Choice 2-2
x,10,p,bs,0002000171,Choice 1-1,Choice 2-2
x,11,p,bs,0002000172,Choice 1-2,Choice 2-2
x,12,p,bs,0002000173,Choice 1-1,Choice 2-2
x,13,p,bs,0002000174,Choice 1-2,Choice 2-3
x,15,p,bs,0002000175,Choice 1-1,Choice 2-3
"""


Expand Down Expand Up @@ -1743,6 +1743,7 @@ def test_ess_cvr_upload_cvr_file_with_tabulator_cvr_column(
election_id: str,
jurisdiction_ids: List[str],
ess_manifests, # pylint: disable=unused-argument
snapshot,
):
set_logged_in_user(client, UserType.AUDIT_ADMIN, DEFAULT_AA_EMAIL)
rv = client.get(f"/api/election/{election_id}/jurisdiction")
Expand Down Expand Up @@ -1825,6 +1826,29 @@ def test_ess_cvr_upload_cvr_file_with_tabulator_cvr_column(
},
)

cvr_ballots = (
CvrBallot.query.join(Batch)
.filter_by(jurisdiction_id=jurisdiction_ids[0])
.order_by(CvrBallot.imprinted_id)
.all()
)
assert len(cvr_ballots) == manifest_num_ballots - 1
snapshot.assert_match(
[
dict(
batch_name=cvr.batch.name,
tabulator=cvr.batch.tabulator,
ballot_position=cvr.ballot_position,
imprinted_id=cvr.imprinted_id,
interpretations=cvr.interpretations,
)
for cvr in cvr_ballots
]
)
snapshot.assert_match(
Jurisdiction.query.get(jurisdiction_ids[0]).cvr_contests_metadata
)


def build_hart_cvr(
batch_name: str,
Expand Down

0 comments on commit 5c4bb29

Please sign in to comment.