Skip to content

Commit

Permalink
Added logic to validate finding reference uniqueness (#4283)
Browse files Browse the repository at this point in the history
* Added logic to validate finding reference uniqueness

* #3948 Moved invalid historic record to the 'invalid historic records' section to ensure it passes with the migration flag

* #3948 Improved error message and added invalid test workbook
  • Loading branch information
sambodeme authored Sep 16, 2024
1 parent b9e318f commit bc3cbb0
Show file tree
Hide file tree
Showing 14 changed files with 180 additions and 1 deletion.
Binary file not shown.
67 changes: 67 additions & 0 deletions backend/audit/intakelib/checks/check_finding_uniqueness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from ..common.util import build_cell_error_tuple, get_message, get_range_start_row
from ..intermediate_representation import get_range_by_name, get_range_values_by_name


def check_finding_uniqueness(ir, is_gsa_migration=False):
"""
Check the uniqueness of finding associated with the same finding reference number.
"""

errors = []
findings_by_reference = {}

if is_gsa_migration:
return errors

modified_opinion = get_range_values_by_name(ir, "modified_opinion")
other_matters = get_range_values_by_name(ir, "other_matters")
material_weakness = get_range_values_by_name(ir, "material_weakness")
significant_deficiency = get_range_values_by_name(ir, "significant_deficiency")
other_findings = get_range_values_by_name(ir, "other_findings")
finding_references = get_range_values_by_name(ir, "reference_number")
compliance_requirements = get_range_values_by_name(ir, "compliance_requirement")
questioned_costs = get_range_values_by_name(ir, "questioned_costs")
repeat_prior_reference = get_range_values_by_name(ir, "repeat_prior_reference")
prior_references = get_range_values_by_name(ir, "prior_references")

reference_number_range = get_range_by_name(ir, "reference_number")
range_start = int(get_range_start_row(reference_number_range))
# Iterate through the data rows
for ndx, (fr, cr, mo, om, mw, sd, of, qc, rr, pr) in enumerate(
zip(
finding_references,
compliance_requirements,
modified_opinion,
other_matters,
material_weakness,
significant_deficiency,
other_findings,
questioned_costs,
repeat_prior_reference,
prior_references,
)
):

finding_set = (fr, cr, mo, om, mw, sd, of, qc, rr, pr)

if fr in findings_by_reference:
if findings_by_reference[fr]["values"] != finding_set:
previous_row = findings_by_reference[fr]["row"]
current_row = ndx
errors.append(
build_cell_error_tuple(
ir,
reference_number_range,
ndx,
get_message("check_finding_uniqueness").format(
range_start + previous_row,
f'|{" | ".join(findings_by_reference[fr]["values"])}|',
range_start + current_row,
f'|{" | ".join(finding_set)}|',
),
)
)
else:
findings_by_reference[fr] = {"values": finding_set, "row": ndx}

return errors
8 changes: 7 additions & 1 deletion backend/audit/intakelib/checks/runners.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from django.core.exceptions import ValidationError
import logging

from .check_finding_uniqueness import check_finding_uniqueness
from census_historical_migration.invalid_record import InvalidRecord

from .check_finding_award_references_pattern import award_references_pattern
Expand Down Expand Up @@ -112,6 +113,7 @@
finding_reference_pattern,
no_repeat_findings,
findings_grid_validation,
check_finding_uniqueness,
]

additional_eins_checks = general_checks + [
Expand Down Expand Up @@ -153,7 +155,11 @@
"federal_program_total_is_correct": federal_program_total_is_correct,
}

require_gsa_migration_flag = [findings_grid_validation, finding_reference_pattern]
require_gsa_migration_flag = [
findings_grid_validation,
finding_reference_pattern,
check_finding_uniqueness,
]


def run_all_checks(
Expand Down
1 change: 1 addition & 0 deletions backend/audit/intakelib/common/error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,5 @@
"check_cluster_names": "Invalid cluster name",
"check_award_references_len_4_or_5": "Award references must all follow the pattern AWARD-#### or AWARD-#####; {} does not fit either",
"check_max_rows": "The number of rows in the {} named range exceeds the maximum allowed for this version of the workbook",
"check_finding_uniqueness": "On row {}, you reported {}, and on row {}, you reported {}. The FAC cannot accept one finding reference with different finding details",
}
105 changes: 105 additions & 0 deletions backend/audit/test_check_finding_uniqueness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
from django.test import SimpleTestCase

from audit.intakelib.checks.check_finding_uniqueness import check_finding_uniqueness


class TestCheckFindingUniqueness(SimpleTestCase):
def setUp(self):
self.ir = [
{
"name": "Form",
"ranges": [
{
"name": "reference_number",
"start_cell": {"column": "A", "row": "2"},
"end_cell": {"column": "A", "row": "20001"},
"values": ["FR1", "FR1", "FR2"],
},
{
"name": "compliance_requirement",
"start_cell": {"column": "A", "row": "2"},
"end_cell": {"column": "A", "row": "2"},
"values": ["CR1", "CR1", "CR2"],
},
{
"name": "modified_opinion",
"start_cell": {"column": "A", "row": "2"},
"end_cell": {"column": "A", "row": "2"},
"values": ["MO1", "MO1", "MO2"],
},
{
"name": "other_matters",
"start_cell": {"column": "A", "row": "2"},
"end_cell": {"column": "A", "row": "20001"},
"values": ["OM1", "OM1", "OM2"],
},
{
"name": "material_weakness",
"start_cell": {"column": "A", "row": "2"},
"end_cell": {"column": "A", "row": "2"},
"values": ["MW1", "MW1", "MW2"],
},
{
"name": "significant_deficiency",
"start_cell": {"column": "A", "row": "2"},
"end_cell": {"column": "A", "row": "20001"},
"values": ["SD1", "SD1", "SD2"],
},
{
"name": "other_findings",
"start_cell": {"column": "A", "row": "2"},
"end_cell": {"column": "A", "row": "2"},
"values": ["OF1", "OF1", "OF2"],
},
{
"name": "questioned_costs",
"start_cell": {"column": "A", "row": "2"},
"end_cell": {"column": "A", "row": "20001"},
"values": ["QC1", "QC1", "QC2"],
},
{
"name": "repeat_prior_reference",
"start_cell": {"column": "A", "row": "2"},
"end_cell": {"column": "A", "row": "2"},
"values": ["RR1", "RR1", "RR2"],
},
{
"name": "prior_references",
"start_cell": {"column": "A", "row": "2"},
"end_cell": {"column": "A", "row": "20001"},
"values": ["PR1", "PR1", "PR2"],
},
],
}
]

def test_unique_finding_success(self):
"""
Test case where all findings are unique for each reference.
"""
errors = check_finding_uniqueness(self.ir)
self.assertEqual(errors, [])

def test_duplicate_finding_reference(self):
"""
Test case where a finding reference has multiple different findings associated with it.
"""
# Modify the finding sets to simulate a mismatch for FR1
self.ir[0]["ranges"][6]["values"] = [
"OF1",
"OF2",
"OF2",
] # Change in `other_findings`

errors = check_finding_uniqueness(self.ir)

# Expect an error for FR1 due to different findings in rows 0 and 1
self.assertEqual(len(errors), 1)
self.assertIn("On row", errors[0][3]["text"])

def test_gsa_migration(self):
"""
Test case where is_gsa_migration is True and no errors should be returned.
"""
errors = check_finding_uniqueness(self.ir, is_gsa_migration=True)
self.assertEqual(errors, [])

0 comments on commit bc3cbb0

Please sign in to comment.