Added logic to validate finding reference uniqueness (#4283)

* Added logic to validate finding reference uniqueness * #3948 Moved invalid historic record to the 'invalid historic records' section to ensure it passes with the migration flag * #3948 Improved error message and added invalid test workbook
GSA-TTS · Sep 16, 2024 · bc3cbb0 · bc3cbb0
1 parent b9e318f
commit bc3cbb0
Show file tree

Hide file tree

Showing 14 changed files with 180 additions and 1 deletion.
diff --git a/...d_fail/audit-findings/has_duplicate_finding_reference_with_different_finding_details.xlsx b/...d_fail/audit-findings/has_duplicate_finding_reference_with_different_finding_details.xlsx
diff --git a/backend/audit/intakelib/checks/check_finding_uniqueness.py b/backend/audit/intakelib/checks/check_finding_uniqueness.py
@@ -0,0 +1,67 @@
+from ..common.util import build_cell_error_tuple, get_message, get_range_start_row
+from ..intermediate_representation import get_range_by_name, get_range_values_by_name
+
+
+def check_finding_uniqueness(ir, is_gsa_migration=False):
+    """
+    Check the uniqueness of finding associated with the same finding reference number.
+    """
+
+    errors = []
+    findings_by_reference = {}
+
+    if is_gsa_migration:
+        return errors
+
+    modified_opinion = get_range_values_by_name(ir, "modified_opinion")
+    other_matters = get_range_values_by_name(ir, "other_matters")
+    material_weakness = get_range_values_by_name(ir, "material_weakness")
+    significant_deficiency = get_range_values_by_name(ir, "significant_deficiency")
+    other_findings = get_range_values_by_name(ir, "other_findings")
+    finding_references = get_range_values_by_name(ir, "reference_number")
+    compliance_requirements = get_range_values_by_name(ir, "compliance_requirement")
+    questioned_costs = get_range_values_by_name(ir, "questioned_costs")
+    repeat_prior_reference = get_range_values_by_name(ir, "repeat_prior_reference")
+    prior_references = get_range_values_by_name(ir, "prior_references")
+
+    reference_number_range = get_range_by_name(ir, "reference_number")
+    range_start = int(get_range_start_row(reference_number_range))
+    # Iterate through the data rows
+    for ndx, (fr, cr, mo, om, mw, sd, of, qc, rr, pr) in enumerate(
+        zip(
+            finding_references,
+            compliance_requirements,
+            modified_opinion,
+            other_matters,
+            material_weakness,
+            significant_deficiency,
+            other_findings,
+            questioned_costs,
+            repeat_prior_reference,
+            prior_references,
+        )
+    ):
+
+        finding_set = (fr, cr, mo, om, mw, sd, of, qc, rr, pr)
+
+        if fr in findings_by_reference:
+            if findings_by_reference[fr]["values"] != finding_set:
+                previous_row = findings_by_reference[fr]["row"]
+                current_row = ndx
+                errors.append(
+                    build_cell_error_tuple(
+                        ir,
+                        reference_number_range,
+                        ndx,
+                        get_message("check_finding_uniqueness").format(
+                            range_start + previous_row,
+                            f'|{" | ".join(findings_by_reference[fr]["values"])}|',
+                            range_start + current_row,
+                            f'|{" | ".join(finding_set)}|',
+                        ),
+                    )
+                )
+        else:
+            findings_by_reference[fr] = {"values": finding_set, "row": ndx}
+
+    return errors
diff --git a/backend/audit/intakelib/checks/runners.py b/backend/audit/intakelib/checks/runners.py
@@ -1,6 +1,7 @@
 from django.core.exceptions import ValidationError
 import logging
 
+from .check_finding_uniqueness import check_finding_uniqueness
 from census_historical_migration.invalid_record import InvalidRecord
 
 from .check_finding_award_references_pattern import award_references_pattern
@@ -112,6 +113,7 @@
     finding_reference_pattern,
     no_repeat_findings,
     findings_grid_validation,
+    check_finding_uniqueness,
 ]
 
 additional_eins_checks = general_checks + [
@@ -153,7 +155,11 @@
     "federal_program_total_is_correct": federal_program_total_is_correct,
 }
 
-require_gsa_migration_flag = [findings_grid_validation, finding_reference_pattern]
+require_gsa_migration_flag = [
+    findings_grid_validation,
+    finding_reference_pattern,
+    check_finding_uniqueness,
+]
 
 
 def run_all_checks(

diff --git a/backend/audit/intakelib/common/error_messages.py b/backend/audit/intakelib/common/error_messages.py
@@ -85,4 +85,5 @@
     "check_cluster_names": "Invalid cluster name",
     "check_award_references_len_4_or_5": "Award references must all follow the pattern AWARD-#### or AWARD-#####; {} does not fit either",
     "check_max_rows": "The number of rows in the {} named range exceeds the maximum allowed for this version of the workbook",
+    "check_finding_uniqueness": "On row {}, you reported {}, and on row {}, you reported {}. The FAC cannot accept one finding reference with different finding details",
 }
diff --git a/backend/audit/test_check_finding_uniqueness.py b/backend/audit/test_check_finding_uniqueness.py
@@ -0,0 +1,105 @@
+from django.test import SimpleTestCase
+
+from audit.intakelib.checks.check_finding_uniqueness import check_finding_uniqueness
+
+
+class TestCheckFindingUniqueness(SimpleTestCase):
+    def setUp(self):
+        self.ir = [
+            {
+                "name": "Form",
+                "ranges": [
+                    {
+                        "name": "reference_number",
+                        "start_cell": {"column": "A", "row": "2"},
+                        "end_cell": {"column": "A", "row": "20001"},
+                        "values": ["FR1", "FR1", "FR2"],
+                    },
+                    {
+                        "name": "compliance_requirement",
+                        "start_cell": {"column": "A", "row": "2"},
+                        "end_cell": {"column": "A", "row": "2"},
+                        "values": ["CR1", "CR1", "CR2"],
+                    },
+                    {
+                        "name": "modified_opinion",
+                        "start_cell": {"column": "A", "row": "2"},
+                        "end_cell": {"column": "A", "row": "2"},
+                        "values": ["MO1", "MO1", "MO2"],
+                    },
+                    {
+                        "name": "other_matters",
+                        "start_cell": {"column": "A", "row": "2"},
+                        "end_cell": {"column": "A", "row": "20001"},
+                        "values": ["OM1", "OM1", "OM2"],
+                    },
+                    {
+                        "name": "material_weakness",
+                        "start_cell": {"column": "A", "row": "2"},
+                        "end_cell": {"column": "A", "row": "2"},
+                        "values": ["MW1", "MW1", "MW2"],
+                    },
+                    {
+                        "name": "significant_deficiency",
+                        "start_cell": {"column": "A", "row": "2"},
+                        "end_cell": {"column": "A", "row": "20001"},
+                        "values": ["SD1", "SD1", "SD2"],
+                    },
+                    {
+                        "name": "other_findings",
+                        "start_cell": {"column": "A", "row": "2"},
+                        "end_cell": {"column": "A", "row": "2"},
+                        "values": ["OF1", "OF1", "OF2"],
+                    },
+                    {
+                        "name": "questioned_costs",
+                        "start_cell": {"column": "A", "row": "2"},
+                        "end_cell": {"column": "A", "row": "20001"},
+                        "values": ["QC1", "QC1", "QC2"],
+                    },
+                    {
+                        "name": "repeat_prior_reference",
+                        "start_cell": {"column": "A", "row": "2"},
+                        "end_cell": {"column": "A", "row": "2"},
+                        "values": ["RR1", "RR1", "RR2"],
+                    },
+                    {
+                        "name": "prior_references",
+                        "start_cell": {"column": "A", "row": "2"},
+                        "end_cell": {"column": "A", "row": "20001"},
+                        "values": ["PR1", "PR1", "PR2"],
+                    },
+                ],
+            }
+        ]
+
+    def test_unique_finding_success(self):
+        """
+        Test case where all findings are unique for each reference.
+        """
+        errors = check_finding_uniqueness(self.ir)
+        self.assertEqual(errors, [])
+
+    def test_duplicate_finding_reference(self):
+        """
+        Test case where a finding reference has multiple different findings associated with it.
+        """
+        # Modify the finding sets to simulate a mismatch for FR1
+        self.ir[0]["ranges"][6]["values"] = [
+            "OF1",
+            "OF2",
+            "OF2",
+        ]  # Change in `other_findings`
+
+        errors = check_finding_uniqueness(self.ir)
+
+        # Expect an error for FR1 due to different findings in rows 0 and 1
+        self.assertEqual(len(errors), 1)
+        self.assertIn("On row", errors[0][3]["text"])
+
+    def test_gsa_migration(self):
+        """
+        Test case where is_gsa_migration is True and no errors should be returned.
+        """
+        errors = check_finding_uniqueness(self.ir, is_gsa_migration=True)
+        self.assertEqual(errors, [])
diff --git a/...ass/191734-22/additional-eins-191734.xlsx → ...ass/191734-22/additional-eins-191734.xlsx b/...ass/191734-22/additional-eins-191734.xlsx → ...ass/191734-22/additional-eins-191734.xlsx
diff --git a/...ass/191734-22/additional-ueis-191734.xlsx → ...ass/191734-22/additional-ueis-191734.xlsx b/...ass/191734-22/additional-ueis-191734.xlsx → ...ass/191734-22/additional-ueis-191734.xlsx
diff --git a/...pass/191734-22/audit-findings-191734.xlsx → ...pass/191734-22/audit-findings-191734.xlsx b/...pass/191734-22/audit-findings-191734.xlsx → ...pass/191734-22/audit-findings-191734.xlsx
diff --git a/...191734-22/audit-findings-text-191734.xlsx → ...191734-22/audit-findings-text-191734.xlsx b/...191734-22/audit-findings-text-191734.xlsx → ...191734-22/audit-findings-text-191734.xlsx
diff --git a/...734-22/corrective-action-plan-191734.xlsx → ...734-22/corrective-action-plan-191734.xlsx b/...734-22/corrective-action-plan-191734.xlsx → ...734-22/corrective-action-plan-191734.xlsx
diff --git a/...pass/191734-22/federal-awards-191734.xlsx → ...pass/191734-22/federal-awards-191734.xlsx b/...pass/191734-22/federal-awards-191734.xlsx → ...pass/191734-22/federal-awards-191734.xlsx
diff --git a/..._pass/191734-22/notes-to-sefa-191734.xlsx → ..._pass/191734-22/notes-to-sefa-191734.xlsx b/..._pass/191734-22/notes-to-sefa-191734.xlsx → ..._pass/191734-22/notes-to-sefa-191734.xlsx
diff --git a/.../191734-22/secondary-auditors-191734.xlsx → .../191734-22/secondary-auditors-191734.xlsx b/.../191734-22/secondary-auditors-191734.xlsx → .../191734-22/secondary-auditors-191734.xlsx
diff --git a/...uld_pass/191734-22/test-array-191734.json → ...uld_pass/191734-22/test-array-191734.json b/...uld_pass/191734-22/test-array-191734.json → ...uld_pass/191734-22/test-array-191734.json