Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix create NA tickets for the same Individual (HardDocumentDeduplication) #4014

Merged
merged 15 commits into from
Jul 10, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,13 @@ def deduplicate(
new_document_signatures_duplicated_in_batch = [
d for d in new_document_signatures if new_document_signatures.count(d) > 1
]
# use this dict for skip ticket creation for the same Individual with the same doc number
ind_and_new_document_signatures_duplicated_in_batch_dict = defaultdict(list)
for d in documents_to_dedup:
ind_and_new_document_signatures_duplicated_in_batch_dict[str(d.individual_id)].append(
self._generate_signature(d)
)

# added order_by because test was failed randomly
all_matching_number_documents = (
Document.objects.select_related("individual", "individual__household", "individual__business_area")
Expand Down Expand Up @@ -708,6 +715,13 @@ def deduplicate(

for new_document in documents_to_dedup:
new_document_signature = self._generate_signature(new_document)
# use this dict for skip ticket creation for the same Individual with the same doc number
is_duplicated_document_number_for_individual: bool = (
ind_and_new_document_signatures_duplicated_in_batch_dict.get(
str(new_document.individual_id), []
).count(new_document_signature)
> 1
)

if new_document_signature in all_matching_number_documents_signatures:
new_document.status = Document.STATUS_NEED_INVESTIGATION
Expand All @@ -726,6 +740,7 @@ def deduplicate(
if (
new_document_signature in new_document_signatures_duplicated_in_batch
and new_document_signature in already_processed_signatures
and not is_duplicated_document_number_for_individual
):
new_document.status = Document.STATUS_NEED_INVESTIGATION
ticket_data_dict[new_document_signature]["possible_duplicates"].append(new_document)
Expand All @@ -735,7 +750,10 @@ def deduplicate(
new_document.status = Document.STATUS_VALID
already_processed_signatures.append(new_document_signature)

if new_document_signature in new_document_signatures_duplicated_in_batch:
if (
new_document_signature in new_document_signatures_duplicated_in_batch
and not is_duplicated_document_number_for_individual
):
ticket_data_dict[new_document_signature] = {
"original": new_document,
"possible_duplicates": [],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,8 @@
HardDocumentDeduplication,
)
from hct_mis_api.apps.utils.models import MergeStatusModel
from hct_mis_api.conftest import disabled_locally_test


@disabled_locally_test
class TestGoldenRecordDeduplication(BaseElasticSearchTestCase):
databases = "__all__"
fixtures = (f"{settings.PROJECT_ROOT}/apps/geo/fixtures/data.json",)
Expand Down Expand Up @@ -131,80 +129,80 @@ def setUpTestData(cls) -> None:
},
],
)
country = geo_models.Country.objects.get(iso_code2="PL")
dt = DocumentTypeFactory(label="national_id", key="national_id", valid_for_deduplication=False)
dt_tax_id = DocumentTypeFactory(label="tax_id", key="tax_id", valid_for_deduplication=False)
cls.country = geo_models.Country.objects.get(iso_code2="PL")
cls.dt = DocumentTypeFactory(label="national_id", key="national_id", valid_for_deduplication=False)
cls.dt_tax_id = DocumentTypeFactory(label="tax_id", key="tax_id", valid_for_deduplication=False)

cls.document1 = Document.objects.create(
country=country,
type=dt,
country=cls.country,
type=cls.dt,
document_number="ASD123",
individual=cls.individuals[0],
status=Document.STATUS_VALID,
program=cls.individuals[0].program,
rdi_merge_status=MergeStatusModel.MERGED,
)
cls.document2 = Document.objects.create(
type=dt,
type=cls.dt,
document_number="ASD123",
individual=cls.individuals[1],
country=country,
country=cls.country,
program=cls.individuals[1].program,
rdi_merge_status=MergeStatusModel.MERGED,
)
cls.document3 = Document.objects.create(
type=dt,
type=cls.dt,
document_number="BBC999",
individual=cls.individuals[2],
country=country,
country=cls.country,
program=cls.individuals[2].program,
rdi_merge_status=MergeStatusModel.MERGED,
)
cls.document4 = Document.objects.create(
type=dt,
type=cls.dt,
document_number="ASD123",
individual=cls.individuals[3],
country=country,
country=cls.country,
program=cls.individuals[3].program,
rdi_merge_status=MergeStatusModel.MERGED,
)
cls.document5 = Document.objects.create(
country=country,
type=dt,
country=cls.country,
type=cls.dt,
document_number="TOTALY UNIQ",
individual=cls.individuals[4],
status=Document.STATUS_VALID,
program=cls.individuals[4].program,
rdi_merge_status=MergeStatusModel.MERGED,
)
cls.document6 = Document.objects.create(
country=country,
type=dt_tax_id,
country=cls.country,
type=cls.dt_tax_id,
document_number="ASD123",
individual=cls.individuals[2],
status=Document.STATUS_VALID,
program=cls.individuals[2].program,
rdi_merge_status=MergeStatusModel.MERGED,
)
cls.document7 = Document.objects.create(
country=country,
type=dt,
country=cls.country,
type=cls.dt,
document_number="ASD123",
individual=cls.individuals[1],
program=cls.individuals[1].program,
rdi_merge_status=MergeStatusModel.MERGED,
)
cls.document8 = Document.objects.create(
country=country,
type=dt,
country=cls.country,
type=cls.dt,
document_number="ASD123",
individual=cls.individuals[4],
program=cls.individuals[4].program,
rdi_merge_status=MergeStatusModel.MERGED,
)
cls.document9 = Document.objects.create(
country=country,
type=dt,
country=cls.country,
type=cls.dt,
document_number="UNIQ",
individual=cls.individuals[5],
program=cls.individuals[5].program,
Expand Down Expand Up @@ -325,11 +323,10 @@ def test_ticket_created_correctly(self) -> None:
self.assertEqual(grievance_ticket.programs.first().id, self.program.id)

def test_valid_for_deduplication_doc_type(self) -> None:
pl = geo_models.Country.objects.get(iso_code2="PL")
dt_tax_id = DocumentType.objects.get(key="tax_id")
dt_national_id = DocumentType.objects.get(key="national_id")
Document.objects.create(
country=pl,
country=self.country,
type=dt_tax_id,
document_number="TAX_ID_DOC_123",
individual=self.individuals[2],
Expand All @@ -338,15 +335,15 @@ def test_valid_for_deduplication_doc_type(self) -> None:
rdi_merge_status=MergeStatusModel.MERGED,
)
doc_national_id_1 = Document.objects.create(
country=pl,
country=self.country,
type=dt_national_id,
document_number="TAX_ID_DOC_123", # the same doc number
individual=self.individuals[2],
program=self.program,
rdi_merge_status=MergeStatusModel.MERGED,
)
doc_national_id_2 = Document.objects.create(
country=pl,
country=self.country,
type=dt_national_id,
document_number="TAX_ID_DOC_123", # the same doc number
individual=self.individuals[2],
Expand Down Expand Up @@ -427,3 +424,93 @@ def test_hard_documents_deduplication_for_the_diff_program(self) -> None:
HardDocumentDeduplication().deduplicate(self.get_documents_query([new_document_from_other_program]))
new_document_from_other_program.refresh_from_db()
self.assertEqual(new_document_from_other_program.status, Document.STATUS_VALID)

def test_ticket_creation_for_the_same_ind_doc_numbers(self) -> None:
passport = Document.objects.create(
country=self.country, # the same country
type=self.dt,
document_number="123444444", # the same doc number
individual=self.individuals[2], # the same Individual
program=self.program,
rdi_merge_status=MergeStatusModel.MERGED,
)
tax_id = Document.objects.create(
country=self.country, # the same country
type=self.dt_tax_id,
document_number="123444444", # the same doc number
individual=self.individuals[2], # the same Individual
program=self.program,
rdi_merge_status=MergeStatusModel.MERGED,
)
d1 = Document.objects.create(
country=self.country,
type=self.dt,
document_number="123321321",
individual=self.individuals[2],
program=self.program,
rdi_merge_status=MergeStatusModel.MERGED,
)
# add more docs just to have coverage 95% XD
pavlo-mk marked this conversation as resolved.
Show resolved Hide resolved
Document.objects.create(
country=self.country,
type=self.dt,
document_number="123321321",
individual=self.individuals[1],
program=self.program,
status=Document.STATUS_VALID,
rdi_merge_status=MergeStatusModel.MERGED,
)
d2 = Document.objects.create(
country=self.country,
type=self.dt,
document_number="222",
individual=self.individuals[3],
program=self.program,
rdi_merge_status=MergeStatusModel.MERGED,
)
d3 = Document.objects.create(
country=self.country,
type=self.dt_tax_id,
document_number="222",
individual=self.individuals[4],
program=self.program,
rdi_merge_status=MergeStatusModel.MERGED,
)
d4 = Document.objects.create(
country=self.country,
type=self.dt,
document_number="111",
individual=self.individuals[0],
program=self.program,
rdi_merge_status=MergeStatusModel.MERGED,
)
d5 = Document.objects.create(
country=self.country,
type=self.dt_tax_id,
document_number="111",
individual=self.individuals[1],
program=self.program,
rdi_merge_status=MergeStatusModel.MERGED,
)
d6 = Document.objects.create(
country=self.country,
type=DocumentTypeFactory(label="other_type", key="other_type"),
document_number="111",
individual=self.individuals[2],
program=self.program,
rdi_merge_status=MergeStatusModel.MERGED,
)

self.assertEqual(GrievanceTicket.objects.all().count(), 0)
HardDocumentDeduplication().deduplicate(
self.get_documents_query([passport, tax_id, d1, d2, d3, d4, d5, d6]),
self.registration_data_import,
)

self.assertEqual(GrievanceTicket.objects.all().count(), 3)

passport.refresh_from_db()
self.assertEqual(passport.status, Document.STATUS_VALID)

tax_id.refresh_from_db()
self.assertEqual(tax_id.status, Document.STATUS_VALID)
14 changes: 0 additions & 14 deletions frontend/data/schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -2021,7 +2021,6 @@ type ImportedDocumentNode implements Node {
program: ProgramNode
isMigrationHandled: Boolean!
copiedFrom: DocumentNode
copiedTo(offset: Int, before: String, after: String, first: Int, last: Int): DocumentNodeConnection!
}

type ImportedDocumentNodeConnection {
Expand Down Expand Up @@ -3833,7 +3832,6 @@ type Query {
importedHousehold(id: ID!): ImportedHouseholdNode
allImportedHouseholds(offset: Int, before: String, after: String, first: Int, last: Int, rdiId: String, businessArea: String, orderBy: String): ImportedHouseholdNodeConnection
registrationDataImportDatahub(id: ID!): RegistrationDataImportDatahubNode
allRegistrationDataImportsDatahub(offset: Int, before: String, after: String, first: Int, last: Int): RegistrationDataImportDatahubNodeConnection
importedIndividual(id: ID!): ImportedIndividualNode
allImportedIndividuals(offset: Int, before: String, after: String, first: Int, last: Int, household: ID, rdiId: String, duplicatesOnly: Boolean, businessArea: String, orderBy: String): ImportedIndividualNodeConnection
importData(id: ID!): ImportDataNode
Expand Down Expand Up @@ -3965,18 +3963,6 @@ type RegistrationDataImportDatahubNode implements Node {
businessAreaSlug: String!
}

type RegistrationDataImportDatahubNodeConnection {
pageInfo: PageInfo!
edges: [RegistrationDataImportDatahubNodeEdge]!
totalCount: Int
edgeCount: Int
}

type RegistrationDataImportDatahubNodeEdge {
node: RegistrationDataImportDatahubNode
cursor: String!
}

type RegistrationDataImportNode implements Node {
id: ID!
createdAt: DateTime!
Expand Down
Loading
Loading