From f367586ecb95ae6fee55d71ec712cb4dd522fc6b Mon Sep 17 00:00:00 2001 From: Ewen Corre Date: Fri, 20 Dec 2024 15:25:13 +0100 Subject: [PATCH] wip: tests for the get_geiq_df() function --- .../management/commands/import_geiq.py | 2 +- itou/utils/faker_providers.py | 5 + .../test_management_command_import_geiq.py | 146 ++++++++++++++++++ 3 files changed, 152 insertions(+), 1 deletion(-) create mode 100644 tests/companies/test_management_command_import_geiq.py diff --git a/itou/companies/management/commands/import_geiq.py b/itou/companies/management/commands/import_geiq.py index 60c947f664..6be4f2e11a 100755 --- a/itou/companies/management/commands/import_geiq.py +++ b/itou/companies/management/commands/import_geiq.py @@ -53,7 +53,7 @@ def get_geiq_df(filename): df["address_line_2"] = df.address_line_2.apply(clean_string) df["post_code"] = df.post_code.apply(clean_string) df["city"] = df.city.apply(clean_string) - df["siret"] = df.siret.apply(clean_string_siret) + df["siret"] = df.siret.apply(clean_string) df["auth_email"] = df.auth_email.apply(clean_string) # "GEIQ PROVENCE" becomes "Geiq Provence". diff --git a/itou/utils/faker_providers.py b/itou/utils/faker_providers.py index 7fa682ed76..6bd65d4b2f 100644 --- a/itou/utils/faker_providers.py +++ b/itou/utils/faker_providers.py @@ -2,6 +2,7 @@ import random from django.contrib.gis.geos import Point +from django.utils import timezone from faker.providers import BaseProvider @@ -13,6 +14,10 @@ def asp_ea2_filename(self, date: datetime.date = None) -> str: date_part = random.randint(0, 99999999) if date is None else date.strftime("%Y%m%d") return f"FLUX_EA2_ITOU_{date_part}.zip" + def geiq_filename(self, date: datetime.date = timezone.localdate()) -> str: + date_part = date.strftime("%Y-%m-%d") + return f"{date_part} - Export BDD FFGEIQ.xls" + def geopoint(self) -> Point: return Point( [float(coord) for coord in self.generator.format("local_latlng", country_code="FR", coords_only=True)] diff --git a/tests/companies/test_management_command_import_geiq.py b/tests/companies/test_management_command_import_geiq.py new file mode 100644 index 0000000000..1a6e9a0efb --- /dev/null +++ b/tests/companies/test_management_command_import_geiq.py @@ -0,0 +1,146 @@ +import pytest +from faker import Faker + +from itou.companies.management.commands.import_geiq import get_geiq_df +from itou.utils.export import generate_excel_sheet +from tests.utils.test import create_fake_postcode + + +faker = Faker() + +FILE_HEADERS = ["Nom", "Rue", "Rue (suite)", "Code Postal", "Ville", "SIRET", "e-mail"] + + +def generate_data(rows=185, rows_with_empty_siret=0, rows_with_empty_email=0): + data = [] + for _ in range(rows): + if rows_with_empty_siret > 0: + siret = "" + rows_with_empty_siret -= 1 + else: + siret = faker.numerify("1#############") + + if rows_with_empty_email > 0: + email = "" + rows_with_empty_email -= 1 + else: + email = faker.email() + + data.append( + [ + faker.name(), + faker.street_address(), + "Sous l'escalier", + create_fake_postcode(), + faker.city(), + siret, + email, + ] + ) + return data + + +def test_get_geiq_df(sftp_directory, faker): + # Correct data + rows = 185 + rows_with_empty_siret = 0 + rows_with_empty_email = 0 + data = generate_data( + rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email + ) + file_path = sftp_directory.joinpath(faker.geiq_filename()) + with open(file_path, "wb") as xlsxfile: + workbook = generate_excel_sheet(FILE_HEADERS, data) + workbook.save(xlsxfile) + df, info_stats = get_geiq_df(file_path) + assert df.shape == (rows, 8) + assert info_stats == { + "rows_in_file": rows, + "rows_with_a_siret": rows, + "rows_after_deduplication": rows, + "rows_with_empty_email": rows_with_empty_email, + } + + # File too small, need at least 150 rows + rows = 140 + rows_with_empty_siret = 0 + rows_with_empty_email = 0 + data = generate_data( + rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email + ) + file_path = sftp_directory.joinpath(faker.geiq_filename()) + with open(file_path, "wb") as xlsxfile: + workbook = generate_excel_sheet(FILE_HEADERS, data) + workbook.save(xlsxfile) + with pytest.raises(AssertionError): + df, info_stats = get_geiq_df(file_path) + + # Too many missing emails + rows = 185 + rows_with_empty_siret = 0 + rows_with_empty_email = 100 + data = generate_data( + rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email + ) + file_path = sftp_directory.joinpath(faker.geiq_filename()) + with open(file_path, "wb") as xlsxfile: + workbook = generate_excel_sheet(FILE_HEADERS, data) + workbook.save(xlsxfile) + with pytest.raises(AssertionError): + df, info_stats = get_geiq_df(file_path) + + # Some missing emails + rows = 185 + rows_with_empty_siret = 0 + rows_with_empty_email = 20 + data = generate_data( + rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email + ) + file_path = sftp_directory.joinpath(faker.geiq_filename()) + with open(file_path, "wb") as xlsxfile: + workbook = generate_excel_sheet(FILE_HEADERS, data) + workbook.save(xlsxfile) + df, info_stats = get_geiq_df(file_path) + assert df.shape == (rows - rows_with_empty_email, 8) + assert info_stats == { + "rows_in_file": rows, + "rows_with_a_siret": rows, + "rows_after_deduplication": rows, + "rows_with_empty_email": rows_with_empty_email, + } + + # Too many missing sirets + rows = 185 + rows_with_empty_siret = 100 + rows_with_empty_email = 0 + data = generate_data( + rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email + ) + file_path = sftp_directory.joinpath(faker.geiq_filename()) + with open(file_path, "wb") as xlsxfile: + workbook = generate_excel_sheet(FILE_HEADERS, data) + workbook.save(xlsxfile) + with pytest.raises(AssertionError): + df, info_stats = get_geiq_df(file_path) + + # Some missing sirets + rows = 185 + rows_with_empty_siret = 2020 + rows_with_empty_email = 0 + data = generate_data( + rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email + ) + file_path = sftp_directory.joinpath(faker.geiq_filename()) + with open(file_path, "wb") as xlsxfile: + workbook = generate_excel_sheet(FILE_HEADERS, data) + workbook.save(xlsxfile) + df, info_stats = get_geiq_df(file_path) + assert df.shape == (rows - rows_with_empty_siret, 8) + assert info_stats == { + "rows_in_file": rows, + "rows_with_a_siret": rows - rows_with_empty_siret, + "rows_after_deduplication": rows, + "rows_with_empty_email": 0, + } + + # TODO(ewen): duplicated rows