Skip to content

Commit

Permalink
wip: tests for the get_geiq_df() function
Browse files Browse the repository at this point in the history
  • Loading branch information
EwenKorr committed Dec 20, 2024
1 parent 013fd43 commit f367586
Show file tree
Hide file tree
Showing 3 changed files with 152 additions and 1 deletion.
2 changes: 1 addition & 1 deletion itou/companies/management/commands/import_geiq.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def get_geiq_df(filename):
df["address_line_2"] = df.address_line_2.apply(clean_string)
df["post_code"] = df.post_code.apply(clean_string)
df["city"] = df.city.apply(clean_string)
df["siret"] = df.siret.apply(clean_string_siret)
df["siret"] = df.siret.apply(clean_string)
df["auth_email"] = df.auth_email.apply(clean_string)

# "GEIQ PROVENCE" becomes "Geiq Provence".
Expand Down
5 changes: 5 additions & 0 deletions itou/utils/faker_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import random

from django.contrib.gis.geos import Point
from django.utils import timezone
from faker.providers import BaseProvider


Expand All @@ -13,6 +14,10 @@ def asp_ea2_filename(self, date: datetime.date = None) -> str:
date_part = random.randint(0, 99999999) if date is None else date.strftime("%Y%m%d")
return f"FLUX_EA2_ITOU_{date_part}.zip"

def geiq_filename(self, date: datetime.date = timezone.localdate()) -> str:
date_part = date.strftime("%Y-%m-%d")
return f"{date_part} - Export BDD FFGEIQ.xls"

def geopoint(self) -> Point:
return Point(
[float(coord) for coord in self.generator.format("local_latlng", country_code="FR", coords_only=True)]
Expand Down
146 changes: 146 additions & 0 deletions tests/companies/test_management_command_import_geiq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import pytest
from faker import Faker

from itou.companies.management.commands.import_geiq import get_geiq_df
from itou.utils.export import generate_excel_sheet
from tests.utils.test import create_fake_postcode


faker = Faker()

FILE_HEADERS = ["Nom", "Rue", "Rue (suite)", "Code Postal", "Ville", "SIRET", "e-mail"]


def generate_data(rows=185, rows_with_empty_siret=0, rows_with_empty_email=0):
data = []
for _ in range(rows):
if rows_with_empty_siret > 0:
siret = ""
rows_with_empty_siret -= 1
else:
siret = faker.numerify("1#############")

if rows_with_empty_email > 0:
email = ""
rows_with_empty_email -= 1
else:
email = faker.email()

data.append(
[
faker.name(),
faker.street_address(),
"Sous l'escalier",
create_fake_postcode(),
faker.city(),
siret,
email,
]
)
return data


def test_get_geiq_df(sftp_directory, faker):
# Correct data
rows = 185
rows_with_empty_siret = 0
rows_with_empty_email = 0
data = generate_data(
rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email
)
file_path = sftp_directory.joinpath(faker.geiq_filename())
with open(file_path, "wb") as xlsxfile:
workbook = generate_excel_sheet(FILE_HEADERS, data)
workbook.save(xlsxfile)
df, info_stats = get_geiq_df(file_path)
assert df.shape == (rows, 8)
assert info_stats == {
"rows_in_file": rows,
"rows_with_a_siret": rows,
"rows_after_deduplication": rows,
"rows_with_empty_email": rows_with_empty_email,
}

# File too small, need at least 150 rows
rows = 140
rows_with_empty_siret = 0
rows_with_empty_email = 0
data = generate_data(
rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email
)
file_path = sftp_directory.joinpath(faker.geiq_filename())
with open(file_path, "wb") as xlsxfile:
workbook = generate_excel_sheet(FILE_HEADERS, data)
workbook.save(xlsxfile)
with pytest.raises(AssertionError):
df, info_stats = get_geiq_df(file_path)

# Too many missing emails
rows = 185
rows_with_empty_siret = 0
rows_with_empty_email = 100
data = generate_data(
rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email
)
file_path = sftp_directory.joinpath(faker.geiq_filename())
with open(file_path, "wb") as xlsxfile:
workbook = generate_excel_sheet(FILE_HEADERS, data)
workbook.save(xlsxfile)
with pytest.raises(AssertionError):
df, info_stats = get_geiq_df(file_path)

# Some missing emails
rows = 185
rows_with_empty_siret = 0
rows_with_empty_email = 20
data = generate_data(
rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email
)
file_path = sftp_directory.joinpath(faker.geiq_filename())
with open(file_path, "wb") as xlsxfile:
workbook = generate_excel_sheet(FILE_HEADERS, data)
workbook.save(xlsxfile)
df, info_stats = get_geiq_df(file_path)
assert df.shape == (rows - rows_with_empty_email, 8)
assert info_stats == {
"rows_in_file": rows,
"rows_with_a_siret": rows,
"rows_after_deduplication": rows,
"rows_with_empty_email": rows_with_empty_email,
}

# Too many missing sirets
rows = 185
rows_with_empty_siret = 100
rows_with_empty_email = 0
data = generate_data(
rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email
)
file_path = sftp_directory.joinpath(faker.geiq_filename())
with open(file_path, "wb") as xlsxfile:
workbook = generate_excel_sheet(FILE_HEADERS, data)
workbook.save(xlsxfile)
with pytest.raises(AssertionError):
df, info_stats = get_geiq_df(file_path)

# Some missing sirets
rows = 185
rows_with_empty_siret = 2020
rows_with_empty_email = 0
data = generate_data(
rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email
)
file_path = sftp_directory.joinpath(faker.geiq_filename())
with open(file_path, "wb") as xlsxfile:
workbook = generate_excel_sheet(FILE_HEADERS, data)
workbook.save(xlsxfile)
df, info_stats = get_geiq_df(file_path)
assert df.shape == (rows - rows_with_empty_siret, 8)
assert info_stats == {
"rows_in_file": rows,
"rows_with_a_siret": rows - rows_with_empty_siret,
"rows_after_deduplication": rows,
"rows_with_empty_email": 0,
}

# TODO(ewen): duplicated rows

0 comments on commit f367586

Please sign in to comment.