diff --git a/itou/companies/management/commands/import_geiq.py b/itou/companies/management/commands/import_geiq.py index 8c1811d1f5..a80e2bcb7f 100755 --- a/itou/companies/management/commands/import_geiq.py +++ b/itou/companies/management/commands/import_geiq.py @@ -34,6 +34,9 @@ def get_geiq_df(filename): } df = remap_columns(df, column_mapping=column_mapping) + # Force siret type to integer, otherwise replacing NaN elements to None blindly converts them to float. + df["siret"] = df["siret"].astype("Int64") + # Replace NaN elements with None. df = df.replace({np.nan: None}) diff --git a/tests/companies/test_management_command_import_geiq.py b/tests/companies/test_management_command_import_geiq.py index 93c85cc0fb..4e6e33a65f 100644 --- a/tests/companies/test_management_command_import_geiq.py +++ b/tests/companies/test_management_command_import_geiq.py @@ -132,6 +132,26 @@ def test_get_geiq_df(sftp_directory, faker): with pytest.raises(AssertionError): df, info_stats = get_geiq_df(file_path) + # Missing some sirets + rows = 185 + rows_with_empty_siret = 20 + rows_with_empty_email = 0 + data = generate_data( + rows=rows, rows_with_empty_siret=rows_with_empty_siret, rows_with_empty_email=rows_with_empty_email + ) + file_path = sftp_directory.joinpath(faker.geiq_filename()) + with open(file_path, "wb") as xlsxfile: + workbook = generate_excel_sheet(FILE_HEADERS, data) + workbook.save(xlsxfile) + df, info_stats = get_geiq_df(file_path) + assert df.shape == (rows - rows_with_empty_siret, 8) + assert info_stats == { + "rows_in_file": rows, + "rows_with_a_siret": rows - rows_with_empty_siret, + "rows_after_deduplication": rows - rows_with_empty_siret, + "rows_with_empty_email": 0, + } + # Duplicated rows rows = 250 rows_with_empty_siret = 0