diff --git a/backend/geonature/core/imports/checks/sql/extra.py b/backend/geonature/core/imports/checks/sql/extra.py index 80959e5332..31a7977c54 100644 --- a/backend/geonature/core/imports/checks/sql/extra.py +++ b/backend/geonature/core/imports/checks/sql/extra.py @@ -640,7 +640,7 @@ def check_entity_data_consistency(imprt, entity, fields, grouping_field): select(hashedRows.c.grouping_col.label("grouping_col")) .group_by(hashedRows.c.grouping_col) .having(func.count(func.distinct(hashedRows.c.hashed)) > 1) - ) + ).cte() # note: rows are unidentified (None) instead of being marked as invalid (False) in order to avoid running checks report_erroneous_rows( diff --git a/backend/geonature/core/imports/routes/imports.py b/backend/geonature/core/imports/routes/imports.py index 12284a1e3f..6ba4343374 100644 --- a/backend/geonature/core/imports/routes/imports.py +++ b/backend/geonature/core/imports/routes/imports.py @@ -476,7 +476,7 @@ def preview_valid_data(scope, imprt): # Retrieve data for each entity from entries in the transient table which are related to the import transient_table = imprt.destination.get_transient_table() - entities = db.session.scalars( + entities: list[Entity] = db.session.scalars( select(Entity).filter_by(destination=imprt.destination).order_by(Entity.order) ).all() @@ -493,46 +493,54 @@ def preview_valid_data(scope, imprt): .all() ) columns = [{"prop": field.dest_column, "name": field.name_field} for field in fields] - columns_to_count_unique_entities = [ - transient_table.c[field.dest_column] for field in fields - ] - valid_data = db.session.execute( - select(*[transient_table.c[field.dest_field] for field in fields]) - .distinct() - .where( - transient_table.c.id_import == imprt.id_import, - transient_table.c[entity.validity_column] == True, - ) - .limit(100) - ).all() + id_field = ( + entity.unique_column.dest_field if entity.unique_column.dest_field in fields else None + ) + data_fields_query = [transient_table.c[field.dest_field] for field in fields] - n_valid_data = db.session.execute( - select(func.count(func.distinct(*columns_to_count_unique_entities))) - .select_from(transient_table) - .where( - transient_table.c.id_import == imprt.id_import, - transient_table.c[entity.validity_column] == True, - ) - ).scalar() + query = select(*data_fields_query).where( + transient_table.c.id_import == imprt.id_import, + transient_table.c[entity.validity_column] == True, + ) + valid_data = db.session.execute(query.limit(100)).all() - n_invalid_data = db.session.execute( - select(func.count(func.distinct(*columns_to_count_unique_entities))) - .select_from(transient_table) + def count_select(query_cte): + count_ = "*" + # if multiple entities and the entity has a unique column we base the count on the unique column + + if entity.unique_column and len(entities) > 1 and id_field: + count_ = func.distinct(query_cte.c[id_field]) + return count_ + + valid_data_cte = query.cte() + n_valid_data = db.session.scalar( + select(func.count(count_select(valid_data_cte))).select_from(valid_data_cte) + ) + + invalid_data_cte = ( + select(data_fields_query) .where( transient_table.c.id_import == imprt.id_import, transient_table.c[entity.validity_column] == False, ) - ).scalar() + .cte() + ) + + n_invalid_data = db.session.scalar( + select(func.count(count_select(invalid_data_cte))).select_from(invalid_data_cte) + ) + data["entities"].append( { "entity": entity.as_dict(), "columns": columns, "valid_data": valid_data, "n_valid_data": n_valid_data, - "n_invalid_data": n_invalid_data, + "n_invalid_data": n_invalid_data, # NOTE: Not used in the frontend ... } ) + return jsonify(data) diff --git a/backend/geonature/tests/imports/test_imports_occhab.py b/backend/geonature/tests/imports/test_imports_occhab.py index b0fe44064f..d2f6b79958 100644 --- a/backend/geonature/tests/imports/test_imports_occhab.py +++ b/backend/geonature/tests/imports/test_imports_occhab.py @@ -633,3 +633,27 @@ def test_bbox_computation_transient( ] ], } + + @pytest.mark.parametrize("import_file_name", ["valid_file.csv"]) + def test_preview_data(self, client, prepared_import): + valid_numbers = { + "station_valid": 7, + "station_invalid": 8, + "habitat_valid": 11, + "habitat_invalid": 23, + } + imprt = prepared_import + with logged_user(client, imprt.authors[0]): + response = client.get(url_for("import.preview_valid_data", import_id=imprt.id_import)) + assert response.status_code == 200 + data = response.json + + index_data_station = 0 if data["entities"][0]["entity"]["code"] == "station" else 1 + data_station = data["entities"][index_data_station] + data_habitat = data["entities"][0 if index_data_station == 1 else 1] + + assert data_station["n_valid_data"] == valid_numbers["station_valid"] + assert data_station["n_invalid_data"] == valid_numbers["station_invalid"] + + assert data_habitat["n_valid_data"] == valid_numbers["habitat_valid"] + assert data_habitat["n_invalid_data"] == valid_numbers["habitat_invalid"] diff --git a/contrib/gn_module_occhab/backend/gn_module_occhab/migrations/9c3e1f98361f_fix_typo.py b/contrib/gn_module_occhab/backend/gn_module_occhab/migrations/9c3e1f98361f_fix_typo.py new file mode 100644 index 0000000000..666fe068c6 --- /dev/null +++ b/contrib/gn_module_occhab/backend/gn_module_occhab/migrations/9c3e1f98361f_fix_typo.py @@ -0,0 +1,76 @@ +"""fix_typo + +Revision ID: 9c3e1f98361f +Revises: c1a6b0793360 +Create Date: 2025-01-20 16:09:12.490217 + +""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "9c3e1f98361f" +down_revision = "c1a6b0793360" +branch_labels = None +depends_on = None + +OLD_NAME_MAPPING = "Occhab" +NEW_NAME_MAPPING = "Occhab GeoNature" + + +def get_table(): + conn = op.get_bind() + metadata = sa.MetaData(bind=conn) + bib_fields = sa.Table("bib_fields", metadata, schema="gn_imports", autoload_with=op.get_bind()) + destinations = sa.Table( + "bib_destinations", metadata, schema="gn_imports", autoload_with=op.get_bind() + ) + t_mappings = sa.Table("t_mappings", metadata, schema="gn_imports", autoload_with=op.get_bind()) + return bib_fields, destinations, t_mappings + + +def get_id_dest_occhab(): + _, destinations, _ = get_table() + id_destination_occhab = ( + op.get_bind() + .execute(sa.select(destinations.c.id_destination).where(destinations.c.code == "occhab")) + .scalar() + ) + return id_destination_occhab + + +def upgrade(): + bib_fields, destinations, t_mappings = get_table() + op.execute( + sa.update(bib_fields) + .where( + bib_fields.c.name_field == "depth_max", + bib_fields.c.id_destination == get_id_dest_occhab(), + ) + .values(dest_field="depth_max") + ) + + op.execute( + sa.update(t_mappings) + .where(t_mappings.c.label == OLD_NAME_MAPPING) + .values(label=NEW_NAME_MAPPING) + ) + + +def downgrade(): + bib_fields, _, t_mappings = get_table() + op.execute( + sa.update(bib_fields) + .where( + bib_fields.c.name_field == "depth_max", + bib_fields.c.id_destination == get_id_dest_occhab(), + ) + .values(dest_field="depth_min") + ) + op.execute( + sa.update(t_mappings) + .where(t_mappings.c.label == NEW_NAME_MAPPING) + .values(label=OLD_NAME_MAPPING) + ) diff --git a/frontend/src/app/modules/imports/components/import_process/import-step/import-step.component.html b/frontend/src/app/modules/imports/components/import_process/import-step/import-step.component.html index 51a29f10a2..2e2dbb6441 100644 --- a/frontend/src/app/modules/imports/components/import_process/import-step/import-step.component.html +++ b/frontend/src/app/modules/imports/components/import_process/import-step/import-step.component.html @@ -111,7 +111,7 @@