From dfd60a7d14e64457c9c998c606caa1caa137fc9b Mon Sep 17 00:00:00 2001 From: spwoodcock Date: Tue, 24 Sep 2024 11:02:52 +0100 Subject: [PATCH] fix(xlsform): do not filter duplicates from choices sheet --- osm_fieldwork/update_xlsform.py | 16 +++++++--------- tests/test_update_xlsform.py | 10 +--------- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/osm_fieldwork/update_xlsform.py b/osm_fieldwork/update_xlsform.py index cbde72de..8aed029c 100644 --- a/osm_fieldwork/update_xlsform.py +++ b/osm_fieldwork/update_xlsform.py @@ -34,19 +34,12 @@ def merge_dataframes(mandatory_df: pd.DataFrame, user_question_df: pd.DataFrame, # mandatory_df, user_question_df, digitisation_df, fields=["label", "hint", "required_message"] # ) - # Find common fields between user_question_df and mandatory_df or digitisation_df - duplicate_fields = set(user_question_df[NAME_COLUMN]).intersection( - set(mandatory_df[NAME_COLUMN]).union(set(digitisation_df[NAME_COLUMN])) - ) - - # Is choices sheet, return ordered merged choices + # If processing the choices sheet, retain all duplicates if "list_name" in user_question_df.columns: - user_question_df_filtered = user_question_df[~user_question_df[NAME_COLUMN].isin(duplicate_fields)] - return pd.concat( [ mandatory_df, - user_question_df_filtered, + user_question_df, digitisation_df, ], ignore_index=True, @@ -54,6 +47,11 @@ def merge_dataframes(mandatory_df: pd.DataFrame, user_question_df: pd.DataFrame, # Else we are processing the survey sheet, continue + # Find common fields between user_question_df and mandatory_df or digitisation_df + duplicate_fields = set(user_question_df[NAME_COLUMN]).intersection( + set(mandatory_df[NAME_COLUMN]).union(set(digitisation_df[NAME_COLUMN])) + ) + # NOTE filter out 'end group' from duplicate check as they have empty NAME_COLUMN end_group_rows = user_question_df[user_question_df["type"].isin(["end group", "end_group"])] user_question_df_filtered = user_question_df[ diff --git a/tests/test_update_xlsform.py b/tests/test_update_xlsform.py index b5c6f17c..0cfcba86 100644 --- a/tests/test_update_xlsform.py +++ b/tests/test_update_xlsform.py @@ -41,7 +41,7 @@ async def test_merge_mandatory_fields(): merged_xlsform.write(updated_form.getvalue()) check_survey_sheet(workbook) - check_choices_sheet(workbook) + # NOTE the choices sheet can have duplicates in the 'name' field without issue check_entities_sheet(workbook) check_form_title(workbook) @@ -113,14 +113,6 @@ def check_survey_sheet(workbook: Workbook) -> None: check_for_duplicates(survey_sheet, name_col_index) -def check_choices_sheet(workbook: Workbook) -> None: - """Check the 'choices' sheet and ensure no duplicates in 'name' column.""" - choices_sheet = get_sheet(workbook, "choices") - name_col_index = get_column_index(choices_sheet, "name") - - check_for_duplicates(choices_sheet, name_col_index) - - def check_entities_sheet(workbook: Workbook) -> None: """Check the 'entities' sheet values.""" entities_sheet = get_sheet(workbook, "entities")