From 4bedcdc7368804bbda35dea1cacdcd5fe6c980e6 Mon Sep 17 00:00:00 2001 From: Hendrik Huyskens Date: Fri, 6 Sep 2024 14:13:12 +0200 Subject: [PATCH] Fix None values in FK column --- CHANGELOG.md | 4 ++++ data_adapter/preprocessing.py | 9 +++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a3edbd4..782cedf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,10 @@ Here is a template for new release sections - ``` +## [Unreleased] +### Changed +- "None" values in possible FK column are overwritten by FK mapping + ## [0.22.0] - 2024-06-25 ### Changed - in case of bandwidth values, first value is used for process diff --git a/data_adapter/preprocessing.py b/data_adapter/preprocessing.py index 7754025..4f962a9 100644 --- a/data_adapter/preprocessing.py +++ b/data_adapter/preprocessing.py @@ -447,11 +447,16 @@ def _get_foreign_keys(process: str, df: pd.DataFrame) -> dict[str, ForeignKey]: # Check if Fks are unique (cannot have different FKs per process/subprocess) fk_candidates = {} for fk_column in fk_column_candidates: - if len(df[fk_column].unique()) > 1: + column_data_without_none = df[fk_column][~df[fk_column].isnull()] + if len(column_data_without_none.unique()) > 1: continue # no candidate - fk = df[fk_column].iloc[0] + fk = column_data_without_none.iloc[0] if "." not in fk: continue # no candidate + if df[fk_column].isnull().sum() > 0: + logging.warning( + f"None values in column '{fk_column}' of process '{process}' will be overwritten by FK values." + ) fk_candidates[fk_column] = ForeignKey(*fk.split(".")) return fk_candidates