Skip to content

Commit

Permalink
Fix None values in FK column
Browse files Browse the repository at this point in the history
  • Loading branch information
henhuy committed Sep 24, 2024
1 parent f7db205 commit 4bedcdc
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 2 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ Here is a template for new release sections
-
```

## [Unreleased]
### Changed
- "None" values in possible FK column are overwritten by FK mapping

## [0.22.0] - 2024-06-25
### Changed
- in case of bandwidth values, first value is used for process
Expand Down
9 changes: 7 additions & 2 deletions data_adapter/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,11 +447,16 @@ def _get_foreign_keys(process: str, df: pd.DataFrame) -> dict[str, ForeignKey]:
# Check if Fks are unique (cannot have different FKs per process/subprocess)
fk_candidates = {}
for fk_column in fk_column_candidates:
if len(df[fk_column].unique()) > 1:
column_data_without_none = df[fk_column][~df[fk_column].isnull()]
if len(column_data_without_none.unique()) > 1:
continue # no candidate
fk = df[fk_column].iloc[0]
fk = column_data_without_none.iloc[0]
if "." not in fk:
continue # no candidate
if df[fk_column].isnull().sum() > 0:
logging.warning(
f"None values in column '{fk_column}' of process '{process}' will be overwritten by FK values."
)
fk_candidates[fk_column] = ForeignKey(*fk.split("."))
return fk_candidates

Expand Down

0 comments on commit 4bedcdc

Please sign in to comment.