From 6112a6f47d78bbc61ef0d23e9a5d5fd38fe80135 Mon Sep 17 00:00:00 2001 From: Chris Frick Date: Thu, 3 Oct 2024 10:13:51 -0700 Subject: [PATCH] fix height percentile error in generate_main_manifest --- nuc_morph_analysis/lib/preprocessing/load_data.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/nuc_morph_analysis/lib/preprocessing/load_data.py b/nuc_morph_analysis/lib/preprocessing/load_data.py index f5212a67..b01aa0fb 100644 --- a/nuc_morph_analysis/lib/preprocessing/load_data.py +++ b/nuc_morph_analysis/lib/preprocessing/load_data.py @@ -52,15 +52,19 @@ def get_dataframe_by_info(info): # Load dataframe by file format if path.endswith("csv"): df = pd.read_csv(path) + # use height calculated from 1st to 99th percentile values # rather than the most extreme values - df["height"] = df["height_percentile"] + if "height_percentile" in df.columns: # only some datasets have this column + df["height"] = df["height_percentile"] return df elif path.endswith("parquet"): df = pd.read_parquet(path) + # use height calculated from 1st to 99th percentile values # rather than the most extreme values - df["height"] = df["height_percentile"] + if "height_percentile" in df.columns: # only some datasets have this column + df["height"] = df["height_percentile"] return df else: raise ValueError(f"Unknown format {path.split('.')[-1]}")