refactor: handle instances of empty data in some plotting functions

This was implemented to assist the user whilst running analysis with wind farms which had limited datasets
resgroup · Oct 29, 2024 · 980f6db · 980f6db
1 parent 0254b45
commit 980f6db
Show file tree

Hide file tree

Showing 4 changed files with 30 additions and 11 deletions.
diff --git a/wind_up/detrend.py b/wind_up/detrend.py
@@ -233,9 +233,16 @@ def apply_wsratio_v_wd_scen(
     ref_ws_col: str,
     ref_wd_col: str,
 ) -> pd.DataFrame:
+    try:
+        scenario_set_wsratio_dir_scen = set(
+            wsratio_v_dir_scen.dropna(subset="ws_rom").index.unique(level="waking_scenario")
+        )
+    except KeyError as e:  # KeyError: 'Requested level (waking_scenario) does not match index name (None)'
+        logger.warning(e)
+        scenario_set_wsratio_dir_scen = set()
+
     scen_list = list(
-        set(p_df.dropna(subset=[ref_ws_col, ref_wd_col])["waking_scenario"].unique())
-        & set(wsratio_v_dir_scen.dropna(subset="ws_rom").index.unique(level="waking_scenario")),
+        set(p_df.dropna(subset=[ref_ws_col, ref_wd_col])["waking_scenario"].unique()) & scenario_set_wsratio_dir_scen
     )
     all_scens_df = pd.DataFrame()
     for scen in scen_list:

diff --git a/wind_up/plots/detrend_plots.py b/wind_up/plots/detrend_plots.py
@@ -216,6 +216,12 @@ def plot_check_wsratio_v_dir(
     ref_wd_col: str,
     plot_cfg: PlotConfig,
 ) -> None:
+    if pre_wsratio_v_dir.empty:
+        logger.warning("pre_wsratio_v_dir is empty")
+        return
+    if pre_wsratio_v_dir.empty:
+        logger.warning("post_wsratio_v_dir is empty")
+        return
     scen_to_plot = (
         pd.concat([pre_wsratio_v_dir, post_wsratio_v_dir])
         .dropna(subset="ws_rom")

diff --git a/wind_up/plots/pp_analysis_plots.py b/wind_up/plots/pp_analysis_plots.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import logging
 from typing import TYPE_CHECKING
 
 import matplotlib.pyplot as plt
@@ -13,6 +14,9 @@
     from wind_up.models import PlotConfig
 
 
+logger = logging.getLogger(__name__)
+
+
 def plot_pre_post_binned_power_curves(
     *,
     test_name: str,
@@ -195,10 +199,11 @@ def plot_pre_post_condition_histogram(
         msg = f"plot_pre_post_condition_histogram ref_name={ref_name} post_df missing required column {col}"
         result_manager.warning(msg)
         return
-    if first_bin_start is None:
-        first_bin_start = round(min(pre_df[col].min(), post_df[col].min()) - bin_width / 2)
-    if last_bin_end is None:
-        last_bin_end = max(pre_df[col].max(), post_df[col].max())
+    if pre_df.empty or post_df.empty:
+        _ref_and_test_str = f"ref: {ref_name}, test: {test_name}"
+        logger.warning("%s - Cannot plot condition histogram as some required data is empty", _ref_and_test_str)
+    first_bin_start = first_bin_start or round(min(pre_df[col].min(), post_df[col].min()) - bin_width / 2)
+    last_bin_end = last_bin_end or max(pre_df[col].max(), post_df[col].max())
     bins = list(
         np.arange(
             first_bin_start,

diff --git a/wind_up/smart_data.py b/wind_up/smart_data.py
@@ -122,9 +122,9 @@ def check_and_convert_scada_raw(
     scada_raw.set_index([scada_raw.index, "TurbineName"], verify_integrity=True)
 
     turbine_rows = scada_raw.groupby("TurbineName", observed=False)["TurbineName"].count().to_frame()
-    rows_per_turbine = turbine_rows.max().iloc[0]
-    if rows_per_turbine != turbine_rows.min().iloc[0]:
-        msg = f"turbines have different number of rows: {rows_per_turbine} != {turbine_rows.min().iloc[0]}"
+    max_rows_per_turbine = turbine_rows.max().iloc[0]
+    if max_rows_per_turbine != turbine_rows.min().iloc[0]:
+        msg = f"turbines have different number of rows: {max_rows_per_turbine} != {turbine_rows.min().iloc[0]}"
         result_manager.warning(msg)
         logger.info("attempting to repair")
         rows_before = len(scada_raw)
@@ -147,15 +147,16 @@ def check_and_convert_scada_raw(
 
         turbine_rows = reindexed_df.groupby("TurbineName", observed=False)["TurbineName"].count().to_frame()
         new_rows_per_turbine = turbine_rows.max().iloc[0]
-        if new_rows_per_turbine == turbine_rows.min().iloc[0] and new_rows_per_turbine == rows_per_turbine:
+        if new_rows_per_turbine == turbine_rows.min().iloc[0]:
             logger.info(f"repair successful. rows before: {rows_before}, rows after: {rows_after}")
             scada_raw = reindexed_df
         else:
             msg = f"turbines have different number of rows: {new_rows_per_turbine} != {turbine_rows.min().iloc[0]}"
             raise RuntimeError(msg)
     rows_per_hour = 3600 / timebase_s
     logger.info(
-        f"loaded {len(turbine_rows)} turbines, {rows_per_turbine / rows_per_hour / 24 / 365.25:.1f} years per turbine"
+        f"loaded {len(turbine_rows)} turbines, {max_rows_per_turbine / rows_per_hour / 24 / 365.25:.1f} "
+        "years per turbine"
     )
     return scada_raw