Skip to content

Commit

Permalink
refactor: handle instances of empty data in some plotting functions
Browse files Browse the repository at this point in the history
This was implemented to assist the user whilst running analysis with
wind farms which had limited datasets
  • Loading branch information
samuelwnaylor committed Oct 29, 2024
1 parent 0254b45 commit 980f6db
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 11 deletions.
11 changes: 9 additions & 2 deletions wind_up/detrend.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,9 +233,16 @@ def apply_wsratio_v_wd_scen(
ref_ws_col: str,
ref_wd_col: str,
) -> pd.DataFrame:
try:
scenario_set_wsratio_dir_scen = set(
wsratio_v_dir_scen.dropna(subset="ws_rom").index.unique(level="waking_scenario")
)
except KeyError as e: # KeyError: 'Requested level (waking_scenario) does not match index name (None)'
logger.warning(e)
scenario_set_wsratio_dir_scen = set()

scen_list = list(
set(p_df.dropna(subset=[ref_ws_col, ref_wd_col])["waking_scenario"].unique())
& set(wsratio_v_dir_scen.dropna(subset="ws_rom").index.unique(level="waking_scenario")),
set(p_df.dropna(subset=[ref_ws_col, ref_wd_col])["waking_scenario"].unique()) & scenario_set_wsratio_dir_scen
)
all_scens_df = pd.DataFrame()
for scen in scen_list:
Expand Down
6 changes: 6 additions & 0 deletions wind_up/plots/detrend_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,12 @@ def plot_check_wsratio_v_dir(
ref_wd_col: str,
plot_cfg: PlotConfig,
) -> None:
if pre_wsratio_v_dir.empty:
logger.warning("pre_wsratio_v_dir is empty")
return
if pre_wsratio_v_dir.empty:
logger.warning("post_wsratio_v_dir is empty")
return
scen_to_plot = (
pd.concat([pre_wsratio_v_dir, post_wsratio_v_dir])
.dropna(subset="ws_rom")
Expand Down
13 changes: 9 additions & 4 deletions wind_up/plots/pp_analysis_plots.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import logging
from typing import TYPE_CHECKING

import matplotlib.pyplot as plt
Expand All @@ -13,6 +14,9 @@
from wind_up.models import PlotConfig


logger = logging.getLogger(__name__)


def plot_pre_post_binned_power_curves(
*,
test_name: str,
Expand Down Expand Up @@ -195,10 +199,11 @@ def plot_pre_post_condition_histogram(
msg = f"plot_pre_post_condition_histogram ref_name={ref_name} post_df missing required column {col}"
result_manager.warning(msg)
return
if first_bin_start is None:
first_bin_start = round(min(pre_df[col].min(), post_df[col].min()) - bin_width / 2)
if last_bin_end is None:
last_bin_end = max(pre_df[col].max(), post_df[col].max())
if pre_df.empty or post_df.empty:
_ref_and_test_str = f"ref: {ref_name}, test: {test_name}"
logger.warning("%s - Cannot plot condition histogram as some required data is empty", _ref_and_test_str)
first_bin_start = first_bin_start or round(min(pre_df[col].min(), post_df[col].min()) - bin_width / 2)
last_bin_end = last_bin_end or max(pre_df[col].max(), post_df[col].max())
bins = list(
np.arange(
first_bin_start,
Expand Down
11 changes: 6 additions & 5 deletions wind_up/smart_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,9 @@ def check_and_convert_scada_raw(
scada_raw.set_index([scada_raw.index, "TurbineName"], verify_integrity=True)

turbine_rows = scada_raw.groupby("TurbineName", observed=False)["TurbineName"].count().to_frame()
rows_per_turbine = turbine_rows.max().iloc[0]
if rows_per_turbine != turbine_rows.min().iloc[0]:
msg = f"turbines have different number of rows: {rows_per_turbine} != {turbine_rows.min().iloc[0]}"
max_rows_per_turbine = turbine_rows.max().iloc[0]
if max_rows_per_turbine != turbine_rows.min().iloc[0]:
msg = f"turbines have different number of rows: {max_rows_per_turbine} != {turbine_rows.min().iloc[0]}"
result_manager.warning(msg)
logger.info("attempting to repair")
rows_before = len(scada_raw)
Expand All @@ -147,15 +147,16 @@ def check_and_convert_scada_raw(

turbine_rows = reindexed_df.groupby("TurbineName", observed=False)["TurbineName"].count().to_frame()
new_rows_per_turbine = turbine_rows.max().iloc[0]
if new_rows_per_turbine == turbine_rows.min().iloc[0] and new_rows_per_turbine == rows_per_turbine:
if new_rows_per_turbine == turbine_rows.min().iloc[0]:
logger.info(f"repair successful. rows before: {rows_before}, rows after: {rows_after}")
scada_raw = reindexed_df
else:
msg = f"turbines have different number of rows: {new_rows_per_turbine} != {turbine_rows.min().iloc[0]}"
raise RuntimeError(msg)
rows_per_hour = 3600 / timebase_s
logger.info(
f"loaded {len(turbine_rows)} turbines, {rows_per_turbine / rows_per_hour / 24 / 365.25:.1f} years per turbine"
f"loaded {len(turbine_rows)} turbines, {max_rows_per_turbine / rows_per_hour / 24 / 365.25:.1f} "
"years per turbine"
)
return scada_raw

Expand Down

0 comments on commit 980f6db

Please sign in to comment.