diff --git a/examples/smarteole_example.ipynb b/examples/smarteole_example.ipynb
index e294b1b..1e4a2ac 100644
--- a/examples/smarteole_example.ipynb
+++ b/examples/smarteole_example.ipynb
@@ -62,7 +62,8 @@
"import logging\n",
"import re\n",
"\n",
- "from IPython.display import Markdown, display\n",
+ "from IPython.display import Markdown\n",
+ "from IPython.display import display as ipy_display\n",
"\n",
"logging.basicConfig(format=\"%(message)s\", level=logging.INFO)\n",
"\n",
@@ -72,7 +73,7 @@
" message = self.format(record)\n",
" # Replace newline characters with
tags\n",
" message = re.sub(r\"\\n\", \"
\", message)\n",
- " display(Markdown(message))\n",
+ " ipy_display(Markdown(message))\n",
"\n",
"\n",
"logger = logging.getLogger()\n",
@@ -2786,7 +2787,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "a2457ea9318744fd80c413a42676fcd0",
+ "model_id": "3fbc8142f44a49bf8500c4e04e97b1ed",
"version_major": 2,
"version_minor": 0
},
@@ -2908,7 +2909,7 @@
{
"data": {
"text/markdown": [
- "{'ref': 'SMV7', 'ref_ws_col': 'ref_ws_est_blend', 'distance_m': 314.4465998943834, 'bearing_deg': 173.69483366777283, 'ref_max_northing_error_v_reanalysis': np.float64(2.6590754551807976), 'ref_max_northing_error_v_wf': np.float64(2.842170943040401e-14), 'ref_max_ws_drift': np.float64(0.08697706942338845), 'ref_max_ws_drift_pp_period': np.float64(0.08697706942338845), 'ref_powercurve_shift': np.float64(0.003117456887993697), 'ref_rpm_shift': np.float64(0.0015313319638985412), 'ref_pitch_shift': np.float64(-0.05548555519736481), 'detrend_pre_r2_improvement': np.float64(0.004942384513915488), 'detrend_post_r2_improvement': np.float64(0.0011487228730558963), 'mean_power_pre': np.float64(1149.2323289820358), 'mean_power_post': np.float64(1148.4157066585956), 'mean_test_yaw_offset_pre': np.float64(-0.022511156888877885), 'mean_test_yaw_offset_post': np.float64(3.8213264238880478), 'mean_test_yaw_offset_command_pre': np.float64(0.0002638323353293413), 'mean_test_yaw_offset_command_post': np.float64(6.636967675544794), 'mean_ref_yaw_offset_command_pre': np.float64(0.0), 'test_ref_warning_counts': 0, 'time_calculated': Timestamp('2024-11-04 14:53:53.646317+0000', tz='UTC'), 'uplift_frc': np.float64(-0.010361770845398625), 'unc_one_sigma_frc': np.float64(0.0057851151530948705), 't_value_one_sigma': np.float64(1.000630119597717), 'missing_bins_unc_scale_factor': 1, 'pp_valid_hours_pre': np.float64(132.5), 'pp_valid_hours_post': np.float64(136.0), 'pp_valid_hours': np.float64(268.5), 'pp_data_coverage': np.float64(0.11496467565831728), 'pp_invalid_bin_count': np.int64(16), 'uplift_noadj_frc': np.float64(-0.011505660672016103), 'unc_one_sigma_noadj_frc': np.float64(0.0057851151530948705), 'poweronly_uplift_frc': np.float64(-0.012003308408347353), 'reversed_uplift_frc': np.float64(-0.009715528755112396), 'reversal_error': np.float64(0.0022877796532349576), 'unc_one_sigma_lowerbound_frc': np.float64(0.0011438898266174788), 'unc_one_sigma_bootstrap_frc': np.float64(0.0049406532714673), 'uplift_p5_frc': np.float64(-0.000846103203498606), 'uplift_p95_frc': np.float64(-0.019877438487298643), 'wind_up_version': '0.1.9', 'test_wtg': 'SMV6', 'test_pw_col': 'test_pw_clipped', 'lt_wtg_hours_raw': 0, 'lt_wtg_hours_filt': 0, 'test_max_ws_drift': np.float64(0.03653354205095605), 'test_max_ws_drift_pp_period': np.float64(0.03653354205095605), 'test_powercurve_shift': np.float64(0.0010615707256107498), 'test_rpm_shift': np.float64(0.0011316163321652972), 'test_pitch_shift': np.float64(-0.037158903030505286), 'preprocess_warning_counts': 0, 'test_warning_counts': 0}"
+ "{'ref': 'SMV7', 'ref_ws_col': 'ref_ws_est_blend', 'distance_m': 314.4465998943834, 'bearing_deg': 173.69483366777283, 'ref_max_northing_error_v_reanalysis': np.float64(2.6590754551807976), 'ref_max_northing_error_v_wf': np.float64(2.842170943040401e-14), 'ref_max_ws_drift': np.float64(0.08697706942338845), 'ref_max_ws_drift_pp_period': np.float64(0.08697706942338845), 'ref_powercurve_shift': np.float64(0.003117456887993697), 'ref_rpm_shift': np.float64(0.0015313319638985412), 'ref_pitch_shift': np.float64(-0.05548555519736481), 'ref_wind_speed_shift': np.float64(-0.0009373123752394186), 'detrend_pre_r2_improvement': np.float64(0.004942384513915488), 'detrend_post_r2_improvement': np.float64(0.0011487228730558963), 'mean_power_pre': np.float64(1149.2323289820358), 'mean_power_post': np.float64(1148.4157066585956), 'mean_test_yaw_offset_pre': np.float64(-0.022511156888877885), 'mean_test_yaw_offset_post': np.float64(3.8213264238880478), 'mean_test_yaw_offset_command_pre': np.float64(0.0002638323353293413), 'mean_test_yaw_offset_command_post': np.float64(6.636967675544794), 'mean_ref_yaw_offset_command_pre': np.float64(0.0), 'test_ref_warning_counts': 0, 'time_calculated': Timestamp('2024-12-05 12:46:03.895478+0000', tz='UTC'), 'uplift_frc': np.float64(-0.010361770845398625), 'unc_one_sigma_frc': np.float64(0.0057851151530948705), 't_value_one_sigma': np.float64(1.000630119597717), 'missing_bins_unc_scale_factor': 1, 'pp_valid_hours_pre': np.float64(132.5), 'pp_valid_hours_post': np.float64(136.0), 'pp_valid_hours': np.float64(268.5), 'pp_data_coverage': np.float64(0.11496467565831728), 'pp_invalid_bin_count': np.int64(16), 'uplift_noadj_frc': np.float64(-0.011505660672016103), 'unc_one_sigma_noadj_frc': np.float64(0.0057851151530948705), 'poweronly_uplift_frc': np.float64(-0.012003308408347353), 'reversed_uplift_frc': np.float64(-0.009715528755112396), 'reversal_error': np.float64(0.0022877796532349576), 'unc_one_sigma_lowerbound_frc': np.float64(0.0011438898266174788), 'unc_one_sigma_bootstrap_frc': np.float64(0.0049406532714673), 'uplift_p5_frc': np.float64(-0.000846103203498606), 'uplift_p95_frc': np.float64(-0.019877438487298643), 'wind_up_version': '0.1.10', 'test_wtg': 'SMV6', 'test_pw_col': 'test_pw_clipped', 'lt_wtg_hours_raw': 0, 'lt_wtg_hours_filt': 0, 'test_max_ws_drift': np.float64(0.03653354205095605), 'test_max_ws_drift_pp_period': np.float64(0.03653354205095605), 'test_powercurve_shift': np.float64(0.0010615707256107498), 'test_rpm_shift': np.float64(0.0011316163321652972), 'test_pitch_shift': np.float64(-0.037158903030505286), 'test_wind_speed_shift': np.float64(-0.0063495234304998815), 'preprocess_warning_counts': 0, 'test_warning_counts': 0}"
],
"text/plain": [
""
@@ -3748,7 +3749,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "df804baf4f2041b18c2dc67189a83f67",
+ "model_id": "45ef05430cb541a890aa60886e175144",
"version_major": 2,
"version_minor": 0
},
@@ -3870,7 +3871,7 @@
{
"data": {
"text/markdown": [
- "{'ref': 'SMV7', 'ref_ws_col': 'ref_ws_est_blend', 'distance_m': 591.1178519927024, 'bearing_deg': 190.23567745705736, 'ref_max_northing_error_v_reanalysis': np.float64(2.6590754551807976), 'ref_max_northing_error_v_wf': np.float64(2.842170943040401e-14), 'ref_max_ws_drift': np.float64(0.08697706942338845), 'ref_max_ws_drift_pp_period': np.float64(0.08697706942338845), 'ref_powercurve_shift': np.float64(0.003117456887993697), 'ref_rpm_shift': np.float64(0.0015313319638985412), 'ref_pitch_shift': np.float64(-0.05548555519736481), 'detrend_pre_r2_improvement': np.float64(0.09621188863947527), 'detrend_post_r2_improvement': np.float64(0.11890364717818414), 'mean_power_pre': np.float64(955.493497245509), 'mean_power_post': np.float64(993.6911992736077), 'mean_test_yaw_offset_pre': np.float64(-2.2725466102034675), 'mean_test_yaw_offset_post': np.float64(-2.876981850327039), 'mean_test_yaw_offset_command_pre': np.float64(0.0), 'mean_test_yaw_offset_command_post': np.float64(0.0), 'mean_ref_yaw_offset_command_pre': np.float64(0.0), 'test_ref_warning_counts': 0, 'time_calculated': Timestamp('2024-11-04 14:55:21.795722+0000', tz='UTC'), 'uplift_frc': np.float64(0.030879346731271313), 'unc_one_sigma_frc': np.float64(0.01167847006525424), 't_value_one_sigma': np.float64(1.0006277462668354), 'missing_bins_unc_scale_factor': 1, 'pp_valid_hours_pre': np.float64(133.0), 'pp_valid_hours_post': np.float64(137.16666666666669), 'pp_valid_hours': np.float64(270.1666666666667), 'pp_data_coverage': np.float64(0.11567829872261472), 'pp_invalid_bin_count': np.int64(16), 'uplift_noadj_frc': np.float64(0.030509447623790466), 'unc_one_sigma_noadj_frc': np.float64(0.01004760180633488), 'poweronly_uplift_frc': np.float64(0.029990866525649328), 'reversed_uplift_frc': np.float64(0.03073066474061102), 'reversal_error': np.float64(0.0007397982149616941), 'unc_one_sigma_lowerbound_frc': np.float64(0.00036989910748084706), 'unc_one_sigma_bootstrap_frc': np.float64(0.01167847006525424), 'uplift_p5_frc': np.float64(0.050088720575348945), 'uplift_p95_frc': np.float64(0.01166997288719368), 'wind_up_version': '0.1.9', 'test_wtg': 'SMV5', 'test_pw_col': 'test_pw_clipped', 'lt_wtg_hours_raw': 0, 'lt_wtg_hours_filt': 0, 'test_max_ws_drift': np.float64(0.10726609831004863), 'test_max_ws_drift_pp_period': np.float64(0.10726609831004863), 'test_powercurve_shift': np.float64(-0.005678000921447213), 'test_rpm_shift': np.float64(0.0013951853610039144), 'test_pitch_shift': np.float64(-0.02783487184623068), 'preprocess_warning_counts': 0, 'test_warning_counts': 0}"
+ "{'ref': 'SMV7', 'ref_ws_col': 'ref_ws_est_blend', 'distance_m': 591.1178519927024, 'bearing_deg': 190.23567745705736, 'ref_max_northing_error_v_reanalysis': np.float64(2.6590754551807976), 'ref_max_northing_error_v_wf': np.float64(2.842170943040401e-14), 'ref_max_ws_drift': np.float64(0.08697706942338845), 'ref_max_ws_drift_pp_period': np.float64(0.08697706942338845), 'ref_powercurve_shift': np.float64(0.003117456887993697), 'ref_rpm_shift': np.float64(0.0015313319638985412), 'ref_pitch_shift': np.float64(-0.05548555519736481), 'ref_wind_speed_shift': np.float64(-0.0009373123752394186), 'detrend_pre_r2_improvement': np.float64(0.09621188863947527), 'detrend_post_r2_improvement': np.float64(0.11890364717818414), 'mean_power_pre': np.float64(955.493497245509), 'mean_power_post': np.float64(993.6911992736077), 'mean_test_yaw_offset_pre': np.float64(-2.2725466102034675), 'mean_test_yaw_offset_post': np.float64(-2.876981850327039), 'mean_test_yaw_offset_command_pre': np.float64(0.0), 'mean_test_yaw_offset_command_post': np.float64(0.0), 'mean_ref_yaw_offset_command_pre': np.float64(0.0), 'test_ref_warning_counts': 0, 'time_calculated': Timestamp('2024-12-05 12:47:18.340985+0000', tz='UTC'), 'uplift_frc': np.float64(0.030879346731271313), 'unc_one_sigma_frc': np.float64(0.01167847006525424), 't_value_one_sigma': np.float64(1.0006277462668354), 'missing_bins_unc_scale_factor': 1, 'pp_valid_hours_pre': np.float64(133.0), 'pp_valid_hours_post': np.float64(137.16666666666669), 'pp_valid_hours': np.float64(270.1666666666667), 'pp_data_coverage': np.float64(0.11567829872261472), 'pp_invalid_bin_count': np.int64(16), 'uplift_noadj_frc': np.float64(0.030509447623790466), 'unc_one_sigma_noadj_frc': np.float64(0.01004760180633488), 'poweronly_uplift_frc': np.float64(0.029990866525649328), 'reversed_uplift_frc': np.float64(0.03073066474061102), 'reversal_error': np.float64(0.0007397982149616941), 'unc_one_sigma_lowerbound_frc': np.float64(0.00036989910748084706), 'unc_one_sigma_bootstrap_frc': np.float64(0.01167847006525424), 'uplift_p5_frc': np.float64(0.050088720575348945), 'uplift_p95_frc': np.float64(0.01166997288719368), 'wind_up_version': '0.1.10', 'test_wtg': 'SMV5', 'test_pw_col': 'test_pw_clipped', 'lt_wtg_hours_raw': 0, 'lt_wtg_hours_filt': 0, 'test_max_ws_drift': np.float64(0.10726609831004863), 'test_max_ws_drift_pp_period': np.float64(0.10726609831004863), 'test_powercurve_shift': np.float64(-0.005678000921447213), 'test_rpm_shift': np.float64(0.0013951853610039144), 'test_pitch_shift': np.float64(-0.02783487184623068), 'test_wind_speed_shift': np.float64(-0.003742689588962622), 'preprocess_warning_counts': 0, 'test_warning_counts': 0}"
],
"text/plain": [
""
diff --git a/pyproject.toml b/pyproject.toml
index 14076a0..513e721 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -132,7 +132,10 @@ filterwarnings = [
omit = [
"wind_up/plots/*.py",
]
-exclude_lines = ["if __name__ == .__main__.:"]
+exclude_lines = [
+ "if __name__ == .__main__.:",
+ "if TYPE_CHECKING:"
+]
[tool.poe.tasks]
[tool.poe.tasks.lint]
diff --git a/tests/test_ops_curve_shift.py b/tests/test_ops_curve_shift.py
new file mode 100644
index 0000000..fd55484
--- /dev/null
+++ b/tests/test_ops_curve_shift.py
@@ -0,0 +1,360 @@
+import logging
+from unittest.mock import Mock, patch
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from wind_up.ops_curve_shift import (
+ CurveConfig,
+ CurveShiftInput,
+ CurveShiftOutput,
+ CurveTypes,
+ OpsCurveRequiredColumns,
+ calculate_curve_shift,
+ check_for_ops_curve_shift,
+)
+
+
+@pytest.fixture
+def fake_required_columns() -> OpsCurveRequiredColumns:
+ return OpsCurveRequiredColumns(wind_speed="wind_speed", power="active_power", rpm="gen_rpm", pitch="pitch_angle")
+
+
+@pytest.fixture
+def fake_curve_df(fake_required_columns: OpsCurveRequiredColumns) -> pd.DataFrame:
+ return pd.DataFrame(
+ {
+ fake_required_columns.wind_speed: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+ fake_required_columns.power: [0, 0, np.nan, 1, 3, 6, 10, 15, 22, 30, 36, 39, 40, 40, 40],
+ fake_required_columns.rpm: [
+ 900,
+ 900,
+ 850,
+ 875,
+ 900,
+ 1000,
+ 1100,
+ 1200,
+ 1350,
+ 1500,
+ 1600,
+ 1600,
+ 1600,
+ 1600,
+ 1600,
+ ],
+ fake_required_columns.pitch: [4, 4, 4, 3, 2, 1, 1, 1, 2, 5, 8, 11, 13, 14, 15],
+ }
+ )
+
+
+class TestCurveShiftInput:
+ @staticmethod
+ def test_acceptable_inputs(fake_curve_df: pd.DataFrame, fake_required_columns: OpsCurveRequiredColumns) -> None:
+ _input = CurveShiftInput(
+ turbine_name="anything",
+ pre_df=fake_curve_df,
+ post_df=fake_curve_df,
+ curve_config=CurveConfig(
+ name=CurveTypes.POWER_CURVE.value,
+ x_col=fake_required_columns.wind_speed,
+ y_col=fake_required_columns.power,
+ x_bin_width=1,
+ warning_threshold=0.01,
+ ),
+ ops_curve_required_columns=fake_required_columns,
+ )
+
+ @pytest.mark.parametrize("column_name", ["wind_speed", "active_power"])
+ def test_missing_column_in_pre_df(
+ self, column_name: str, fake_curve_df: pd.DataFrame, fake_required_columns: OpsCurveRequiredColumns
+ ) -> None:
+ with pytest.raises(IndexError, match=f"'{column_name}' column name missing in pre-dataframe"):
+ CurveShiftInput(
+ turbine_name="anything",
+ pre_df=fake_curve_df.drop(columns=column_name),
+ post_df=(fake_curve_df + 2),
+ curve_config=CurveConfig(
+ name=CurveTypes.POWER_CURVE.value,
+ x_col=fake_required_columns.wind_speed,
+ y_col=fake_required_columns.power,
+ x_bin_width=1,
+ warning_threshold=0.01,
+ ),
+ ops_curve_required_columns=fake_required_columns,
+ )
+
+ @pytest.mark.parametrize("column_name", ["wind_speed", "active_power"])
+ def test_missing_column_in_post_df(
+ self, column_name: str, fake_curve_df: pd.DataFrame, fake_required_columns: OpsCurveRequiredColumns
+ ) -> None:
+ with pytest.raises(IndexError, match=f"'{column_name}' column name missing in post-dataframe"):
+ CurveShiftInput(
+ turbine_name="anything",
+ pre_df=fake_curve_df,
+ post_df=(fake_curve_df + 2).drop(columns=column_name),
+ curve_config=CurveConfig(
+ name=CurveTypes.POWER_CURVE.value,
+ x_col=fake_required_columns.wind_speed,
+ y_col=fake_required_columns.power,
+ x_bin_width=1,
+ warning_threshold=0.01,
+ ),
+ ops_curve_required_columns=fake_required_columns,
+ )
+
+
+@pytest.mark.parametrize(
+ ("shift_amount", "expected"),
+ [
+ pytest.param(0.0, 0.0, id="zero"),
+ pytest.param(2.0, -0.1376912378303199, id="shift DOES exceed threshold"),
+ pytest.param(0.05, -0.004489831851395176, id="shift DOES NOT exceed threshold"),
+ ],
+)
+def test_calculate_power_curve_shift(
+ shift_amount: float, expected: float, fake_curve_df: pd.DataFrame, fake_required_columns: OpsCurveRequiredColumns
+) -> None:
+ curve_shift_input = CurveShiftInput(
+ turbine_name="anything",
+ pre_df=fake_curve_df,
+ post_df=(fake_curve_df + shift_amount),
+ curve_config=CurveConfig(
+ name=CurveTypes.POWER_CURVE, x_col=fake_required_columns.wind_speed, y_col=fake_required_columns.power
+ ),
+ ops_curve_required_columns=fake_required_columns,
+ )
+ # check that CurveShiftInput pydantic model has removed NaNs
+ assert not curve_shift_input.pre_df.isna().to_numpy().any()
+ assert not curve_shift_input.post_df.isna().to_numpy().any()
+ actual = calculate_curve_shift(curve_shift_input=curve_shift_input)
+
+ np.testing.assert_almost_equal(actual=actual.value, desired=expected)
+
+
+@pytest.mark.parametrize(
+ ("shift_amount", "expected"),
+ [
+ pytest.param(0.2, -0.00865091569970633, id="shift DOES exceed threshold"),
+ pytest.param(0.1, -0.004926790475744736, id="shift DOES NOT exceed threshold"),
+ ],
+)
+def test_calculate_rpm_curve_shift(
+ shift_amount: float,
+ expected: float,
+ fake_curve_df: pd.DataFrame,
+ fake_required_columns: OpsCurveRequiredColumns,
+ caplog: pytest.LogCaptureFixture,
+) -> None:
+ with caplog.at_level(logging.WARNING):
+ actual = calculate_curve_shift(
+ curve_shift_input=CurveShiftInput(
+ turbine_name="anything",
+ pre_df=fake_curve_df,
+ post_df=(fake_curve_df + shift_amount),
+ curve_config=CurveConfig(name=CurveTypes.RPM, x_col="wind_speed", y_col="gen_rpm"),
+ ops_curve_required_columns=fake_required_columns,
+ ),
+ )
+
+ np.testing.assert_almost_equal(actual=actual.value, desired=expected)
+
+
+@pytest.mark.parametrize(
+ ("shift_amount", "expected"),
+ [
+ pytest.param(0.0, 0.0, id="zero"),
+ pytest.param(0.6, 0.10714285714285765, id="shift DOES exceed threshold"),
+ pytest.param(0.5, 0.08928571428571441, id="shift DOES NOT exceed threshold"),
+ ],
+)
+def test_calculate_pitch_curve_shift(
+ shift_amount: float, expected: float, fake_curve_df: pd.DataFrame, fake_required_columns: OpsCurveRequiredColumns
+) -> None:
+ actual = calculate_curve_shift(
+ curve_shift_input=CurveShiftInput(
+ turbine_name="anything",
+ pre_df=fake_curve_df,
+ post_df=(fake_curve_df + shift_amount),
+ curve_config=CurveConfig(
+ name=CurveTypes.PITCH, x_col=fake_required_columns.wind_speed, y_col=fake_required_columns.pitch
+ ),
+ ops_curve_required_columns=fake_required_columns,
+ )
+ )
+
+ np.testing.assert_almost_equal(actual=actual.value, desired=expected)
+
+
+@pytest.mark.parametrize(
+ ("shift_amount", "expected"),
+ [
+ pytest.param(2.0, 0.13811720414537776, id="shift DOES exceed threshold"),
+ pytest.param(0.0, -0.04629629629629639, id="shift DOES NOT exceed threshold"),
+ ],
+)
+def test_calculate_wind_speed_curve_shift(
+ shift_amount: float, expected: float, fake_curve_df: pd.DataFrame, fake_required_columns: OpsCurveRequiredColumns
+) -> None:
+ _df = fake_curve_df.copy()
+ actual = calculate_curve_shift(
+ curve_shift_input=CurveShiftInput(
+ turbine_name="anything",
+ pre_df=_df,
+ post_df=(_df + shift_amount),
+ curve_config=CurveConfig(
+ name=CurveTypes.WIND_SPEED, x_col=fake_required_columns.power, y_col=fake_required_columns.wind_speed
+ ),
+ ops_curve_required_columns=fake_required_columns,
+ )
+ )
+
+ np.testing.assert_almost_equal(actual=actual.value, desired=expected)
+
+
+class TestCheckForOpsCurveShift:
+ @pytest.mark.parametrize(
+ ("pre_df_or_post_df", "missing_column"),
+ [
+ ("pre", "wind_speed"),
+ ("pre", "active_power"),
+ ("pre", "gen_rpm"),
+ ("pre", "pitch_angle"),
+ ("post", "wind_speed"),
+ ("post", "active_power"),
+ ("post", "gen_rpm"),
+ ("post", "pitch_angle"),
+ ],
+ )
+ def test_missing_required_column(
+ self,
+ pre_df_or_post_df: str,
+ missing_column: str,
+ fake_curve_df: pd.DataFrame,
+ ) -> None:
+ _df = fake_curve_df.copy()
+
+ pre_df = _df.drop(columns=missing_column) if pre_df_or_post_df == "pre" else _df
+ post_df = _df.drop(columns=missing_column) if pre_df_or_post_df == "post" else _df
+
+ actual = check_for_ops_curve_shift(
+ pre_df=pre_df,
+ post_df=post_df,
+ wtg_name="anything",
+ scada_ws_col="wind_speed",
+ pw_col="power",
+ rpm_col="gen_rpm",
+ pt_col="pitch",
+ cfg=Mock(),
+ plot_cfg=Mock(),
+ plot=False,
+ )
+
+ expected = {
+ f"{CurveTypes.POWER_CURVE.value}_shift": np.nan,
+ f"{CurveTypes.RPM.value}_shift": np.nan,
+ f"{CurveTypes.PITCH.value}_shift": np.nan,
+ f"{CurveTypes.WIND_SPEED.value}_shift": np.nan,
+ }
+
+ assert actual == expected
+
+ def test_calls_funcs_as_intended(
+ self, fake_curve_df: pd.DataFrame, fake_required_columns: OpsCurveRequiredColumns
+ ) -> None:
+ _df = fake_curve_df.copy()
+
+ wtg_name = "anything"
+
+ with (
+ patch(
+ "wind_up.ops_curve_shift.calculate_curve_shift",
+ return_value=CurveShiftOutput(value=np.nan, warning_msg=None),
+ ) as mock_curve_shift,
+ patch("wind_up.ops_curve_shift.compare_ops_curves_pre_post", return_value=None) as mock_plot_func,
+ ):
+ mock_wind_up_conf = Mock()
+ mock_wind_up_conf.toggle = True
+ mock_plot_conf = Mock()
+
+ actual = check_for_ops_curve_shift(
+ pre_df=_df,
+ post_df=_df,
+ wtg_name=wtg_name,
+ scada_ws_col=fake_required_columns.wind_speed,
+ pw_col=fake_required_columns.power,
+ rpm_col=fake_required_columns.rpm,
+ pt_col=fake_required_columns.pitch,
+ cfg=mock_wind_up_conf,
+ plot_cfg=mock_plot_conf,
+ )
+
+ # define expected call inputs
+ curve_input_power = CurveShiftInput(
+ turbine_name=wtg_name,
+ pre_df=_df,
+ post_df=_df,
+ ops_curve_required_columns=fake_required_columns,
+ curve_config=CurveConfig(
+ name=CurveTypes.POWER_CURVE, x_col=fake_required_columns.wind_speed, y_col=fake_required_columns.power
+ ),
+ )
+ curve_input_rpm = CurveShiftInput(
+ turbine_name=wtg_name,
+ pre_df=_df,
+ post_df=_df,
+ ops_curve_required_columns=fake_required_columns,
+ curve_config=CurveConfig(
+ name=CurveTypes.RPM, x_col=fake_required_columns.power, y_col=fake_required_columns.rpm
+ ),
+ )
+ curve_input_pitch = CurveShiftInput(
+ turbine_name=wtg_name,
+ pre_df=_df,
+ post_df=_df,
+ ops_curve_required_columns=fake_required_columns,
+ curve_config=CurveConfig(
+ name=CurveTypes.PITCH, x_col=fake_required_columns.wind_speed, y_col=fake_required_columns.pitch
+ ),
+ )
+ curve_input_wind_speed = CurveShiftInput(
+ turbine_name=wtg_name,
+ pre_df=_df,
+ post_df=_df,
+ ops_curve_required_columns=fake_required_columns,
+ curve_config=CurveConfig(
+ name=CurveTypes.WIND_SPEED, x_col=fake_required_columns.power, y_col=fake_required_columns.wind_speed
+ ),
+ )
+ _call_inputs_list = [curve_input_power, curve_input_rpm, curve_input_pitch, curve_input_wind_speed]
+
+ # check calls are made with expected inputs
+ for _call, _input in zip(mock_curve_shift.mock_calls, _call_inputs_list):
+ pd.testing.assert_frame_equal(_call.kwargs["curve_shift_input"].pre_df, _input.pre_df)
+ pd.testing.assert_frame_equal(_call.kwargs["curve_shift_input"].post_df, _input.post_df)
+ assert _call.kwargs["curve_shift_input"].model_dump(exclude=["pre_df", "post_df"]) == _input.model_dump(
+ exclude=["pre_df", "post_df"]
+ )
+
+ mock_plot_func.assert_called_once_with(
+ pre_df=_df,
+ post_df=_df,
+ wtg_name=wtg_name,
+ ws_col=fake_required_columns.wind_speed,
+ pw_col=fake_required_columns.power,
+ pt_col=fake_required_columns.pitch,
+ rpm_col=fake_required_columns.rpm,
+ plot_cfg=mock_plot_conf,
+ is_toggle_test=mock_wind_up_conf.toggle is not None,
+ sub_dir=None,
+ )
+
+ expected = {
+ f"{CurveTypes.POWER_CURVE.value}_shift": np.nan,
+ f"{CurveTypes.RPM.value}_shift": np.nan,
+ f"{CurveTypes.PITCH.value}_shift": np.nan,
+ f"{CurveTypes.WIND_SPEED.value}_shift": np.nan,
+ }
+
+ assert actual == expected
diff --git a/wind_up/main_analysis.py b/wind_up/main_analysis.py
index 17d3d19..7ac8ac2 100644
--- a/wind_up/main_analysis.py
+++ b/wind_up/main_analysis.py
@@ -24,9 +24,10 @@
from wind_up.northing import (
check_wtg_northing,
)
+from wind_up.ops_curve_shift import CurveTypes, check_for_ops_curve_shift
from wind_up.plots.data_coverage_plots import plot_detrend_data_cov, plot_pre_post_data_cov
from wind_up.plots.detrend_plots import plot_apply_wsratio_v_wd_scen
-from wind_up.plots.scada_funcs_plots import compare_ops_curves_pre_post, print_filter_stats
+from wind_up.plots.scada_funcs_plots import print_filter_stats
from wind_up.plots.yaw_direction_plots import plot_yaw_direction_pre_post
from wind_up.pp_analysis import pre_post_pp_analysis_with_reversal_and_bootstrapping
from wind_up.result_manager import result_manager
@@ -365,78 +366,6 @@ def yaw_offset_results(
return results
-def check_for_ops_curve_shift(
- pre_df: pd.DataFrame,
- post_df: pd.DataFrame,
- *,
- wtg_name: str,
- scada_ws_col: str,
- pw_col: str,
- rpm_col: str,
- pt_col: str,
- cfg: WindUpConfig,
- plot_cfg: PlotConfig,
- sub_dir: str | None = None,
-) -> dict[str, float]:
- results_dict = {
- "powercurve_shift": np.nan,
- "rpm_shift": np.nan,
- "pitch_shift": np.nan,
- }
- # check if all required columns are present
- required_cols = [scada_ws_col, pw_col, pt_col, rpm_col]
- for req_col in required_cols:
- if req_col not in pre_df.columns:
- msg = f"check_for_ops_curve_shift {wtg_name} pre_df missing required column {req_col}"
- result_manager.warning(msg)
- return results_dict
- if req_col not in post_df.columns:
- msg = f"check_for_ops_curve_shift {wtg_name} post_df missing required column {req_col}"
- result_manager.warning(msg)
- return results_dict
- pre_dropna_df = pre_df.dropna(subset=[scada_ws_col, pw_col, pt_col, rpm_col]).copy()
- post_dropna_df = post_df.dropna(subset=[scada_ws_col, pw_col, pt_col, rpm_col]).copy()
-
- warning_msg: str | None = None
- for descr, x_var, y_var, x_bin_width, warn_thresh in [
- ("powercurve_shift", scada_ws_col, pw_col, 1, 0.01),
- ("rpm_shift", pw_col, rpm_col, 0, 0.005),
- ("pitch_shift", scada_ws_col, pt_col, 1, 0.1),
- ]:
- bins = np.arange(0, pre_dropna_df[x_var].max() + x_bin_width, x_bin_width) if x_bin_width > 0 else 10
- mean_curve = pre_dropna_df.groupby(pd.cut(pre_dropna_df[x_var], bins=bins, retbins=False), observed=True).agg(
- x_mean=pd.NamedAgg(column=x_var, aggfunc="mean"),
- y_mean=pd.NamedAgg(column=y_var, aggfunc="mean"),
- )
- post_dropna_df["expected_y"] = np.interp(post_dropna_df[x_var], mean_curve["x_mean"], mean_curve["y_mean"])
- mean_df = post_dropna_df.mean()
- if y_var == pt_col:
- results_dict[descr] = mean_df[y_var] - mean_df["expected_y"]
- else:
- results_dict[descr] = (mean_df[y_var] / mean_df["expected_y"] - 1).clip(-1, 1)
- if abs(results_dict[descr]) > warn_thresh:
- if warning_msg is None:
- warning_msg = f"{wtg_name} check_for_ops_curve_shift warnings:"
- warning_msg += f" abs({descr}) > {warn_thresh}: {abs(results_dict[descr]):.3f}"
- if warning_msg is not None:
- result_manager.warning(warning_msg)
-
- compare_ops_curves_pre_post(
- pre_df=pre_df,
- post_df=post_df,
- wtg_name=wtg_name,
- ws_col=scada_ws_col,
- pw_col=pw_col,
- pt_col=pt_col,
- rpm_col=rpm_col,
- plot_cfg=plot_cfg,
- is_toggle_test=(cfg.toggle is not None),
- sub_dir=sub_dir,
- )
-
- return results_dict
-
-
def calc_test_ref_results(
*,
test_df: pd.DataFrame,
@@ -723,9 +652,10 @@ def calc_test_ref_results(
"ref_max_northing_error_v_wf": ref_max_northing_error_v_wf,
"ref_max_ws_drift": ref_max_ws_drift,
"ref_max_ws_drift_pp_period": ref_max_ws_drift_pp_period,
- "ref_powercurve_shift": ref_ops_curve_shift_dict["powercurve_shift"],
- "ref_rpm_shift": ref_ops_curve_shift_dict["rpm_shift"],
- "ref_pitch_shift": ref_ops_curve_shift_dict["pitch_shift"],
+ "ref_powercurve_shift": ref_ops_curve_shift_dict[f"{CurveTypes.POWER_CURVE.value}_shift"],
+ "ref_rpm_shift": ref_ops_curve_shift_dict[f"{CurveTypes.RPM.value}_shift"],
+ "ref_pitch_shift": ref_ops_curve_shift_dict[f"{CurveTypes.PITCH.value}_shift"],
+ "ref_wind_speed_shift": ref_ops_curve_shift_dict[f"{CurveTypes.WIND_SPEED.value}_shift"],
"detrend_pre_r2_improvement": detrend_pre_r2_improvement,
"detrend_post_r2_improvement": detrend_post_r2_improvement,
"mean_power_pre": pre_df.dropna(subset=[detrend_ws_col, test_pw_col, ref_wd_col])[test_pw_col].mean(),
@@ -874,9 +804,10 @@ def run_wind_up_analysis(
"lt_wtg_hours_filt": lt_df_filt["observed_hours"].sum() if lt_df_filt is not None else 0,
"test_max_ws_drift": test_max_ws_drift,
"test_max_ws_drift_pp_period": test_max_ws_drift_pp_period,
- "test_powercurve_shift": test_ops_curve_shift_dict["powercurve_shift"],
- "test_rpm_shift": test_ops_curve_shift_dict["rpm_shift"],
- "test_pitch_shift": test_ops_curve_shift_dict["pitch_shift"],
+ "test_powercurve_shift": test_ops_curve_shift_dict[f"{CurveTypes.POWER_CURVE.value}_shift"],
+ "test_rpm_shift": test_ops_curve_shift_dict[f"{CurveTypes.RPM.value}_shift"],
+ "test_pitch_shift": test_ops_curve_shift_dict[f"{CurveTypes.PITCH.value}_shift"],
+ "test_wind_speed_shift": test_ops_curve_shift_dict[f"{CurveTypes.WIND_SPEED.value}_shift"],
"preprocess_warning_counts": preprocess_warning_counts,
"test_warning_counts": len(result_manager.stored_warnings),
}
diff --git a/wind_up/ops_curve_shift.py b/wind_up/ops_curve_shift.py
new file mode 100644
index 0000000..9f210fb
--- /dev/null
+++ b/wind_up/ops_curve_shift.py
@@ -0,0 +1,239 @@
+from __future__ import annotations
+
+from enum import Enum
+from typing import TYPE_CHECKING, NamedTuple
+
+import numpy as np
+import pandas as pd
+from pydantic import BaseModel, ConfigDict, model_validator
+
+from wind_up.plots.scada_funcs_plots import compare_ops_curves_pre_post
+from wind_up.result_manager import result_manager
+
+if TYPE_CHECKING:
+ from collections.abc import Iterator
+
+ from wind_up.models import PlotConfig, WindUpConfig
+
+
+class CurveTypes(str, Enum):
+ POWER_CURVE = "powercurve"
+ RPM = "rpm"
+ PITCH = "pitch"
+ WIND_SPEED = "windspeed"
+
+
+class CurveShiftOutput(NamedTuple):
+ value: float
+ warning_msg: str | None
+
+
+CURVE_CONSTANTS = {
+ CurveTypes.POWER_CURVE.value: {"warning_threshold": 0.01, "x_bin_width": 1},
+ CurveTypes.RPM.value: {"warning_threshold": 0.005, "x_bin_width": 0},
+ CurveTypes.PITCH.value: {"warning_threshold": 0.1, "x_bin_width": 1},
+ CurveTypes.WIND_SPEED.value: {"warning_threshold": 0.01, "x_bin_width": 0.5},
+}
+
+
+class CurveConfig(BaseModel):
+ name: CurveTypes
+ x_col: str
+ y_col: str
+ x_bin_width: int | float | None = None
+ warning_threshold: float | None = None
+
+ @model_validator(mode="after")
+ def validate_constants(self) -> CurveConfig:
+ if self.x_bin_width is None:
+ self.x_bin_width = CURVE_CONSTANTS[self.name]["x_bin_width"]
+ if self.warning_threshold is None:
+ self.warning_threshold = CURVE_CONSTANTS[self.name]["warning_threshold"]
+ return self
+
+
+class OpsCurveRequiredColumns(BaseModel):
+ wind_speed: str
+ power: str
+ pitch: str
+ rpm: str
+
+ def __iter__(self) -> Iterator[str]: # type: ignore[override]
+ return iter([self.wind_speed, self.power, self.pitch, self.rpm])
+
+
+class CurveShiftInput(BaseModel):
+ turbine_name: str
+ pre_df: pd.DataFrame
+ post_df: pd.DataFrame
+ ops_curve_required_columns: OpsCurveRequiredColumns
+ curve_config: CurveConfig
+ model_config = ConfigDict(arbitrary_types_allowed=True)
+
+ @model_validator(mode="after")
+ def validate_dataframes(self) -> CurveShiftInput:
+ # check curve config columns are present in dataframes
+ for c in [self.curve_config.x_col, self.curve_config.y_col]:
+ if c not in self.pre_df.columns:
+ err_msg = f"'{c}' column name missing in pre-dataframe"
+ raise IndexError(err_msg)
+ if c not in self.post_df.columns:
+ err_msg = f"'{c}' column name missing in post-dataframe"
+ raise IndexError(err_msg)
+
+ # check required columns are present in dataframes
+ required_cols = set(self.ops_curve_required_columns)
+ columns_missing_in_pre_df = required_cols - set(self.pre_df.columns)
+ columns_missing_in_post_df = required_cols - set(self.post_df.columns)
+ if (len(columns_missing_in_pre_df) > 0) or (len(columns_missing_in_post_df) > 0):
+ err_msg = "Column name missing in dataframe"
+ raise IndexError(err_msg)
+
+ # remove NA
+ self.pre_df = self.pre_df.dropna(subset=list(required_cols)).copy()
+ self.post_df = self.post_df.dropna(subset=list(required_cols)).copy()
+
+ return self
+
+
+def check_for_ops_curve_shift(
+ pre_df: pd.DataFrame,
+ post_df: pd.DataFrame,
+ *,
+ wtg_name: str,
+ scada_ws_col: str,
+ pw_col: str,
+ rpm_col: str,
+ pt_col: str,
+ cfg: WindUpConfig,
+ plot_cfg: PlotConfig,
+ sub_dir: str | None = None,
+ plot: bool = True,
+) -> dict[str, float]:
+ results_dict = {
+ f"{CurveTypes.POWER_CURVE.value}_shift": np.nan,
+ f"{CurveTypes.RPM.value}_shift": np.nan,
+ f"{CurveTypes.PITCH.value}_shift": np.nan,
+ f"{CurveTypes.WIND_SPEED.value}_shift": np.nan,
+ }
+
+ required_cols = OpsCurveRequiredColumns(wind_speed=scada_ws_col, power=pw_col, pitch=pt_col, rpm=rpm_col)
+
+ if not _required_cols_are_present(
+ pre_df=pre_df, post_df=post_df, turbine_name=wtg_name, required_ops_curve_columns=required_cols
+ ):
+ return results_dict
+
+ shift_power = calculate_curve_shift(
+ curve_shift_input=CurveShiftInput(
+ turbine_name=wtg_name,
+ pre_df=pre_df,
+ post_df=post_df,
+ curve_config=CurveConfig(name=CurveTypes.POWER_CURVE, x_col=scada_ws_col, y_col=pw_col),
+ ops_curve_required_columns=required_cols,
+ )
+ )
+
+ shift_rpm = calculate_curve_shift(
+ curve_shift_input=CurveShiftInput(
+ turbine_name=wtg_name,
+ pre_df=pre_df,
+ post_df=post_df,
+ curve_config=CurveConfig(name=CurveTypes.RPM, x_col=pw_col, y_col=rpm_col),
+ ops_curve_required_columns=required_cols,
+ )
+ )
+
+ shift_pitch = calculate_curve_shift(
+ curve_shift_input=CurveShiftInput(
+ turbine_name=wtg_name,
+ pre_df=pre_df,
+ post_df=post_df,
+ curve_config=CurveConfig(name=CurveTypes.PITCH, x_col=scada_ws_col, y_col=pt_col),
+ ops_curve_required_columns=required_cols,
+ )
+ )
+
+ shift_wind_speed = calculate_curve_shift(
+ curve_shift_input=CurveShiftInput(
+ turbine_name=wtg_name,
+ pre_df=pre_df,
+ post_df=post_df,
+ curve_config=CurveConfig(name=CurveTypes.WIND_SPEED, x_col=pw_col, y_col=scada_ws_col),
+ ops_curve_required_columns=required_cols,
+ )
+ )
+
+ results_dict[f"{CurveTypes.POWER_CURVE.value}_shift"] = shift_power.value
+ results_dict[f"{CurveTypes.RPM.value}_shift"] = shift_rpm.value
+ results_dict[f"{CurveTypes.PITCH.value}_shift"] = shift_pitch.value
+ results_dict[f"{CurveTypes.WIND_SPEED.value}_shift"] = shift_wind_speed.value
+
+ warning_msg = ""
+ for wm in [shift_power.warning_msg, shift_rpm.warning_msg, shift_pitch.warning_msg, shift_wind_speed.warning_msg]:
+ if wm is not None:
+ warning_msg += wm
+
+ if warning_msg:
+ result_manager.warning(warning_msg)
+
+ if plot:
+ compare_ops_curves_pre_post(
+ pre_df=pre_df,
+ post_df=post_df,
+ wtg_name=wtg_name,
+ ws_col=scada_ws_col,
+ pw_col=pw_col,
+ pt_col=pt_col,
+ rpm_col=rpm_col,
+ plot_cfg=plot_cfg,
+ is_toggle_test=(cfg.toggle is not None),
+ sub_dir=sub_dir,
+ )
+
+ return results_dict
+
+
+def _required_cols_are_present(
+ pre_df: pd.DataFrame, post_df: pd.DataFrame, turbine_name: str, required_ops_curve_columns: OpsCurveRequiredColumns
+) -> bool:
+ # check if all required columns are present
+ required_cols = list(required_ops_curve_columns)
+ for req_col in required_cols:
+ if req_col not in pre_df.columns:
+ msg = f"check_for_ops_curve_shift {turbine_name} pre_df missing required column {req_col}"
+ result_manager.warning(msg)
+ return False
+ if req_col not in post_df.columns:
+ msg = f"check_for_ops_curve_shift {turbine_name} post_df missing required column {req_col}"
+ result_manager.warning(msg)
+ return False
+ return True
+
+
+def calculate_curve_shift(curve_shift_input: CurveShiftInput) -> CurveShiftOutput:
+ conf = curve_shift_input.curve_config
+ pre_df = curve_shift_input.pre_df
+ post_df = curve_shift_input.post_df
+ wtg_name = curve_shift_input.turbine_name
+
+ bins = np.arange(0, pre_df[conf.x_col].max() + conf.x_bin_width, conf.x_bin_width) if conf.x_bin_width > 0 else 10 # type: ignore[operator,var-annotated]
+
+ mean_curve = pre_df.groupby(pd.cut(pre_df[conf.x_col], bins=bins, retbins=False), observed=True).agg(
+ x_mean=pd.NamedAgg(column=conf.x_col, aggfunc="mean"),
+ y_mean=pd.NamedAgg(column=conf.y_col, aggfunc="mean"),
+ )
+ post_df["expected_y"] = np.interp(post_df[conf.x_col], mean_curve["x_mean"], mean_curve["y_mean"])
+ mean_df = post_df.mean()
+
+ if conf.name in CurveTypes.PITCH:
+ result = mean_df[conf.y_col] - mean_df["expected_y"]
+ else:
+ result = (mean_df[conf.y_col] / mean_df["expected_y"] - 1).clip(-1, 1)
+
+ # log warning
+ warning_msg = None
+ if abs(result) > conf.warning_threshold:
+ warning_msg = f"{wtg_name} Ops Curve Shift warning: abs({conf.name}) > {conf.warning_threshold}: {result:.3f}"
+
+ return CurveShiftOutput(value=result, warning_msg=warning_msg)