diff --git a/examples/smarteole_example.ipynb b/examples/smarteole_example.ipynb index e294b1b..1e4a2ac 100644 --- a/examples/smarteole_example.ipynb +++ b/examples/smarteole_example.ipynb @@ -62,7 +62,8 @@ "import logging\n", "import re\n", "\n", - "from IPython.display import Markdown, display\n", + "from IPython.display import Markdown\n", + "from IPython.display import display as ipy_display\n", "\n", "logging.basicConfig(format=\"%(message)s\", level=logging.INFO)\n", "\n", @@ -72,7 +73,7 @@ " message = self.format(record)\n", " # Replace newline characters with
tags\n", " message = re.sub(r\"\\n\", \"
\", message)\n", - " display(Markdown(message))\n", + " ipy_display(Markdown(message))\n", "\n", "\n", "logger = logging.getLogger()\n", @@ -2786,7 +2787,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a2457ea9318744fd80c413a42676fcd0", + "model_id": "3fbc8142f44a49bf8500c4e04e97b1ed", "version_major": 2, "version_minor": 0 }, @@ -2908,7 +2909,7 @@ { "data": { "text/markdown": [ - "{'ref': 'SMV7', 'ref_ws_col': 'ref_ws_est_blend', 'distance_m': 314.4465998943834, 'bearing_deg': 173.69483366777283, 'ref_max_northing_error_v_reanalysis': np.float64(2.6590754551807976), 'ref_max_northing_error_v_wf': np.float64(2.842170943040401e-14), 'ref_max_ws_drift': np.float64(0.08697706942338845), 'ref_max_ws_drift_pp_period': np.float64(0.08697706942338845), 'ref_powercurve_shift': np.float64(0.003117456887993697), 'ref_rpm_shift': np.float64(0.0015313319638985412), 'ref_pitch_shift': np.float64(-0.05548555519736481), 'detrend_pre_r2_improvement': np.float64(0.004942384513915488), 'detrend_post_r2_improvement': np.float64(0.0011487228730558963), 'mean_power_pre': np.float64(1149.2323289820358), 'mean_power_post': np.float64(1148.4157066585956), 'mean_test_yaw_offset_pre': np.float64(-0.022511156888877885), 'mean_test_yaw_offset_post': np.float64(3.8213264238880478), 'mean_test_yaw_offset_command_pre': np.float64(0.0002638323353293413), 'mean_test_yaw_offset_command_post': np.float64(6.636967675544794), 'mean_ref_yaw_offset_command_pre': np.float64(0.0), 'test_ref_warning_counts': 0, 'time_calculated': Timestamp('2024-11-04 14:53:53.646317+0000', tz='UTC'), 'uplift_frc': np.float64(-0.010361770845398625), 'unc_one_sigma_frc': np.float64(0.0057851151530948705), 't_value_one_sigma': np.float64(1.000630119597717), 'missing_bins_unc_scale_factor': 1, 'pp_valid_hours_pre': np.float64(132.5), 'pp_valid_hours_post': np.float64(136.0), 'pp_valid_hours': np.float64(268.5), 'pp_data_coverage': np.float64(0.11496467565831728), 'pp_invalid_bin_count': np.int64(16), 'uplift_noadj_frc': np.float64(-0.011505660672016103), 'unc_one_sigma_noadj_frc': np.float64(0.0057851151530948705), 'poweronly_uplift_frc': np.float64(-0.012003308408347353), 'reversed_uplift_frc': np.float64(-0.009715528755112396), 'reversal_error': np.float64(0.0022877796532349576), 'unc_one_sigma_lowerbound_frc': np.float64(0.0011438898266174788), 'unc_one_sigma_bootstrap_frc': np.float64(0.0049406532714673), 'uplift_p5_frc': np.float64(-0.000846103203498606), 'uplift_p95_frc': np.float64(-0.019877438487298643), 'wind_up_version': '0.1.9', 'test_wtg': 'SMV6', 'test_pw_col': 'test_pw_clipped', 'lt_wtg_hours_raw': 0, 'lt_wtg_hours_filt': 0, 'test_max_ws_drift': np.float64(0.03653354205095605), 'test_max_ws_drift_pp_period': np.float64(0.03653354205095605), 'test_powercurve_shift': np.float64(0.0010615707256107498), 'test_rpm_shift': np.float64(0.0011316163321652972), 'test_pitch_shift': np.float64(-0.037158903030505286), 'preprocess_warning_counts': 0, 'test_warning_counts': 0}" + "{'ref': 'SMV7', 'ref_ws_col': 'ref_ws_est_blend', 'distance_m': 314.4465998943834, 'bearing_deg': 173.69483366777283, 'ref_max_northing_error_v_reanalysis': np.float64(2.6590754551807976), 'ref_max_northing_error_v_wf': np.float64(2.842170943040401e-14), 'ref_max_ws_drift': np.float64(0.08697706942338845), 'ref_max_ws_drift_pp_period': np.float64(0.08697706942338845), 'ref_powercurve_shift': np.float64(0.003117456887993697), 'ref_rpm_shift': np.float64(0.0015313319638985412), 'ref_pitch_shift': np.float64(-0.05548555519736481), 'ref_wind_speed_shift': np.float64(-0.0009373123752394186), 'detrend_pre_r2_improvement': np.float64(0.004942384513915488), 'detrend_post_r2_improvement': np.float64(0.0011487228730558963), 'mean_power_pre': np.float64(1149.2323289820358), 'mean_power_post': np.float64(1148.4157066585956), 'mean_test_yaw_offset_pre': np.float64(-0.022511156888877885), 'mean_test_yaw_offset_post': np.float64(3.8213264238880478), 'mean_test_yaw_offset_command_pre': np.float64(0.0002638323353293413), 'mean_test_yaw_offset_command_post': np.float64(6.636967675544794), 'mean_ref_yaw_offset_command_pre': np.float64(0.0), 'test_ref_warning_counts': 0, 'time_calculated': Timestamp('2024-12-05 12:46:03.895478+0000', tz='UTC'), 'uplift_frc': np.float64(-0.010361770845398625), 'unc_one_sigma_frc': np.float64(0.0057851151530948705), 't_value_one_sigma': np.float64(1.000630119597717), 'missing_bins_unc_scale_factor': 1, 'pp_valid_hours_pre': np.float64(132.5), 'pp_valid_hours_post': np.float64(136.0), 'pp_valid_hours': np.float64(268.5), 'pp_data_coverage': np.float64(0.11496467565831728), 'pp_invalid_bin_count': np.int64(16), 'uplift_noadj_frc': np.float64(-0.011505660672016103), 'unc_one_sigma_noadj_frc': np.float64(0.0057851151530948705), 'poweronly_uplift_frc': np.float64(-0.012003308408347353), 'reversed_uplift_frc': np.float64(-0.009715528755112396), 'reversal_error': np.float64(0.0022877796532349576), 'unc_one_sigma_lowerbound_frc': np.float64(0.0011438898266174788), 'unc_one_sigma_bootstrap_frc': np.float64(0.0049406532714673), 'uplift_p5_frc': np.float64(-0.000846103203498606), 'uplift_p95_frc': np.float64(-0.019877438487298643), 'wind_up_version': '0.1.10', 'test_wtg': 'SMV6', 'test_pw_col': 'test_pw_clipped', 'lt_wtg_hours_raw': 0, 'lt_wtg_hours_filt': 0, 'test_max_ws_drift': np.float64(0.03653354205095605), 'test_max_ws_drift_pp_period': np.float64(0.03653354205095605), 'test_powercurve_shift': np.float64(0.0010615707256107498), 'test_rpm_shift': np.float64(0.0011316163321652972), 'test_pitch_shift': np.float64(-0.037158903030505286), 'test_wind_speed_shift': np.float64(-0.0063495234304998815), 'preprocess_warning_counts': 0, 'test_warning_counts': 0}" ], "text/plain": [ "" @@ -3748,7 +3749,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "df804baf4f2041b18c2dc67189a83f67", + "model_id": "45ef05430cb541a890aa60886e175144", "version_major": 2, "version_minor": 0 }, @@ -3870,7 +3871,7 @@ { "data": { "text/markdown": [ - "{'ref': 'SMV7', 'ref_ws_col': 'ref_ws_est_blend', 'distance_m': 591.1178519927024, 'bearing_deg': 190.23567745705736, 'ref_max_northing_error_v_reanalysis': np.float64(2.6590754551807976), 'ref_max_northing_error_v_wf': np.float64(2.842170943040401e-14), 'ref_max_ws_drift': np.float64(0.08697706942338845), 'ref_max_ws_drift_pp_period': np.float64(0.08697706942338845), 'ref_powercurve_shift': np.float64(0.003117456887993697), 'ref_rpm_shift': np.float64(0.0015313319638985412), 'ref_pitch_shift': np.float64(-0.05548555519736481), 'detrend_pre_r2_improvement': np.float64(0.09621188863947527), 'detrend_post_r2_improvement': np.float64(0.11890364717818414), 'mean_power_pre': np.float64(955.493497245509), 'mean_power_post': np.float64(993.6911992736077), 'mean_test_yaw_offset_pre': np.float64(-2.2725466102034675), 'mean_test_yaw_offset_post': np.float64(-2.876981850327039), 'mean_test_yaw_offset_command_pre': np.float64(0.0), 'mean_test_yaw_offset_command_post': np.float64(0.0), 'mean_ref_yaw_offset_command_pre': np.float64(0.0), 'test_ref_warning_counts': 0, 'time_calculated': Timestamp('2024-11-04 14:55:21.795722+0000', tz='UTC'), 'uplift_frc': np.float64(0.030879346731271313), 'unc_one_sigma_frc': np.float64(0.01167847006525424), 't_value_one_sigma': np.float64(1.0006277462668354), 'missing_bins_unc_scale_factor': 1, 'pp_valid_hours_pre': np.float64(133.0), 'pp_valid_hours_post': np.float64(137.16666666666669), 'pp_valid_hours': np.float64(270.1666666666667), 'pp_data_coverage': np.float64(0.11567829872261472), 'pp_invalid_bin_count': np.int64(16), 'uplift_noadj_frc': np.float64(0.030509447623790466), 'unc_one_sigma_noadj_frc': np.float64(0.01004760180633488), 'poweronly_uplift_frc': np.float64(0.029990866525649328), 'reversed_uplift_frc': np.float64(0.03073066474061102), 'reversal_error': np.float64(0.0007397982149616941), 'unc_one_sigma_lowerbound_frc': np.float64(0.00036989910748084706), 'unc_one_sigma_bootstrap_frc': np.float64(0.01167847006525424), 'uplift_p5_frc': np.float64(0.050088720575348945), 'uplift_p95_frc': np.float64(0.01166997288719368), 'wind_up_version': '0.1.9', 'test_wtg': 'SMV5', 'test_pw_col': 'test_pw_clipped', 'lt_wtg_hours_raw': 0, 'lt_wtg_hours_filt': 0, 'test_max_ws_drift': np.float64(0.10726609831004863), 'test_max_ws_drift_pp_period': np.float64(0.10726609831004863), 'test_powercurve_shift': np.float64(-0.005678000921447213), 'test_rpm_shift': np.float64(0.0013951853610039144), 'test_pitch_shift': np.float64(-0.02783487184623068), 'preprocess_warning_counts': 0, 'test_warning_counts': 0}" + "{'ref': 'SMV7', 'ref_ws_col': 'ref_ws_est_blend', 'distance_m': 591.1178519927024, 'bearing_deg': 190.23567745705736, 'ref_max_northing_error_v_reanalysis': np.float64(2.6590754551807976), 'ref_max_northing_error_v_wf': np.float64(2.842170943040401e-14), 'ref_max_ws_drift': np.float64(0.08697706942338845), 'ref_max_ws_drift_pp_period': np.float64(0.08697706942338845), 'ref_powercurve_shift': np.float64(0.003117456887993697), 'ref_rpm_shift': np.float64(0.0015313319638985412), 'ref_pitch_shift': np.float64(-0.05548555519736481), 'ref_wind_speed_shift': np.float64(-0.0009373123752394186), 'detrend_pre_r2_improvement': np.float64(0.09621188863947527), 'detrend_post_r2_improvement': np.float64(0.11890364717818414), 'mean_power_pre': np.float64(955.493497245509), 'mean_power_post': np.float64(993.6911992736077), 'mean_test_yaw_offset_pre': np.float64(-2.2725466102034675), 'mean_test_yaw_offset_post': np.float64(-2.876981850327039), 'mean_test_yaw_offset_command_pre': np.float64(0.0), 'mean_test_yaw_offset_command_post': np.float64(0.0), 'mean_ref_yaw_offset_command_pre': np.float64(0.0), 'test_ref_warning_counts': 0, 'time_calculated': Timestamp('2024-12-05 12:47:18.340985+0000', tz='UTC'), 'uplift_frc': np.float64(0.030879346731271313), 'unc_one_sigma_frc': np.float64(0.01167847006525424), 't_value_one_sigma': np.float64(1.0006277462668354), 'missing_bins_unc_scale_factor': 1, 'pp_valid_hours_pre': np.float64(133.0), 'pp_valid_hours_post': np.float64(137.16666666666669), 'pp_valid_hours': np.float64(270.1666666666667), 'pp_data_coverage': np.float64(0.11567829872261472), 'pp_invalid_bin_count': np.int64(16), 'uplift_noadj_frc': np.float64(0.030509447623790466), 'unc_one_sigma_noadj_frc': np.float64(0.01004760180633488), 'poweronly_uplift_frc': np.float64(0.029990866525649328), 'reversed_uplift_frc': np.float64(0.03073066474061102), 'reversal_error': np.float64(0.0007397982149616941), 'unc_one_sigma_lowerbound_frc': np.float64(0.00036989910748084706), 'unc_one_sigma_bootstrap_frc': np.float64(0.01167847006525424), 'uplift_p5_frc': np.float64(0.050088720575348945), 'uplift_p95_frc': np.float64(0.01166997288719368), 'wind_up_version': '0.1.10', 'test_wtg': 'SMV5', 'test_pw_col': 'test_pw_clipped', 'lt_wtg_hours_raw': 0, 'lt_wtg_hours_filt': 0, 'test_max_ws_drift': np.float64(0.10726609831004863), 'test_max_ws_drift_pp_period': np.float64(0.10726609831004863), 'test_powercurve_shift': np.float64(-0.005678000921447213), 'test_rpm_shift': np.float64(0.0013951853610039144), 'test_pitch_shift': np.float64(-0.02783487184623068), 'test_wind_speed_shift': np.float64(-0.003742689588962622), 'preprocess_warning_counts': 0, 'test_warning_counts': 0}" ], "text/plain": [ "" diff --git a/pyproject.toml b/pyproject.toml index 14076a0..513e721 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -132,7 +132,10 @@ filterwarnings = [ omit = [ "wind_up/plots/*.py", ] -exclude_lines = ["if __name__ == .__main__.:"] +exclude_lines = [ + "if __name__ == .__main__.:", + "if TYPE_CHECKING:" +] [tool.poe.tasks] [tool.poe.tasks.lint] diff --git a/tests/test_ops_curve_shift.py b/tests/test_ops_curve_shift.py new file mode 100644 index 0000000..fd55484 --- /dev/null +++ b/tests/test_ops_curve_shift.py @@ -0,0 +1,360 @@ +import logging +from unittest.mock import Mock, patch + +import numpy as np +import pandas as pd +import pytest + +from wind_up.ops_curve_shift import ( + CurveConfig, + CurveShiftInput, + CurveShiftOutput, + CurveTypes, + OpsCurveRequiredColumns, + calculate_curve_shift, + check_for_ops_curve_shift, +) + + +@pytest.fixture +def fake_required_columns() -> OpsCurveRequiredColumns: + return OpsCurveRequiredColumns(wind_speed="wind_speed", power="active_power", rpm="gen_rpm", pitch="pitch_angle") + + +@pytest.fixture +def fake_curve_df(fake_required_columns: OpsCurveRequiredColumns) -> pd.DataFrame: + return pd.DataFrame( + { + fake_required_columns.wind_speed: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], + fake_required_columns.power: [0, 0, np.nan, 1, 3, 6, 10, 15, 22, 30, 36, 39, 40, 40, 40], + fake_required_columns.rpm: [ + 900, + 900, + 850, + 875, + 900, + 1000, + 1100, + 1200, + 1350, + 1500, + 1600, + 1600, + 1600, + 1600, + 1600, + ], + fake_required_columns.pitch: [4, 4, 4, 3, 2, 1, 1, 1, 2, 5, 8, 11, 13, 14, 15], + } + ) + + +class TestCurveShiftInput: + @staticmethod + def test_acceptable_inputs(fake_curve_df: pd.DataFrame, fake_required_columns: OpsCurveRequiredColumns) -> None: + _input = CurveShiftInput( + turbine_name="anything", + pre_df=fake_curve_df, + post_df=fake_curve_df, + curve_config=CurveConfig( + name=CurveTypes.POWER_CURVE.value, + x_col=fake_required_columns.wind_speed, + y_col=fake_required_columns.power, + x_bin_width=1, + warning_threshold=0.01, + ), + ops_curve_required_columns=fake_required_columns, + ) + + @pytest.mark.parametrize("column_name", ["wind_speed", "active_power"]) + def test_missing_column_in_pre_df( + self, column_name: str, fake_curve_df: pd.DataFrame, fake_required_columns: OpsCurveRequiredColumns + ) -> None: + with pytest.raises(IndexError, match=f"'{column_name}' column name missing in pre-dataframe"): + CurveShiftInput( + turbine_name="anything", + pre_df=fake_curve_df.drop(columns=column_name), + post_df=(fake_curve_df + 2), + curve_config=CurveConfig( + name=CurveTypes.POWER_CURVE.value, + x_col=fake_required_columns.wind_speed, + y_col=fake_required_columns.power, + x_bin_width=1, + warning_threshold=0.01, + ), + ops_curve_required_columns=fake_required_columns, + ) + + @pytest.mark.parametrize("column_name", ["wind_speed", "active_power"]) + def test_missing_column_in_post_df( + self, column_name: str, fake_curve_df: pd.DataFrame, fake_required_columns: OpsCurveRequiredColumns + ) -> None: + with pytest.raises(IndexError, match=f"'{column_name}' column name missing in post-dataframe"): + CurveShiftInput( + turbine_name="anything", + pre_df=fake_curve_df, + post_df=(fake_curve_df + 2).drop(columns=column_name), + curve_config=CurveConfig( + name=CurveTypes.POWER_CURVE.value, + x_col=fake_required_columns.wind_speed, + y_col=fake_required_columns.power, + x_bin_width=1, + warning_threshold=0.01, + ), + ops_curve_required_columns=fake_required_columns, + ) + + +@pytest.mark.parametrize( + ("shift_amount", "expected"), + [ + pytest.param(0.0, 0.0, id="zero"), + pytest.param(2.0, -0.1376912378303199, id="shift DOES exceed threshold"), + pytest.param(0.05, -0.004489831851395176, id="shift DOES NOT exceed threshold"), + ], +) +def test_calculate_power_curve_shift( + shift_amount: float, expected: float, fake_curve_df: pd.DataFrame, fake_required_columns: OpsCurveRequiredColumns +) -> None: + curve_shift_input = CurveShiftInput( + turbine_name="anything", + pre_df=fake_curve_df, + post_df=(fake_curve_df + shift_amount), + curve_config=CurveConfig( + name=CurveTypes.POWER_CURVE, x_col=fake_required_columns.wind_speed, y_col=fake_required_columns.power + ), + ops_curve_required_columns=fake_required_columns, + ) + # check that CurveShiftInput pydantic model has removed NaNs + assert not curve_shift_input.pre_df.isna().to_numpy().any() + assert not curve_shift_input.post_df.isna().to_numpy().any() + actual = calculate_curve_shift(curve_shift_input=curve_shift_input) + + np.testing.assert_almost_equal(actual=actual.value, desired=expected) + + +@pytest.mark.parametrize( + ("shift_amount", "expected"), + [ + pytest.param(0.2, -0.00865091569970633, id="shift DOES exceed threshold"), + pytest.param(0.1, -0.004926790475744736, id="shift DOES NOT exceed threshold"), + ], +) +def test_calculate_rpm_curve_shift( + shift_amount: float, + expected: float, + fake_curve_df: pd.DataFrame, + fake_required_columns: OpsCurveRequiredColumns, + caplog: pytest.LogCaptureFixture, +) -> None: + with caplog.at_level(logging.WARNING): + actual = calculate_curve_shift( + curve_shift_input=CurveShiftInput( + turbine_name="anything", + pre_df=fake_curve_df, + post_df=(fake_curve_df + shift_amount), + curve_config=CurveConfig(name=CurveTypes.RPM, x_col="wind_speed", y_col="gen_rpm"), + ops_curve_required_columns=fake_required_columns, + ), + ) + + np.testing.assert_almost_equal(actual=actual.value, desired=expected) + + +@pytest.mark.parametrize( + ("shift_amount", "expected"), + [ + pytest.param(0.0, 0.0, id="zero"), + pytest.param(0.6, 0.10714285714285765, id="shift DOES exceed threshold"), + pytest.param(0.5, 0.08928571428571441, id="shift DOES NOT exceed threshold"), + ], +) +def test_calculate_pitch_curve_shift( + shift_amount: float, expected: float, fake_curve_df: pd.DataFrame, fake_required_columns: OpsCurveRequiredColumns +) -> None: + actual = calculate_curve_shift( + curve_shift_input=CurveShiftInput( + turbine_name="anything", + pre_df=fake_curve_df, + post_df=(fake_curve_df + shift_amount), + curve_config=CurveConfig( + name=CurveTypes.PITCH, x_col=fake_required_columns.wind_speed, y_col=fake_required_columns.pitch + ), + ops_curve_required_columns=fake_required_columns, + ) + ) + + np.testing.assert_almost_equal(actual=actual.value, desired=expected) + + +@pytest.mark.parametrize( + ("shift_amount", "expected"), + [ + pytest.param(2.0, 0.13811720414537776, id="shift DOES exceed threshold"), + pytest.param(0.0, -0.04629629629629639, id="shift DOES NOT exceed threshold"), + ], +) +def test_calculate_wind_speed_curve_shift( + shift_amount: float, expected: float, fake_curve_df: pd.DataFrame, fake_required_columns: OpsCurveRequiredColumns +) -> None: + _df = fake_curve_df.copy() + actual = calculate_curve_shift( + curve_shift_input=CurveShiftInput( + turbine_name="anything", + pre_df=_df, + post_df=(_df + shift_amount), + curve_config=CurveConfig( + name=CurveTypes.WIND_SPEED, x_col=fake_required_columns.power, y_col=fake_required_columns.wind_speed + ), + ops_curve_required_columns=fake_required_columns, + ) + ) + + np.testing.assert_almost_equal(actual=actual.value, desired=expected) + + +class TestCheckForOpsCurveShift: + @pytest.mark.parametrize( + ("pre_df_or_post_df", "missing_column"), + [ + ("pre", "wind_speed"), + ("pre", "active_power"), + ("pre", "gen_rpm"), + ("pre", "pitch_angle"), + ("post", "wind_speed"), + ("post", "active_power"), + ("post", "gen_rpm"), + ("post", "pitch_angle"), + ], + ) + def test_missing_required_column( + self, + pre_df_or_post_df: str, + missing_column: str, + fake_curve_df: pd.DataFrame, + ) -> None: + _df = fake_curve_df.copy() + + pre_df = _df.drop(columns=missing_column) if pre_df_or_post_df == "pre" else _df + post_df = _df.drop(columns=missing_column) if pre_df_or_post_df == "post" else _df + + actual = check_for_ops_curve_shift( + pre_df=pre_df, + post_df=post_df, + wtg_name="anything", + scada_ws_col="wind_speed", + pw_col="power", + rpm_col="gen_rpm", + pt_col="pitch", + cfg=Mock(), + plot_cfg=Mock(), + plot=False, + ) + + expected = { + f"{CurveTypes.POWER_CURVE.value}_shift": np.nan, + f"{CurveTypes.RPM.value}_shift": np.nan, + f"{CurveTypes.PITCH.value}_shift": np.nan, + f"{CurveTypes.WIND_SPEED.value}_shift": np.nan, + } + + assert actual == expected + + def test_calls_funcs_as_intended( + self, fake_curve_df: pd.DataFrame, fake_required_columns: OpsCurveRequiredColumns + ) -> None: + _df = fake_curve_df.copy() + + wtg_name = "anything" + + with ( + patch( + "wind_up.ops_curve_shift.calculate_curve_shift", + return_value=CurveShiftOutput(value=np.nan, warning_msg=None), + ) as mock_curve_shift, + patch("wind_up.ops_curve_shift.compare_ops_curves_pre_post", return_value=None) as mock_plot_func, + ): + mock_wind_up_conf = Mock() + mock_wind_up_conf.toggle = True + mock_plot_conf = Mock() + + actual = check_for_ops_curve_shift( + pre_df=_df, + post_df=_df, + wtg_name=wtg_name, + scada_ws_col=fake_required_columns.wind_speed, + pw_col=fake_required_columns.power, + rpm_col=fake_required_columns.rpm, + pt_col=fake_required_columns.pitch, + cfg=mock_wind_up_conf, + plot_cfg=mock_plot_conf, + ) + + # define expected call inputs + curve_input_power = CurveShiftInput( + turbine_name=wtg_name, + pre_df=_df, + post_df=_df, + ops_curve_required_columns=fake_required_columns, + curve_config=CurveConfig( + name=CurveTypes.POWER_CURVE, x_col=fake_required_columns.wind_speed, y_col=fake_required_columns.power + ), + ) + curve_input_rpm = CurveShiftInput( + turbine_name=wtg_name, + pre_df=_df, + post_df=_df, + ops_curve_required_columns=fake_required_columns, + curve_config=CurveConfig( + name=CurveTypes.RPM, x_col=fake_required_columns.power, y_col=fake_required_columns.rpm + ), + ) + curve_input_pitch = CurveShiftInput( + turbine_name=wtg_name, + pre_df=_df, + post_df=_df, + ops_curve_required_columns=fake_required_columns, + curve_config=CurveConfig( + name=CurveTypes.PITCH, x_col=fake_required_columns.wind_speed, y_col=fake_required_columns.pitch + ), + ) + curve_input_wind_speed = CurveShiftInput( + turbine_name=wtg_name, + pre_df=_df, + post_df=_df, + ops_curve_required_columns=fake_required_columns, + curve_config=CurveConfig( + name=CurveTypes.WIND_SPEED, x_col=fake_required_columns.power, y_col=fake_required_columns.wind_speed + ), + ) + _call_inputs_list = [curve_input_power, curve_input_rpm, curve_input_pitch, curve_input_wind_speed] + + # check calls are made with expected inputs + for _call, _input in zip(mock_curve_shift.mock_calls, _call_inputs_list): + pd.testing.assert_frame_equal(_call.kwargs["curve_shift_input"].pre_df, _input.pre_df) + pd.testing.assert_frame_equal(_call.kwargs["curve_shift_input"].post_df, _input.post_df) + assert _call.kwargs["curve_shift_input"].model_dump(exclude=["pre_df", "post_df"]) == _input.model_dump( + exclude=["pre_df", "post_df"] + ) + + mock_plot_func.assert_called_once_with( + pre_df=_df, + post_df=_df, + wtg_name=wtg_name, + ws_col=fake_required_columns.wind_speed, + pw_col=fake_required_columns.power, + pt_col=fake_required_columns.pitch, + rpm_col=fake_required_columns.rpm, + plot_cfg=mock_plot_conf, + is_toggle_test=mock_wind_up_conf.toggle is not None, + sub_dir=None, + ) + + expected = { + f"{CurveTypes.POWER_CURVE.value}_shift": np.nan, + f"{CurveTypes.RPM.value}_shift": np.nan, + f"{CurveTypes.PITCH.value}_shift": np.nan, + f"{CurveTypes.WIND_SPEED.value}_shift": np.nan, + } + + assert actual == expected diff --git a/wind_up/main_analysis.py b/wind_up/main_analysis.py index 17d3d19..7ac8ac2 100644 --- a/wind_up/main_analysis.py +++ b/wind_up/main_analysis.py @@ -24,9 +24,10 @@ from wind_up.northing import ( check_wtg_northing, ) +from wind_up.ops_curve_shift import CurveTypes, check_for_ops_curve_shift from wind_up.plots.data_coverage_plots import plot_detrend_data_cov, plot_pre_post_data_cov from wind_up.plots.detrend_plots import plot_apply_wsratio_v_wd_scen -from wind_up.plots.scada_funcs_plots import compare_ops_curves_pre_post, print_filter_stats +from wind_up.plots.scada_funcs_plots import print_filter_stats from wind_up.plots.yaw_direction_plots import plot_yaw_direction_pre_post from wind_up.pp_analysis import pre_post_pp_analysis_with_reversal_and_bootstrapping from wind_up.result_manager import result_manager @@ -365,78 +366,6 @@ def yaw_offset_results( return results -def check_for_ops_curve_shift( - pre_df: pd.DataFrame, - post_df: pd.DataFrame, - *, - wtg_name: str, - scada_ws_col: str, - pw_col: str, - rpm_col: str, - pt_col: str, - cfg: WindUpConfig, - plot_cfg: PlotConfig, - sub_dir: str | None = None, -) -> dict[str, float]: - results_dict = { - "powercurve_shift": np.nan, - "rpm_shift": np.nan, - "pitch_shift": np.nan, - } - # check if all required columns are present - required_cols = [scada_ws_col, pw_col, pt_col, rpm_col] - for req_col in required_cols: - if req_col not in pre_df.columns: - msg = f"check_for_ops_curve_shift {wtg_name} pre_df missing required column {req_col}" - result_manager.warning(msg) - return results_dict - if req_col not in post_df.columns: - msg = f"check_for_ops_curve_shift {wtg_name} post_df missing required column {req_col}" - result_manager.warning(msg) - return results_dict - pre_dropna_df = pre_df.dropna(subset=[scada_ws_col, pw_col, pt_col, rpm_col]).copy() - post_dropna_df = post_df.dropna(subset=[scada_ws_col, pw_col, pt_col, rpm_col]).copy() - - warning_msg: str | None = None - for descr, x_var, y_var, x_bin_width, warn_thresh in [ - ("powercurve_shift", scada_ws_col, pw_col, 1, 0.01), - ("rpm_shift", pw_col, rpm_col, 0, 0.005), - ("pitch_shift", scada_ws_col, pt_col, 1, 0.1), - ]: - bins = np.arange(0, pre_dropna_df[x_var].max() + x_bin_width, x_bin_width) if x_bin_width > 0 else 10 - mean_curve = pre_dropna_df.groupby(pd.cut(pre_dropna_df[x_var], bins=bins, retbins=False), observed=True).agg( - x_mean=pd.NamedAgg(column=x_var, aggfunc="mean"), - y_mean=pd.NamedAgg(column=y_var, aggfunc="mean"), - ) - post_dropna_df["expected_y"] = np.interp(post_dropna_df[x_var], mean_curve["x_mean"], mean_curve["y_mean"]) - mean_df = post_dropna_df.mean() - if y_var == pt_col: - results_dict[descr] = mean_df[y_var] - mean_df["expected_y"] - else: - results_dict[descr] = (mean_df[y_var] / mean_df["expected_y"] - 1).clip(-1, 1) - if abs(results_dict[descr]) > warn_thresh: - if warning_msg is None: - warning_msg = f"{wtg_name} check_for_ops_curve_shift warnings:" - warning_msg += f" abs({descr}) > {warn_thresh}: {abs(results_dict[descr]):.3f}" - if warning_msg is not None: - result_manager.warning(warning_msg) - - compare_ops_curves_pre_post( - pre_df=pre_df, - post_df=post_df, - wtg_name=wtg_name, - ws_col=scada_ws_col, - pw_col=pw_col, - pt_col=pt_col, - rpm_col=rpm_col, - plot_cfg=plot_cfg, - is_toggle_test=(cfg.toggle is not None), - sub_dir=sub_dir, - ) - - return results_dict - - def calc_test_ref_results( *, test_df: pd.DataFrame, @@ -723,9 +652,10 @@ def calc_test_ref_results( "ref_max_northing_error_v_wf": ref_max_northing_error_v_wf, "ref_max_ws_drift": ref_max_ws_drift, "ref_max_ws_drift_pp_period": ref_max_ws_drift_pp_period, - "ref_powercurve_shift": ref_ops_curve_shift_dict["powercurve_shift"], - "ref_rpm_shift": ref_ops_curve_shift_dict["rpm_shift"], - "ref_pitch_shift": ref_ops_curve_shift_dict["pitch_shift"], + "ref_powercurve_shift": ref_ops_curve_shift_dict[f"{CurveTypes.POWER_CURVE.value}_shift"], + "ref_rpm_shift": ref_ops_curve_shift_dict[f"{CurveTypes.RPM.value}_shift"], + "ref_pitch_shift": ref_ops_curve_shift_dict[f"{CurveTypes.PITCH.value}_shift"], + "ref_wind_speed_shift": ref_ops_curve_shift_dict[f"{CurveTypes.WIND_SPEED.value}_shift"], "detrend_pre_r2_improvement": detrend_pre_r2_improvement, "detrend_post_r2_improvement": detrend_post_r2_improvement, "mean_power_pre": pre_df.dropna(subset=[detrend_ws_col, test_pw_col, ref_wd_col])[test_pw_col].mean(), @@ -874,9 +804,10 @@ def run_wind_up_analysis( "lt_wtg_hours_filt": lt_df_filt["observed_hours"].sum() if lt_df_filt is not None else 0, "test_max_ws_drift": test_max_ws_drift, "test_max_ws_drift_pp_period": test_max_ws_drift_pp_period, - "test_powercurve_shift": test_ops_curve_shift_dict["powercurve_shift"], - "test_rpm_shift": test_ops_curve_shift_dict["rpm_shift"], - "test_pitch_shift": test_ops_curve_shift_dict["pitch_shift"], + "test_powercurve_shift": test_ops_curve_shift_dict[f"{CurveTypes.POWER_CURVE.value}_shift"], + "test_rpm_shift": test_ops_curve_shift_dict[f"{CurveTypes.RPM.value}_shift"], + "test_pitch_shift": test_ops_curve_shift_dict[f"{CurveTypes.PITCH.value}_shift"], + "test_wind_speed_shift": test_ops_curve_shift_dict[f"{CurveTypes.WIND_SPEED.value}_shift"], "preprocess_warning_counts": preprocess_warning_counts, "test_warning_counts": len(result_manager.stored_warnings), } diff --git a/wind_up/ops_curve_shift.py b/wind_up/ops_curve_shift.py new file mode 100644 index 0000000..9f210fb --- /dev/null +++ b/wind_up/ops_curve_shift.py @@ -0,0 +1,239 @@ +from __future__ import annotations + +from enum import Enum +from typing import TYPE_CHECKING, NamedTuple + +import numpy as np +import pandas as pd +from pydantic import BaseModel, ConfigDict, model_validator + +from wind_up.plots.scada_funcs_plots import compare_ops_curves_pre_post +from wind_up.result_manager import result_manager + +if TYPE_CHECKING: + from collections.abc import Iterator + + from wind_up.models import PlotConfig, WindUpConfig + + +class CurveTypes(str, Enum): + POWER_CURVE = "powercurve" + RPM = "rpm" + PITCH = "pitch" + WIND_SPEED = "windspeed" + + +class CurveShiftOutput(NamedTuple): + value: float + warning_msg: str | None + + +CURVE_CONSTANTS = { + CurveTypes.POWER_CURVE.value: {"warning_threshold": 0.01, "x_bin_width": 1}, + CurveTypes.RPM.value: {"warning_threshold": 0.005, "x_bin_width": 0}, + CurveTypes.PITCH.value: {"warning_threshold": 0.1, "x_bin_width": 1}, + CurveTypes.WIND_SPEED.value: {"warning_threshold": 0.01, "x_bin_width": 0.5}, +} + + +class CurveConfig(BaseModel): + name: CurveTypes + x_col: str + y_col: str + x_bin_width: int | float | None = None + warning_threshold: float | None = None + + @model_validator(mode="after") + def validate_constants(self) -> CurveConfig: + if self.x_bin_width is None: + self.x_bin_width = CURVE_CONSTANTS[self.name]["x_bin_width"] + if self.warning_threshold is None: + self.warning_threshold = CURVE_CONSTANTS[self.name]["warning_threshold"] + return self + + +class OpsCurveRequiredColumns(BaseModel): + wind_speed: str + power: str + pitch: str + rpm: str + + def __iter__(self) -> Iterator[str]: # type: ignore[override] + return iter([self.wind_speed, self.power, self.pitch, self.rpm]) + + +class CurveShiftInput(BaseModel): + turbine_name: str + pre_df: pd.DataFrame + post_df: pd.DataFrame + ops_curve_required_columns: OpsCurveRequiredColumns + curve_config: CurveConfig + model_config = ConfigDict(arbitrary_types_allowed=True) + + @model_validator(mode="after") + def validate_dataframes(self) -> CurveShiftInput: + # check curve config columns are present in dataframes + for c in [self.curve_config.x_col, self.curve_config.y_col]: + if c not in self.pre_df.columns: + err_msg = f"'{c}' column name missing in pre-dataframe" + raise IndexError(err_msg) + if c not in self.post_df.columns: + err_msg = f"'{c}' column name missing in post-dataframe" + raise IndexError(err_msg) + + # check required columns are present in dataframes + required_cols = set(self.ops_curve_required_columns) + columns_missing_in_pre_df = required_cols - set(self.pre_df.columns) + columns_missing_in_post_df = required_cols - set(self.post_df.columns) + if (len(columns_missing_in_pre_df) > 0) or (len(columns_missing_in_post_df) > 0): + err_msg = "Column name missing in dataframe" + raise IndexError(err_msg) + + # remove NA + self.pre_df = self.pre_df.dropna(subset=list(required_cols)).copy() + self.post_df = self.post_df.dropna(subset=list(required_cols)).copy() + + return self + + +def check_for_ops_curve_shift( + pre_df: pd.DataFrame, + post_df: pd.DataFrame, + *, + wtg_name: str, + scada_ws_col: str, + pw_col: str, + rpm_col: str, + pt_col: str, + cfg: WindUpConfig, + plot_cfg: PlotConfig, + sub_dir: str | None = None, + plot: bool = True, +) -> dict[str, float]: + results_dict = { + f"{CurveTypes.POWER_CURVE.value}_shift": np.nan, + f"{CurveTypes.RPM.value}_shift": np.nan, + f"{CurveTypes.PITCH.value}_shift": np.nan, + f"{CurveTypes.WIND_SPEED.value}_shift": np.nan, + } + + required_cols = OpsCurveRequiredColumns(wind_speed=scada_ws_col, power=pw_col, pitch=pt_col, rpm=rpm_col) + + if not _required_cols_are_present( + pre_df=pre_df, post_df=post_df, turbine_name=wtg_name, required_ops_curve_columns=required_cols + ): + return results_dict + + shift_power = calculate_curve_shift( + curve_shift_input=CurveShiftInput( + turbine_name=wtg_name, + pre_df=pre_df, + post_df=post_df, + curve_config=CurveConfig(name=CurveTypes.POWER_CURVE, x_col=scada_ws_col, y_col=pw_col), + ops_curve_required_columns=required_cols, + ) + ) + + shift_rpm = calculate_curve_shift( + curve_shift_input=CurveShiftInput( + turbine_name=wtg_name, + pre_df=pre_df, + post_df=post_df, + curve_config=CurveConfig(name=CurveTypes.RPM, x_col=pw_col, y_col=rpm_col), + ops_curve_required_columns=required_cols, + ) + ) + + shift_pitch = calculate_curve_shift( + curve_shift_input=CurveShiftInput( + turbine_name=wtg_name, + pre_df=pre_df, + post_df=post_df, + curve_config=CurveConfig(name=CurveTypes.PITCH, x_col=scada_ws_col, y_col=pt_col), + ops_curve_required_columns=required_cols, + ) + ) + + shift_wind_speed = calculate_curve_shift( + curve_shift_input=CurveShiftInput( + turbine_name=wtg_name, + pre_df=pre_df, + post_df=post_df, + curve_config=CurveConfig(name=CurveTypes.WIND_SPEED, x_col=pw_col, y_col=scada_ws_col), + ops_curve_required_columns=required_cols, + ) + ) + + results_dict[f"{CurveTypes.POWER_CURVE.value}_shift"] = shift_power.value + results_dict[f"{CurveTypes.RPM.value}_shift"] = shift_rpm.value + results_dict[f"{CurveTypes.PITCH.value}_shift"] = shift_pitch.value + results_dict[f"{CurveTypes.WIND_SPEED.value}_shift"] = shift_wind_speed.value + + warning_msg = "" + for wm in [shift_power.warning_msg, shift_rpm.warning_msg, shift_pitch.warning_msg, shift_wind_speed.warning_msg]: + if wm is not None: + warning_msg += wm + + if warning_msg: + result_manager.warning(warning_msg) + + if plot: + compare_ops_curves_pre_post( + pre_df=pre_df, + post_df=post_df, + wtg_name=wtg_name, + ws_col=scada_ws_col, + pw_col=pw_col, + pt_col=pt_col, + rpm_col=rpm_col, + plot_cfg=plot_cfg, + is_toggle_test=(cfg.toggle is not None), + sub_dir=sub_dir, + ) + + return results_dict + + +def _required_cols_are_present( + pre_df: pd.DataFrame, post_df: pd.DataFrame, turbine_name: str, required_ops_curve_columns: OpsCurveRequiredColumns +) -> bool: + # check if all required columns are present + required_cols = list(required_ops_curve_columns) + for req_col in required_cols: + if req_col not in pre_df.columns: + msg = f"check_for_ops_curve_shift {turbine_name} pre_df missing required column {req_col}" + result_manager.warning(msg) + return False + if req_col not in post_df.columns: + msg = f"check_for_ops_curve_shift {turbine_name} post_df missing required column {req_col}" + result_manager.warning(msg) + return False + return True + + +def calculate_curve_shift(curve_shift_input: CurveShiftInput) -> CurveShiftOutput: + conf = curve_shift_input.curve_config + pre_df = curve_shift_input.pre_df + post_df = curve_shift_input.post_df + wtg_name = curve_shift_input.turbine_name + + bins = np.arange(0, pre_df[conf.x_col].max() + conf.x_bin_width, conf.x_bin_width) if conf.x_bin_width > 0 else 10 # type: ignore[operator,var-annotated] + + mean_curve = pre_df.groupby(pd.cut(pre_df[conf.x_col], bins=bins, retbins=False), observed=True).agg( + x_mean=pd.NamedAgg(column=conf.x_col, aggfunc="mean"), + y_mean=pd.NamedAgg(column=conf.y_col, aggfunc="mean"), + ) + post_df["expected_y"] = np.interp(post_df[conf.x_col], mean_curve["x_mean"], mean_curve["y_mean"]) + mean_df = post_df.mean() + + if conf.name in CurveTypes.PITCH: + result = mean_df[conf.y_col] - mean_df["expected_y"] + else: + result = (mean_df[conf.y_col] / mean_df["expected_y"] - 1).clip(-1, 1) + + # log warning + warning_msg = None + if abs(result) > conf.warning_threshold: + warning_msg = f"{wtg_name} Ops Curve Shift warning: abs({conf.name}) > {conf.warning_threshold}: {result:.3f}" + + return CurveShiftOutput(value=result, warning_msg=warning_msg)