resgroup · izofat · Sep 6, 2024 · Sep 8, 2024 · Sep 8, 2024 · Sep 13, 2024
diff --git a/pyproject.toml b/pyproject.toml
@@ -48,6 +48,7 @@ build-backend = "setuptools.build_meta"
 [project.optional-dependencies]
 dev = [
     'pytest',
+    'pytest-benchmark',
     'coverage',
     'poethepoet',
     'types-pyyaml',

diff --git a/tests/test_detrend.py b/tests/test_detrend.py
@@ -3,6 +3,7 @@
 import pandas as pd
 import pytest
 from pandas.testing import assert_frame_equal
+from pytest_benchmark.fixture import BenchmarkFixture
 
 from wind_up.detrend import apply_wsratio_v_wd_scen, calc_wsratio_v_wd_scen, check_applied_detrend
 from wind_up.models import WindUpConfig
@@ -66,7 +67,7 @@ def test_check_applied_detrend(test_lsa_t13_config: WindUpConfig) -> None:
     assert detrend_post_r2_improvement == pytest.approx(0.03776561982402227)
 
 
-def test_calc_wsratio_v_wd_scen(test_lsa_t13_config: WindUpConfig) -> None:
+def test_calc_wsratio_v_wd_scen(benchmark: BenchmarkFixture, test_lsa_t13_config: WindUpConfig) -> None:
     # this test case borrows logic and results from check_applied_detrend where data which has already been detrended
     # is used to calculate the wsratio_v_wd_scen again to check it is flat
     cfg = test_lsa_t13_config
@@ -85,7 +86,8 @@ def test_calc_wsratio_v_wd_scen(test_lsa_t13_config: WindUpConfig) -> None:
     expected_pre_df = pd.read_parquet(
         Path(__file__).parents[0] / "test_data/LSA_T13_LSA_T12_check_pre_wsratio_v_dir_scen.parquet",
     )
-    actual_pre_df = calc_wsratio_v_wd_scen(
+    actual_pre_df = benchmark(
+        calc_wsratio_v_wd_scen,
         test_name=test_name,
         ref_name=ref_name,
         ref_lat=ref_lat,

diff --git a/wind_up/detrend.py b/wind_up/detrend.py
@@ -35,40 +35,57 @@ def calc_wsratio_v_wd(
     # IEC says only use 4-16 m/s
     test_ws_ll = 4
     test_ws_ul = 16
-    ref_ws_ll = test_ws_ll * detrend_df[ref_ws_col].mean() / detrend_df[test_ws_col].mean()
-    ref_ws_ul = test_ws_ul * detrend_df[ref_ws_col].mean() / detrend_df[test_ws_col].mean()
-    detrend_df = detrend_df[(detrend_df[test_ws_col] >= test_ws_ll) & (detrend_df[test_ws_col] < test_ws_ul)]
-    detrend_df = detrend_df[(detrend_df[ref_ws_col] >= ref_ws_ll) & (detrend_df[ref_ws_col] < ref_ws_ul)]
+    test_ws_mean = detrend_df[test_ws_col].mean()
+    ref_ws_mean = detrend_df[ref_ws_col].mean()
 
-    directions = []
-    hours = []
+    ref_ws_ll = test_ws_ll * ref_ws_mean / test_ws_mean
+    ref_ws_ul = test_ws_ul * ref_ws_mean / test_ws_mean
+
+    detrend_df = detrend_df[
+        (detrend_df[test_ws_col] >= test_ws_ll)
+        & (detrend_df[test_ws_col] < test_ws_ul)
+        & (detrend_df[ref_ws_col] >= ref_ws_ll)
+        & (detrend_df[ref_ws_col] < ref_ws_ul)
+    ]
+
+    rows_per_hour = 3600 / timebase_s
+    min_count = min_hours * rows_per_hour
+    iec_ws_threshold = 8
+
+    # Vectorized circular difference calculation
+    directions = np.arange(0, 360, 1)
+    circ_diffs = np.array([circ_diff(detrend_df[ref_wd_col], d) for d in directions])
+
+    within_dir_bins = np.abs(circ_diffs) < dir_bin_width / 2
+
+    valid_directions = []
+    valid_hours = []
     test_rf_ws_roms = []
-    for d in list(range(0, 360, 1)):
-        detrend_df["circ_diff_to_d"] = circ_diff(detrend_df[ref_wd_col], d)
-        detrend_df["within_dir_bin"] = detrend_df["circ_diff_to_d"].abs() < dir_bin_width / 2
-        subsector_df = detrend_df[detrend_df["within_dir_bin"]].copy()
-        if len(subsector_df) > 0:
-            directions.append(d)
-            rows_per_hour = 3600 / timebase_s
-            hours.append(len(subsector_df) / rows_per_hour)
-            # 61400-12-1 requires >=24h data, >=6h above 8m/s, >= below 8m/s
-            min_count = min_hours * rows_per_hour
-            accept_sector = len(subsector_df) >= min_count
-            iec_ws_threshold = 8
-            accept_sector = accept_sector and ((subsector_df[test_ws_col] < iec_ws_threshold).sum() >= (min_count / 4))
-            accept_sector = accept_sector and ((subsector_df[test_ws_col] >= iec_ws_threshold).sum() >= (min_count / 4))
-            if accept_sector:
-                rom = subsector_df[test_ws_col].mean() / subsector_df[ref_ws_col].mean()
-                test_rf_ws_roms.append(rom)
+    for i, direction in enumerate(directions):
+        subsector_df = detrend_df[within_dir_bins[i]].copy()
+
+        if (subsector_df_len := len(subsector_df)) > 0:
+            valid_directions.append(direction)
+            valid_hours.append(subsector_df_len / rows_per_hour)
+
+            if subsector_df_len >= min_count:
+                below_thresh = (subsector_df[test_ws_col] < iec_ws_threshold).sum()
+                above_thresh = (subsector_df[test_ws_col] >= iec_ws_threshold).sum()
+
+                if below_thresh >= (min_count / 4) and above_thresh >= (min_count / 4):
+                    rom = subsector_df[test_ws_col].mean() / subsector_df[ref_ws_col].mean()
+                    test_rf_ws_roms.append(rom)
+                else:
+                    test_rf_ws_roms.append(np.nan)
             else:
                 test_rf_ws_roms.append(np.nan)
 
     return pd.DataFrame(
         {
-            "direction": directions,
-            "hours": hours,
+            "direction": valid_directions,
+            "hours": valid_hours,
             "ws_rom": test_rf_ws_roms,
-        },
+        }
     )