diff --git a/README.md b/README.md index 5d2372a..c445fc1 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,8 @@ Use `poe all` to run all required pre-push commands (make sure the virtual envir ## Running tests Install dev dependencies and use `poe test` to run unit tests (make sure the virtual environment is activated) +For convenience when developing locally, run `poe test-fast` to avoid running the tests marked as slow. + ## License See [`LICENSE.txt`](LICENSE.txt) diff --git a/examples/smarteole_example.py b/examples/smarteole_example.py index 2791345..1d5758c 100644 --- a/examples/smarteole_example.py +++ b/examples/smarteole_example.py @@ -1,19 +1,21 @@ +from __future__ import annotations + import logging import sys import zipfile from functools import partial from pathlib import Path +from typing import IO, NamedTuple import pandas as pd from pandas.testing import assert_frame_equal from scipy.stats import circmean -from wind_up.caching import with_parquet_cache from wind_up.combine_results import calc_net_uplift from wind_up.constants import OUTPUT_DIR, PROJECTROOT_DIR, TIMESTAMP_COL, DataColumns from wind_up.interface import AssessmentInputs from wind_up.main_analysis import run_wind_up_analysis -from wind_up.models import PlotConfig, WindUpConfig +from wind_up.models import Asset, PlotConfig, Toggle, Turbine, WindUpConfig from wind_up.reanalysis_data import ReanalysisDataset sys.path.append(str(PROJECTROOT_DIR)) @@ -32,9 +34,14 @@ ZIP_FILENAME = "SMARTEOLE-WFC-open-dataset.zip" MINIMUM_DATA_COUNT_COVERAGE = 0.5 # 50% of the data must be present +DEFAULT_SCADA_FILE_PATH = "SMARTEOLE-WFC-open-dataset/SMARTEOLE_WakeSteering_SCADA_1minData.csv" +DEFAULT_METADATA_FILE_PATH = "SMARTEOLE-WFC-open-dataset/SMARTEOLE_WakeSteering_Coordinates_staticData.csv" +DEFAULT_TOGGLE_FILE_PATH = "SMARTEOLE-WFC-open-dataset/SMARTEOLE_WakeSteering_ControlLog_1minData.csv" + -@with_parquet_cache(CACHE_SUBDIR / "smarteole_scada.parquet") -def unpack_smarteole_scada(timebase_s: int) -> pd.DataFrame: +def unpack_smarteole_scada( + timebase_s: int, scada_data_file: Path | str | IO[bytes] = DEFAULT_SCADA_FILE_PATH +) -> pd.DataFrame: """ Function that translates 1-minute SCADA data to x minute data in the wind-up expected format """ @@ -81,64 +88,60 @@ def _map_and_mask_cols(df: pd.DataFrame) -> pd.DataFrame: ) # unzipping the data in memory and only reading the relevant files - scada_fpath = "SMARTEOLE-WFC-open-dataset/SMARTEOLE_WakeSteering_SCADA_1minData.csv" circular_mean = partial(circmean, low=0, high=360) - with zipfile.ZipFile(CACHE_DIR / ZIP_FILENAME) as zf: - return ( - pd.read_csv(zf.open(scada_fpath), parse_dates=[0], index_col=0) - .pipe(_make_turbine_id_a_column) - .groupby(DataColumns.turbine_name) - .resample(f"{timebase_s}s") - .aggregate( - { - "active_power_avg": "mean", - "active_power_std": "mean", - "active_power_count": "sum", - "wind_speed_avg": "mean", - "wind_speed_std": "mean", - "wind_speed_count": "sum", - "blade_1_pitch_angle_avg": "mean", # no need for circular_mean because no wrap - "blade_1_pitch_angle_count": "sum", - "generator_speed_avg": "mean", - "generator_speed_count": "sum", - "temperature_avg": "mean", - "temperature_count": "sum", - "nacelle_position_avg": circular_mean, - "nacelle_position_max": "max", - "nacelle_position_min": "min", - "nacelle_position_count": "sum", - } - ) - .reset_index(DataColumns.turbine_name) - .pipe(_map_and_mask_cols) - .loc[:, DataColumns.all()] - .rename_axis(TIMESTAMP_COL, axis=0) - .rename_axis(None, axis=1) + return ( + pd.read_csv(scada_data_file, parse_dates=[0], index_col=0) + .pipe(_make_turbine_id_a_column) + .groupby(DataColumns.turbine_name) + .resample(f"{timebase_s}s") + .aggregate( + { + "active_power_avg": "mean", + "active_power_std": "mean", + "active_power_count": "sum", + "wind_speed_avg": "mean", + "wind_speed_std": "mean", + "wind_speed_count": "sum", + "blade_1_pitch_angle_avg": "mean", # no need for circular_mean because no wrap + "blade_1_pitch_angle_count": "sum", + "generator_speed_avg": "mean", + "generator_speed_count": "sum", + "temperature_avg": "mean", + "temperature_count": "sum", + "nacelle_position_avg": circular_mean, + "nacelle_position_max": "max", + "nacelle_position_min": "min", + "nacelle_position_count": "sum", + } ) + .reset_index(DataColumns.turbine_name) + .pipe(_map_and_mask_cols) + .loc[:, DataColumns.all()] + .rename_axis(TIMESTAMP_COL, axis=0) + .rename_axis(None, axis=1) + ) -@with_parquet_cache(CACHE_DIR / "smarteole_metadata.parquet") -def unpack_smarteole_metadata(timebase_s: int) -> pd.DataFrame: - md_fpath = "SMARTEOLE-WFC-open-dataset/SMARTEOLE_WakeSteering_Coordinates_staticData.csv" - with zipfile.ZipFile(CACHE_DIR / ZIP_FILENAME) as zf: - return ( - pd.read_csv(zf.open(md_fpath), index_col=0) - .reset_index() - .rename(columns={"Turbine": "Name"}) - .query("Name.str.startswith('SMV')") # is a turbine - .loc[:, ["Name", "Latitude", "Longitude"]] - .assign(TimeZone="UTC", TimeSpanMinutes=timebase_s / 60, TimeFormat="Start") - ) +def unpack_smarteole_metadata( + timebase_s: int, metadata_file: Path | str | IO[bytes] = DEFAULT_METADATA_FILE_PATH +) -> pd.DataFrame: + return ( + pd.read_csv(metadata_file, index_col=0) + .reset_index() + .rename(columns={"Turbine": "Name"}) + .query("Name.str.startswith('SMV')") # is a turbine + .loc[:, ["Name", "Latitude", "Longitude"]] + .assign(TimeZone="UTC", TimeSpanMinutes=timebase_s / 60, TimeFormat="Start") + ) -@with_parquet_cache(CACHE_SUBDIR / "smarteole_toggle.parquet") -def unpack_smarteole_toggle_data(timebase_s: int) -> pd.DataFrame: +def unpack_smarteole_toggle_data( + timebase_s: int, toggle_file: Path | str | IO[bytes] = DEFAULT_TOGGLE_FILE_PATH +) -> pd.DataFrame: ten_minutes_count_lower_limit = timebase_s * MINIMUM_DATA_COUNT_COVERAGE toggle_value_threshold: float = 0.95 - _fpath = "SMARTEOLE-WFC-open-dataset/SMARTEOLE_WakeSteering_ControlLog_1minData.csv" - with zipfile.ZipFile(CACHE_DIR / ZIP_FILENAME) as zf: - raw_df = pd.read_csv(zf.open(_fpath), parse_dates=[0], index_col=0) + raw_df = pd.read_csv(toggle_file, parse_dates=[0], index_col=0) required_in_cols = [ "control_log_offset_active_avg", @@ -169,21 +172,26 @@ def unpack_smarteole_toggle_data(timebase_s: int) -> pd.DataFrame: return toggle_df[["toggle_on", "toggle_off", "yaw_offset_command"]] -def define_smarteole_example_config() -> WindUpConfig: +def define_smarteole_example_config( + analysis_timebase_s: int, + analysis_output_dir: Path, +) -> WindUpConfig: wtg_map = { - f"SMV{i}": { - "name": f"SMV{i}", - "turbine_type": { - "turbine_type": "Senvion-MM82-2050", - "rotor_diameter_m": 82.0, - "rated_power_kw": 2050.0, - "cutout_ws_mps": 25, - "normal_operation_pitch_range": (-10.0, 35.0), - "normal_operation_genrpm_range": (250.0, 2000.0), - "rpm_v_pw_margin_factor": 0.05, - "pitch_to_stall": False, - }, - } + f"SMV{i}": Turbine.model_validate( + { + "name": f"SMV{i}", + "turbine_type": { + "turbine_type": "Senvion-MM82-2050", + "rotor_diameter_m": 82.0, + "rated_power_kw": 2050.0, + "cutout_ws_mps": 25, + "normal_operation_pitch_range": (-10.0, 35.0), + "normal_operation_genrpm_range": (250.0, 2000.0), + "rpm_v_pw_margin_factor": 0.05, + "pitch_to_stall": False, + }, + } + ) for i in range(1, 7 + 1) } northing_corrections_utc = [ @@ -196,16 +204,16 @@ def define_smarteole_example_config() -> WindUpConfig: ("SMV7", pd.Timestamp("2020-02-17 16:30:00+0000"), 4.605999999999972), ] - wd_filter_margin = 3 + 7 * ANALYSIS_TIMEBASE_S / 600 + wd_filter_margin = 3 + 7 * analysis_timebase_s / 600 return WindUpConfig( assessment_name="smarteole_example", - timebase_s=ANALYSIS_TIMEBASE_S, + timebase_s=analysis_timebase_s, require_ref_wake_free=True, detrend_min_hours=12, ref_wd_filter=[207 - wd_filter_margin, 236 + wd_filter_margin], # steer is from 207-236 filter_all_test_wtgs_together=True, use_lt_distribution=False, - out_dir=ANALYSIS_OUTPUT_DIR, + out_dir=analysis_output_dir, test_wtgs=[wtg_map["SMV6"], wtg_map["SMV5"]], ref_wtgs=[wtg_map["SMV7"]], ref_super_wtgs=[], @@ -220,21 +228,25 @@ def define_smarteole_example_config() -> WindUpConfig: years_for_lt_distribution=0, years_for_detrend=0, ws_bin_width=1.0, - asset={ - "name": "Sole du Moulin Vieux", - "wtgs": list(wtg_map.values()), - "masts_and_lidars": [], - }, + asset=Asset.model_validate( + { + "name": "Sole du Moulin Vieux", + "wtgs": list(wtg_map.values()), + "masts_and_lidars": [], + } + ), northing_corrections_utc=northing_corrections_utc, - toggle={ - "name": "wake steering", - "toggle_file_per_turbine": False, - "toggle_filename": "SMV_offset_active_toggle_df.parquet", - "detrend_data_selection": "use_toggle_off_data", - "pairing_filter_method": "any_within_timedelta", - "pairing_filter_timedelta_seconds": 3600, - "toggle_change_settling_filter_seconds": 120, - }, + toggle=Toggle.model_validate( + { + "name": "wake steering", + "toggle_file_per_turbine": False, + "toggle_filename": "SMV_offset_active_toggle_df.parquet", + "detrend_data_selection": "use_toggle_off_data", + "pairing_filter_method": "any_within_timedelta", + "pairing_filter_timedelta_seconds": 3600, + "toggle_change_settling_filter_seconds": 120, + } + ), ) @@ -263,49 +275,74 @@ def print_smarteole_results( assert_frame_equal(print_df, expected_print_df) -if __name__ == "__main__": - setup_logger(ANALYSIS_OUTPUT_DIR / "analysis.log") - logger = logging.getLogger(__name__) +class SmarteoleData(NamedTuple): + scada_df: pd.DataFrame + metadata_df: pd.DataFrame + toggle_df: pd.DataFrame - logger.info("Downloading example data from Zenodo") - download_zenodo_data(record_id="7342466", output_dir=CACHE_DIR, filenames={ZIP_FILENAME}) - logger.info("Preprocessing (and caching) turbine SCADA data") - scada_df = unpack_smarteole_scada(ANALYSIS_TIMEBASE_S) - logger.info("Preprocessing (and caching) turbine metadata") - metadata_df = unpack_smarteole_metadata(ANALYSIS_TIMEBASE_S) - logger.info("Preprocessing (and caching) toggle data") - toggle_df = unpack_smarteole_toggle_data(ANALYSIS_TIMEBASE_S) +def _download_data_from_zenodo(analysis_timebase_s: int, cache_dir: Path, zip_filename: str) -> SmarteoleData: + download_zenodo_data(record_id="7342466", output_dir=cache_dir, filenames={zip_filename}) + with zipfile.ZipFile(cache_dir / zip_filename) as zf: + scada_df = unpack_smarteole_scada(analysis_timebase_s, scada_data_file=zf.open(DEFAULT_SCADA_FILE_PATH)) + metadata_df = unpack_smarteole_metadata(analysis_timebase_s, metadata_file=zf.open(DEFAULT_METADATA_FILE_PATH)) + toggle_df = unpack_smarteole_toggle_data(analysis_timebase_s, toggle_file=zf.open(DEFAULT_TOGGLE_FILE_PATH)) + return SmarteoleData(scada_df=scada_df, metadata_df=metadata_df, toggle_df=toggle_df) + + +def main_smarteole_analysis( + *, + smarteole_data: SmarteoleData, + analysis_timebase_s: int = ANALYSIS_TIMEBASE_S, + check_results: bool = CHECK_RESULTS, + analysis_output_dir: Path = ANALYSIS_OUTPUT_DIR, + cache_sub_dir: Path = CACHE_SUBDIR, + reanalysis_file_path: Path | str = PARENT_DIR + / "smarteole_data/ERA5T_50.00N_2.75E_100m_1hr_20200201_20200531.parquet", +) -> None: + setup_logger(ANALYSIS_OUTPUT_DIR / "analysis.log") + logger = logging.getLogger(__name__) logger.info("Merging SMV6 yaw offset command signal into SCADA data") - toggle_df_no_tz = toggle_df.copy() + toggle_df_no_tz = smarteole_data.toggle_df.copy() toggle_df_no_tz.index = toggle_df_no_tz.index.tz_localize(None) - scada_df = scada_df.merge(toggle_df_no_tz["yaw_offset_command"], left_index=True, right_index=True, how="left") + scada_df = smarteole_data.scada_df.merge( + toggle_df_no_tz["yaw_offset_command"], left_index=True, right_index=True, how="left" + ) scada_df["yaw_offset_command"] = scada_df["yaw_offset_command"].where(scada_df["TurbineName"] == "SMV6", 0) del toggle_df_no_tz logger.info("Loading reference reanalysis data") reanalysis_dataset = ReanalysisDataset( id="ERA5T_50.00N_2.75E_100m_1hr", - data=pd.read_parquet(PARENT_DIR / "smarteole_data" / "ERA5T_50.00N_2.75E_100m_1hr_20200201_20200531.parquet"), + data=pd.read_parquet(reanalysis_file_path), ) logger.info("Defining Assessment Configuration") - cfg = define_smarteole_example_config() + cfg = define_smarteole_example_config( + analysis_timebase_s=analysis_timebase_s, analysis_output_dir=analysis_output_dir + ) plot_cfg = PlotConfig(show_plots=False, save_plots=True, plots_dir=cfg.out_dir / "plots") assessment_inputs = AssessmentInputs.from_cfg( cfg=cfg, plot_cfg=plot_cfg, - toggle_df=toggle_df, + toggle_df=smarteole_data.toggle_df, scada_df=scada_df, - metadata_df=metadata_df, + metadata_df=smarteole_data.metadata_df, reanalysis_datasets=[reanalysis_dataset], - cache_dir=CACHE_SUBDIR, + cache_dir=cache_sub_dir, ) results_per_test_ref_df = run_wind_up_analysis(assessment_inputs) net_p50, net_p95, net_p5 = calc_net_uplift(results_per_test_ref_df, confidence=0.9) print(f"net P50: {net_p50:.1%}, net P95: {net_p95:.1%}, net P5: {net_p5:.1%}") - print_smarteole_results(results_per_test_ref_df, check_results=CHECK_RESULTS) + print_smarteole_results(results_per_test_ref_df, check_results=check_results) + + +if __name__ == "__main__": + smarteole_data = _download_data_from_zenodo( + analysis_timebase_s=ANALYSIS_TIMEBASE_S, cache_dir=CACHE_DIR, zip_filename=ZIP_FILENAME + ) + main_smarteole_analysis(smarteole_data=smarteole_data) diff --git a/pyproject.toml b/pyproject.toml index b17db2f..2bcb1d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,7 @@ dev = [ 'types-requests', 'ruff', 'mypy', + 'requests', ] examples = [ 'jupyterlab', @@ -131,6 +132,7 @@ filterwarnings = [ "ignore:Passing unrecognized arguments to super:DeprecationWarning", # pycharm debugger issue "ignore:Passing a BlockManager to DataFrame is deprecated:DeprecationWarning", ] +markers = ["slow: mark test as slow."] [tool.coverage.report] omit = [ @@ -155,6 +157,10 @@ sequence = [ { cmd = "mypy ." } ] +[tool.poe.tasks.test-fast] +help = "Runs tests that are not marked as slow" +sequence = [{ cmd = 'python -m pytest -m "not slow"' }] + [tool.poe.tasks.test] help = "Runs unit tests and show coverage" sequence = [ diff --git a/tests/test_smarteole.py b/tests/test_smarteole.py new file mode 100644 index 0000000..9147230 --- /dev/null +++ b/tests/test_smarteole.py @@ -0,0 +1,38 @@ +"""Tests running the Smarteole dataset through wind-up analysis.""" + +from pathlib import Path + +import pytest + +from examples.smarteole_example import ( + ANALYSIS_TIMEBASE_S, + CACHE_DIR, + ZIP_FILENAME, + _download_data_from_zenodo, + main_smarteole_analysis, +) + +SMARTEOLE_DATA_DIR = Path(__file__).parents[1] / "tests/test_data/smarteole" + + +@pytest.mark.slow +@pytest.mark.filterwarnings("ignore") +def test_smarteole_analysis_zenodo(tmp_path: Path) -> None: + """Test downloading data from zenodo and running the Smarteole analysis.""" + + timebase_s = 600 + cache_subdir = tmp_path / f"timebase_{timebase_s}" + cache_subdir.mkdir(parents=True, exist_ok=True) + + smarteole_data = _download_data_from_zenodo( + analysis_timebase_s=ANALYSIS_TIMEBASE_S, cache_dir=CACHE_DIR, zip_filename=ZIP_FILENAME + ) + + main_smarteole_analysis( + smarteole_data=smarteole_data, + reanalysis_file_path=SMARTEOLE_DATA_DIR / "ERA5T_50.00N_2.75E_100m_1hr_20200201_20200531.parquet", + analysis_timebase_s=timebase_s, + check_results=True, # asserts expected results + analysis_output_dir=tmp_path, + cache_sub_dir=cache_subdir, + )