Skip to content

Commit

Permalink
add smarteole example to pytest test suite
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelwnaylor committed Jan 20, 2025
1 parent c15a31c commit 892e1fa
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 103 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ Use `poe all` to run all required pre-push commands (make sure the virtual envir
## Running tests
Install dev dependencies and use `poe test` to run unit tests (make sure the virtual environment is activated)

For convenience when developing locally, run `poe test-fast` to avoid running the tests marked as slow.

## License
See [`LICENSE.txt`](LICENSE.txt)

Expand Down
243 changes: 140 additions & 103 deletions examples/smarteole_example.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
from __future__ import annotations

import logging
import sys
import zipfile
from functools import partial
from pathlib import Path
from typing import IO, NamedTuple

import pandas as pd
from pandas.testing import assert_frame_equal
from scipy.stats import circmean

from wind_up.caching import with_parquet_cache
from wind_up.combine_results import calc_net_uplift
from wind_up.constants import OUTPUT_DIR, PROJECTROOT_DIR, TIMESTAMP_COL, DataColumns
from wind_up.interface import AssessmentInputs
from wind_up.main_analysis import run_wind_up_analysis
from wind_up.models import PlotConfig, WindUpConfig
from wind_up.models import Asset, PlotConfig, Toggle, Turbine, WindUpConfig
from wind_up.reanalysis_data import ReanalysisDataset

sys.path.append(str(PROJECTROOT_DIR))
Expand All @@ -32,9 +34,14 @@
ZIP_FILENAME = "SMARTEOLE-WFC-open-dataset.zip"
MINIMUM_DATA_COUNT_COVERAGE = 0.5 # 50% of the data must be present

DEFAULT_SCADA_FILE_PATH = "SMARTEOLE-WFC-open-dataset/SMARTEOLE_WakeSteering_SCADA_1minData.csv"
DEFAULT_METADATA_FILE_PATH = "SMARTEOLE-WFC-open-dataset/SMARTEOLE_WakeSteering_Coordinates_staticData.csv"
DEFAULT_TOGGLE_FILE_PATH = "SMARTEOLE-WFC-open-dataset/SMARTEOLE_WakeSteering_ControlLog_1minData.csv"


@with_parquet_cache(CACHE_SUBDIR / "smarteole_scada.parquet")
def unpack_smarteole_scada(timebase_s: int) -> pd.DataFrame:
def unpack_smarteole_scada(
timebase_s: int, scada_data_file: Path | str | IO[bytes] = DEFAULT_SCADA_FILE_PATH
) -> pd.DataFrame:
"""
Function that translates 1-minute SCADA data to x minute data in the wind-up expected format
"""
Expand Down Expand Up @@ -81,64 +88,60 @@ def _map_and_mask_cols(df: pd.DataFrame) -> pd.DataFrame:
)

# unzipping the data in memory and only reading the relevant files
scada_fpath = "SMARTEOLE-WFC-open-dataset/SMARTEOLE_WakeSteering_SCADA_1minData.csv"
circular_mean = partial(circmean, low=0, high=360)
with zipfile.ZipFile(CACHE_DIR / ZIP_FILENAME) as zf:
return (
pd.read_csv(zf.open(scada_fpath), parse_dates=[0], index_col=0)
.pipe(_make_turbine_id_a_column)
.groupby(DataColumns.turbine_name)
.resample(f"{timebase_s}s")
.aggregate(
{
"active_power_avg": "mean",
"active_power_std": "mean",
"active_power_count": "sum",
"wind_speed_avg": "mean",
"wind_speed_std": "mean",
"wind_speed_count": "sum",
"blade_1_pitch_angle_avg": "mean", # no need for circular_mean because no wrap
"blade_1_pitch_angle_count": "sum",
"generator_speed_avg": "mean",
"generator_speed_count": "sum",
"temperature_avg": "mean",
"temperature_count": "sum",
"nacelle_position_avg": circular_mean,
"nacelle_position_max": "max",
"nacelle_position_min": "min",
"nacelle_position_count": "sum",
}
)
.reset_index(DataColumns.turbine_name)
.pipe(_map_and_mask_cols)
.loc[:, DataColumns.all()]
.rename_axis(TIMESTAMP_COL, axis=0)
.rename_axis(None, axis=1)
return (
pd.read_csv(scada_data_file, parse_dates=[0], index_col=0)
.pipe(_make_turbine_id_a_column)
.groupby(DataColumns.turbine_name)
.resample(f"{timebase_s}s")
.aggregate(
{
"active_power_avg": "mean",
"active_power_std": "mean",
"active_power_count": "sum",
"wind_speed_avg": "mean",
"wind_speed_std": "mean",
"wind_speed_count": "sum",
"blade_1_pitch_angle_avg": "mean", # no need for circular_mean because no wrap
"blade_1_pitch_angle_count": "sum",
"generator_speed_avg": "mean",
"generator_speed_count": "sum",
"temperature_avg": "mean",
"temperature_count": "sum",
"nacelle_position_avg": circular_mean,
"nacelle_position_max": "max",
"nacelle_position_min": "min",
"nacelle_position_count": "sum",
}
)
.reset_index(DataColumns.turbine_name)
.pipe(_map_and_mask_cols)
.loc[:, DataColumns.all()]
.rename_axis(TIMESTAMP_COL, axis=0)
.rename_axis(None, axis=1)
)


@with_parquet_cache(CACHE_DIR / "smarteole_metadata.parquet")
def unpack_smarteole_metadata(timebase_s: int) -> pd.DataFrame:
md_fpath = "SMARTEOLE-WFC-open-dataset/SMARTEOLE_WakeSteering_Coordinates_staticData.csv"
with zipfile.ZipFile(CACHE_DIR / ZIP_FILENAME) as zf:
return (
pd.read_csv(zf.open(md_fpath), index_col=0)
.reset_index()
.rename(columns={"Turbine": "Name"})
.query("Name.str.startswith('SMV')") # is a turbine
.loc[:, ["Name", "Latitude", "Longitude"]]
.assign(TimeZone="UTC", TimeSpanMinutes=timebase_s / 60, TimeFormat="Start")
)
def unpack_smarteole_metadata(
timebase_s: int, metadata_file: Path | str | IO[bytes] = DEFAULT_METADATA_FILE_PATH
) -> pd.DataFrame:
return (
pd.read_csv(metadata_file, index_col=0)
.reset_index()
.rename(columns={"Turbine": "Name"})
.query("Name.str.startswith('SMV')") # is a turbine
.loc[:, ["Name", "Latitude", "Longitude"]]
.assign(TimeZone="UTC", TimeSpanMinutes=timebase_s / 60, TimeFormat="Start")
)


@with_parquet_cache(CACHE_SUBDIR / "smarteole_toggle.parquet")
def unpack_smarteole_toggle_data(timebase_s: int) -> pd.DataFrame:
def unpack_smarteole_toggle_data(
timebase_s: int, toggle_file: Path | str | IO[bytes] = DEFAULT_TOGGLE_FILE_PATH
) -> pd.DataFrame:
ten_minutes_count_lower_limit = timebase_s * MINIMUM_DATA_COUNT_COVERAGE
toggle_value_threshold: float = 0.95

_fpath = "SMARTEOLE-WFC-open-dataset/SMARTEOLE_WakeSteering_ControlLog_1minData.csv"
with zipfile.ZipFile(CACHE_DIR / ZIP_FILENAME) as zf:
raw_df = pd.read_csv(zf.open(_fpath), parse_dates=[0], index_col=0)
raw_df = pd.read_csv(toggle_file, parse_dates=[0], index_col=0)

required_in_cols = [
"control_log_offset_active_avg",
Expand Down Expand Up @@ -169,21 +172,26 @@ def unpack_smarteole_toggle_data(timebase_s: int) -> pd.DataFrame:
return toggle_df[["toggle_on", "toggle_off", "yaw_offset_command"]]


def define_smarteole_example_config() -> WindUpConfig:
def define_smarteole_example_config(
analysis_timebase_s: int,
analysis_output_dir: Path,
) -> WindUpConfig:
wtg_map = {
f"SMV{i}": {
"name": f"SMV{i}",
"turbine_type": {
"turbine_type": "Senvion-MM82-2050",
"rotor_diameter_m": 82.0,
"rated_power_kw": 2050.0,
"cutout_ws_mps": 25,
"normal_operation_pitch_range": (-10.0, 35.0),
"normal_operation_genrpm_range": (250.0, 2000.0),
"rpm_v_pw_margin_factor": 0.05,
"pitch_to_stall": False,
},
}
f"SMV{i}": Turbine.model_validate(
{
"name": f"SMV{i}",
"turbine_type": {
"turbine_type": "Senvion-MM82-2050",
"rotor_diameter_m": 82.0,
"rated_power_kw": 2050.0,
"cutout_ws_mps": 25,
"normal_operation_pitch_range": (-10.0, 35.0),
"normal_operation_genrpm_range": (250.0, 2000.0),
"rpm_v_pw_margin_factor": 0.05,
"pitch_to_stall": False,
},
}
)
for i in range(1, 7 + 1)
}
northing_corrections_utc = [
Expand All @@ -196,16 +204,16 @@ def define_smarteole_example_config() -> WindUpConfig:
("SMV7", pd.Timestamp("2020-02-17 16:30:00+0000"), 4.605999999999972),
]

wd_filter_margin = 3 + 7 * ANALYSIS_TIMEBASE_S / 600
wd_filter_margin = 3 + 7 * analysis_timebase_s / 600
return WindUpConfig(
assessment_name="smarteole_example",
timebase_s=ANALYSIS_TIMEBASE_S,
timebase_s=analysis_timebase_s,
require_ref_wake_free=True,
detrend_min_hours=12,
ref_wd_filter=[207 - wd_filter_margin, 236 + wd_filter_margin], # steer is from 207-236
filter_all_test_wtgs_together=True,
use_lt_distribution=False,
out_dir=ANALYSIS_OUTPUT_DIR,
out_dir=analysis_output_dir,
test_wtgs=[wtg_map["SMV6"], wtg_map["SMV5"]],
ref_wtgs=[wtg_map["SMV7"]],
ref_super_wtgs=[],
Expand All @@ -220,21 +228,25 @@ def define_smarteole_example_config() -> WindUpConfig:
years_for_lt_distribution=0,
years_for_detrend=0,
ws_bin_width=1.0,
asset={
"name": "Sole du Moulin Vieux",
"wtgs": list(wtg_map.values()),
"masts_and_lidars": [],
},
asset=Asset.model_validate(
{
"name": "Sole du Moulin Vieux",
"wtgs": list(wtg_map.values()),
"masts_and_lidars": [],
}
),
northing_corrections_utc=northing_corrections_utc,
toggle={
"name": "wake steering",
"toggle_file_per_turbine": False,
"toggle_filename": "SMV_offset_active_toggle_df.parquet",
"detrend_data_selection": "use_toggle_off_data",
"pairing_filter_method": "any_within_timedelta",
"pairing_filter_timedelta_seconds": 3600,
"toggle_change_settling_filter_seconds": 120,
},
toggle=Toggle.model_validate(
{
"name": "wake steering",
"toggle_file_per_turbine": False,
"toggle_filename": "SMV_offset_active_toggle_df.parquet",
"detrend_data_selection": "use_toggle_off_data",
"pairing_filter_method": "any_within_timedelta",
"pairing_filter_timedelta_seconds": 3600,
"toggle_change_settling_filter_seconds": 120,
}
),
)


Expand Down Expand Up @@ -263,49 +275,74 @@ def print_smarteole_results(
assert_frame_equal(print_df, expected_print_df)


if __name__ == "__main__":
setup_logger(ANALYSIS_OUTPUT_DIR / "analysis.log")
logger = logging.getLogger(__name__)
class SmarteoleData(NamedTuple):
scada_df: pd.DataFrame
metadata_df: pd.DataFrame
toggle_df: pd.DataFrame

logger.info("Downloading example data from Zenodo")
download_zenodo_data(record_id="7342466", output_dir=CACHE_DIR, filenames={ZIP_FILENAME})

logger.info("Preprocessing (and caching) turbine SCADA data")
scada_df = unpack_smarteole_scada(ANALYSIS_TIMEBASE_S)
logger.info("Preprocessing (and caching) turbine metadata")
metadata_df = unpack_smarteole_metadata(ANALYSIS_TIMEBASE_S)
logger.info("Preprocessing (and caching) toggle data")
toggle_df = unpack_smarteole_toggle_data(ANALYSIS_TIMEBASE_S)
def _download_data_from_zenodo(analysis_timebase_s: int, cache_dir: Path, zip_filename: str) -> SmarteoleData:
download_zenodo_data(record_id="7342466", output_dir=cache_dir, filenames={zip_filename})
with zipfile.ZipFile(cache_dir / zip_filename) as zf:
scada_df = unpack_smarteole_scada(analysis_timebase_s, scada_data_file=zf.open(DEFAULT_SCADA_FILE_PATH))
metadata_df = unpack_smarteole_metadata(analysis_timebase_s, metadata_file=zf.open(DEFAULT_METADATA_FILE_PATH))
toggle_df = unpack_smarteole_toggle_data(analysis_timebase_s, toggle_file=zf.open(DEFAULT_TOGGLE_FILE_PATH))
return SmarteoleData(scada_df=scada_df, metadata_df=metadata_df, toggle_df=toggle_df)


def main_smarteole_analysis(
*,
smarteole_data: SmarteoleData,
analysis_timebase_s: int = ANALYSIS_TIMEBASE_S,
check_results: bool = CHECK_RESULTS,
analysis_output_dir: Path = ANALYSIS_OUTPUT_DIR,
cache_sub_dir: Path = CACHE_SUBDIR,
reanalysis_file_path: Path | str = PARENT_DIR
/ "smarteole_data/ERA5T_50.00N_2.75E_100m_1hr_20200201_20200531.parquet",
) -> None:
setup_logger(ANALYSIS_OUTPUT_DIR / "analysis.log")
logger = logging.getLogger(__name__)

logger.info("Merging SMV6 yaw offset command signal into SCADA data")
toggle_df_no_tz = toggle_df.copy()
toggle_df_no_tz = smarteole_data.toggle_df.copy()
toggle_df_no_tz.index = toggle_df_no_tz.index.tz_localize(None)
scada_df = scada_df.merge(toggle_df_no_tz["yaw_offset_command"], left_index=True, right_index=True, how="left")
scada_df = smarteole_data.scada_df.merge(
toggle_df_no_tz["yaw_offset_command"], left_index=True, right_index=True, how="left"
)
scada_df["yaw_offset_command"] = scada_df["yaw_offset_command"].where(scada_df["TurbineName"] == "SMV6", 0)
del toggle_df_no_tz

logger.info("Loading reference reanalysis data")
reanalysis_dataset = ReanalysisDataset(
id="ERA5T_50.00N_2.75E_100m_1hr",
data=pd.read_parquet(PARENT_DIR / "smarteole_data" / "ERA5T_50.00N_2.75E_100m_1hr_20200201_20200531.parquet"),
data=pd.read_parquet(reanalysis_file_path),
)

logger.info("Defining Assessment Configuration")
cfg = define_smarteole_example_config()
cfg = define_smarteole_example_config(
analysis_timebase_s=analysis_timebase_s, analysis_output_dir=analysis_output_dir
)
plot_cfg = PlotConfig(show_plots=False, save_plots=True, plots_dir=cfg.out_dir / "plots")

assessment_inputs = AssessmentInputs.from_cfg(
cfg=cfg,
plot_cfg=plot_cfg,
toggle_df=toggle_df,
toggle_df=smarteole_data.toggle_df,
scada_df=scada_df,
metadata_df=metadata_df,
metadata_df=smarteole_data.metadata_df,
reanalysis_datasets=[reanalysis_dataset],
cache_dir=CACHE_SUBDIR,
cache_dir=cache_sub_dir,
)
results_per_test_ref_df = run_wind_up_analysis(assessment_inputs)

net_p50, net_p95, net_p5 = calc_net_uplift(results_per_test_ref_df, confidence=0.9)
print(f"net P50: {net_p50:.1%}, net P95: {net_p95:.1%}, net P5: {net_p5:.1%}")

print_smarteole_results(results_per_test_ref_df, check_results=CHECK_RESULTS)
print_smarteole_results(results_per_test_ref_df, check_results=check_results)


if __name__ == "__main__":
smarteole_data = _download_data_from_zenodo(
analysis_timebase_s=ANALYSIS_TIMEBASE_S, cache_dir=CACHE_DIR, zip_filename=ZIP_FILENAME
)
main_smarteole_analysis(smarteole_data=smarteole_data)
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ dev = [
'types-requests',
'ruff',
'mypy',
'requests',
]
examples = [
'jupyterlab',
Expand Down Expand Up @@ -131,6 +132,7 @@ filterwarnings = [
"ignore:Passing unrecognized arguments to super:DeprecationWarning", # pycharm debugger issue
"ignore:Passing a BlockManager to DataFrame is deprecated:DeprecationWarning",
]
markers = ["slow: mark test as slow."]

[tool.coverage.report]
omit = [
Expand All @@ -155,6 +157,10 @@ sequence = [
{ cmd = "mypy ." }
]

[tool.poe.tasks.test-fast]
help = "Runs tests that are not marked as slow"
sequence = [{ cmd = 'python -m pytest -m "not slow"' }]

[tool.poe.tasks.test]
help = "Runs unit tests and show coverage"
sequence = [
Expand Down
Loading

0 comments on commit 892e1fa

Please sign in to comment.