Skip to content

Commit

Permalink
use ERA5 for northing
Browse files Browse the repository at this point in the history
  • Loading branch information
aclerc committed Dec 6, 2024
1 parent 1db159b commit c951268
Showing 1 changed file with 161 additions and 16 deletions.
177 changes: 161 additions & 16 deletions examples/kelmarsh_kaggle.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,26 @@

import pandas as pd

from examples.helpers import format_and_print_results_table, setup_logger
from examples.wedowind_example import create_fake_wedowind_reanalysis_dataset
from wind_up.constants import OUTPUT_DIR, PROJECTROOT_DIR, TIMESTAMP_COL, DataColumns
from examples.helpers import setup_logger
from wind_up.constants import (
OUTPUT_DIR,
PROJECTROOT_DIR,
REANALYSIS_WD_COL,
REANALYSIS_WS_COL,
TIMESTAMP_COL,
WINDFARM_YAWDIR_COL,
DataColumns,
)
from wind_up.detrend import calc_wsratio_v_wd_scen
from wind_up.interface import AssessmentInputs
from wind_up.main_analysis import run_wind_up_analysis
from wind_up.models import PlotConfig, WindUpConfig
from wind_up.northing import check_wtg_northing
from wind_up.plots.data_coverage_plots import plot_detrend_data_cov
from wind_up.reanalysis_data import ReanalysisDataset
from wind_up.waking_state import add_waking_scen, get_distance_and_bearing
from wind_up.windspeed_drift import check_windspeed_drift

DATA_DIR = Path("kelmarsh_kaggle_data")
CACHE_DIR = PROJECTROOT_DIR / "cache" / "kelmarsh_kaggle"
ASSESSMENT_NAME = "kelmarsh_kaggle"
ANALYSIS_OUTPUT_DIR = OUTPUT_DIR / ASSESSMENT_NAME
Expand Down Expand Up @@ -71,25 +84,24 @@ def kelmarsh_kaggle_metadata_df(data_dir: Path) -> pd.DataFrame:

def main(analysis_name: str) -> None:
# verify the data is in the correct location
data_dir = Path("kelmarsh_kaggle_data")
expected_files = [
"train.csv",
"test.csv",
"sample_submission.csv",
"metaData.csv",
]
data_ok = all((data_dir / file).exists() for file in expected_files)
data_ok = all((DATA_DIR / file).exists() for file in expected_files)
if not data_ok:
data_url = r"https://www.kaggle.com/competitions/predict-the-wind-speed-at-a-wind-turbine/data"
msg = (
f"Expected files not found in {data_dir}.\nPlease download the data from the Kaggle "
f"at {data_url} and save them in {data_dir.resolve()}."
f"Expected files not found in {DATA_DIR}.\nPlease download the data from the Kaggle "
f"at {data_url} and save them in {DATA_DIR.resolve()}."
)
raise FileNotFoundError(msg)

logger.info("Unpacking turbine SCADA data")
scada_df = KelmarshKaggleScadaUnpacker(data_dir=data_dir).unpack()
metadata_df = kelmarsh_kaggle_metadata_df(data_dir=data_dir)
scada_df = KelmarshKaggleScadaUnpacker(data_dir=DATA_DIR).unpack()
metadata_df = kelmarsh_kaggle_metadata_df(data_dir=DATA_DIR)

# Construct wind-up Configurations
wtg_map = {
Expand All @@ -107,7 +119,21 @@ def main(analysis_name: str) -> None:
}

# is it OK to use ERA5???
reanalysis_dataset = create_fake_wedowind_reanalysis_dataset(start_datetime=scada_df.index.min())
reanalysis_dataset = ReanalysisDataset(
id="ERA5T_52.50N_-1.00E_100m_1hr",
data=pd.read_parquet(DATA_DIR / "ERA5T_52.50N_-1.00E_100m_1hr.parquet"),
)

# calculated previously by setting optimize_northing_corrections to True
northing_corrections_utc = [
("Kelmarsh 2", pd.Timestamp("2017-10-01 00:00:00+0000"), 3.4831420898439944),
("Kelmarsh 3", pd.Timestamp("2017-10-01 00:00:00+0000"), 1.6804382324219773),
("Kelmarsh 4", pd.Timestamp("2017-10-01 00:00:00+0000"), 3.7531753316334004),
("Kelmarsh 5", pd.Timestamp("2017-10-01 00:00:00+0000"), 7.918688964843739),
("Kelmarsh 5", pd.Timestamp("2020-04-16 10:00:00+0000"), 12.944992828369152),
("Kelmarsh 5", pd.Timestamp("2020-04-23 18:10:00+0000"), 8.455931250697915),
("Kelmarsh 6", pd.Timestamp("2017-10-01 00:00:00+0000"), 5.209234619141114),
]

cfg = WindUpConfig(
assessment_name=analysis_name,
Expand Down Expand Up @@ -136,7 +162,8 @@ def main(analysis_name: str) -> None:
"post_first_dt_utc_start": scada_df.index.min() + (scada_df.index.max() - scada_df.index.min()) / 2,
"post_last_dt_utc_start": scada_df.index.max(),
},
optimize_northing_corrections=False,
optimize_northing_corrections=False, # switch to True to recalculate northing_corrections_utc
northing_corrections_utc=northing_corrections_utc,
)

plot_cfg = PlotConfig(show_plots=False, save_plots=True, plots_dir=cfg.out_dir / "plots")
Expand All @@ -153,10 +180,128 @@ def main(analysis_name: str) -> None:
cache_dir=cache_assessment,
)

# Run Analysis
results_per_test_ref_df = run_wind_up_analysis(assessment_inputs)
results_per_test_ref_df.to_csv(cfg.out_dir / "results_per_test_ref.csv", index=False)
_ = format_and_print_results_table(results_per_test_ref_df)
predict_kelmarsh_t1_windspeed(assessment_inputs)


def predict_kelmarsh_t1_windspeed(assessment_inputs: AssessmentInputs) -> None:
wf_df = assessment_inputs.wf_df
pc_per_ttype = assessment_inputs.pc_per_ttype
cfg = assessment_inputs.cfg
plot_cfg = assessment_inputs.plot_cfg
pre_post_splitter = assessment_inputs.pre_post_splitter

test_wtg = cfg.test_wtgs[0]
test_pw_col = "pw_clipped"
test_ws_col = "raw_WindSpeedMean"
test_df = wf_df.loc[test_wtg.name].copy()
test_name = test_wtg.name

test_df.columns = ["test_" + x for x in test_df.columns]
test_pw_col = "test_" + test_pw_col
test_ws_col = "test_" + test_ws_col

test_max_ws_drift, test_max_ws_drift_pp_period = check_windspeed_drift(
wtg_df=test_df,
wtg_name=test_name,
ws_col=test_ws_col,
reanalysis_ws_col="test_" + REANALYSIS_WS_COL,
cfg=cfg,
plot_cfg=plot_cfg,
)

scada_pc = pc_per_ttype[test_wtg.turbine_type.turbine_type]

for ref_wtg in cfg.ref_wtgs:
ref_name = ref_wtg.name
ref_pw_col = "pw_clipped"
ref_wd_col = "YawAngleMean"
ref_ws_col = "ws_est_blend"
ref_df = wf_df.loc[ref_name].copy()
ref_max_northing_error_v_reanalysis = check_wtg_northing(
ref_df,
wtg_name=ref_name,
north_ref_wd_col=REANALYSIS_WD_COL,
timebase_s=cfg.timebase_s,
plot_cfg=plot_cfg,
sub_dir=f"{test_name}/{ref_name}",
)
ref_max_northing_error_v_wf = check_wtg_northing(
ref_df,
wtg_name=ref_name,
north_ref_wd_col=WINDFARM_YAWDIR_COL,
timebase_s=cfg.timebase_s,
plot_cfg=plot_cfg,
sub_dir=f"{test_name}/{ref_name}",
)

ref_pw_col = "ref_" + ref_pw_col
ref_ws_col = "ref_" + ref_ws_col
ref_wd_col = "ref_" + ref_wd_col
ref_df.columns = ["ref_" + x for x in ref_df.columns]

test_lat = test_wtg.latitude
test_long = test_wtg.longitude
ref_lat = ref_wtg.latitude
ref_long = ref_wtg.longitude

distance_m, bearing_deg = get_distance_and_bearing(
lat1=test_lat,
long1=test_long,
lat2=ref_lat,
long2=ref_long,
)

ref_max_ws_drift, ref_max_ws_drift_pp_period = check_windspeed_drift(
wtg_df=ref_df,
wtg_name=ref_name,
ws_col=ref_ws_col,
reanalysis_ws_col="ref_" + REANALYSIS_WS_COL,
cfg=cfg,
plot_cfg=plot_cfg,
sub_dir=f"{test_name}/{ref_name}",
)

detrend_df = test_df.merge(ref_df, how="left", left_index=True, right_index=True)
detrend_df = detrend_df[cfg.detrend_first_dt_utc_start : cfg.detrend_last_dt_utc_start] # type: ignore[misc]

detrend_df = add_waking_scen(
test_name=test_name,
ref_name=ref_name,
test_ref_df=detrend_df,
cfg=cfg,
wf_df=wf_df,
ref_wd_col=ref_wd_col,
ref_lat=ref_lat,
ref_long=ref_long,
)

plot_detrend_data_cov(
cfg=cfg,
test_name=test_name,
ref_name=ref_name,
test_df=test_df,
test_ws_col=test_ws_col,
ref_df=ref_df,
ref_ws_col=ref_ws_col,
ref_wd_col=ref_wd_col,
detrend_df=detrend_df,
plot_cfg=plot_cfg,
)

wsratio_v_dir_scen = calc_wsratio_v_wd_scen(
test_name=test_name,
ref_name=ref_name,
ref_lat=ref_lat,
ref_long=ref_long,
detrend_df=detrend_df,
test_ws_col=test_ws_col,
ref_ws_col=ref_ws_col,
ref_wd_col=ref_wd_col,
cfg=cfg,
plot_cfg=plot_cfg,
)

print(wsratio_v_dir_scen)


if __name__ == "__main__":
Expand Down

0 comments on commit c951268

Please sign in to comment.