Skip to content

Commit

Permalink
Merge pull request #494 from openeemeter/fix/datetime64-units
Browse files Browse the repository at this point in the history
Fix issues with datetimeindex
  • Loading branch information
travis-recurve authored May 7, 2024
2 parents 22bdffc + d318ca5 commit a99ff40
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 2 deletions.
2 changes: 1 addition & 1 deletion eemeter/eemeter/common/data_processor_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def as_freq(
.mean()
)
if include_coverage:
n_total = resampled.resample(atomic_freq).count().resample(freq).count()
n_total = resampled.resample(atomic_freq).count().resample(freq, origin=resampled.index[0]).count()
resampled = resampled.to_frame("value")
resampled["coverage"] = n_coverage / n_total

Expand Down
7 changes: 7 additions & 0 deletions eemeter/eemeter/models/daily/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,13 @@ def _set_data(self, data: pd.DataFrame):
)
self.tz = df.index.tz

# prevent later issues when merging on generated datetimes, which default to ns precision
# there is almost certainly a smoother way to accomplish this conversion, but this works
if df.index.dtype.unit != "ns":
utc_index = df.index.tz_convert("UTC")
ns_index = utc_index.astype("datetime64[ns, UTC]")
df.index = ns_index.tz_convert(self.tz)

# Convert electricity data having 0 meter values to NaNs
if self.is_electricity_data:
df.loc[df["observed"] == 0, "observed"] = np.nan
Expand Down
27 changes: 26 additions & 1 deletion tests/daily_model/test_daily_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -789,4 +789,29 @@ def test_offset_temperature_aggregations(baseline_data_daily_params, tz, hour):
abs_diff = 0
for day in baseline.df.index:
abs_diff += abs(temp_series[day:day+pd.Timedelta(hours=23)].mean() - baseline.df.temperature.loc[day].squeeze())
assert abs_diff < 0.000001
assert abs_diff < 0.000001

def test_non_ns_datetime_index():
meter, temperature, _ = load_sample("il-electricity-cdd-hdd-hourly")
meter = meter[meter.index.year == 2017]
temperature = temperature[temperature.index.year == 2017]

# convert to microseconds
meter.index = meter.index.astype("datetime64[us, UTC]")
temperature.index = temperature.index.astype("datetime64[us, UTC]")
cls = DailyBaselineData.from_series(meter, temperature, is_electricity_data=True)

assert cls.df is not None
assert len(cls.df) == NUM_DAYS_IN_YEAR

def test_offset_aggregations_hourly(il_electricity_cdd_hdd_hourly):
meter_data = il_electricity_cdd_hdd_hourly["meter_data"]
temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"]
blackout_start_date = il_electricity_cdd_hdd_hourly["blackout_start_date"]
baseline_meter_data, warnings = get_baseline_data(
meter_data, end=blackout_start_date
)
baseline_meter_data = baseline_meter_data.iloc[3:] # begin from 3AM UTC
baseline = DailyBaselineData.from_series(baseline_meter_data, temperature_data, is_electricity_data=True)
assert baseline is not None
assert len(baseline.df) == NUM_DAYS_IN_YEAR

0 comments on commit a99ff40

Please sign in to comment.