Skip to content

Commit

Permalink
Merge pull request #159 from openclimatefix/no-satellite-nans
Browse files Browse the repository at this point in the history
No satellite nans
  • Loading branch information
peterdudfield authored Dec 17, 2024
2 parents 96c5041 + ecf112b commit f22d779
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 8 deletions.
6 changes: 6 additions & 0 deletions pvnet_app/consts.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
sat_path = "sat.zarr"
nwp_ukv_path = "nwp_ukv.zarr"
nwp_ecmwf_path = "nwp_ecmwf.zarr"


uk_box = dict(
x_geostationary=[-996_133.85, -480_064.6],
y_geostationary=[4_512_606.3, 5_058_679.8],
)
15 changes: 9 additions & 6 deletions pvnet_app/data/satellite.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,9 @@ def preprocess_sat_data(t0: pd.Timestamp, use_legacy: bool = False) -> pd.Dateti
# Deal with switching between the 5 and 15 minutely satellite data
combine_5_and_15_sat_data()

# Check for nans in the satellite data
check_for_constant_values(value=np.nan, threshold=0)

# Interpolate missing satellite timestamps
interpolate_missing_satellite_timestamps(pd.Timedelta("15min"))

Expand All @@ -343,30 +346,30 @@ def preprocess_sat_data(t0: pd.Timestamp, use_legacy: bool = False) -> pd.Dateti
extend_satellite_data_with_nans(t0)

# Check for zeros in the satellite data
check_for_zeros()
check_for_constant_values()

return sat_timestamps


def check_for_zeros():
"""Check the satellite data for zeros and raise an exception
def check_for_constant_values(value: Optional[float] = 0, threshold: Optional[float] = ERROR_ZERO_PERCENTAGE) -> None:
"""Check the satellite data for constant values and raise an exception
This sometimes happen when the satellite data is corrupt
Note that in the UK, even at night, the values are not zero.
"""
# check satellite for zeros
logger.info("Checking satellite data for zeros")
logger.info(f"Checking satellite data for constant value ({value})")
ds_sat = xr.open_zarr(sat_path)
shape = ds_sat.data.shape
n_data_points_per_timestep = shape[1] * shape[2] * shape[3]
n_time_steps = shape[0]
for i in range(n_time_steps):
data = ds_sat.data[i].values
if (data == 0).sum() / n_data_points_per_timestep > ERROR_ZERO_PERCENTAGE:
if (data == value).sum() / n_data_points_per_timestep > threshold:
time = ds_sat.time[i].values
message = (
f"Satellite data contains zeros (greater than {ERROR_ZERO_PERCENTAGE}), "
f"Satellite data contains zeros (greater than {threshold}), "
f"This is for time step {time}"
)
raise Exception(message)
Expand Down
25 changes: 23 additions & 2 deletions tests/data/test_satellite.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
sat_path,
sat_5_path,
sat_15_path,
extend_satellite_data_with_nans
extend_satellite_data_with_nans,
)


Expand Down Expand Up @@ -242,7 +242,7 @@ def test_extend_satellite_data_with_nans_over_3_hours(sat_5_data, test_t0):


def test_zeros_in_sat_data(sat_15_data_small, test_t0):
"""Download and process only the 15 minute satellite data"""
"""Check error is made if data has zeros"""

# make temporary directory
with tempfile.TemporaryDirectory() as tmpdirname:
Expand All @@ -262,3 +262,24 @@ def test_zeros_in_sat_data(sat_15_data_small, test_t0):
# check an error is made
with pytest.raises(Exception):
preprocess_sat_data(test_t0)


def test_remove_satellite_data(sat_15_data_small, test_t0):
"""Check error is made if data has nans"""
# make temporary directory
with tempfile.TemporaryDirectory() as tmpdirname:
# Change to temporary working directory
os.chdir(tmpdirname)

# make half the values zeros
sat_15_data_small.data[::2] = np.nan

# Make 15-minutely satellite data available
save_to_zarr_zip(sat_15_data_small, filename="latest.zarr.zip")

os.environ["SATELLITE_ZARR_PATH"] = "latest.zarr.zip"
download_all_sat_data()

# check an error is made
with pytest.raises(Exception):
preprocess_sat_data(test_t0)

0 comments on commit f22d779

Please sign in to comment.