From 7fd093912364ca44d2668befcad7ecdc24cdec07 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Thu, 16 Nov 2023 09:42:23 +1300 Subject: [PATCH 1/8] :rotating_light: Fix E501 Line too long Wrapping docstrings in scripts/datacube.py to under 88 characters. --- scripts/datacube.py | 132 ++++++++++++++++++++++++++++++-------------- 1 file changed, 90 insertions(+), 42 deletions(-) diff --git a/scripts/datacube.py b/scripts/datacube.py index 2b2a4e23..e6c553aa 100644 --- a/scripts/datacube.py +++ b/scripts/datacube.py @@ -1,22 +1,36 @@ """ STAC Data Processing Script -This Python script processes Sentinel-2, Sentinel-1, and DEM (Digital Elevation Model) data. It utilizes the Planetary Computer API for data retrieval and manipulation. +This Python script processes Sentinel-2, Sentinel-1, and Copernicus DEM +(Digital Elevation Model) data. It utilizes Microsoft's Planetary Computer API +for data retrieval and manipulation. Constants: - STAC_API: Planetary Computer API endpoint - S2_BANDS: Bands used in Sentinel-2 data processing Functions: -- random_date(start_year, end_year): Generate a random date within a specified range. -- get_week(year, month, day): Get the week range for a given date. -- get_conditions(year1, year2, cloud_cover_percentage): Get random conditions (date, year, month, day, cloud cover) within a specified year range. -- search_sentinel2(week, aoi, cloud_cover_percentage, nodata_pixel_percentage): Search for Sentinel-2 items within a given week and area of interest. -- search_sentinel1(BBOX, catalog, week): Search for Sentinel-1 items within a given bounding box, STAC catalog, and week. -- search_dem(BBOX, catalog, epsg): Search for DEM items within a given bounding box. -- make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg): Create xarray DataArrays for Sentinel-2, Sentinel-1, and DEM data. -- merge_datarrays(da_sen2, da_sen1, da_dem): Merge xarray DataArrays for Sentinel-2, Sentinel-1, and DEM. -- process(year1, year2, aoi, resolution): Process Sentinel-2, Sentinel-1, and DEM data for a specified time range, area of interest, and resolution. +- random_date(start_year, end_year): + Generate a random date within a specified range. +- get_week(year, month, day): + Get the week range for a given date. +- get_conditions(year1, year2, cloud_cover_percentage): + Get random conditions (date, year, month, day, cloud cover) within a + specified year range. +- search_sentinel2(week, aoi, cloud_cover_percentage, nodata_pixel_percentage): + Search for Sentinel-2 items within a given week and area of interest. +- search_sentinel1(BBOX, catalog, week): + Search for Sentinel-1 items within a given bounding box, STAC catalog, + and week. +- search_dem(BBOX, catalog, epsg): + Search for DEM items within a given bounding box. +- make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg): + Create xarray DataArrays for Sentinel-2, Sentinel-1, and DEM data. +- merge_datarrays(da_sen2, da_sen1, da_dem): + Merge xarray DataArrays for Sentinel-2, Sentinel-1, and DEM. +- process(year1, year2, aoi, resolution): + Process Sentinel-2, Sentinel-1, and DEM data for a specified time range, + area of interest, and resolution. """ import random @@ -63,7 +77,8 @@ def get_week(year, month, day): - day (int): The day of the date. Returns: - - str: A string representing the start and end dates of the week in the format 'start_date/end_date'. + - str: A string representing the start and end dates of the week in the + format 'start_date/end_date'. """ date = datetime(year, month, day) start_of_week = date - timedelta(days=date.weekday()) @@ -75,15 +90,18 @@ def get_week(year, month, day): def get_conditions(year1, year2, cloud_cover_percentage): """ - Get random conditions (date, year, month, day, cloud cover) within the specified year range. + Get random conditions (date, year, month, day, cloud cover) within the + specified year range. Parameters: - year1 (int): The starting year of the date range. - year2 (int): The ending year of the date range. - - cloud_cover_percentage (int): Maximum acceptable cloud cover percentage for Sentinel-2 images. + - cloud_cover_percentage (int): Maximum acceptable cloud cover percentage + for Sentinel-2 images. Returns: - - tuple: A tuple containing date, year, month, day, and a constant cloud cover value. + - tuple: A tuple containing date, year, month, day, and a constant cloud + cover value. """ date = random_date(year1, year2) YEAR = date.year @@ -95,20 +113,29 @@ def get_conditions(year1, year2, cloud_cover_percentage): def search_sentinel2(week, aoi, cloud_cover_percentage, nodata_pixel_percentage): """ - Search for Sentinel-2 items within a given week and area of interest (AOI) with specified conditions. + Search for Sentinel-2 items within a given week and area of interest (AOI) + with specified conditions. Parameters: - week (str): The week in the format 'start_date/end_date'. - - aoi (shapely.geometry.base.BaseGeometry): Geometry object for an Area of Interest (AOI). - - cloud_cover_percentage (int): Maximum acceptable cloud cover percentage for Sentinel-2 images. - - nodata_pixel_percentage (int): Maximum acceptable percentage of nodata pixels in Sentinel-2 images. + - aoi (shapely.geometry.base.BaseGeometry): Geometry object for an Area of + Interest (AOI). + - cloud_cover_percentage (int): Maximum acceptable cloud cover percentage + for Sentinel-2 images. + - nodata_pixel_percentage (int): Maximum acceptable percentage of nodata + pixels in Sentinel-2 images. Returns: - - tuple: A tuple containing the STAC catalog, Sentinel-2 items, the bounding box (BBOX), and an EPSG code for the coordinate reference system. + - tuple: A tuple containing the STAC catalog, Sentinel-2 items, the + bounding box (BBOX), and an EPSG code for the coordinate reference + system. Note: - The function filters Sentinel-2 items based on the specified conditions such as geometry, date, cloud cover, and nodata pixel percentage. - The result is returned as a tuple containing the STAC catalog, Sentinel-2 items, the bounding box of the first item, and an EPSG code for the coordinate reference system. + The function filters Sentinel-2 items based on the specified conditions + such as geometry, date, cloud cover, and nodata pixel percentage. The + result is returned as a tuple containing the STAC catalog, Sentinel-2 + items, the bounding box of the first item, and an EPSG code for the + coordinate reference system. """ CENTROID = aoi.centroid @@ -165,7 +192,8 @@ def search_sentinel2(week, aoi, cloud_cover_percentage, nodata_pixel_percentage) s2_items_gdf = s2_items_gdf[s2_items_gdf["eo:cloud_cover"] == best_clouds] - # Get the item ID for the filtered Sentinel 2 dataframe containing the best cloud free scene + # Get the datetime for the filtered Sentinel 2 dataframe + # containing the best cloud free scene s2_items_gdf_datetime_id = s2_items_gdf["datetime"] for item in s2_items: if item.properties["datetime"] == s2_items_gdf_datetime_id[0]: @@ -184,19 +212,24 @@ def search_sentinel2(week, aoi, cloud_cover_percentage, nodata_pixel_percentage) def search_sentinel1(BBOX, catalog, week): """ - Search for Sentinel-1 items within a given bounding box (BBOX), STAC catalog, and week. + Search for Sentinel-1 items within a given bounding box (BBOX), STAC + catalog, and week. Parameters: - - BBOX (tuple): Bounding box coordinates in the format (minx, miny, maxx, maxy). + - BBOX (tuple): Bounding box coordinates in the format + (minx, miny, maxx, maxy). - catalog (pystac.Catalog): STAC catalog containing Sentinel-1 items. - week (str): The week in the format 'start_date/end_date'. Returns: - - pystac.Collection: A collection of Sentinel-1 items filtered by specified conditions. + - pystac.Collection: A collection of Sentinel-1 items filtered by specified + conditions. Note: - This function retrieves Sentinel-1 items from the catalog that intersect with the given bounding box and fall within the provided time window. - The function filters items based on orbit state and returns the collection of Sentinel-1 items that meet the defined criteria. + This function retrieves Sentinel-1 items from the catalog that intersect + with the given bounding box and fall within the provided time window. The + function filters items based on orbit state and returns the collection of + Sentinel-1 items that meet the defined criteria. """ geom_BBOX = box(*BBOX) # Create poly geom object from the bbox @@ -238,15 +271,18 @@ def search_sentinel1(BBOX, catalog, week): def search_dem(BBOX, catalog, epsg): """ - Search for Digital Elevation Model (DEM) items within a given bounding box (BBOX), STAC catalog, week, and Sentinel-2 items. + Search for Copernicus Digital Elevation Model (DEM) items within a given + bounding box (BBOX), STAC catalog, and Sentinel-2 items. Parameters: - - BBOX (tuple): Bounding box coordinates in the format (minx, miny, maxx, maxy). + - BBOX (tuple): Bounding box coordinates in the format + (minx, miny, maxx, maxy). - catalog (pystac.Catalog): STAC catalog containing DEM items. - epsg (int): EPSG code for the coordinate reference system. Returns: - - pystac.Collection: A collection of Digital Elevation Model (DEM) items filtered by specified conditions. + - pystac.Collection: A collection of Digital Elevation Model (DEM) items + filtered by specified conditions. """ search = catalog.search(collections=["cop-dem-glo-30"], bbox=BBOX) dem_items = search.item_collection() @@ -261,18 +297,21 @@ def search_dem(BBOX, catalog, epsg): def make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg): """ - Create xarray DataArrays for Sentinel-2, Sentinel-1, and DEM data. + Create xarray DataArrays for Sentinel-2, Sentinel-1, and Copernicus DEM + data. Parameters: - s2_items (list): List of Sentinel-2 items. - s1_items (list): List of Sentinel-1 items. - dem_items (list): List of DEM items. - - BBOX (tuple): Bounding box coordinates in the format (minx, miny, maxx, maxy). + - BBOX (tuple): Bounding box coordinates in the format + (minx, miny, maxx, maxy). - resolution (int): Spatial resolution. - epsg (int): EPSG code for the coordinate reference system. Returns: - - tuple: A tuple containing xarray DataArrays for Sentinel-2, Sentinel-1, and DEM. + - tuple: A tuple containing xarray DataArrays for Sentinel-2, Sentinel-1, + and Copernicus DEM. """ da_sen2: xr.DataArray = stackstac.stack( items=s2_items, @@ -286,7 +325,7 @@ def make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg): ) da_sen1: xr.DataArray = stackstac.stack( - items=s1_items, # To only accept the same orbit state and date. Need better way to do this. + items=s1_items, assets=["vh", "vv"], # SAR polarizations epsg=epsg, bounds_latlon=BBOX, # W, S, E, N @@ -363,17 +402,22 @@ def make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg): def merge_datarrays(da_sen2, da_sen1, da_dem): """ - Merge xarray DataArrays for Sentinel-2, Sentinel-1, and DEM. + Merge xarray DataArrays for Sentinel-2, Sentinel-1, and Copernicus DEM. Parameters: - da_sen2 (xr.DataArray): xarray DataArray for Sentinel-2 data. - da_sen1 (xr.DataArray): xarray DataArray for Sentinel-1 data. - - da_dem (xr.DataArray): xarray DataArray for DEM data. + - da_dem (xr.DataArray): xarray DataArray for Copernicus DEM data. Returns: - xr.DataArray: Merged xarray DataArray. """ - # print("Platform variables (S2, S1, DEM): ", da_sen2.platform.values, da_sen1.platform.values, da_dem.platform.values) + # print( + # "Platform variables (S2, S1, DEM): ", + # da_sen2.platform.values, + # da_sen1.platform.values, + # da_dem.platform.values, + # ) # da_sen2 = da_sen2.drop(["platform", "constellation"]) # da_sen1 = da_sen1.drop(["platform", "constellation"]) # da_dem = da_dem.drop(["platform"]) @@ -390,17 +434,21 @@ def process( year1, year2, aoi, resolution, cloud_cover_percentage, nodata_pixel_percentage ): """ - Process Sentinel-2, Sentinel-1, and DEM data for a specified time range, area of interest (AOI), - resolution, EPSG code, cloud cover percentage, and nodata pixel percentage. + Process Sentinel-2, Sentinel-1, and Copernicus DEM data for a specified + time range, area of interest (AOI), resolution, EPSG code, cloud cover + percentage, and nodata pixel percentage. Parameters: - year1 (int): The starting year of the date range. - year2 (int): The ending year of the date range. - - aoi (shapely.geometry.base.BaseGeometry): Geometry object for an Area of Interest (AOI). + - aoi (shapely.geometry.base.BaseGeometry): Geometry object for an Area of + Interest (AOI). - resolution (int): Spatial resolution. - epsg (int): EPSG code for the coordinate reference system. - - cloud_cover_percentage (int): Maximum acceptable cloud cover percentage for Sentinel-2 images. - - nodata_pixel_percentage (int): Maximum acceptable percentage of nodata pixels in Sentinel-2 images. + - cloud_cover_percentage (int): Maximum acceptable cloud cover percentage + for Sentinel-2 images. + - nodata_pixel_percentage (int): Maximum acceptable percentage of nodata + pixels in Sentinel-2 images. Returns: - xr.DataArray: Merged xarray DataArray containing processed data. From cc99ae46ecbdc827d5aabe2940ff481602749d91 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Thu, 16 Nov 2023 10:07:03 +1300 Subject: [PATCH 2/8] :recycle: Refactor best_nodata and best_clouds into single sort function Fixes F841 Local variable `best_nodata` is assigned to but never used. Only the best_clouds variable was used, and best_nodata was omitted, but both should be used. Doing this in a single pandas sort_values function. --- scripts/datacube.py | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/scripts/datacube.py b/scripts/datacube.py index e6c553aa..aae5553f 100644 --- a/scripts/datacube.py +++ b/scripts/datacube.py @@ -174,26 +174,15 @@ def search_sentinel2(week, aoi, cloud_cover_percentage, nodata_pixel_percentage) s2_items_gdf = gpd.GeoDataFrame.from_features(s2_items.to_dict()) - best_nodata = ( - s2_items_gdf[["s2:nodata_pixel_percentage"]] - .groupby(["s2:nodata_pixel_percentage"]) - .sum() - .sort_values(by="s2:nodata_pixel_percentage", ascending=True) - .index[0] - ) - - best_clouds = ( - s2_items_gdf[["eo:cloud_cover"]] - .groupby(["eo:cloud_cover"]) - .sum() - .sort_values(by="eo:cloud_cover", ascending=True) - .index[0] - ) + least_nodata_and_clouds = s2_items_gdf.sort_values( + by=["s2:nodata_pixel_percentage", "eo:cloud_cover"], ascending=True + ).index[0] - s2_items_gdf = s2_items_gdf[s2_items_gdf["eo:cloud_cover"] == best_clouds] + s2_items_gdf = s2_items_gdf.iloc[least_nodata_and_clouds] + s2_items_gdf # Get the datetime for the filtered Sentinel 2 dataframe - # containing the best cloud free scene + # containing the least nodata and least cloudy scene s2_items_gdf_datetime_id = s2_items_gdf["datetime"] for item in s2_items: if item.properties["datetime"] == s2_items_gdf_datetime_id[0]: @@ -202,7 +191,7 @@ def search_sentinel2(week, aoi, cloud_cover_percentage, nodata_pixel_percentage) else: continue - BBOX = s2_items_gdf.iloc[0].geometry.bounds + BBOX = s2_items_gdf.iloc[0].bounds epsg = s2_item.properties["proj:epsg"] print("EPSG code based on Sentinel-2 item: ", epsg) From 262aaa606eb58a74b95501faf4e14ae0aa6b8fd2 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Thu, 16 Nov 2023 10:36:55 +1300 Subject: [PATCH 3/8] :ambulance: Quickfix with getting the STAC item with a specific datetime Patch cc99ae46ecbdc827d5aabe2940ff481602749d91 --- scripts/datacube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/datacube.py b/scripts/datacube.py index aae5553f..58d1835f 100644 --- a/scripts/datacube.py +++ b/scripts/datacube.py @@ -183,9 +183,9 @@ def search_sentinel2(week, aoi, cloud_cover_percentage, nodata_pixel_percentage) # Get the datetime for the filtered Sentinel 2 dataframe # containing the least nodata and least cloudy scene - s2_items_gdf_datetime_id = s2_items_gdf["datetime"] + s2_items_gdf_datetime = s2_items_gdf["datetime"] for item in s2_items: - if item.properties["datetime"] == s2_items_gdf_datetime_id[0]: + if item.properties["datetime"] == s2_items_gdf_datetime: s2_item = item # print(s2_item.properties["datetime"]) else: From 2af24be47fb78960bad5f08d080b542b5f22a44b Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Thu, 16 Nov 2023 10:39:01 +1300 Subject: [PATCH 4/8] :label: Rename variables to ds_ (xr.Dataset) or da (xr.DataArray) Using ds_ prefix for xr.Dataset objects, and da_ prefix for xr.DataArray objects. --- scripts/datacube.py | 67 ++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/scripts/datacube.py b/scripts/datacube.py index 58d1835f..a5af15d1 100644 --- a/scripts/datacube.py +++ b/scripts/datacube.py @@ -26,7 +26,7 @@ Search for DEM items within a given bounding box. - make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg): Create xarray DataArrays for Sentinel-2, Sentinel-1, and DEM data. -- merge_datarrays(da_sen2, da_sen1, da_dem): +- merge_datarrays(ds_sen2, ds_sen1, da_dem): Merge xarray DataArrays for Sentinel-2, Sentinel-1, and DEM. - process(year1, year2, aoi, resolution): Process Sentinel-2, Sentinel-1, and DEM data for a specified time range, @@ -286,7 +286,7 @@ def search_dem(BBOX, catalog, epsg): def make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg): """ - Create xarray DataArrays for Sentinel-2, Sentinel-1, and Copernicus DEM + Create xarray Datasets for Sentinel-2, Sentinel-1, and Copernicus DEM data. Parameters: @@ -299,7 +299,7 @@ def make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg): - epsg (int): EPSG code for the coordinate reference system. Returns: - - tuple: A tuple containing xarray DataArrays for Sentinel-2, Sentinel-1, + - tuple: A tuple containing xarray Datasets for Sentinel-2, Sentinel-1, and Copernicus DEM. """ da_sen2: xr.DataArray = stackstac.stack( @@ -313,18 +313,8 @@ def make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg): fill_value=np.nan, ) - da_sen1: xr.DataArray = stackstac.stack( - items=s1_items, - assets=["vh", "vv"], # SAR polarizations - epsg=epsg, - bounds_latlon=BBOX, # W, S, E, N - xy_coords="center", # pixel centroid coords instead of topleft corner - dtype=np.float32, - fill_value=np.nan, - ) - - # Create xarray.Dataset datacube with VH and VV channels from SAR - # 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B11', 'B12', 'B8A', 'SCL' + # Create xarray.Dataset datacube with all 10m and 20m bands from Sentinel-2 + # 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B11', 'B12', 'SCL' da_s2_0: xr.DataArray = da_sen2.sel(band="B02", drop=True).rename("B02").squeeze() da_s2_1: xr.DataArray = da_sen2.sel(band="B03", drop=True).rename("B03").squeeze() da_s2_2: xr.DataArray = da_sen2.sel(band="B04", drop=True).rename("B04").squeeze() @@ -337,7 +327,7 @@ def make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg): da_s2_9: xr.DataArray = da_sen2.sel(band="B11", drop=True).rename("B11").squeeze() da_s2_10: xr.DataArray = da_sen2.sel(band="SCL", drop=True).rename("SCL").squeeze() - da_sen2_all: xr.Dataset = xr.merge( + ds_sen2: xr.Dataset = xr.merge( objects=[ da_s2_0, da_s2_1, @@ -353,8 +343,17 @@ def make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg): ], join="override", ) + ds_sen2.assign(time=da_sen2.time) - da_sen2_all.assign(time=da_sen2.time) + da_sen1: xr.DataArray = stackstac.stack( + items=s1_items, + assets=["vh", "vv"], # SAR polarizations + epsg=epsg, + bounds_latlon=BBOX, # W, S, E, N + xy_coords="center", # pixel centroid coords instead of topleft corner + dtype=np.float32, + fill_value=np.nan, + ) # To fix TypeError: Invalid value for attr 'spec' da_sen1.attrs["spec"] = str(da_sen1.spec) @@ -380,42 +379,41 @@ def make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg): dtype=np.float32, fill_value=np.nan, ) - - da_dem = stackstac.mosaic(da_dem, dim="time").squeeze().rename("DEM") + da_dem: xr.DataArray = stackstac.mosaic(da_dem, dim="time").squeeze().rename("DEM") # _, index = np.unique(da_dem['time'], return_index=True) # Remove redundant time # da_dem = da_dem.isel(time=index) - return da_sen2_all, ds_sen1, da_dem + return ds_sen2, ds_sen1, da_dem -def merge_datarrays(da_sen2, da_sen1, da_dem): +def merge_datarrays(ds_sen2, ds_sen1, da_dem): """ - Merge xarray DataArrays for Sentinel-2, Sentinel-1, and Copernicus DEM. + Merge xarray Dataset for Sentinel-2, Sentinel-1, and Copernicus DEM. Parameters: - - da_sen2 (xr.DataArray): xarray DataArray for Sentinel-2 data. - - da_sen1 (xr.DataArray): xarray DataArray for Sentinel-1 data. + - ds_sen2 (xr.Dataset): xarray Dataset for Sentinel-2 data. + - ds_sen1 (xr.Dataset): xarray Dataset for Sentinel-1 data. - da_dem (xr.DataArray): xarray DataArray for Copernicus DEM data. Returns: - - xr.DataArray: Merged xarray DataArray. + - xr.Dataset: Merged xarray Dataset. """ # print( # "Platform variables (S2, S1, DEM): ", - # da_sen2.platform.values, - # da_sen1.platform.values, + # ds_sen2.platform.values, + # ds_sen1.platform.values, # da_dem.platform.values, # ) - # da_sen2 = da_sen2.drop(["platform", "constellation"]) - # da_sen1 = da_sen1.drop(["platform", "constellation"]) + # ds_sen2 = ds_sen2.drop(["platform", "constellation"]) + # ds_sen1 = ds_sen1.drop(["platform", "constellation"]) # da_dem = da_dem.drop(["platform"]) - da_merge = xr.merge([da_sen2, da_sen1, da_dem], compat="override") + da_merge = xr.merge([ds_sen2, ds_sen1, da_dem], compat="override") print("Merged datarray: ", da_merge) print( - "Time variables (S2, merged): ", da_sen2.time.values, da_merge.time.values - ) # da_sen1.time.values, da_dem.time.values + "Time variables (S2, merged): ", ds_sen2.time.values, da_merge.time.values + ) # ds_sen1.time.values, da_dem.time.values return da_merge @@ -433,7 +431,6 @@ def process( - aoi (shapely.geometry.base.BaseGeometry): Geometry object for an Area of Interest (AOI). - resolution (int): Spatial resolution. - - epsg (int): EPSG code for the coordinate reference system. - cloud_cover_percentage (int): Maximum acceptable cloud cover percentage for Sentinel-2 images. - nodata_pixel_percentage (int): Maximum acceptable percentage of nodata @@ -454,11 +451,11 @@ def process( dem_items = search_dem(BBOX, catalog, epsg) - da_sen2, da_sen1, da_dem = make_dataarrays( + ds_sen2, ds_sen1, da_dem = make_dataarrays( s2_items, s1_items, dem_items, BBOX, resolution, epsg ) - da_merge = merge_datarrays(da_sen2, da_sen1, da_dem) + da_merge = merge_datarrays(ds_sen2, ds_sen1, da_dem) return da_merge From 4664b59befc6534b380a5c67e3e43b228b1a2ca6 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Thu, 16 Nov 2023 10:57:54 +1300 Subject: [PATCH 5/8] :wrench: Set pylint max-args to 6 Increase from default value of 5 to 6, --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index d3a96c17..c2b3a2b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,3 +13,6 @@ select = [ "UP", # pyupgrade "W", # pycodestyle warnings ] + +[tool.ruff.lint.pylint] +max-args = 6 From a396c5db20d0503d275aa57060f58f3159f3d42b Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Thu, 16 Nov 2023 11:10:54 +1300 Subject: [PATCH 6/8] :wastebasket: Replace .get_all_items() with .item_collection() Fixes `FutureWarning: get_all_items() is deprecated, use item_collection() instead`. --- scripts/datacube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/datacube.py b/scripts/datacube.py index a5af15d1..e99bae85 100644 --- a/scripts/datacube.py +++ b/scripts/datacube.py @@ -169,7 +169,7 @@ def search_sentinel2(week, aoi, cloud_cover_percentage, nodata_pixel_percentage) }, ) - s2_items = search.get_all_items() + s2_items = search.item_collection() print(f"Found {len(s2_items)} Sentinel-2 items") s2_items_gdf = gpd.GeoDataFrame.from_features(s2_items.to_dict()) From f65e34a9c4f72950b4f8a68a6ea2d4c580878ecd Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Thu, 16 Nov 2023 14:20:05 +1300 Subject: [PATCH 7/8] :fire: Remove sorting by nodata and just sort by least cloud cover No need to sort by `s2:nodata_pixel_percentage` anymore, just get the Sentinel-2 STAC item with the least cloud cover. --- scripts/datacube.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/datacube.py b/scripts/datacube.py index e99bae85..0614951e 100644 --- a/scripts/datacube.py +++ b/scripts/datacube.py @@ -132,10 +132,10 @@ def search_sentinel2(week, aoi, cloud_cover_percentage, nodata_pixel_percentage) Note: The function filters Sentinel-2 items based on the specified conditions - such as geometry, date, cloud cover, and nodata pixel percentage. The - result is returned as a tuple containing the STAC catalog, Sentinel-2 - items, the bounding box of the first item, and an EPSG code for the - coordinate reference system. + such as geometry, date, cloud cover, and nodata pixel percentage. Only one + result with the least cloud cover will be returned. The result is returned + as a tuple containing the STAC catalog, Sentinel-2 items, the bounding box + of the first item, and an EPSG code for the coordinate reference system. """ CENTROID = aoi.centroid @@ -174,11 +174,11 @@ def search_sentinel2(week, aoi, cloud_cover_percentage, nodata_pixel_percentage) s2_items_gdf = gpd.GeoDataFrame.from_features(s2_items.to_dict()) - least_nodata_and_clouds = s2_items_gdf.sort_values( - by=["s2:nodata_pixel_percentage", "eo:cloud_cover"], ascending=True + least_clouds = s2_items_gdf.sort_values( + by=["eo:cloud_cover"], ascending=True ).index[0] - s2_items_gdf = s2_items_gdf.iloc[least_nodata_and_clouds] + s2_items_gdf = s2_items_gdf.iloc[least_clouds] s2_items_gdf # Get the datetime for the filtered Sentinel 2 dataframe From 7b208781ce54133a8ce72dd226be8575688780ae Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Thu, 16 Nov 2023 14:24:35 +1300 Subject: [PATCH 8/8] :memo: More DataArray to Dataset renames Missed a few more da_ to ds_ renames, following from 2af24be47fb78960bad5f08d080b542b5f22a44b --- scripts/datacube.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/scripts/datacube.py b/scripts/datacube.py index 0614951e..0af29a61 100644 --- a/scripts/datacube.py +++ b/scripts/datacube.py @@ -24,10 +24,10 @@ and week. - search_dem(BBOX, catalog, epsg): Search for DEM items within a given bounding box. -- make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg): - Create xarray DataArrays for Sentinel-2, Sentinel-1, and DEM data. -- merge_datarrays(ds_sen2, ds_sen1, da_dem): - Merge xarray DataArrays for Sentinel-2, Sentinel-1, and DEM. +- make_datasets(s2_items, s1_items, dem_items, BBOX, resolution, epsg): + Create xarray Datasets for Sentinel-2, Sentinel-1, and DEM data. +- merge_datasets(ds_sen2, ds_sen1, da_dem): + Merge xarray Datasets for Sentinel-2, Sentinel-1, and DEM. - process(year1, year2, aoi, resolution): Process Sentinel-2, Sentinel-1, and DEM data for a specified time range, area of interest, and resolution. @@ -284,7 +284,7 @@ def search_dem(BBOX, catalog, epsg): return dem_items -def make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg): +def make_datasets(s2_items, s1_items, dem_items, BBOX, resolution, epsg): """ Create xarray Datasets for Sentinel-2, Sentinel-1, and Copernicus DEM data. @@ -387,7 +387,7 @@ def make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg): return ds_sen2, ds_sen1, da_dem -def merge_datarrays(ds_sen2, ds_sen1, da_dem): +def merge_datasets(ds_sen2, ds_sen1, da_dem): """ Merge xarray Dataset for Sentinel-2, Sentinel-1, and Copernicus DEM. @@ -409,12 +409,12 @@ def merge_datarrays(ds_sen2, ds_sen1, da_dem): # ds_sen1 = ds_sen1.drop(["platform", "constellation"]) # da_dem = da_dem.drop(["platform"]) - da_merge = xr.merge([ds_sen2, ds_sen1, da_dem], compat="override") - print("Merged datarray: ", da_merge) + ds_merge = xr.merge([ds_sen2, ds_sen1, da_dem], compat="override") + print("Merged dataset: ", ds_merge) print( - "Time variables (S2, merged): ", ds_sen2.time.values, da_merge.time.values + "Time variables (S2, merged): ", ds_sen2.time.values, ds_merge.time.values ) # ds_sen1.time.values, da_dem.time.values - return da_merge + return ds_merge def process( @@ -437,7 +437,7 @@ def process( pixels in Sentinel-2 images. Returns: - - xr.DataArray: Merged xarray DataArray containing processed data. + - xr.Dataset: Merged xarray Dataset containing processed data. """ date, YEAR, MONTH, DAY, CLOUD = get_conditions(year1, year2, cloud_cover_percentage) @@ -451,12 +451,12 @@ def process( dem_items = search_dem(BBOX, catalog, epsg) - ds_sen2, ds_sen1, da_dem = make_dataarrays( + ds_sen2, ds_sen1, da_dem = make_datasets( s2_items, s1_items, dem_items, BBOX, resolution, epsg ) - da_merge = merge_datarrays(ds_sen2, ds_sen1, da_dem) - return da_merge + ds_merge = merge_datasets(ds_sen2, ds_sen1, da_dem) + return ds_merge if __name__ == "__main__":