From 7fd093912364ca44d2668befcad7ecdc24cdec07 Mon Sep 17 00:00:00 2001
From: Wei Ji <23487320+weiji14@users.noreply.github.com>
Date: Thu, 16 Nov 2023 09:42:23 +1300
Subject: [PATCH 1/8] :rotating_light: Fix E501 Line too long

Wrapping docstrings in scripts/datacube.py to under 88 characters.
---
 scripts/datacube.py | 132 ++++++++++++++++++++++++++++++--------------
 1 file changed, 90 insertions(+), 42 deletions(-)

diff --git a/scripts/datacube.py b/scripts/datacube.py
index 2b2a4e23..e6c553aa 100644
--- a/scripts/datacube.py
+++ b/scripts/datacube.py
@@ -1,22 +1,36 @@
 """
 STAC Data Processing Script
 
-This Python script processes Sentinel-2, Sentinel-1, and DEM (Digital Elevation Model) data. It utilizes the Planetary Computer API for data retrieval and manipulation.
+This Python script processes Sentinel-2, Sentinel-1, and Copernicus DEM
+(Digital Elevation Model) data. It utilizes Microsoft's Planetary Computer API
+for data retrieval and manipulation.
 
 Constants:
 - STAC_API: Planetary Computer API endpoint
 - S2_BANDS: Bands used in Sentinel-2 data processing
 
 Functions:
-- random_date(start_year, end_year): Generate a random date within a specified range.
-- get_week(year, month, day): Get the week range for a given date.
-- get_conditions(year1, year2, cloud_cover_percentage): Get random conditions (date, year, month, day, cloud cover) within a specified year range.
-- search_sentinel2(week, aoi, cloud_cover_percentage, nodata_pixel_percentage): Search for Sentinel-2 items within a given week and area of interest.
-- search_sentinel1(BBOX, catalog, week): Search for Sentinel-1 items within a given bounding box, STAC catalog, and week.
-- search_dem(BBOX, catalog, epsg): Search for DEM items within a given bounding box.
-- make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg): Create xarray DataArrays for Sentinel-2, Sentinel-1, and DEM data.
-- merge_datarrays(da_sen2, da_sen1, da_dem): Merge xarray DataArrays for Sentinel-2, Sentinel-1, and DEM.
-- process(year1, year2, aoi, resolution): Process Sentinel-2, Sentinel-1, and DEM data for a specified time range, area of interest, and resolution.
+- random_date(start_year, end_year):
+      Generate a random date within a specified range.
+- get_week(year, month, day):
+      Get the week range for a given date.
+- get_conditions(year1, year2, cloud_cover_percentage):
+      Get random conditions (date, year, month, day, cloud cover) within a
+      specified year range.
+- search_sentinel2(week, aoi, cloud_cover_percentage, nodata_pixel_percentage):
+      Search for Sentinel-2 items within a given week and area of interest.
+- search_sentinel1(BBOX, catalog, week):
+      Search for Sentinel-1 items within a given bounding box, STAC catalog,
+      and week.
+- search_dem(BBOX, catalog, epsg):
+      Search for DEM items within a given bounding box.
+- make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg):
+      Create xarray DataArrays for Sentinel-2, Sentinel-1, and DEM data.
+- merge_datarrays(da_sen2, da_sen1, da_dem):
+      Merge xarray DataArrays for Sentinel-2, Sentinel-1, and DEM.
+- process(year1, year2, aoi, resolution):
+      Process Sentinel-2, Sentinel-1, and DEM data for a specified time range,
+      area of interest, and resolution.
 """
 
 import random
@@ -63,7 +77,8 @@ def get_week(year, month, day):
     - day (int): The day of the date.
 
     Returns:
-    - str: A string representing the start and end dates of the week in the format 'start_date/end_date'.
+    - str: A string representing the start and end dates of the week in the
+        format 'start_date/end_date'.
     """
     date = datetime(year, month, day)
     start_of_week = date - timedelta(days=date.weekday())
@@ -75,15 +90,18 @@ def get_week(year, month, day):
 
 def get_conditions(year1, year2, cloud_cover_percentage):
     """
-    Get random conditions (date, year, month, day, cloud cover) within the specified year range.
+    Get random conditions (date, year, month, day, cloud cover) within the
+    specified year range.
 
     Parameters:
     - year1 (int): The starting year of the date range.
     - year2 (int): The ending year of the date range.
-    - cloud_cover_percentage (int): Maximum acceptable cloud cover percentage for Sentinel-2 images.
+    - cloud_cover_percentage (int): Maximum acceptable cloud cover percentage
+        for Sentinel-2 images.
 
     Returns:
-    - tuple: A tuple containing date, year, month, day, and a constant cloud cover value.
+    - tuple: A tuple containing date, year, month, day, and a constant cloud
+        cover value.
     """
     date = random_date(year1, year2)
     YEAR = date.year
@@ -95,20 +113,29 @@ def get_conditions(year1, year2, cloud_cover_percentage):
 
 def search_sentinel2(week, aoi, cloud_cover_percentage, nodata_pixel_percentage):
     """
-    Search for Sentinel-2 items within a given week and area of interest (AOI) with specified conditions.
+    Search for Sentinel-2 items within a given week and area of interest (AOI)
+    with specified conditions.
 
     Parameters:
     - week (str): The week in the format 'start_date/end_date'.
-    - aoi (shapely.geometry.base.BaseGeometry): Geometry object for an Area of Interest (AOI).
-    - cloud_cover_percentage (int): Maximum acceptable cloud cover percentage for Sentinel-2 images.
-    - nodata_pixel_percentage (int): Maximum acceptable percentage of nodata pixels in Sentinel-2 images.
+    - aoi (shapely.geometry.base.BaseGeometry): Geometry object for an Area of
+        Interest (AOI).
+    - cloud_cover_percentage (int): Maximum acceptable cloud cover percentage
+        for Sentinel-2 images.
+    - nodata_pixel_percentage (int): Maximum acceptable percentage of nodata
+        pixels in Sentinel-2 images.
 
     Returns:
-    - tuple: A tuple containing the STAC catalog, Sentinel-2 items, the bounding box (BBOX), and an EPSG code for the coordinate reference system.
+    - tuple: A tuple containing the STAC catalog, Sentinel-2 items, the
+        bounding box (BBOX), and an EPSG code for the coordinate reference
+        system.
 
     Note:
-    The function filters Sentinel-2 items based on the specified conditions such as geometry, date, cloud cover, and nodata pixel percentage.
-    The result is returned as a tuple containing the STAC catalog, Sentinel-2 items, the bounding box of the first item, and an EPSG code for the coordinate reference system.
+    The function filters Sentinel-2 items based on the specified conditions
+    such as geometry, date, cloud cover, and nodata pixel percentage. The
+    result is returned as a tuple containing the STAC catalog, Sentinel-2
+    items, the bounding box of the first item, and an EPSG code for the
+    coordinate reference system.
     """
 
     CENTROID = aoi.centroid
@@ -165,7 +192,8 @@ def search_sentinel2(week, aoi, cloud_cover_percentage, nodata_pixel_percentage)
 
     s2_items_gdf = s2_items_gdf[s2_items_gdf["eo:cloud_cover"] == best_clouds]
 
-    # Get the item ID for the filtered Sentinel 2 dataframe containing the best cloud free scene
+    # Get the datetime for the filtered Sentinel 2 dataframe
+    # containing the best cloud free scene
     s2_items_gdf_datetime_id = s2_items_gdf["datetime"]
     for item in s2_items:
         if item.properties["datetime"] == s2_items_gdf_datetime_id[0]:
@@ -184,19 +212,24 @@ def search_sentinel2(week, aoi, cloud_cover_percentage, nodata_pixel_percentage)
 
 def search_sentinel1(BBOX, catalog, week):
     """
-    Search for Sentinel-1 items within a given bounding box (BBOX), STAC catalog, and week.
+    Search for Sentinel-1 items within a given bounding box (BBOX), STAC
+    catalog, and week.
 
     Parameters:
-    - BBOX (tuple): Bounding box coordinates in the format (minx, miny, maxx, maxy).
+    - BBOX (tuple): Bounding box coordinates in the format
+        (minx, miny, maxx, maxy).
     - catalog (pystac.Catalog): STAC catalog containing Sentinel-1 items.
     - week (str): The week in the format 'start_date/end_date'.
 
     Returns:
-    - pystac.Collection: A collection of Sentinel-1 items filtered by specified conditions.
+    - pystac.Collection: A collection of Sentinel-1 items filtered by specified
+        conditions.
 
     Note:
-    This function retrieves Sentinel-1 items from the catalog that intersect with the given bounding box and fall within the provided time window.
-    The function filters items based on orbit state and returns the collection of Sentinel-1 items that meet the defined criteria.
+    This function retrieves Sentinel-1 items from the catalog that intersect
+    with the given bounding box and fall within the provided time window. The
+    function filters items based on orbit state and returns the collection of
+    Sentinel-1 items that meet the defined criteria.
     """
 
     geom_BBOX = box(*BBOX)  # Create poly geom object from the bbox
@@ -238,15 +271,18 @@ def search_sentinel1(BBOX, catalog, week):
 
 def search_dem(BBOX, catalog, epsg):
     """
-    Search for Digital Elevation Model (DEM) items within a given bounding box (BBOX), STAC catalog, week, and Sentinel-2 items.
+    Search for Copernicus Digital Elevation Model (DEM) items within a given
+    bounding box (BBOX), STAC catalog, and Sentinel-2 items.
 
     Parameters:
-    - BBOX (tuple): Bounding box coordinates in the format (minx, miny, maxx, maxy).
+    - BBOX (tuple): Bounding box coordinates in the format
+        (minx, miny, maxx, maxy).
     - catalog (pystac.Catalog): STAC catalog containing DEM items.
     - epsg (int): EPSG code for the coordinate reference system.
 
     Returns:
-    - pystac.Collection: A collection of Digital Elevation Model (DEM) items filtered by specified conditions.
+    - pystac.Collection: A collection of Digital Elevation Model (DEM) items
+        filtered by specified conditions.
     """
     search = catalog.search(collections=["cop-dem-glo-30"], bbox=BBOX)
     dem_items = search.item_collection()
@@ -261,18 +297,21 @@ def search_dem(BBOX, catalog, epsg):
 
 def make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg):
     """
-    Create xarray DataArrays for Sentinel-2, Sentinel-1, and DEM data.
+    Create xarray DataArrays for Sentinel-2, Sentinel-1, and Copernicus DEM
+    data.
 
     Parameters:
     - s2_items (list): List of Sentinel-2 items.
     - s1_items (list): List of Sentinel-1 items.
     - dem_items (list): List of DEM items.
-    - BBOX (tuple): Bounding box coordinates in the format (minx, miny, maxx, maxy).
+    - BBOX (tuple): Bounding box coordinates in the format
+        (minx, miny, maxx, maxy).
     - resolution (int): Spatial resolution.
     - epsg (int): EPSG code for the coordinate reference system.
 
     Returns:
-    - tuple: A tuple containing xarray DataArrays for Sentinel-2, Sentinel-1, and DEM.
+    - tuple: A tuple containing xarray DataArrays for Sentinel-2, Sentinel-1,
+        and Copernicus DEM.
     """
     da_sen2: xr.DataArray = stackstac.stack(
         items=s2_items,
@@ -286,7 +325,7 @@ def make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg):
     )
 
     da_sen1: xr.DataArray = stackstac.stack(
-        items=s1_items,  # To only accept the same orbit state and date. Need better way to do this.
+        items=s1_items,
         assets=["vh", "vv"],  # SAR polarizations
         epsg=epsg,
         bounds_latlon=BBOX,  # W, S, E, N
@@ -363,17 +402,22 @@ def make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg):
 
 def merge_datarrays(da_sen2, da_sen1, da_dem):
     """
-    Merge xarray DataArrays for Sentinel-2, Sentinel-1, and DEM.
+    Merge xarray DataArrays for Sentinel-2, Sentinel-1, and Copernicus DEM.
 
     Parameters:
     - da_sen2 (xr.DataArray): xarray DataArray for Sentinel-2 data.
     - da_sen1 (xr.DataArray): xarray DataArray for Sentinel-1 data.
-    - da_dem (xr.DataArray): xarray DataArray for DEM data.
+    - da_dem (xr.DataArray): xarray DataArray for Copernicus DEM data.
 
     Returns:
     - xr.DataArray: Merged xarray DataArray.
     """
-    # print("Platform variables (S2, S1, DEM): ", da_sen2.platform.values, da_sen1.platform.values, da_dem.platform.values)
+    # print(
+    #     "Platform variables (S2, S1, DEM): ",
+    #     da_sen2.platform.values,
+    #     da_sen1.platform.values,
+    #     da_dem.platform.values,
+    # )
     # da_sen2 = da_sen2.drop(["platform", "constellation"])
     # da_sen1 = da_sen1.drop(["platform", "constellation"])
     # da_dem = da_dem.drop(["platform"])
@@ -390,17 +434,21 @@ def process(
     year1, year2, aoi, resolution, cloud_cover_percentage, nodata_pixel_percentage
 ):
     """
-    Process Sentinel-2, Sentinel-1, and DEM data for a specified time range, area of interest (AOI),
-    resolution, EPSG code, cloud cover percentage, and nodata pixel percentage.
+    Process Sentinel-2, Sentinel-1, and Copernicus DEM data for a specified
+    time range, area of interest (AOI), resolution, EPSG code, cloud cover
+    percentage, and nodata pixel percentage.
 
     Parameters:
     - year1 (int): The starting year of the date range.
     - year2 (int): The ending year of the date range.
-    - aoi (shapely.geometry.base.BaseGeometry): Geometry object for an Area of Interest (AOI).
+    - aoi (shapely.geometry.base.BaseGeometry): Geometry object for an Area of
+        Interest (AOI).
     - resolution (int): Spatial resolution.
     - epsg (int): EPSG code for the coordinate reference system.
-    - cloud_cover_percentage (int): Maximum acceptable cloud cover percentage for Sentinel-2 images.
-    - nodata_pixel_percentage (int): Maximum acceptable percentage of nodata pixels in Sentinel-2 images.
+    - cloud_cover_percentage (int): Maximum acceptable cloud cover percentage
+        for Sentinel-2 images.
+    - nodata_pixel_percentage (int): Maximum acceptable percentage of nodata
+        pixels in Sentinel-2 images.
 
     Returns:
     - xr.DataArray: Merged xarray DataArray containing processed data.

From cc99ae46ecbdc827d5aabe2940ff481602749d91 Mon Sep 17 00:00:00 2001
From: Wei Ji <23487320+weiji14@users.noreply.github.com>
Date: Thu, 16 Nov 2023 10:07:03 +1300
Subject: [PATCH 2/8] :recycle: Refactor best_nodata and best_clouds into
 single sort function

Fixes F841 Local variable `best_nodata` is assigned to but never used. Only the best_clouds variable was used, and best_nodata was omitted, but both should be used. Doing this in a single pandas sort_values function.
---
 scripts/datacube.py | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/scripts/datacube.py b/scripts/datacube.py
index e6c553aa..aae5553f 100644
--- a/scripts/datacube.py
+++ b/scripts/datacube.py
@@ -174,26 +174,15 @@ def search_sentinel2(week, aoi, cloud_cover_percentage, nodata_pixel_percentage)
 
     s2_items_gdf = gpd.GeoDataFrame.from_features(s2_items.to_dict())
 
-    best_nodata = (
-        s2_items_gdf[["s2:nodata_pixel_percentage"]]
-        .groupby(["s2:nodata_pixel_percentage"])
-        .sum()
-        .sort_values(by="s2:nodata_pixel_percentage", ascending=True)
-        .index[0]
-    )
-
-    best_clouds = (
-        s2_items_gdf[["eo:cloud_cover"]]
-        .groupby(["eo:cloud_cover"])
-        .sum()
-        .sort_values(by="eo:cloud_cover", ascending=True)
-        .index[0]
-    )
+    least_nodata_and_clouds = s2_items_gdf.sort_values(
+        by=["s2:nodata_pixel_percentage", "eo:cloud_cover"], ascending=True
+    ).index[0]
 
-    s2_items_gdf = s2_items_gdf[s2_items_gdf["eo:cloud_cover"] == best_clouds]
+    s2_items_gdf = s2_items_gdf.iloc[least_nodata_and_clouds]
+    s2_items_gdf
 
     # Get the datetime for the filtered Sentinel 2 dataframe
-    # containing the best cloud free scene
+    # containing the least nodata and least cloudy scene
     s2_items_gdf_datetime_id = s2_items_gdf["datetime"]
     for item in s2_items:
         if item.properties["datetime"] == s2_items_gdf_datetime_id[0]:
@@ -202,7 +191,7 @@ def search_sentinel2(week, aoi, cloud_cover_percentage, nodata_pixel_percentage)
         else:
             continue
 
-    BBOX = s2_items_gdf.iloc[0].geometry.bounds
+    BBOX = s2_items_gdf.iloc[0].bounds
 
     epsg = s2_item.properties["proj:epsg"]
     print("EPSG code based on Sentinel-2 item: ", epsg)

From 262aaa606eb58a74b95501faf4e14ae0aa6b8fd2 Mon Sep 17 00:00:00 2001
From: Wei Ji <23487320+weiji14@users.noreply.github.com>
Date: Thu, 16 Nov 2023 10:36:55 +1300
Subject: [PATCH 3/8] :ambulance: Quickfix with getting the STAC item with a
 specific datetime

Patch cc99ae46ecbdc827d5aabe2940ff481602749d91
---
 scripts/datacube.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/datacube.py b/scripts/datacube.py
index aae5553f..58d1835f 100644
--- a/scripts/datacube.py
+++ b/scripts/datacube.py
@@ -183,9 +183,9 @@ def search_sentinel2(week, aoi, cloud_cover_percentage, nodata_pixel_percentage)
 
     # Get the datetime for the filtered Sentinel 2 dataframe
     # containing the least nodata and least cloudy scene
-    s2_items_gdf_datetime_id = s2_items_gdf["datetime"]
+    s2_items_gdf_datetime = s2_items_gdf["datetime"]
     for item in s2_items:
-        if item.properties["datetime"] == s2_items_gdf_datetime_id[0]:
+        if item.properties["datetime"] == s2_items_gdf_datetime:
             s2_item = item
             # print(s2_item.properties["datetime"])
         else:

From 2af24be47fb78960bad5f08d080b542b5f22a44b Mon Sep 17 00:00:00 2001
From: Wei Ji <23487320+weiji14@users.noreply.github.com>
Date: Thu, 16 Nov 2023 10:39:01 +1300
Subject: [PATCH 4/8] :label: Rename variables to ds_ (xr.Dataset) or da
 (xr.DataArray)

Using ds_ prefix for xr.Dataset objects, and da_ prefix for xr.DataArray objects.
---
 scripts/datacube.py | 67 ++++++++++++++++++++++-----------------------
 1 file changed, 32 insertions(+), 35 deletions(-)

diff --git a/scripts/datacube.py b/scripts/datacube.py
index 58d1835f..a5af15d1 100644
--- a/scripts/datacube.py
+++ b/scripts/datacube.py
@@ -26,7 +26,7 @@
       Search for DEM items within a given bounding box.
 - make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg):
       Create xarray DataArrays for Sentinel-2, Sentinel-1, and DEM data.
-- merge_datarrays(da_sen2, da_sen1, da_dem):
+- merge_datarrays(ds_sen2, ds_sen1, da_dem):
       Merge xarray DataArrays for Sentinel-2, Sentinel-1, and DEM.
 - process(year1, year2, aoi, resolution):
       Process Sentinel-2, Sentinel-1, and DEM data for a specified time range,
@@ -286,7 +286,7 @@ def search_dem(BBOX, catalog, epsg):
 
 def make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg):
     """
-    Create xarray DataArrays for Sentinel-2, Sentinel-1, and Copernicus DEM
+    Create xarray Datasets for Sentinel-2, Sentinel-1, and Copernicus DEM
     data.
 
     Parameters:
@@ -299,7 +299,7 @@ def make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg):
     - epsg (int): EPSG code for the coordinate reference system.
 
     Returns:
-    - tuple: A tuple containing xarray DataArrays for Sentinel-2, Sentinel-1,
+    - tuple: A tuple containing xarray Datasets for Sentinel-2, Sentinel-1,
         and Copernicus DEM.
     """
     da_sen2: xr.DataArray = stackstac.stack(
@@ -313,18 +313,8 @@ def make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg):
         fill_value=np.nan,
     )
 
-    da_sen1: xr.DataArray = stackstac.stack(
-        items=s1_items,
-        assets=["vh", "vv"],  # SAR polarizations
-        epsg=epsg,
-        bounds_latlon=BBOX,  # W, S, E, N
-        xy_coords="center",  # pixel centroid coords instead of topleft corner
-        dtype=np.float32,
-        fill_value=np.nan,
-    )
-
-    # Create xarray.Dataset datacube with VH and VV channels from SAR
-    # 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B11', 'B12', 'B8A', 'SCL'
+    # Create xarray.Dataset datacube with all 10m and 20m bands from Sentinel-2
+    # 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B11', 'B12', 'SCL'
     da_s2_0: xr.DataArray = da_sen2.sel(band="B02", drop=True).rename("B02").squeeze()
     da_s2_1: xr.DataArray = da_sen2.sel(band="B03", drop=True).rename("B03").squeeze()
     da_s2_2: xr.DataArray = da_sen2.sel(band="B04", drop=True).rename("B04").squeeze()
@@ -337,7 +327,7 @@ def make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg):
     da_s2_9: xr.DataArray = da_sen2.sel(band="B11", drop=True).rename("B11").squeeze()
     da_s2_10: xr.DataArray = da_sen2.sel(band="SCL", drop=True).rename("SCL").squeeze()
 
-    da_sen2_all: xr.Dataset = xr.merge(
+    ds_sen2: xr.Dataset = xr.merge(
         objects=[
             da_s2_0,
             da_s2_1,
@@ -353,8 +343,17 @@ def make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg):
         ],
         join="override",
     )
+    ds_sen2.assign(time=da_sen2.time)
 
-    da_sen2_all.assign(time=da_sen2.time)
+    da_sen1: xr.DataArray = stackstac.stack(
+        items=s1_items,
+        assets=["vh", "vv"],  # SAR polarizations
+        epsg=epsg,
+        bounds_latlon=BBOX,  # W, S, E, N
+        xy_coords="center",  # pixel centroid coords instead of topleft corner
+        dtype=np.float32,
+        fill_value=np.nan,
+    )
 
     # To fix TypeError: Invalid value for attr 'spec'
     da_sen1.attrs["spec"] = str(da_sen1.spec)
@@ -380,42 +379,41 @@ def make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg):
         dtype=np.float32,
         fill_value=np.nan,
     )
-
-    da_dem = stackstac.mosaic(da_dem, dim="time").squeeze().rename("DEM")
+    da_dem: xr.DataArray = stackstac.mosaic(da_dem, dim="time").squeeze().rename("DEM")
 
     # _, index = np.unique(da_dem['time'], return_index=True)  # Remove redundant time
     # da_dem = da_dem.isel(time=index)
 
-    return da_sen2_all, ds_sen1, da_dem
+    return ds_sen2, ds_sen1, da_dem
 
 
-def merge_datarrays(da_sen2, da_sen1, da_dem):
+def merge_datarrays(ds_sen2, ds_sen1, da_dem):
     """
-    Merge xarray DataArrays for Sentinel-2, Sentinel-1, and Copernicus DEM.
+    Merge xarray Dataset for Sentinel-2, Sentinel-1, and Copernicus DEM.
 
     Parameters:
-    - da_sen2 (xr.DataArray): xarray DataArray for Sentinel-2 data.
-    - da_sen1 (xr.DataArray): xarray DataArray for Sentinel-1 data.
+    - ds_sen2 (xr.Dataset): xarray Dataset for Sentinel-2 data.
+    - ds_sen1 (xr.Dataset): xarray Dataset for Sentinel-1 data.
     - da_dem (xr.DataArray): xarray DataArray for Copernicus DEM data.
 
     Returns:
-    - xr.DataArray: Merged xarray DataArray.
+    - xr.Dataset: Merged xarray Dataset.
     """
     # print(
     #     "Platform variables (S2, S1, DEM): ",
-    #     da_sen2.platform.values,
-    #     da_sen1.platform.values,
+    #     ds_sen2.platform.values,
+    #     ds_sen1.platform.values,
     #     da_dem.platform.values,
     # )
-    # da_sen2 = da_sen2.drop(["platform", "constellation"])
-    # da_sen1 = da_sen1.drop(["platform", "constellation"])
+    # ds_sen2 = ds_sen2.drop(["platform", "constellation"])
+    # ds_sen1 = ds_sen1.drop(["platform", "constellation"])
     # da_dem = da_dem.drop(["platform"])
 
-    da_merge = xr.merge([da_sen2, da_sen1, da_dem], compat="override")
+    da_merge = xr.merge([ds_sen2, ds_sen1, da_dem], compat="override")
     print("Merged datarray: ", da_merge)
     print(
-        "Time variables (S2, merged): ", da_sen2.time.values, da_merge.time.values
-    )  # da_sen1.time.values, da_dem.time.values
+        "Time variables (S2, merged): ", ds_sen2.time.values, da_merge.time.values
+    )  # ds_sen1.time.values, da_dem.time.values
     return da_merge
 
 
@@ -433,7 +431,6 @@ def process(
     - aoi (shapely.geometry.base.BaseGeometry): Geometry object for an Area of
         Interest (AOI).
     - resolution (int): Spatial resolution.
-    - epsg (int): EPSG code for the coordinate reference system.
     - cloud_cover_percentage (int): Maximum acceptable cloud cover percentage
         for Sentinel-2 images.
     - nodata_pixel_percentage (int): Maximum acceptable percentage of nodata
@@ -454,11 +451,11 @@ def process(
 
     dem_items = search_dem(BBOX, catalog, epsg)
 
-    da_sen2, da_sen1, da_dem = make_dataarrays(
+    ds_sen2, ds_sen1, da_dem = make_dataarrays(
         s2_items, s1_items, dem_items, BBOX, resolution, epsg
     )
 
-    da_merge = merge_datarrays(da_sen2, da_sen1, da_dem)
+    da_merge = merge_datarrays(ds_sen2, ds_sen1, da_dem)
     return da_merge
 
 

From 4664b59befc6534b380a5c67e3e43b228b1a2ca6 Mon Sep 17 00:00:00 2001
From: Wei Ji <23487320+weiji14@users.noreply.github.com>
Date: Thu, 16 Nov 2023 10:57:54 +1300
Subject: [PATCH 5/8] :wrench: Set pylint max-args to 6

Increase from default value of 5 to 6,
---
 pyproject.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index d3a96c17..c2b3a2b7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,3 +13,6 @@ select = [
     "UP",   # pyupgrade
     "W",    # pycodestyle warnings
 ]
+
+[tool.ruff.lint.pylint]
+max-args = 6

From a396c5db20d0503d275aa57060f58f3159f3d42b Mon Sep 17 00:00:00 2001
From: Wei Ji <23487320+weiji14@users.noreply.github.com>
Date: Thu, 16 Nov 2023 11:10:54 +1300
Subject: [PATCH 6/8] :wastebasket: Replace .get_all_items() with
 .item_collection()

Fixes `FutureWarning: get_all_items() is deprecated, use item_collection() instead`.
---
 scripts/datacube.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/datacube.py b/scripts/datacube.py
index a5af15d1..e99bae85 100644
--- a/scripts/datacube.py
+++ b/scripts/datacube.py
@@ -169,7 +169,7 @@ def search_sentinel2(week, aoi, cloud_cover_percentage, nodata_pixel_percentage)
         },
     )
 
-    s2_items = search.get_all_items()
+    s2_items = search.item_collection()
     print(f"Found {len(s2_items)} Sentinel-2 items")
 
     s2_items_gdf = gpd.GeoDataFrame.from_features(s2_items.to_dict())

From f65e34a9c4f72950b4f8a68a6ea2d4c580878ecd Mon Sep 17 00:00:00 2001
From: Wei Ji <23487320+weiji14@users.noreply.github.com>
Date: Thu, 16 Nov 2023 14:20:05 +1300
Subject: [PATCH 7/8] :fire: Remove sorting by nodata and just sort by least
 cloud cover

No need to sort by `s2:nodata_pixel_percentage` anymore, just get the Sentinel-2 STAC item with the least cloud cover.
---
 scripts/datacube.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/scripts/datacube.py b/scripts/datacube.py
index e99bae85..0614951e 100644
--- a/scripts/datacube.py
+++ b/scripts/datacube.py
@@ -132,10 +132,10 @@ def search_sentinel2(week, aoi, cloud_cover_percentage, nodata_pixel_percentage)
 
     Note:
     The function filters Sentinel-2 items based on the specified conditions
-    such as geometry, date, cloud cover, and nodata pixel percentage. The
-    result is returned as a tuple containing the STAC catalog, Sentinel-2
-    items, the bounding box of the first item, and an EPSG code for the
-    coordinate reference system.
+    such as geometry, date, cloud cover, and nodata pixel percentage. Only one
+    result with the least cloud cover will be returned. The result is returned
+    as a tuple containing the STAC catalog, Sentinel-2 items, the bounding box
+    of the first item, and an EPSG code for the coordinate reference system.
     """
 
     CENTROID = aoi.centroid
@@ -174,11 +174,11 @@ def search_sentinel2(week, aoi, cloud_cover_percentage, nodata_pixel_percentage)
 
     s2_items_gdf = gpd.GeoDataFrame.from_features(s2_items.to_dict())
 
-    least_nodata_and_clouds = s2_items_gdf.sort_values(
-        by=["s2:nodata_pixel_percentage", "eo:cloud_cover"], ascending=True
+    least_clouds = s2_items_gdf.sort_values(
+        by=["eo:cloud_cover"], ascending=True
     ).index[0]
 
-    s2_items_gdf = s2_items_gdf.iloc[least_nodata_and_clouds]
+    s2_items_gdf = s2_items_gdf.iloc[least_clouds]
     s2_items_gdf
 
     # Get the datetime for the filtered Sentinel 2 dataframe

From 7b208781ce54133a8ce72dd226be8575688780ae Mon Sep 17 00:00:00 2001
From: Wei Ji <23487320+weiji14@users.noreply.github.com>
Date: Thu, 16 Nov 2023 14:24:35 +1300
Subject: [PATCH 8/8] :memo: More DataArray to Dataset renames

Missed a few more da_ to ds_ renames, following from 2af24be47fb78960bad5f08d080b542b5f22a44b
---
 scripts/datacube.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/scripts/datacube.py b/scripts/datacube.py
index 0614951e..0af29a61 100644
--- a/scripts/datacube.py
+++ b/scripts/datacube.py
@@ -24,10 +24,10 @@
       and week.
 - search_dem(BBOX, catalog, epsg):
       Search for DEM items within a given bounding box.
-- make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg):
-      Create xarray DataArrays for Sentinel-2, Sentinel-1, and DEM data.
-- merge_datarrays(ds_sen2, ds_sen1, da_dem):
-      Merge xarray DataArrays for Sentinel-2, Sentinel-1, and DEM.
+- make_datasets(s2_items, s1_items, dem_items, BBOX, resolution, epsg):
+      Create xarray Datasets for Sentinel-2, Sentinel-1, and DEM data.
+- merge_datasets(ds_sen2, ds_sen1, da_dem):
+      Merge xarray Datasets for Sentinel-2, Sentinel-1, and DEM.
 - process(year1, year2, aoi, resolution):
       Process Sentinel-2, Sentinel-1, and DEM data for a specified time range,
       area of interest, and resolution.
@@ -284,7 +284,7 @@ def search_dem(BBOX, catalog, epsg):
     return dem_items
 
 
-def make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg):
+def make_datasets(s2_items, s1_items, dem_items, BBOX, resolution, epsg):
     """
     Create xarray Datasets for Sentinel-2, Sentinel-1, and Copernicus DEM
     data.
@@ -387,7 +387,7 @@ def make_dataarrays(s2_items, s1_items, dem_items, BBOX, resolution, epsg):
     return ds_sen2, ds_sen1, da_dem
 
 
-def merge_datarrays(ds_sen2, ds_sen1, da_dem):
+def merge_datasets(ds_sen2, ds_sen1, da_dem):
     """
     Merge xarray Dataset for Sentinel-2, Sentinel-1, and Copernicus DEM.
 
@@ -409,12 +409,12 @@ def merge_datarrays(ds_sen2, ds_sen1, da_dem):
     # ds_sen1 = ds_sen1.drop(["platform", "constellation"])
     # da_dem = da_dem.drop(["platform"])
 
-    da_merge = xr.merge([ds_sen2, ds_sen1, da_dem], compat="override")
-    print("Merged datarray: ", da_merge)
+    ds_merge = xr.merge([ds_sen2, ds_sen1, da_dem], compat="override")
+    print("Merged dataset: ", ds_merge)
     print(
-        "Time variables (S2, merged): ", ds_sen2.time.values, da_merge.time.values
+        "Time variables (S2, merged): ", ds_sen2.time.values, ds_merge.time.values
     )  # ds_sen1.time.values, da_dem.time.values
-    return da_merge
+    return ds_merge
 
 
 def process(
@@ -437,7 +437,7 @@ def process(
         pixels in Sentinel-2 images.
 
     Returns:
-    - xr.DataArray: Merged xarray DataArray containing processed data.
+    - xr.Dataset: Merged xarray Dataset containing processed data.
     """
 
     date, YEAR, MONTH, DAY, CLOUD = get_conditions(year1, year2, cloud_cover_percentage)
@@ -451,12 +451,12 @@ def process(
 
     dem_items = search_dem(BBOX, catalog, epsg)
 
-    ds_sen2, ds_sen1, da_dem = make_dataarrays(
+    ds_sen2, ds_sen1, da_dem = make_datasets(
         s2_items, s1_items, dem_items, BBOX, resolution, epsg
     )
 
-    da_merge = merge_datarrays(ds_sen2, ds_sen1, da_dem)
-    return da_merge
+    ds_merge = merge_datasets(ds_sen2, ds_sen1, da_dem)
+    return ds_merge
 
 
 if __name__ == "__main__":