Empty collection testing (#53)

* Swap black for ruff format * Redefine ruff line length * Modify error handling for empty collections It looks like Earth Engine no longer raises a specific error for sampling random points within a single point, so the error handling and testing were modified for the more specific case of an empty sampling region, e.g. a FeatureCollection with zero features. In the process, the sampling error handling was refactored and a new SamplingError class was added. SamplingError inherits from ValueError which was the previous error type, so this shouldn't affect any downstream error handling. * Fix pandas deprecation warning * Update auth mode * Update auth mode * Reset auth mode * Update CI
aazuspan · Mar 8, 2024 · 800ce4f · 800ce4f
1 parent a320ec0
commit 800ce4f
Show file tree

Hide file tree

Showing 9 changed files with 87 additions and 99 deletions.
diff --git a/.github/scripts/make_ee_token.py b/.github/scripts/make_ee_token.py
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,45 @@
+name: tests
+
+on: push
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12' ]
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install hatch
+    - name: Write persistent EE credentials
+      env:
+        EE_TOKEN: ${{ secrets.EE_TOKEN }}
+      run: |
+        mkdir -p /home/runner/.config/earthengine
+        echo $EE_TOKEN > /home/runner/.config/earthengine/credentials
+    - name: Test with pytest
+      run: |
+        hatch run test:all
+  
+  lint:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install hatch
+
+      - name: Run pre-commit hooks
+        uses: pre-commit/[email protected]
diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,12 +1,6 @@
 repos:
-- repo: https://github.com/psf/black
-  rev: 23.3.0
-  hooks:
-  -   id: black
-      args: [--line-length=100]
-
-- repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.0.275
-  hooks:
-    - id: ruff
-      args: [--fix, --line-length=100]
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.1.4
+    hooks:
+      - id: ruff
+      - id: ruff-format
diff --git a/pyproject.toml b/pyproject.toml
@@ -78,4 +78,9 @@ view-coverage = "python -m webbrowser -t htmlcov/index.html"
 [tool.ruff]
 select = ["E", "F", "I", "UP", "B", "PT", "NPY", "PERF", "RUF"]
 exclude = ["docs/"]
-target-version = "py38"
+fix = true
+show-fixes = true
+line-length = 100
+
+[tool.ruff.isort]
+known-first-party = ["sankee"]
diff --git a/sankee/plotting.py b/sankee/plotting.py
@@ -264,7 +264,7 @@ def generate_dataframe(self) -> pd.DataFrame:
             .rename(columns={0: "changed"})
         )
         # Count the total number of source samples in each year
-        df["total"] = df.groupby(["source_year", "source"]).changed.transform(sum)
+        df["total"] = df.groupby(["source_year", "source"]).changed.transform("sum")
         # Calculate what percent of the source samples went into each target class
         df["proportion"] = df["changed"] / df["total"]
 
@@ -312,7 +312,7 @@ def toggle_button(button):
             button.toggle()
 
             class_name = button.tooltip
-            class_id = [key for key in self.labels.keys() if self.labels[key] == class_name][0]
+            class_id = next(key for key in self.labels.keys() if self.labels[key] == class_name)
 
             if not button.state:
                 self.hide.append(class_id)

diff --git a/sankee/sampling.py b/sankee/sampling.py
@@ -6,6 +6,30 @@
 from sankee import utils
 
 
+class SamplingError(ValueError):
+    """Error related to data sampling in Earth Engine."""
+
+
+def handle_sampling_error(e: ee.EEException, band: str, image_list: list[ee.Image]) -> None:
+    """Handle Earth Engine errors that occur during sampling by raising more specific errors."""
+    msg = None
+
+    if band in str(e):
+        shared_bands = utils.get_shared_bands(image_list)
+        msg = f"The band `{band}` was not found in all images. Choose from {shared_bands}."
+
+    elif "Region must not be empty" in str(e):
+        msg = (
+            "The sample region is empty. Make sure to pass a valid geometry, feature, or "
+            "non-empty collection."
+        )
+
+    if msg:
+        raise SamplingError(msg) from None
+
+    raise e
+
+
 def generate_sample_data(
     *,
     image_list: list[ee.Image],
@@ -38,21 +62,13 @@ def extract_values_at_point(pt):
     try:
         features = [feat["properties"] for feat in samples.toList(samples.size()).getInfo()]
     except ee.EEException as e:
-        if band in str(e):
-            shared_bands = utils.get_shared_bands(image_list)
-            raise ValueError(
-                f"The band `{band}` was not found in all images. Choose from " f"{shared_bands}"
-            ) from None
-        elif "'count' must be positive" in str(e):
-            raise ValueError("No points were sampled. Make sure to pass a 2D `region`.") from None
-        else:
-            raise e
+        handle_sampling_error(e, band, image_list)
 
     data = pd.DataFrame.from_dict(features).dropna().astype(int)
 
     for image in image_labels:
         if image not in data.columns:
-            raise ValueError(
+            raise SamplingError(
                 f"Valid samples were not found for image `{image}`. Check that the"
                 " image overlaps the sampling region."
             )

diff --git a/tests/test_sampling.py b/tests/test_sampling.py
@@ -49,13 +49,13 @@ def test_sample_data_bad_region():
         )
 
 
-def test_sample_data_point():
+def test_sample_empty_collection():
     """Test that an error is thrown when sampling occurs on an empty FeatureCollection."""
-    with pytest.raises(ValueError, match="pass a 2D `region`"):
+    with pytest.raises(ValueError, match="region is empty"):
         sankee.sampling.generate_sample_data(
             image_list=TEST_IMAGE_LIST,
             image_labels=TEST_IMAGE_LABELS,
-            region=ee.Geometry.Point([0, 0]),
+            region=ee.FeatureCollection([]),
             band=TEST_DATASET.band,
             scale=100,
         )