Skip to content

Commit

Permalink
Empty collection testing (#53)
Browse files Browse the repository at this point in the history
* Swap black for ruff format

* Redefine ruff line length

* Modify error handling for empty collections

It looks like Earth Engine no longer raises a specific error for
sampling random points within a single point, so the error handling
and testing were modified for the more specific case of an empty
sampling region, e.g. a FeatureCollection with zero features.

In the process, the sampling error handling was refactored and a
new SamplingError class was added. SamplingError inherits from
ValueError which was the previous error type, so this shouldn't
affect any downstream error handling.

* Fix pandas deprecation warning

* Update auth mode

* Update auth mode

* Reset auth mode

* Update CI
  • Loading branch information
aazuspan authored Mar 8, 2024
1 parent a320ec0 commit 800ce4f
Show file tree
Hide file tree
Showing 9 changed files with 87 additions and 99 deletions.
9 changes: 0 additions & 9 deletions .github/scripts/make_ee_token.py

This file was deleted.

45 changes: 45 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: tests

on: push

jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12' ]

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install hatch
- name: Write persistent EE credentials
env:
EE_TOKEN: ${{ secrets.EE_TOKEN }}
run: |
mkdir -p /home/runner/.config/earthengine
echo $EE_TOKEN > /home/runner/.config/earthengine/credentials
- name: Test with pytest
run: |
hatch run test:all
lint:
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install hatch
- name: Run pre-commit hooks
uses: pre-commit/[email protected]
33 changes: 0 additions & 33 deletions .github/workflows/coverage.yml

This file was deleted.

30 changes: 0 additions & 30 deletions .github/workflows/test.yml

This file was deleted.

16 changes: 5 additions & 11 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
repos:
- repo: https://github.com/psf/black
rev: 23.3.0
hooks:
- id: black
args: [--line-length=100]

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.0.275
hooks:
- id: ruff
args: [--fix, --line-length=100]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.4
hooks:
- id: ruff
- id: ruff-format
7 changes: 6 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,9 @@ view-coverage = "python -m webbrowser -t htmlcov/index.html"
[tool.ruff]
select = ["E", "F", "I", "UP", "B", "PT", "NPY", "PERF", "RUF"]
exclude = ["docs/"]
target-version = "py38"
fix = true
show-fixes = true
line-length = 100

[tool.ruff.isort]
known-first-party = ["sankee"]
4 changes: 2 additions & 2 deletions sankee/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def generate_dataframe(self) -> pd.DataFrame:
.rename(columns={0: "changed"})
)
# Count the total number of source samples in each year
df["total"] = df.groupby(["source_year", "source"]).changed.transform(sum)
df["total"] = df.groupby(["source_year", "source"]).changed.transform("sum")
# Calculate what percent of the source samples went into each target class
df["proportion"] = df["changed"] / df["total"]

Expand Down Expand Up @@ -312,7 +312,7 @@ def toggle_button(button):
button.toggle()

class_name = button.tooltip
class_id = [key for key in self.labels.keys() if self.labels[key] == class_name][0]
class_id = next(key for key in self.labels.keys() if self.labels[key] == class_name)

if not button.state:
self.hide.append(class_id)
Expand Down
36 changes: 26 additions & 10 deletions sankee/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,30 @@
from sankee import utils


class SamplingError(ValueError):
"""Error related to data sampling in Earth Engine."""


def handle_sampling_error(e: ee.EEException, band: str, image_list: list[ee.Image]) -> None:
"""Handle Earth Engine errors that occur during sampling by raising more specific errors."""
msg = None

if band in str(e):
shared_bands = utils.get_shared_bands(image_list)
msg = f"The band `{band}` was not found in all images. Choose from {shared_bands}."

elif "Region must not be empty" in str(e):
msg = (
"The sample region is empty. Make sure to pass a valid geometry, feature, or "
"non-empty collection."
)

if msg:
raise SamplingError(msg) from None

raise e


def generate_sample_data(
*,
image_list: list[ee.Image],
Expand Down Expand Up @@ -38,21 +62,13 @@ def extract_values_at_point(pt):
try:
features = [feat["properties"] for feat in samples.toList(samples.size()).getInfo()]
except ee.EEException as e:
if band in str(e):
shared_bands = utils.get_shared_bands(image_list)
raise ValueError(
f"The band `{band}` was not found in all images. Choose from " f"{shared_bands}"
) from None
elif "'count' must be positive" in str(e):
raise ValueError("No points were sampled. Make sure to pass a 2D `region`.") from None
else:
raise e
handle_sampling_error(e, band, image_list)

data = pd.DataFrame.from_dict(features).dropna().astype(int)

for image in image_labels:
if image not in data.columns:
raise ValueError(
raise SamplingError(
f"Valid samples were not found for image `{image}`. Check that the"
" image overlaps the sampling region."
)
Expand Down
6 changes: 3 additions & 3 deletions tests/test_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,13 @@ def test_sample_data_bad_region():
)


def test_sample_data_point():
def test_sample_empty_collection():
"""Test that an error is thrown when sampling occurs on an empty FeatureCollection."""
with pytest.raises(ValueError, match="pass a 2D `region`"):
with pytest.raises(ValueError, match="region is empty"):
sankee.sampling.generate_sample_data(
image_list=TEST_IMAGE_LIST,
image_labels=TEST_IMAGE_LABELS,
region=ee.Geometry.Point([0, 0]),
region=ee.FeatureCollection([]),
band=TEST_DATASET.band,
scale=100,
)

0 comments on commit 800ce4f

Please sign in to comment.