From 800ce4f413a8c2f3b4b4f9a5a219785ed64d10f0 Mon Sep 17 00:00:00 2001 From: Aaron Zuspan <50475791+aazuspan@users.noreply.github.com> Date: Thu, 7 Mar 2024 20:49:22 -0800 Subject: [PATCH] Empty collection testing (#53) * Swap black for ruff format * Redefine ruff line length * Modify error handling for empty collections It looks like Earth Engine no longer raises a specific error for sampling random points within a single point, so the error handling and testing were modified for the more specific case of an empty sampling region, e.g. a FeatureCollection with zero features. In the process, the sampling error handling was refactored and a new SamplingError class was added. SamplingError inherits from ValueError which was the previous error type, so this shouldn't affect any downstream error handling. * Fix pandas deprecation warning * Update auth mode * Update auth mode * Reset auth mode * Update CI --- .github/scripts/make_ee_token.py | 9 ------- .github/workflows/ci.yml | 45 ++++++++++++++++++++++++++++++++ .github/workflows/coverage.yml | 33 ----------------------- .github/workflows/test.yml | 30 --------------------- .pre-commit-config.yaml | 16 ++++-------- pyproject.toml | 7 ++++- sankee/plotting.py | 4 +-- sankee/sampling.py | 36 ++++++++++++++++++------- tests/test_sampling.py | 6 ++--- 9 files changed, 87 insertions(+), 99 deletions(-) delete mode 100644 .github/scripts/make_ee_token.py create mode 100644 .github/workflows/ci.yml delete mode 100644 .github/workflows/coverage.yml delete mode 100644 .github/workflows/test.yml diff --git a/.github/scripts/make_ee_token.py b/.github/scripts/make_ee_token.py deleted file mode 100644 index 6535165..0000000 --- a/.github/scripts/make_ee_token.py +++ /dev/null @@ -1,9 +0,0 @@ -import os - -credentials = f'{{"refresh_token": "{os.environ["EE_TOKEN"]}"}}' - -credential_dir = os.path.expanduser("~/.config/earthengine/") -os.makedirs(credential_dir, exist_ok=True) - -with open(os.path.join(credential_dir, "credentials"), "w") as dst: - dst.write(credentials) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..1b12ee2 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,45 @@ +name: tests + +on: push + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12' ] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install hatch + - name: Write persistent EE credentials + env: + EE_TOKEN: ${{ secrets.EE_TOKEN }} + run: | + mkdir -p /home/runner/.config/earthengine + echo $EE_TOKEN > /home/runner/.config/earthengine/credentials + - name: Test with pytest + run: | + hatch run test:all + + lint: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install hatch + + - name: Run pre-commit hooks + uses: pre-commit/action@v3.0.0 \ No newline at end of file diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml deleted file mode 100644 index 0da1226..0000000 --- a/.github/workflows/coverage.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: tests-coverage - -on: pull_request - -jobs: - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.11 - uses: actions/setup-python@v4 - with: - python-version: 3.11 - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install hatch - - name: Store EE token - run: | - python ./.github/scripts/make_ee_token.py - env: - EE_TOKEN: ${{ secrets.EE_TOKEN }} - - name: Test with pytest - run: | - hatch run test:coverage --cov-report=xml - - name: Upload to Codecov - run: | - bash <(curl -s https://codecov.io/bash) - - uses: actions/upload-artifact@v2 - with: - path: coverage.xml \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml deleted file mode 100644 index fc97805..0000000 --- a/.github/workflows/test.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: tests - -on: push - -jobs: - build: - - runs-on: ubuntu-latest - strategy: - matrix: - python-version: [ '3.8', '3.9', '3.10', '3.11' ] - - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install hatch - - name: Store EE token - run: | - python ./.github/scripts/make_ee_token.py - env: - EE_TOKEN: ${{ secrets.EE_TOKEN }} - - name: Test with pytest - run: | - hatch run test:all \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e319ccb..75d7859 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,12 +1,6 @@ repos: -- repo: https://github.com/psf/black - rev: 23.3.0 - hooks: - - id: black - args: [--line-length=100] - -- repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.0.275 - hooks: - - id: ruff - args: [--fix, --line-length=100] \ No newline at end of file + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.4 + hooks: + - id: ruff + - id: ruff-format \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 990ea9a..009524d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,4 +78,9 @@ view-coverage = "python -m webbrowser -t htmlcov/index.html" [tool.ruff] select = ["E", "F", "I", "UP", "B", "PT", "NPY", "PERF", "RUF"] exclude = ["docs/"] -target-version = "py38" \ No newline at end of file +fix = true +show-fixes = true +line-length = 100 + +[tool.ruff.isort] +known-first-party = ["sankee"] \ No newline at end of file diff --git a/sankee/plotting.py b/sankee/plotting.py index 6f42868..ca1ee68 100644 --- a/sankee/plotting.py +++ b/sankee/plotting.py @@ -264,7 +264,7 @@ def generate_dataframe(self) -> pd.DataFrame: .rename(columns={0: "changed"}) ) # Count the total number of source samples in each year - df["total"] = df.groupby(["source_year", "source"]).changed.transform(sum) + df["total"] = df.groupby(["source_year", "source"]).changed.transform("sum") # Calculate what percent of the source samples went into each target class df["proportion"] = df["changed"] / df["total"] @@ -312,7 +312,7 @@ def toggle_button(button): button.toggle() class_name = button.tooltip - class_id = [key for key in self.labels.keys() if self.labels[key] == class_name][0] + class_id = next(key for key in self.labels.keys() if self.labels[key] == class_name) if not button.state: self.hide.append(class_id) diff --git a/sankee/sampling.py b/sankee/sampling.py index d263ca8..faba539 100644 --- a/sankee/sampling.py +++ b/sankee/sampling.py @@ -6,6 +6,30 @@ from sankee import utils +class SamplingError(ValueError): + """Error related to data sampling in Earth Engine.""" + + +def handle_sampling_error(e: ee.EEException, band: str, image_list: list[ee.Image]) -> None: + """Handle Earth Engine errors that occur during sampling by raising more specific errors.""" + msg = None + + if band in str(e): + shared_bands = utils.get_shared_bands(image_list) + msg = f"The band `{band}` was not found in all images. Choose from {shared_bands}." + + elif "Region must not be empty" in str(e): + msg = ( + "The sample region is empty. Make sure to pass a valid geometry, feature, or " + "non-empty collection." + ) + + if msg: + raise SamplingError(msg) from None + + raise e + + def generate_sample_data( *, image_list: list[ee.Image], @@ -38,21 +62,13 @@ def extract_values_at_point(pt): try: features = [feat["properties"] for feat in samples.toList(samples.size()).getInfo()] except ee.EEException as e: - if band in str(e): - shared_bands = utils.get_shared_bands(image_list) - raise ValueError( - f"The band `{band}` was not found in all images. Choose from " f"{shared_bands}" - ) from None - elif "'count' must be positive" in str(e): - raise ValueError("No points were sampled. Make sure to pass a 2D `region`.") from None - else: - raise e + handle_sampling_error(e, band, image_list) data = pd.DataFrame.from_dict(features).dropna().astype(int) for image in image_labels: if image not in data.columns: - raise ValueError( + raise SamplingError( f"Valid samples were not found for image `{image}`. Check that the" " image overlaps the sampling region." ) diff --git a/tests/test_sampling.py b/tests/test_sampling.py index 72afd0e..ef573e1 100644 --- a/tests/test_sampling.py +++ b/tests/test_sampling.py @@ -49,13 +49,13 @@ def test_sample_data_bad_region(): ) -def test_sample_data_point(): +def test_sample_empty_collection(): """Test that an error is thrown when sampling occurs on an empty FeatureCollection.""" - with pytest.raises(ValueError, match="pass a 2D `region`"): + with pytest.raises(ValueError, match="region is empty"): sankee.sampling.generate_sample_data( image_list=TEST_IMAGE_LIST, image_labels=TEST_IMAGE_LABELS, - region=ee.Geometry.Point([0, 0]), + region=ee.FeatureCollection([]), band=TEST_DATASET.band, scale=100, )