Skip to content

Commit

Permalink
dask 2025.1.0 compatibility (#324)
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger authored Jan 19, 2025
1 parent f629462 commit 105bfd6
Show file tree
Hide file tree
Showing 17 changed files with 125 additions and 1,356 deletions.
6 changes: 2 additions & 4 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,9 @@ jobs:
matrix:
os: [ubuntu-latest]
env:
- ci/envs/310-minimal.yaml
- ci/envs/310-no-optional-deps.yaml
- ci/envs/39-minimal.yaml
- ci/envs/311-no-expr.yaml
- ci/envs/311-latest.yaml
- ci/envs/311-latest-no-expr.yaml
- ci/envs/312-latest.yaml

include:
Expand All @@ -52,7 +50,7 @@ jobs:
with:
environment-file: ${{ matrix.env }}
miniforge-version: latest
miniforge-variant: Mambaforge
miniforge-variant: Miniforge3
use-mamba: true

- name: Check and Log Environment
Expand Down
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
Changelog
=========

Version 0.4.3 (January, 2025)
----------------------------------

Packaging:

- `dask>=2025.1.0` is now required.
- `python>=3.10` is now required.


Version 0.4.2 (September 24, 2024)
----------------------------------

Expand Down
12 changes: 6 additions & 6 deletions ci/envs/39-minimal.yaml → ci/envs/310-minimal.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ channels:
- conda-forge
dependencies:
# required dependencies
- python=3.9
- numpy=1.23
- dask=2022.06.0
- distributed=2022.06.0
- geopandas=0.12
- pandas=1.5.3
- python=3.10
- numpy=1.24
- dask=2025.1.0
- distributed=2025.1.0
- geopandas=0.14.3
- pandas=2.0.0
- shapely=2.0
- pyproj=3.4
- packaging
Expand Down
26 changes: 0 additions & 26 deletions ci/envs/311-no-expr.yaml

This file was deleted.

1 change: 0 additions & 1 deletion ci/envs/312-dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,3 @@ dependencies:
- git+https://github.com/shapely/shapely.git@main
- git+https://github.com/geopandas/geopandas.git@main
- git+https://github.com/dask/dask.git@main
- git+https://github.com/dask-contrib/dask-expr.git@main
50 changes: 17 additions & 33 deletions dask_geopandas/__init__.py
Original file line number Diff line number Diff line change
@@ -1,58 +1,42 @@
from ._version import get_versions

from . import backends

if backends.QUERY_PLANNING_ON:
from .expr import (
points_from_xy,
from_wkt,
from_wkb,
GeoDataFrame,
GeoSeries,
from_geopandas,
from_dask_dataframe,
)
else:
from .core import (
points_from_xy,
from_wkt,
from_wkb,
GeoDataFrame,
GeoSeries,
from_geopandas,
from_dask_dataframe,
)
from .expr import (
points_from_xy,
from_wkt,
from_wkb,
GeoDataFrame,
GeoSeries,
from_geopandas,
from_dask_dataframe,
)
from .io.file import read_file
from .io.parquet import read_parquet, to_parquet
from .io.arrow import read_feather, to_feather
from .clip import clip
from .sjoin import sjoin
from . import backends as _ # needed to register dispatch functions with dask


__version__ = get_versions()["version"]
del get_versions

__all__ = [
"points_from_xy",
"from_wkt",
"from_wkb",
"GeoDataFrame",
"GeoSeries",
"from_geopandas",
"clip",
"from_dask_dataframe",
"read_file",
"from_geopandas",
"from_wkb",
"from_wkt",
"points_from_xy",
"read_feather",
"read_file",
"read_parquet",
"sjoin",
"to_feather",
"to_parquet",
"clip",
"sjoin",
]

from . import _version

__version__ = _version.get_versions()["version"]

from . import _version

__version__ = _version.get_versions()["version"]
21 changes: 1 addition & 20 deletions dask_geopandas/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,6 @@
import pandas as pd

import dask
from dask import config

# Check if dask-dataframe is using dask-expr (mimix the logic of dask.dataframe
# _dask_expr_enabled() - default of None means True as well if dask-expr is available)
QUERY_PLANNING_ON = config.get("dataframe.query-planning", False)
if QUERY_PLANNING_ON is None:
if Version(pd.__version__).major < 2:
QUERY_PLANNING_ON = False
else:
try:
import dask_expr # noqa: F401
except ImportError:
# dask will raise error or warning depending on the config
QUERY_PLANNING_ON = False
else:
QUERY_PLANNING_ON = True


from dask.base import normalize_token
from dask.dataframe.backends import _nonempty_index, meta_nonempty_dataframe
from dask.dataframe.core import get_parallel_type
Expand All @@ -34,7 +16,7 @@
from geopandas.array import GeometryArray, GeometryDtype, from_shapely
from shapely.geometry.base import BaseGeometry

from .core import GeoDataFrame, GeoSeries
from .expr import GeoDataFrame, GeoSeries

get_parallel_type.register(geopandas.GeoDataFrame, lambda _: GeoDataFrame)
get_parallel_type.register(geopandas.GeoSeries, lambda _: GeoSeries)
Expand Down Expand Up @@ -85,7 +67,6 @@ def tokenize_geometryarray(x):

@pyarrow_schema_dispatch.register((geopandas.GeoDataFrame,))
def get_pyarrow_schema_geopandas(obj):
import pandas as pd
import pyarrow as pa

df = pd.DataFrame(obj.copy())
Expand Down
20 changes: 3 additions & 17 deletions dask_geopandas/clip.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,16 @@
import numpy as np

from dask.base import tokenize
from dask.dataframe import from_graph
from dask.highlevelgraph import HighLevelGraph
from dask.utils import derived_from

import geopandas

from . import backends


@derived_from(geopandas.tools)
def clip(gdf, mask, keep_geom_type=False):

if backends.QUERY_PLANNING_ON:
from .expr import GeoDataFrame, GeoSeries
else:
from .core import GeoDataFrame, GeoSeries
from dask_geopandas import GeoDataFrame, GeoSeries

if isinstance(mask, (GeoDataFrame, GeoSeries)):
raise NotImplementedError("Mask cannot be a Dask GeoDataFrame or GeoSeries.")
Expand Down Expand Up @@ -45,17 +40,8 @@ def clip(gdf, mask, keep_geom_type=False):
}
divisions = [None] * (len(dsk) + 1)
graph = HighLevelGraph.from_collections(name, dsk, dependencies=[gdf])
if backends.QUERY_PLANNING_ON:
from dask_expr import from_graph

result = from_graph(graph, gdf._meta, tuple(divisions), dsk.keys(), "clip")
else:
from .core import GeoDataFrame, GeoSeries

if isinstance(gdf, GeoDataFrame):
result = GeoDataFrame(graph, name, gdf._meta, tuple(divisions))
elif isinstance(gdf, GeoSeries):
result = GeoSeries(graph, name, gdf._meta, tuple(divisions))
result = from_graph(graph, gdf._meta, tuple(divisions), dsk.keys(), "clip")

result.spatial_partitions = new_spatial_partitions
return result
Loading

0 comments on commit 105bfd6

Please sign in to comment.