Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dask 2025.1.0 compatibility #324

Merged
merged 16 commits into from
Jan 19, 2025
6 changes: 2 additions & 4 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,9 @@ jobs:
matrix:
os: [ubuntu-latest]
env:
- ci/envs/310-minimal.yaml
- ci/envs/310-no-optional-deps.yaml
- ci/envs/39-minimal.yaml
- ci/envs/311-no-expr.yaml
- ci/envs/311-latest.yaml
- ci/envs/311-latest-no-expr.yaml
- ci/envs/312-latest.yaml

include:
Expand All @@ -52,7 +50,7 @@ jobs:
with:
environment-file: ${{ matrix.env }}
miniforge-version: latest
miniforge-variant: Mambaforge
miniforge-variant: Miniforge3
use-mamba: true

- name: Check and Log Environment
Expand Down
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
Changelog
=========

Version 0.4.3 (January, 2025)
----------------------------------

Packaging:

- `dask>=2025.1.0` is now required.
- `python>=3.10` is now required.


Version 0.4.2 (September 24, 2024)
----------------------------------

Expand Down
12 changes: 6 additions & 6 deletions ci/envs/39-minimal.yaml → ci/envs/310-minimal.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ channels:
- conda-forge
dependencies:
# required dependencies
- python=3.9
- numpy=1.23
- dask=2022.06.0
- distributed=2022.06.0
- geopandas=0.12
- pandas=1.5.3
- python=3.10
- numpy=1.24
- dask=2025.1.0
- distributed=2025.1.0
- geopandas=0.14.3
- pandas=2.0.0
- shapely=2.0
- pyproj=3.4
- packaging
Expand Down
26 changes: 0 additions & 26 deletions ci/envs/311-no-expr.yaml

This file was deleted.

1 change: 0 additions & 1 deletion ci/envs/312-dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,3 @@ dependencies:
- git+https://github.com/shapely/shapely.git@main
- git+https://github.com/geopandas/geopandas.git@main
- git+https://github.com/dask/dask.git@main
- git+https://github.com/dask-contrib/dask-expr.git@main
50 changes: 17 additions & 33 deletions dask_geopandas/__init__.py
Original file line number Diff line number Diff line change
@@ -1,58 +1,42 @@
from ._version import get_versions

from . import backends

if backends.QUERY_PLANNING_ON:
from .expr import (
points_from_xy,
from_wkt,
from_wkb,
GeoDataFrame,
GeoSeries,
from_geopandas,
from_dask_dataframe,
)
else:
from .core import (
points_from_xy,
from_wkt,
from_wkb,
GeoDataFrame,
GeoSeries,
from_geopandas,
from_dask_dataframe,
)
from .expr import (
points_from_xy,
from_wkt,
from_wkb,
GeoDataFrame,
GeoSeries,
from_geopandas,
from_dask_dataframe,
)
from .io.file import read_file
from .io.parquet import read_parquet, to_parquet
from .io.arrow import read_feather, to_feather
from .clip import clip
from .sjoin import sjoin
from . import backends as _ # needed to register dispatch functions with dask


__version__ = get_versions()["version"]
del get_versions

__all__ = [
"points_from_xy",
"from_wkt",
"from_wkb",
"GeoDataFrame",
"GeoSeries",
"from_geopandas",
"clip",
"from_dask_dataframe",
"read_file",
"from_geopandas",
"from_wkb",
"from_wkt",
"points_from_xy",
"read_feather",
"read_file",
"read_parquet",
"sjoin",
"to_feather",
"to_parquet",
"clip",
"sjoin",
]

from . import _version

__version__ = _version.get_versions()["version"]

from . import _version

__version__ = _version.get_versions()["version"]
21 changes: 1 addition & 20 deletions dask_geopandas/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,6 @@
import pandas as pd

import dask
from dask import config

# Check if dask-dataframe is using dask-expr (mimix the logic of dask.dataframe
# _dask_expr_enabled() - default of None means True as well if dask-expr is available)
QUERY_PLANNING_ON = config.get("dataframe.query-planning", False)
if QUERY_PLANNING_ON is None:
if Version(pd.__version__).major < 2:
QUERY_PLANNING_ON = False
else:
try:
import dask_expr # noqa: F401
except ImportError:
# dask will raise error or warning depending on the config
QUERY_PLANNING_ON = False
else:
QUERY_PLANNING_ON = True


from dask.base import normalize_token
from dask.dataframe.backends import _nonempty_index, meta_nonempty_dataframe
from dask.dataframe.core import get_parallel_type
Expand All @@ -34,7 +16,7 @@
from geopandas.array import GeometryArray, GeometryDtype, from_shapely
from shapely.geometry.base import BaseGeometry

from .core import GeoDataFrame, GeoSeries
from .expr import GeoDataFrame, GeoSeries

get_parallel_type.register(geopandas.GeoDataFrame, lambda _: GeoDataFrame)
get_parallel_type.register(geopandas.GeoSeries, lambda _: GeoSeries)
Expand Down Expand Up @@ -85,7 +67,6 @@ def tokenize_geometryarray(x):

@pyarrow_schema_dispatch.register((geopandas.GeoDataFrame,))
def get_pyarrow_schema_geopandas(obj):
import pandas as pd
import pyarrow as pa

df = pd.DataFrame(obj.copy())
Expand Down
20 changes: 3 additions & 17 deletions dask_geopandas/clip.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,16 @@
import numpy as np

from dask.base import tokenize
from dask.dataframe import from_graph
from dask.highlevelgraph import HighLevelGraph
from dask.utils import derived_from

import geopandas

from . import backends


@derived_from(geopandas.tools)
def clip(gdf, mask, keep_geom_type=False):

if backends.QUERY_PLANNING_ON:
from .expr import GeoDataFrame, GeoSeries
else:
from .core import GeoDataFrame, GeoSeries
from dask_geopandas import GeoDataFrame, GeoSeries

if isinstance(mask, (GeoDataFrame, GeoSeries)):
raise NotImplementedError("Mask cannot be a Dask GeoDataFrame or GeoSeries.")
Expand Down Expand Up @@ -45,17 +40,8 @@ def clip(gdf, mask, keep_geom_type=False):
}
divisions = [None] * (len(dsk) + 1)
graph = HighLevelGraph.from_collections(name, dsk, dependencies=[gdf])
if backends.QUERY_PLANNING_ON:
from dask_expr import from_graph

result = from_graph(graph, gdf._meta, tuple(divisions), dsk.keys(), "clip")
else:
from .core import GeoDataFrame, GeoSeries

if isinstance(gdf, GeoDataFrame):
result = GeoDataFrame(graph, name, gdf._meta, tuple(divisions))
elif isinstance(gdf, GeoSeries):
result = GeoSeries(graph, name, gdf._meta, tuple(divisions))
result = from_graph(graph, gdf._meta, tuple(divisions), dsk.keys(), "clip")

result.spatial_partitions = new_spatial_partitions
return result
Loading
Loading