Skip to content

Commit

Permalink
DEP: Use Cython 3.0 (pandas-dev#55179)
Browse files Browse the repository at this point in the history
* DEP: Use Cython 3.0

* Cython 3.0.3

* Update to Cython 3.0.4

* Merge pyi updates

* fixup

* Update pyi files and upgrade to Cython 3.0.5

* Remove debug print

* fix typo

---------

Co-authored-by: Thomas Li <[email protected]>
  • Loading branch information
rhshadrach and lithomas1 committed Nov 16, 2023
1 parent 02e2bae commit e5301a8
Show file tree
Hide file tree
Showing 24 changed files with 54 additions and 41 deletions.
2 changes: 1 addition & 1 deletion asv_bench/asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
// pip (with all the conda available packages installed first,
// followed by the pip installed packages).
"matrix": {
"Cython": ["0.29.33"],
"Cython": ["3.0.5"],
"matplotlib": [],
"sqlalchemy": [],
"scipy": [],
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython=0.29.33
- cython=3.0.5
- meson[ninja]=1.2.1
- meson-python=0.13.1

Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/arrays.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class NDArrayBacked:
def size(self) -> int: ...
@property
def nbytes(self) -> int: ...
def copy(self): ...
def copy(self, order=...): ...
def delete(self, loc, axis=...): ...
def swapaxes(self, axis1, axis2): ...
def repeat(self, repeats: int | Sequence[int], axis: int | None = ...): ...
Expand Down
3 changes: 1 addition & 2 deletions pandas/_libs/groupby.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def group_fillna_indexer(
labels: np.ndarray, # ndarray[int64_t]
sorted_labels: npt.NDArray[np.intp],
mask: npt.NDArray[np.uint8],
direction: Literal["ffill", "bfill"],
limit: int, # int64_t
dropna: bool,
) -> None: ...
Expand All @@ -55,7 +54,7 @@ def group_any_all(
mask: np.ndarray, # const uint8_t[::1]
val_test: Literal["any", "all"],
skipna: bool,
nullable: bool,
result_mask: np.ndarray | None,
) -> None: ...
def group_sum(
out: np.ndarray, # complexfloatingintuint_t[:, ::1]
Expand Down
9 changes: 5 additions & 4 deletions pandas/_libs/hashtable.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ class Factorizer:
def factorize(
self,
values: np.ndarray,
sort: bool = ...,
na_sentinel=...,
na_value=...,
mask=...,
Expand Down Expand Up @@ -157,9 +156,9 @@ class HashTable:
def __contains__(self, key: Hashable) -> bool: ...
def sizeof(self, deep: bool = ...) -> int: ...
def get_state(self) -> dict[str, int]: ...
# TODO: `item` type is subclass-specific
def get_item(self, item): ... # TODO: return type?
def set_item(self, item, val) -> None: ...
# TODO: `val/key` type is subclass-specific
def get_item(self, val): ... # TODO: return type?
def set_item(self, key, val) -> None: ...
def get_na(self): ... # TODO: return type?
def set_na(self, val) -> None: ...
def map_locations(
Expand All @@ -185,6 +184,7 @@ class HashTable:
self,
values: np.ndarray, # np.ndarray[subclass-specific]
return_inverse: bool = ...,
mask=...,
) -> (
tuple[
np.ndarray, # np.ndarray[subclass-specific]
Expand All @@ -198,6 +198,7 @@ class HashTable:
na_sentinel: int = ...,
na_value: object = ...,
mask=...,
ignore_na: bool = True,
) -> tuple[np.ndarray, npt.NDArray[np.intp]]: ... # np.ndarray[subclass-specific]

class Complex128HashTable(HashTable): ...
Expand Down
6 changes: 4 additions & 2 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -1239,9 +1239,10 @@ cdef class StringHashTable(HashTable):
na_value=na_value, ignore_na=ignore_na,
return_inverse=True)

# Add unused mask parameter for compat with other signatures
def get_labels(self, ndarray[object] values, ObjectVector uniques,
Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
object na_value=None):
object na_value=None, object mask=None):
# -> np.ndarray[np.intp]
_, labels = self._unique(values, uniques, count_prior=count_prior,
na_sentinel=na_sentinel, na_value=na_value,
Expand Down Expand Up @@ -1496,9 +1497,10 @@ cdef class PyObjectHashTable(HashTable):
na_value=na_value, ignore_na=ignore_na,
return_inverse=True)

# Add unused mask parameter for compat with other signatures
def get_labels(self, ndarray[object] values, ObjectVector uniques,
Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
object na_value=None):
object na_value=None, object mask=None):
# -> np.ndarray[np.intp]
_, labels = self._unique(values, uniques, count_prior=count_prior,
na_sentinel=na_sentinel, na_value=na_value,
Expand Down
22 changes: 12 additions & 10 deletions pandas/_libs/lib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -45,22 +45,24 @@ def is_scalar(val: object) -> bool: ...
def is_list_like(obj: object, allow_sets: bool = ...) -> bool: ...
def is_pyarrow_array(obj: object) -> bool: ...
def is_period(val: object) -> TypeGuard[Period]: ...
def is_interval(val: object) -> TypeGuard[Interval]: ...
def is_decimal(val: object) -> TypeGuard[Decimal]: ...
def is_complex(val: object) -> TypeGuard[complex]: ...
def is_bool(val: object) -> TypeGuard[bool | np.bool_]: ...
def is_integer(val: object) -> TypeGuard[int | np.integer]: ...
def is_interval(obj: object) -> TypeGuard[Interval]: ...
def is_decimal(obj: object) -> TypeGuard[Decimal]: ...
def is_complex(obj: object) -> TypeGuard[complex]: ...
def is_bool(obj: object) -> TypeGuard[bool | np.bool_]: ...
def is_integer(obj: object) -> TypeGuard[int | np.integer]: ...
def is_int_or_none(obj) -> bool: ...
def is_float(val: object) -> TypeGuard[float]: ...
def is_float(obj: object) -> TypeGuard[float]: ...
def is_interval_array(values: np.ndarray) -> bool: ...
def is_datetime64_array(values: np.ndarray) -> bool: ...
def is_timedelta_or_timedelta64_array(values: np.ndarray) -> bool: ...
def is_datetime64_array(values: np.ndarray, skipna: bool = True) -> bool: ...
def is_timedelta_or_timedelta64_array(
values: np.ndarray, skipna: bool = True
) -> bool: ...
def is_datetime_with_singletz_array(values: np.ndarray) -> bool: ...
def is_time_array(values: np.ndarray, skipna: bool = ...): ...
def is_date_array(values: np.ndarray, skipna: bool = ...): ...
def is_datetime_array(values: np.ndarray, skipna: bool = ...): ...
def is_string_array(values: np.ndarray, skipna: bool = ...): ...
def is_float_array(values: np.ndarray, skipna: bool = ...): ...
def is_float_array(values: np.ndarray): ...
def is_integer_array(values: np.ndarray, skipna: bool = ...): ...
def is_bool_array(values: np.ndarray, skipna: bool = ...): ...
def fast_multiget(
Expand Down Expand Up @@ -185,7 +187,7 @@ def count_level_2d(
max_bin: int,
) -> np.ndarray: ... # np.ndarray[np.int64, ndim=2]
def get_level_sorter(
label: np.ndarray, # const int64_t[:]
codes: np.ndarray, # const int64_t[:]
starts: np.ndarray, # const intp_t[:]
) -> np.ndarray: ... # np.ndarray[np.intp, ndim=1]
def generate_bins_dt64(
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/ops.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ def vec_binop(
@overload
def maybe_convert_bool(
arr: npt.NDArray[np.object_],
true_values: Iterable = ...,
false_values: Iterable = ...,
true_values: Iterable | None = None,
false_values: Iterable | None = None,
convert_to_masked_nullable: Literal[False] = ...,
) -> tuple[np.ndarray, None]: ...
@overload
Expand Down
4 changes: 4 additions & 0 deletions pandas/_libs/sparse.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ class BlockIndex(SparseIndex):
self, length: int, blocs: np.ndarray, blengths: np.ndarray
) -> None: ...

# Override to have correct parameters
def intersect(self, other: SparseIndex) -> Self: ...
def make_union(self, y: SparseIndex) -> Self: ...

def make_mask_object_ndarray(
arr: npt.NDArray[np.object_], fill_value
) -> npt.NDArray[np.bool_]: ...
Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/tslibs/conversion.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ DT64NS_DTYPE: np.dtype
TD64NS_DTYPE: np.dtype

def precision_from_unit(
in_reso: int, # NPY_DATETIMEUNIT
in_reso: int,
out_reso: int = ...,
) -> tuple[int, int]: ... # (int64_t, _)
def localize_pydatetime(dt: datetime, tz: tzinfo | None) -> datetime: ...
4 changes: 2 additions & 2 deletions pandas/_libs/tslibs/dtypes.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ from enum import Enum

OFFSET_TO_PERIOD_FREQSTR: dict[str, str]

def periods_per_day(reso: int) -> int: ...
def periods_per_day(reso: int = ...) -> int: ...
def periods_per_second(reso: int) -> int: ...
def is_supported_unit(reso: int) -> bool: ...
def npy_unit_to_abbrev(reso: int) -> str: ...
def npy_unit_to_abbrev(unit: int) -> str: ...
def get_supported_reso(reso: int) -> int: ...
def abbrev_to_npy_unit(abbrev: str) -> int: ...
def freq_to_period_freqstr(freq_n: int, freq_name: str) -> str: ...
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/np_datetime.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class OutOfBoundsTimedelta(ValueError): ...
def py_get_unit_from_dtype(dtype: np.dtype): ...
def py_td64_to_tdstruct(td64: int, unit: int) -> dict: ...
def astype_overflowsafe(
arr: np.ndarray,
values: np.ndarray,
dtype: np.dtype,
copy: bool = ...,
round_ok: bool = ...,
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/period.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class Period(PeriodMixin):
@classmethod
def _from_ordinal(cls, ordinal: int, freq) -> Period: ...
@classmethod
def now(cls, freq: Frequency = ...) -> Period: ...
def now(cls, freq: Frequency) -> Period: ...
def strftime(self, fmt: str | None) -> str: ...
def to_timestamp(
self,
Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/strptime.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ def array_strptime(
exact: bool = ...,
errors: str = ...,
utc: bool = ...,
creso: int = ..., # NPY_DATETIMEUNIT
) -> tuple[np.ndarray, np.ndarray]: ...

# first ndarray is M8[ns], second is object ndarray of tzinfo | None
8 changes: 5 additions & 3 deletions pandas/_libs/tslibs/timedeltas.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ _S = TypeVar("_S", bound=timedelta)
def get_unit_for_round(freq, creso: int) -> int: ...
def disallow_ambiguous_unit(unit: str | None) -> None: ...
def ints_to_pytimedelta(
arr: npt.NDArray[np.timedelta64],
m8values: npt.NDArray[np.timedelta64],
box: bool = ...,
) -> npt.NDArray[np.object_]: ...
def array_to_timedelta64(
Expand Down Expand Up @@ -165,8 +165,10 @@ class Timedelta(timedelta):
def __gt__(self, other: timedelta) -> bool: ...
def __hash__(self) -> int: ...
def isoformat(self) -> str: ...
def to_numpy(self) -> np.timedelta64: ...
def view(self, dtype: npt.DTypeLike = ...) -> object: ...
def to_numpy(
self, dtype: npt.DTypeLike = ..., copy: bool = False
) -> np.timedelta64: ...
def view(self, dtype: npt.DTypeLike) -> object: ...
@property
def unit(self) -> str: ...
def as_unit(self, unit: str, round_ok: bool = ...) -> Timedelta: ...
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/timestamps.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ class Timestamp(datetime):
def is_year_end(self) -> bool: ...
def to_pydatetime(self, warn: bool = ...) -> datetime: ...
def to_datetime64(self) -> np.datetime64: ...
def to_period(self, freq: BaseOffset | str = ...) -> Period: ...
def to_period(self, freq: BaseOffset | str | None = None) -> Period: ...
def to_julian_date(self) -> np.float64: ...
@property
def asm8(self) -> np.datetime64: ...
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/tzconversion.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ from pandas._typing import npt

# tz_convert_from_utc_single exposed for testing
def tz_convert_from_utc_single(
val: np.int64, tz: tzinfo, creso: int = ...
utc_val: np.int64, tz: tzinfo, creso: int = ...
) -> np.int64: ...
def tz_localize_to_utc(
vals: npt.NDArray[np.int64],
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/vectorized.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def get_resolution(
reso: int = ..., # NPY_DATETIMEUNIT
) -> Resolution: ...
def ints_to_pydatetime(
arr: npt.NDArray[np.int64],
stamps: npt.NDArray[np.int64],
tz: tzinfo | None = ...,
box: str = ...,
reso: int = ..., # NPY_DATETIMEUNIT
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/window/aggregations.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,8 @@ def ewm(
com: float, # float64_t
adjust: bool,
ignore_na: bool,
deltas: np.ndarray, # const float64_t[:]
normalize: bool,
deltas: np.ndarray | None = None, # const float64_t[:]
normalize: bool = True,
) -> np.ndarray: ... # np.ndarray[np.float64]
def ewmcov(
input_x: np.ndarray, # const float64_t[:]
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -2314,8 +2314,7 @@ def _concat_same_type(
return new_obj

def copy(self, order: str = "C") -> Self:
# error: Unexpected keyword argument "order" for "copy"
new_obj = super().copy(order=order) # type: ignore[call-arg]
new_obj = super().copy(order=order)
new_obj._freq = self.freq
return new_obj

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ requires = [
"meson-python==0.13.1",
"meson==1.2.1",
"wheel",
"Cython>=0.29.33,<3", # Note: sync with setup.py, environment.yml and asv.conf.json
"Cython==3.0.5", # Note: sync with setup.py, environment.yml and asv.conf.json
# Any NumPy version should be fine for compiling. Users are unlikely
# to get a NumPy<1.25 so the result will be compatible with all relevant
# NumPy versions (if not it is presumably compatible with their version).
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

pip
versioneer[toml]
cython==0.29.33
cython==3.0.5
meson[ninja]==1.2.1
meson-python==0.13.1
pytest>=7.3.2
Expand Down
2 changes: 2 additions & 0 deletions scripts/run_stubtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
# stubtest might be too sensitive
"pandas._libs.lib.NoDefault",
"pandas._libs.lib._NoDefault.no_default",
# stubtest/Cython is not recognizing the default value for the dtype parameter
"pandas._libs.lib.map_infer_mask",
# internal type alias (should probably be private)
"pandas._libs.lib.ndarray_obj_2d",
# runtime argument "owner" has a default value but stub argument does not
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def is_platform_mac():


# note: sync with pyproject.toml, environment.yml and asv.conf.json
min_cython_ver = "0.29.33"
min_cython_ver = "3.0.5"

try:
from Cython import (
Expand Down

0 comments on commit e5301a8

Please sign in to comment.