Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-25.04' into prevent-pyd…
Browse files Browse the repository at this point in the history
…ataframe-serialization
  • Loading branch information
pentschev committed Feb 20, 2025
2 parents 6558690 + 8bef542 commit 9d101c5
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 3 deletions.
9 changes: 6 additions & 3 deletions cpp/include/cudf/strings/string_view.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -159,8 +159,11 @@ __device__ inline string_view::const_iterator::const_iterator(string_view const&

__device__ inline string_view::const_iterator& string_view::const_iterator::operator++()
{
if (byte_pos < bytes)
byte_pos += strings::detail::bytes_in_utf8_byte(static_cast<uint8_t>(p[byte_pos]));
if (byte_pos < bytes) {
// max is used to prevent an infinite loop on invalid UTF-8 data
byte_pos +=
cuda::std::max(1, strings::detail::bytes_in_utf8_byte(static_cast<uint8_t>(p[byte_pos])));
}
++char_pos;
return *this;
}
Expand Down
2 changes: 2 additions & 0 deletions python/dask_cudf/dask_cudf/_expr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
)
from dask.dataframe.dask_expr._expr import (
Elemwise,
EnforceRuntimeDivisions,
Expr,
RenameAxis,
VarColumns,
Expand Down Expand Up @@ -70,6 +71,7 @@
"DXSeriesGroupBy",
"DecomposableGroupbyAggregation",
"Elemwise",
"EnforceRuntimeDivisions",
"Expr",
"FragmentWrapper",
"FrameBase",
Expand Down
16 changes: 16 additions & 0 deletions python/dask_cudf/dask_cudf/_expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from dask_cudf._expr import (
CumulativeBlockwise,
Elemwise,
EnforceRuntimeDivisions,
Expr,
Reduction,
RenameAxis,
Expand Down Expand Up @@ -202,6 +203,20 @@ def _patched_get_divisions(frame, other, *args, **kwargs):
return _original_get_divisions(frame, other, *args, **kwargs)


_original_erd_divisions = EnforceRuntimeDivisions._divisions


def _patched_erd_divisions(self):
# This patch is needed for upstream dask testing
# (dask/dataframe/tests/test_indexing.py::test_gpu_loc).
# Without this patch, an individual element of divisions
# may end up as a 0-dim cupy array.
# TODO: Find long-term fix.
# Maybe update `LocList._layer_information`?
divs = _original_erd_divisions(self)
return tuple(div.item() if hasattr(div, "item") else div for div in divs)


_PATCHED = False


Expand All @@ -213,4 +228,5 @@ def _patch_dask_expr():
CumulativeBlockwise._kwargs = PatchCumulativeBlockwise._kwargs
Expr.var = _patched_var
_shuffle_module._get_divisions = _patched_get_divisions
EnforceRuntimeDivisions._divisions = _patched_erd_divisions
_PATCHED = True

0 comments on commit 9d101c5

Please sign in to comment.