diff --git a/ci/run_cudf_polars_pytests.sh b/ci/run_cudf_polars_pytests.sh index e881055e9e3..5a1d5f56bf0 100755 --- a/ci/run_cudf_polars_pytests.sh +++ b/ci/run_cudf_polars_pytests.sh @@ -17,5 +17,5 @@ python -m pytest --cache-clear "$@" tests --executor dask-experimental # Test the "dask-experimental" executor with Distributed cluster # Not all tests pass yet, deselecting by name those that are failing. python -m pytest --cache-clear "$@" tests --executor dask-experimental --dask-cluster \ - -k "not test_groupby_maintain_order_random and not test_scan_csv_multi and not test_select_literal_series" \ - --cov-fail-under=89 # Override coverage, Distributed cluster coverage not yet 100% + -k "not test_groupby_maintain_order_random and not test_scan_csv_multi and not test_select_literal_series and not test_can_convert_lists and not test_executor_basics and not test_replace_literal and not test_hconcat_different_heights and not test_join and not test_dataframescan and not test_strip_chars" \ + --cov-fail-under=80 # Override coverage, Distributed cluster coverage not yet 100% diff --git a/cpp/include/cudf/strings/string_view.cuh b/cpp/include/cudf/strings/string_view.cuh index b91748cfc7d..15539c50da9 100644 --- a/cpp/include/cudf/strings/string_view.cuh +++ b/cpp/include/cudf/strings/string_view.cuh @@ -443,10 +443,12 @@ __device__ inline size_type string_view::rfind(char_utf8 chr, size_type pos, siz __device__ inline string_view string_view::substr(size_type pos, size_type count) const { if (pos < 0 || pos >= length()) { return string_view{}; } - auto const itr = begin() + pos; - auto const spos = itr.byte_offset(); - auto const epos = count >= 0 ? (itr + count).byte_offset() : size_bytes(); - return {data() + spos, epos - spos}; + auto const spos = begin() + pos; + auto const epos = count >= 0 ? (spos + count) : const_iterator{*this, _length, size_bytes()}; + auto ss = string_view{data() + spos.byte_offset(), epos.byte_offset() - spos.byte_offset()}; + // this potentially saves redundant character counting downstream + if (_length != UNKNOWN_STRING_LENGTH) { ss._length = epos.position() - spos.position(); } + return ss; } __device__ inline size_type string_view::character_offset(size_type bytepos) const diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 3a20ffbce19..217aff48d5e 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -2226,6 +2226,22 @@ stripe_dictionaries build_dictionaries(orc_table_view& orc_table, std::move(dict_order_owner)}; } +[[nodiscard]] uint32_t find_largest_stream_size(device_2dspan ss, + rmm::cuda_stream_view stream) +{ + auto const longest_stream = thrust::max_element( + rmm::exec_policy(stream), + ss.data(), + ss.data() + ss.count(), + cuda::proclaim_return_type([] __device__(auto const& lhs, auto const& rhs) { + return lhs.stream_size < rhs.stream_size; + })); + + auto const h_longest_stream = cudf::detail::make_host_vector_sync( + device_span{longest_stream, 1}, stream); + return h_longest_stream[0].stream_size; +} + /** * @brief Perform the processing steps needed to convert the input table into the output ORC data * for writing, such as compression and ORC encoding. @@ -2319,7 +2335,9 @@ auto convert_table_to_orc_data(table_view const& input, size_t compressed_bfr_size = 0; size_t num_compressed_blocks = 0; - auto const max_compressed_block_size = max_compressed_size(compression, compression_blocksize); + auto const largest_stream_size = find_largest_stream_size(strm_descs, stream); + auto const max_compressed_block_size = + max_compressed_size(compression, std::min(largest_stream_size, compression_blocksize)); auto const padded_max_compressed_block_size = util::round_up_unsafe(max_compressed_block_size, block_align); auto const padded_block_header_size = diff --git a/cpp/src/io/utilities/config_utils.cpp b/cpp/src/io/utilities/config_utils.cpp index 46816604918..fa6f04eed73 100644 --- a/cpp/src/io/utilities/config_utils.cpp +++ b/cpp/src/io/utilities/config_utils.cpp @@ -36,10 +36,10 @@ void set_up_kvikio() cudaFree(nullptr); auto const compat_mode = kvikio::getenv_or("KVIKIO_COMPAT_MODE", kvikio::CompatMode::ON); - kvikio::defaults::compat_mode_reset(compat_mode); + kvikio::defaults::set_compat_mode(compat_mode); auto const nthreads = getenv_or("KVIKIO_NTHREADS", 4u); - kvikio::defaults::thread_pool_nthreads_reset(nthreads); + kvikio::defaults::set_thread_pool_nthreads(nthreads); }); } diff --git a/cpp/src/utilities/host_memory.cpp b/cpp/src/utilities/host_memory.cpp index 94d27d976c3..e41d772a479 100644 --- a/cpp/src/utilities/host_memory.cpp +++ b/cpp/src/utilities/host_memory.cpp @@ -29,6 +29,7 @@ namespace cudf { namespace { + class fixed_pinned_pool_memory_resource { using upstream_mr = rmm::mr::pinned_host_memory_resource; using host_pooled_mr = rmm::mr::pool_memory_resource; diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index cfc6a0dc425..e3ca8b70b87 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -309,7 +309,7 @@ ConfigureTest( ConfigureTest( ORC_TEST io/orc_chunked_reader_test.cu io/orc_test.cpp GPUS 1 - PERCENT 30 + PERCENT 100 ) ConfigureTest( PARQUET_TEST @@ -340,7 +340,7 @@ ConfigureTest(JSON_TREE_CSR io/json/json_tree_csr.cu) ConfigureTest( DATA_CHUNK_SOURCE_TEST io/text/data_chunk_source_test.cpp GPUS 1 - PERCENT 30 + PERCENT 100 ) target_link_libraries(DATA_CHUNK_SOURCE_TEST PRIVATE ZLIB::ZLIB) ConfigureTest(LOGICAL_STACK_TEST io/fst/logical_stack_test.cu) diff --git a/docs/cudf/source/user_guide/cupy-interop.ipynb b/docs/cudf/source/user_guide/cupy-interop.ipynb index 112f0bcfca6..93e62d90c0f 100644 --- a/docs/cudf/source/user_guide/cupy-interop.ipynb +++ b/docs/cudf/source/user_guide/cupy-interop.ipynb @@ -566,7 +566,7 @@ "%%timeit\n", "\n", "fortran_arr = cp.asfortranarray(reshaped_arr)\n", - "reshaped_df = cudf.from_dlpack(fortran_arr.toDlpack())" + "reshaped_df = cudf.from_dlpack(fortran_arr.__dlpack__())" ] }, { @@ -1418,7 +1418,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.12.9" } }, "nbformat": 4, diff --git a/python/cudf/cudf/core/subword_tokenizer.py b/python/cudf/cudf/core/subword_tokenizer.py index c59a16f99f5..36f9eea0619 100644 --- a/python/cudf/cudf/core/subword_tokenizer.py +++ b/python/cudf/cudf/core/subword_tokenizer.py @@ -20,7 +20,7 @@ def _cast_to_appropriate_type(ar, cast_type): elif cast_type == "tf": from tensorflow.experimental.dlpack import from_dlpack - return from_dlpack(ar.astype(np.dtype(np.int32)).toDlpack()) + return from_dlpack(ar.astype(np.dtype(np.int32)).__dlpack__()) class SubwordTokenizer: diff --git a/python/cudf/cudf/tests/test_dlpack.py b/python/cudf/cudf/tests/test_dlpack.py index 20c24bd7564..187a5524e8e 100644 --- a/python/cudf/cudf/tests/test_dlpack.py +++ b/python/cudf/cudf/tests/test_dlpack.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# Copyright (c) 2019-2025, NVIDIA CORPORATION. import itertools from contextlib import ExitStack as does_not_raise @@ -140,7 +140,7 @@ def test_to_dlpack_cupy_2d(data_2d): def test_from_dlpack_cupy_1d(data_1d): cupy_array = cupy.array(data_1d) cupy_host_array = cupy_array.get() - dlt = cupy_array.toDlpack() + dlt = cupy_array.__dlpack__() gs = cudf.from_dlpack(dlt) cudf_host_array = gs.to_numpy(na_value=np.nan) @@ -151,7 +151,7 @@ def test_from_dlpack_cupy_1d(data_1d): def test_from_dlpack_cupy_2d(data_2d): cupy_array = cupy.array(data_2d, order="F") cupy_host_array = cupy_array.get().flatten() - dlt = cupy_array.toDlpack() + dlt = cupy_array.__dlpack__() gdf = cudf.from_dlpack(dlt) cudf_host_array = np.array(gdf.to_pandas()).flatten()