Skip to content

Commit

Permalink
Merge branch 'branch-25.04' into serialize-columns
Browse files Browse the repository at this point in the history
  • Loading branch information
rjzamora authored Feb 13, 2025
2 parents 30793d1 + 7914858 commit 6226c34
Show file tree
Hide file tree
Showing 67 changed files with 911 additions and 1,384 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -326,13 +326,14 @@ jobs:
third-party-integration-tests-cudf-pandas:
needs: conda-python-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@nvks-runners
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.04
with:
build_type: pull-request
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
node_type: "gpu-l4-latest-1"
continue-on-error: true
container_image: "rapidsai/ci-conda:latest"
run_script: |
ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
Expand Down
14 changes: 1 addition & 13 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ option(CUDA_ENABLE_LINEINFO
option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compilation" ON)
# cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking
option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
option(CUDA_STATIC_CUFILE "Statically link cuFile" OFF)

set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON)
if(CUDA_STATIC_RUNTIME OR NOT BUILD_SHARED_LIBS)
Expand Down Expand Up @@ -464,7 +463,6 @@ add_library(
src/hash/xxhash_64.cu
src/interop/dlpack.cpp
src/interop/arrow_utilities.cpp
src/interop/decimal_conversion_utilities.cu
src/interop/to_arrow_device.cu
src/interop/to_arrow_host.cu
src/interop/from_arrow_device.cu
Expand Down Expand Up @@ -547,7 +545,6 @@ add_library(
src/io/utilities/data_casting.cu
src/io/utilities/data_sink.cpp
src/io/utilities/datasource.cpp
src/io/utilities/file_io_utilities.cpp
src/io/utilities/row_selection.cpp
src/io/utilities/type_inference.cu
src/io/utilities/trie.cu
Expand Down Expand Up @@ -923,15 +920,6 @@ target_compile_definitions(
# Enable remote IO through KvikIO
target_compile_definitions(cudf PRIVATE $<$<BOOL:${CUDF_KVIKIO_REMOTE_IO}>:CUDF_KVIKIO_REMOTE_IO>)

# Enable cuFile support
set(_cufile_suffix)
if(CUDA_STATIC_CUFILE)
set(_cufile_suffix _static)
endif()
if(TARGET CUDA::cuFile${_cufile_suffix})
target_compile_definitions(cudf PRIVATE CUDF_CUFILE_FOUND)
endif()

# Remove this after upgrading to a CCCL that has a proper CMake option. See
# https://github.com/NVIDIA/cccl/pull/2844
target_compile_definitions(cudf PRIVATE THRUST_FORCE_32_BIT_OFFSET_TYPE=1)
Expand All @@ -944,7 +932,7 @@ target_link_libraries(
cudf
PUBLIC CCCL::CCCL rapids_logger::rapids_logger rmm::rmm $<BUILD_LOCAL_INTERFACE:BS::thread_pool>
PRIVATE $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp> cuco::cuco ZLIB::ZLIB nvcomp::nvcomp
kvikio::kvikio $<TARGET_NAME_IF_EXISTS:CUDA::cuFile${_cufile_suffix}> nanoarrow
kvikio::kvikio nanoarrow
)

# Add Conda library, and include paths if specified
Expand Down
21 changes: 3 additions & 18 deletions cpp/include/cudf/io/config_utils.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -19,22 +19,7 @@

namespace CUDF_EXPORT cudf {
namespace io {
namespace cufile_integration {

/**
* @brief Returns true if cuFile and its compatibility mode are enabled.
*/
bool is_always_enabled();

/**
* @brief Returns true if only direct IO through cuFile is enabled (compatibility mode is disabled).
*/
bool is_gds_enabled();

/**
* @brief Returns true if KvikIO is enabled.
*/
bool is_kvikio_enabled();
namespace kvikio_integration {

/**
* @brief Set KvikIO parameters, including:
Expand All @@ -45,7 +30,7 @@ bool is_kvikio_enabled();
*/
void set_up_kvikio();

} // namespace cufile_integration
} // namespace kvikio_integration

namespace nvcomp_integration {

Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/io/data_sink.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -122,7 +122,7 @@ class data_sink {
*
* In the case where the sink type is itself a memory buffered write, this ends up
* being effectively a second memcpy. So a useful optimization for a "smart"
* custom data_sink is to do it's own internal management of the movement
* custom data_sink is to do its own internal management of the movement
* of data between cpu and gpu; turning the internals of the writer into simply
*
* sink->device_write(device_buffer, size)
Expand Down
27 changes: 16 additions & 11 deletions cpp/include/cudf/transform.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -32,27 +32,32 @@ namespace CUDF_EXPORT cudf {
*/

/**
* @brief Creates a new column by applying a unary function against every
* element of an input column.
* @brief Creates a new column by applying a transform function against every
* element of the input columns.
*
* Computes:
* `out[i] = F(in[i])`
* `out[i] = F(inputs[i]...)`.
*
* The output null mask is the same is the input null mask so if input[i] is
* null then output[i] is also null
* Note that for every scalar in `inputs` (columns of size 1), `input[i] == input[0]`
*
* @param input An immutable view of the input column to transform
* @param unary_udf The PTX/CUDA string of the unary function to apply
* The output null mask is the same as the null mask of the input columns, so if input[i] is
* null then output[i] is also null. The size of the resulting column is the size of the largest
* column.
* All input columns must have equivalent null masks.
*
*
* @param inputs Immutable views of the input columns to transform
* @param transform_udf The PTX/CUDA string of the transform function to apply
* @param output_type The output type that is compatible with the output type in the UDF
* @param is_ptx true: the UDF is treated as PTX code; false: the UDF is treated as CUDA code
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return The column resulting from applying the unary function to
* @return The column resulting from applying the transform function to
* every element of the input
*/
std::unique_ptr<column> transform(
column_view const& input,
std::string const& unary_udf,
std::vector<column_view> const& inputs,
std::string const& transform_udf,
data_type output_type,
bool is_ptx,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
Expand Down
70 changes: 0 additions & 70 deletions cpp/src/interop/decimal_conversion_utilities.cu

This file was deleted.

44 changes: 0 additions & 44 deletions cpp/src/interop/decimal_conversion_utilities.cuh

This file was deleted.

1 change: 0 additions & 1 deletion cpp/src/interop/to_arrow_device.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
*/

#include "arrow_utilities.hpp"
#include "decimal_conversion_utilities.cuh"

#include <cudf/column/column.hpp>
#include <cudf/column/column_view.hpp>
Expand Down
1 change: 0 additions & 1 deletion cpp/src/interop/to_arrow_host.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
*/

#include "arrow_utilities.hpp"
#include "decimal_conversion_utilities.cuh"

#include <cudf/column/column_view.hpp>
#include <cudf/detail/interop.hpp>
Expand Down
29 changes: 21 additions & 8 deletions cpp/src/io/parquet/arrow_schema_writer.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -250,13 +250,26 @@ struct dispatch_to_flatbuf {
std::enable_if_t<cudf::is_fixed_point<T>(), void> operator()()
{
field_type_id = flatbuf::Type_Decimal;
field_offset = flatbuf::CreateDecimal(fbb,
(col_meta.is_decimal_precision_set())
? col_meta.get_decimal_precision()
: MAX_DECIMAL128_PRECISION,
col->type().scale(),
128)
.Union();

auto const [max_precision, bitwidth] = []() constexpr -> std::pair<int32_t, int32_t> {
if constexpr (std::is_same_v<T, numeric::decimal32>) {
return {MAX_DECIMAL32_PRECISION, 32};
} else if constexpr (std::is_same_v<T, numeric::decimal64>) {
return {MAX_DECIMAL64_PRECISION, 64};
} else if constexpr (std::is_same_v<T, numeric::decimal128>) {
return {MAX_DECIMAL128_PRECISION, 128};
} else {
CUDF_FAIL("Unsupported fixed point type for arrow schema writer");
}
}();

field_offset =
flatbuf::CreateDecimal(
fbb,
(col_meta.is_decimal_precision_set()) ? col_meta.get_decimal_precision() : max_precision,
col->type().scale(),
bitwidth)
.Union();
}

template <typename T>
Expand Down
8 changes: 4 additions & 4 deletions cpp/src/io/parquet/ipc/Schema_generated.h
Original file line number Diff line number Diff line change
Expand Up @@ -1393,9 +1393,9 @@ inline ::flatbuffers::Offset<RunEndEncoded> CreateRunEndEncoded(
}

/// Exact decimal value represented as an integer value in two's
/// complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers
/// are used. The representation uses the endianness indicated
/// in the Schema.
/// complement. Currently 32-bit (4-byte), 64-bit (8-byte),
/// 128-bit (16-byte) and 256-bit (32-byte) integers are used.
/// The representation uses the endianness indicated in the Schema.
struct Decimal FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table {
typedef DecimalBuilder Builder;
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
Expand All @@ -1407,7 +1407,7 @@ struct Decimal FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table {
int32_t precision() const { return GetField<int32_t>(VT_PRECISION, 0); }
/// Number of digits after the decimal point "."
int32_t scale() const { return GetField<int32_t>(VT_SCALE, 0); }
/// Number of bits per value. The only accepted widths are 128 and 256.
/// Number of bits per value. The accepted widths are 32, 64, 128 and 256.
/// We use bitWidth for consistency with Int::bitWidth.
int32_t bitWidth() const { return GetField<int32_t>(VT_BITWIDTH, 128); }
bool Verify(::flatbuffers::Verifier& verifier) const
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/io/parquet/ipc/schema/Schema.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
/// Version 1.3 - Add Run-End Encoded.
/// Version 1.4 - Add BinaryView, Utf8View, variadicBufferCounts, ListView, and
/// LargeListView.
/// Version 1.5 - Add 32-bit and 64-bit as allowed bit widths for Decimal

namespace cudf.io.parquet.flatbuf;

Expand Down Expand Up @@ -243,9 +244,9 @@ table RunEndEncoded {
}

/// Exact decimal value represented as an integer value in two's
/// complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers
/// are used. The representation uses the endianness indicated
/// in the Schema.
/// complement. Currently 32-bit (4-byte), 64-bit (8-byte),
/// 128-bit (16-byte) and 256-bit (32-byte) integers are used.
/// The representation uses the endianness indicated in the Schema.
table Decimal {
/// Total number of decimal digits
precision: int;
Expand Down
Loading

0 comments on commit 6226c34

Please sign in to comment.