Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[wip][ffi] transaction and append data FFI #550

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ffi/.gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
cffi-test
cffi-test.o
examples/read-table/.clangd
2 changes: 1 addition & 1 deletion ffi/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ arrow-schema = { version = "53.0", default-features = false, features = [
arrow-data = { version = "53.0", default-features = false, features = [
"ffi",
], optional = true }
arrow-array = { version = "53.0", default-features = false, optional = true }
arrow-array = { version = "53.0", default-features = false, features = [ "ffi" ], optional = true }

[build-dependencies]
cbindgen = "0.27.0"
Expand Down
153 changes: 115 additions & 38 deletions ffi/examples/read-table/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,46 +1,123 @@
cmake_minimum_required(VERSION 3.12)
project(read_table)

# Options
option(PRINT_DATA "Print out the table data. Requires arrow-glib" ON)
option(VERBOSE "Enable for more diagnostics messages." OFF)
add_executable(read_table read_table.c arrow.c kernel_utils.c)
target_compile_definitions(read_table PUBLIC DEFINE_DEFAULT_ENGINE)
target_include_directories(read_table PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/ffi-headers")
target_link_directories(read_table PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/debug")
target_link_libraries(read_table PUBLIC delta_kernel_ffi)
target_compile_options(read_table PUBLIC)

# Add the test
include(CTest)
set(TestRunner "../../../tests/read-table-testing/run_test.sh")
set(DatPath "../../../../acceptance/tests/dat/out/reader_tests/generated")
set(ExpectedPath "../../../tests/read-table-testing/expected-data")
add_test(NAME read_and_print_all_prim COMMAND ${TestRunner} ${DatPath}/all_primitive_types/delta/ ${ExpectedPath}/all-prim-types.expected)
add_test(NAME read_and_print_basic_partitioned COMMAND ${TestRunner} ${DatPath}/basic_partitioned/delta/ ${ExpectedPath}/basic-partitioned.expected)

if(WIN32)
set(CMAKE_C_FLAGS_DEBUG "/MT")
target_link_libraries(read_table PUBLIC ws2_32 userenv bcrypt ncrypt crypt32 secur32 ntdll RuntimeObject)
endif(WIN32)

if(MSVC)
target_compile_options(read_table PRIVATE /W3 /WX)
else()
# no-strict-prototypes because arrow headers have fn defs without prototypes
target_compile_options(read_table PRIVATE -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes -g -fsanitize=address)
target_link_options(read_table PRIVATE -g -fsanitize=address)
option(VERBOSE "Enable for more diagnostic messages." OFF)

# Paths to directories
set(FFI_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/ffi-headers")
set(TARGET_DEBUG_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/debug")
set(COMMON_DIR "${CMAKE_CURRENT_SOURCE_DIR}/common")

# Include the pkg-config module if needed
if(PRINT_DATA)
include(FindPkgConfig)
pkg_check_modules(GLIB REQUIRED glib-2.0)
pkg_check_modules(ARROW_GLIB REQUIRED arrow-glib)
pkg_check_modules(PARQUET_GLIB REQUIRED parquet-glib)
endif()

# Create the common static library
add_library(common_lib STATIC
${COMMON_DIR}/arrow.c
${COMMON_DIR}/kernel_utils.c
)

# Include directories for common_lib
target_include_directories(common_lib PUBLIC
${COMMON_DIR}
${FFI_HEADERS_DIR}
$<IF:$<BOOL:${PRINT_DATA}>,${ARROW_GLIB_INCLUDE_DIRS},>
$<IF:$<BOOL:${PRINT_DATA}>,${PARQUET_GLIB_INCLUDE_DIRS},>
$<IF:$<BOOL:${PRINT_DATA}>,${CMAKE_CURRENT_SOURCE_DIR},>
)

# Link directories for common_lib
target_link_directories(common_lib PUBLIC
${TARGET_DEBUG_DIR}
$<IF:$<BOOL:${PRINT_DATA}>,${ARROW_GLIB_LIBRARY_DIRS},>
$<IF:$<BOOL:${PRINT_DATA}>,${PARQUET_GLIB_LIBRARY_DIRS},>
)

# Link libraries for common_lib
target_link_libraries(common_lib PUBLIC
delta_kernel_ffi
$<IF:$<BOOL:${PRINT_DATA}>,${ARROW_GLIB_LIBRARIES},>
$<IF:$<BOOL:${PRINT_DATA}>,${PARQUET_GLIB_LIBRARIES},>
)

# Compile definitions for common_lib
target_compile_definitions(common_lib PUBLIC DEFINE_DEFAULT_ENGINE)

if(VERBOSE)
target_compile_definitions(read_table PUBLIC VERBOSE)
endif(VERBOSE)
target_compile_definitions(common_lib PUBLIC VERBOSE)
endif()

if(PRINT_DATA)
include(FindPkgConfig)
pkg_check_modules(GLIB REQUIRED glib-2.0)
pkg_check_modules(ARROW_GLIB REQUIRED arrow-glib)
target_include_directories(read_table PUBLIC ${ARROW_GLIB_INCLUDE_DIRS})
target_link_directories(read_table PUBLIC ${ARROW_GLIB_LIBRARY_DIRS})
target_link_libraries(read_table PUBLIC ${ARROW_GLIB_LIBRARIES})
target_compile_options(read_table PUBLIC ${ARROW_GLIB_CFLAGS_OTHER})
target_compile_definitions(read_table PUBLIC PRINT_ARROW_DATA)
endif(PRINT_DATA)
target_compile_definitions(common_lib PUBLIC PRINT_ARROW_DATA)
target_compile_options(common_lib PUBLIC ${ARROW_GLIB_CFLAGS_OTHER})
target_compile_options(common_lib PUBLIC ${PARQUET_GLIB_CFLAGS_OTHER})
endif()

# List of executables
set(EXECUTABLES read_table write_table)

foreach(EXE ${EXECUTABLES})
add_executable(${EXE} ${EXE}.c)

# Include directories for the executable
target_include_directories(${EXE} PUBLIC
"${CMAKE_CURRENT_SOURCE_DIR}"
${FFI_HEADERS_DIR}
)

# Link directories for the executable
target_link_directories(${EXE} PUBLIC ${TARGET_DEBUG_DIR})

# Link libraries
target_link_libraries(${EXE} PUBLIC common_lib)

# Compile definitions for the executable (if needed)
if(VERBOSE)
target_compile_definitions(${EXE} PUBLIC VERBOSE)
endif()

if(PRINT_DATA)
target_compile_definitions(${EXE} PUBLIC PRINT_ARROW_DATA)
target_compile_options(${EXE} PUBLIC ${ARROW_GLIB_CFLAGS_OTHER})
target_compile_options(${EXE} PUBLIC ${PARQUET_GLIB_CFLAGS_OTHER})
endif()

# Compiler options
if(MSVC)
target_compile_options(${EXE} PRIVATE /W3 /WX)
else()
# Suppress warnings about function prototypes due to arrow headers
target_compile_options(${EXE} PRIVATE
-Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes -g -fsanitize=address
)
target_link_options(${EXE} PRIVATE -g -fsanitize=address)
endif()

# Platform-specific libraries
if(WIN32)
set(CMAKE_C_FLAGS_DEBUG "/MT")
target_link_libraries(${EXE} PUBLIC
ws2_32 userenv bcrypt ncrypt crypt32 secur32 ntdll RuntimeObject
)
endif()
endforeach()

# Add the tests
include(CTest)
set(TestRunner "../../../tests/read-table-testing/run_test.sh")
set(DatPath "../../../../acceptance/tests/dat/out/reader_tests/generated")
set(ExpectedPath "../../../tests/read-table-testing/expected-data")

add_test(NAME read_and_print_all_prim
COMMAND ${TestRunner} ${DatPath}/all_primitive_types/delta/ ${ExpectedPath}/all-prim-types.expected
)
add_test(NAME read_and_print_basic_partitioned
COMMAND ${TestRunner} ${DatPath}/basic_partitioned/delta/ ${ExpectedPath}/basic-partitioned.expected
)
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <delta_kernel_ffi.h>
#include <stdio.h>
#include <string.h>

#include "delta_kernel_ffi.h"
#include "kernel_utils.h"

// some diagnostic functions
Expand Down Expand Up @@ -61,7 +62,7 @@ void* allocate_string(const KernelStringSlice slice)
}

// utility function to convert key/val into slices and set them on a builder
void set_builder_opt(EngineBuilder* engine_builder, char* key, char* val)
void set_builder_opt(struct EngineBuilder* engine_builder, char* key, char* val)
{
KernelStringSlice key_slice = { key, strlen(key) };
KernelStringSlice val_slice = { val, strlen(val) };
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#pragma once

#include <delta_kernel_ffi.h>
#include "delta_kernel_ffi.h"

// This is how we represent our errors. The kernel will ask us to contruct this struct whenever it
// enounters an error, and then return the contructed EngineError to us
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"commitInfo":{"timestamp":1678020185201,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"10","numOutputBytes":"548"},"engineInfo":"Apache-Spark/3.3.0 Delta-Lake/2.3.0rc1","txnId":"07c0f996-3854-4456-b68b-d1e35e3888cd"}}
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":[],"writerFeatures":[]}}
{"metaData":{"id":"6524c99f-9a76-4ea1-8ad4-e428a7e065d7","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1678020184802}}
{"add":{"path":"part-00000-517f5d32-9c95-48e8-82b4-0229cc194867-c000.snappy.parquet","partitionValues":{},"size":548,"modificationTime":1678020185157,"dataChange":true,"stats":"{\"numRecords\":10,\"minValues\":{\"value\":0},\"maxValues\":{\"value\":9},\"nullCount\":{\"value\":0}}"}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"commitInfo":{"timestamp":1732749680780,"operation":"UNKNOWN","operationParameters":{},"kernelVersion":"v0.5.0","engineCommitInfo":{"engineInfo":"default engine C FFI"}}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"commitInfo":{"timestamp":1732749716500,"operation":"UNKNOWN","operationParameters":{},"kernelVersion":"v0.5.0","engineCommitInfo":{"engineInfo":"default engine C FFI"}}}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"commitInfo":{"timestamp":1678020185201,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"10","numOutputBytes":"548"},"engineInfo":"Apache-Spark/3.3.0 Delta-Lake/2.3.0rc1","txnId":"07c0f996-3854-4456-b68b-d1e35e3888cd"}}
{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}
{"metaData":{"id":"6524c99f-9a76-4ea1-8ad4-e428a7e065d7","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1678020184802}}
{"add":{"path":"part-00000-517f5d32-9c95-48e8-82b4-0229cc194867-c000.snappy.parquet","partitionValues":{},"size":548,"modificationTime":1678020185157,"dataChange":true,"stats":"{\"numRecords\":10,\"minValues\":{\"value\":0},\"maxValues\":{\"value\":9},\"nullCount\":{\"value\":0}}"}}
Binary file not shown.
2 changes: 1 addition & 1 deletion ffi/examples/read-table/read_table.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#pragma once

#include <delta_kernel_ffi.h>
#include "delta_kernel_ffi.h"

// A list of partition column names
typedef struct PartitionList
Expand Down
Loading
Loading