Skip to content

FFI initial implementation #12920

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
4bb3d58
Initial commit of FFI table provider code
timsaucer Oct 10, 2024
4d31722
Add table type
timsaucer Oct 10, 2024
d5f541e
Make struct pub
timsaucer Oct 10, 2024
afeb439
Implementing supports_filters_pushdown
timsaucer Oct 10, 2024
7fb77da
Move plan properties over to its own file
timsaucer Oct 10, 2024
3ab0f9f
Adding release function
timsaucer Oct 10, 2024
c616227
Adding release functions to additional structs
timsaucer Oct 10, 2024
461b4a9
Resolve memory leaks
timsaucer Oct 10, 2024
0408a9b
Rename ForeignExecutionPlan for consistency
timsaucer Oct 10, 2024
7922d8f
Resolving memory leak issues
timsaucer Oct 14, 2024
afd06be
Remove debug statements. Create runtime for block_on operations
timsaucer Oct 15, 2024
dafc982
Switching over to stable abi and async-ffi
timsaucer Oct 16, 2024
ff4d2e4
Make consistent the use of Foreign and FFI on struct names
timsaucer Oct 16, 2024
1dbca58
Apply prettier
timsaucer Oct 17, 2024
7761c84
Format for linter
timsaucer Oct 17, 2024
d0f8f88
Add doc-comment
timsaucer Oct 17, 2024
6b5227e
Add option to specify table provider does not support pushdown filter…
timsaucer Oct 21, 2024
400f45a
Remove setting default features in cargo file
timsaucer Oct 21, 2024
8b220cd
Tokio only needed for unit tests
timsaucer Oct 21, 2024
8d0f86d
Provide log errors rather than failing silently on schema requests
timsaucer Oct 21, 2024
9c01f75
Set default features for datafusion to false in ffi crate
timsaucer Oct 21, 2024
61f44ae
Using TryFrom or From instead of implementing new when there is only …
timsaucer Oct 26, 2024
1576520
Move arrow wrappers into their own file
timsaucer Oct 26, 2024
790f454
Add documentation
timsaucer Oct 26, 2024
bf626f4
Small adjustment to documentation
timsaucer Oct 27, 2024
bb47819
Add license text
timsaucer Oct 30, 2024
71ae880
Fix unnecessary qualification
timsaucer Oct 30, 2024
011340c
taplo format
timsaucer Oct 30, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ members = [
"datafusion/expr",
"datafusion/expr-common",
"datafusion/execution",
"datafusion/ffi",
"datafusion/functions",
"datafusion/functions-aggregate",
"datafusion/functions-aggregate-common",
Expand Down Expand Up @@ -99,6 +100,7 @@ datafusion-common-runtime = { path = "datafusion/common-runtime", version = "42.
datafusion-execution = { path = "datafusion/execution", version = "42.1.0" }
datafusion-expr = { path = "datafusion/expr", version = "42.1.0" }
datafusion-expr-common = { path = "datafusion/expr-common", version = "42.1.0" }
datafusion-ffi = { path = "datafusion/ffi", version = "42.1.0" }
datafusion-functions = { path = "datafusion/functions", version = "42.1.0" }
datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "42.1.0" }
datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "42.1.0" }
Expand Down
51 changes: 51 additions & 0 deletions datafusion/ffi/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

[package]
name = "datafusion-ffi"
description = "Foreign Function Interface implementation for DataFusion"
readme = "README.md"
version = { workspace = true }
edition = { workspace = true }
homepage = { workspace = true }
repository = { workspace = true }
license = { workspace = true }
authors = { workspace = true }
# Specify MSRV here as `cargo msrv` doesn't support workspace version
rust-version = "1.76"

[lints]
workspace = true

[lib]
name = "datafusion_ffi"
path = "src/lib.rs"

[dependencies]
abi_stable = "0.11.3"
arrow = { workspace = true, features = ["ffi"] }
async-ffi = { version = "0.5.0", features = ["abi_stable"] }
async-trait = { workspace = true }
datafusion = { workspace = true, default-features = false }
datafusion-proto = { workspace = true }
doc-comment = { workspace = true }
futures = { workspace = true }
log = { workspace = true }
prost = { workspace = true }

[dev-dependencies]
tokio = { workspace = true }
81 changes: 81 additions & 0 deletions datafusion/ffi/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
<!---
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->

# `datafusion-ffi`: Apache DataFusion Foreign Function Interface

This crate contains code to allow interoperability of Apache [DataFusion]
with functions from other languages using a stable interface.
Comment on lines +22 to +23
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It may also be good to point out that the ffi interface allows different versions of datafusion to interact with each other over stable interfaces in the intro

Suggested change
This crate contains code to allow interoperability of Apache [DataFusion]
with functions from other languages using a stable interface.
This crate contains code to allow interoperability of Apache [DataFusion]
with functions from other languages and/or versions using a stable interface.


See [API Docs] for details and examples.

We expect this crate may be used by both sides of the FFI. This allows users
to create modules that can interoperate with the necessity of using the same
version of DataFusion. The driving use case has been the `datafusion-python`
Comment on lines +27 to +29
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this text is backwards -- it is "without the necessity". Maybe we could rephrase like

Suggested change
We expect this crate may be used by both sides of the FFI. This allows users
to create modules that can interoperate with the necessity of using the same
version of DataFusion. The driving use case has been the `datafusion-python`
We expect this crate may be used by both sides of the FFI. This allows users
to create modules that can interoperate using different
versions of DataFusion. The driving use case has been the `datafusion-python`

repository, but many other use cases may exist. We envision at least two
use cases.

1. `datafusion-python` which will use the FFI to provide external services such
as a `TableProvider` without needing to re-export the entire `datafusion-python`
code base. With `datafusion-ffi` these packages do not need `datafusion-python`
as a dependency at all.
2. Users may want to create a modular interface that allows runtime loading of
libraries.

## Struct Layout
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest we move this discussion of code layout / struct naming into the rust code so it stays closer to the code and has less chance to get out of date

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I'll probably move this over to TableProvider which is the most complex and have the other structs refer to it for more information.


In this crate we have a variety of structs which closely mimic the behavior of
their internal counterparts. In the following example, we will refer to the
`TableProvider`, but the same pattern exists for other structs.

Each of the exposted structs in this crate is provided with a variant prefixed
with `Foreign`. This variant is designed to be used by the consumer of the
foreign code. The `Foreign` structs should _never_ access the `private_data`
fields. Instead they should only access the data returned through the function
calls defined on the `FFI_` structs. The second purpose of the `Foreign`
structs is to contain additional data that may be needed by the traits that
are implemented on them. Some of these traits require borrowing data which
can be far more convienent to be locally stored.

For example, we have a struct `FFI_TableProvider` to give access to the
`TableProvider` functions like `table_type()` and `scan()`. If we write a
library that wishes to expose it's `TableProvider`, then we can access the
private data that contains the Arc reference to the `TableProvider` via
`FFI_TableProvider`. This data is local to the library.

If we have a program that accesses a `TableProvider` via FFI, then it
will use `ForeignTableProvider`. When using `ForeignTableProvider` we **must**
not attempt to access the `private_data` field in `FFI_TableProvider`. If a
user is testing locally, you may be able to successfully access this field, but
it will only work if you are building against the exact same version of
`DataFusion` for both libraries **and** the same compiler. It will not work
in general.

It is worth noting that which library is the `local` and which is `foreign`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should name this "DataFusion C API" or something instead of using the rust FFI term 🤔

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was originally pushing for a C style API but based on some feedback on the discord decided to go with this crate that focuses on rust-rust interface with a very nice interface. I'll see what it would be like to try using C with it as a small test.

depends on which interface we are considering. For example, suppose we have a
Python library called `my_provider` that exposes a `TableProvider` called
`MyProvider` via `FFI_TableProvider`. Within the library `my_provider` we can
access the `private_data` via `FFI_TableProvider`. We connect this to
`datafusion-python`, where we access it as a `ForeignTableProvider`. Now when
we call `scan()` on this interface, we have to pass it a `FFI_SessionConfig`.
The `SessionConfig` is local to `datafusion-python` and **not** `my_provider`.
It is important to be careful when expanding these functions to be certain which
side of the interface each object refers to.

[datafusion]: https://datafusion.apache.org
[api docs]: http://docs.rs/datafusion-ffi/latest
70 changes: 70 additions & 0 deletions datafusion/ffi/src/arrow_wrappers.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use std::sync::Arc;

use abi_stable::StableAbi;
use arrow::{
datatypes::{Schema, SchemaRef},
ffi::{FFI_ArrowArray, FFI_ArrowSchema},
};
use log::error;

/// This is a wrapper struct around FFI_ArrowSchema simply to indicate
/// to the StableAbi macros that the underlying struct is FFI safe.
#[repr(C)]
#[derive(Debug, StableAbi)]
pub struct WrappedSchema(#[sabi(unsafe_opaque_field)] pub FFI_ArrowSchema);

impl From<SchemaRef> for WrappedSchema {
fn from(value: SchemaRef) -> Self {
let ffi_schema = match FFI_ArrowSchema::try_from(value.as_ref()) {
Ok(s) => s,
Err(e) => {
error!("Unable to convert DataFusion Schema to FFI_ArrowSchema in FFI_PlanProperties. {}", e);
FFI_ArrowSchema::empty()
}
};

WrappedSchema(ffi_schema)
}
}

impl From<WrappedSchema> for SchemaRef {
fn from(value: WrappedSchema) -> Self {
let schema = match Schema::try_from(&value.0) {
Ok(s) => s,
Err(e) => {
error!("Unable to convert from FFI_ArrowSchema to DataFusion Schema in FFI_PlanProperties. {}", e);
Schema::empty()
}
};
Arc::new(schema)
}
}

/// This is a wrapper struct for FFI_ArrowArray to indicate to StableAbi
/// that the struct is FFI Safe. For convenience, we also include the
/// schema needed to create a record batch from the array.
#[repr(C)]
#[derive(Debug, StableAbi)]
pub struct WrappedArray {
#[sabi(unsafe_opaque_field)]
pub array: FFI_ArrowArray,

pub schema: WrappedSchema,
}
Loading