Skip to content

Commit b40a298

Browse files
authored
feat(logical-types): add NativeType and LogicalType (#12853)
* [logical-types] add NativeType and LogicalType * Add license header * Add NativeField and derivates * Support TypeSignatures * Fix doc * Add documentation * Fix doc tests * Remove dummy test * From NativeField to LogicalField * Add default_cast_for * Add type order with can_cast_types * Rename NativeType Utf8 to String * NativeType from &DataType * Add builtin types * From LazyLock to OnceLock
1 parent 89e96b4 commit b40a298

File tree

6 files changed

+717
-0
lines changed

6 files changed

+717
-0
lines changed

datafusion/common/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ pub mod scalar;
4444
pub mod stats;
4545
pub mod test_util;
4646
pub mod tree_node;
47+
pub mod types;
4748
pub mod utils;
4849

4950
/// Reexport arrow crate
+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use crate::types::{LogicalTypeRef, NativeType};
19+
use std::sync::{Arc, OnceLock};
20+
21+
macro_rules! singleton {
22+
($name:ident, $getter:ident, $ty:ident) => {
23+
// TODO: Use LazyLock instead of getter function when MSRV gets bumped
24+
static $name: OnceLock<LogicalTypeRef> = OnceLock::new();
25+
26+
#[doc = "Getter for singleton instance of a logical type representing"]
27+
#[doc = concat!("[`NativeType::", stringify!($ty), "`].")]
28+
pub fn $getter() -> LogicalTypeRef {
29+
Arc::clone($name.get_or_init(|| Arc::new(NativeType::$ty)))
30+
}
31+
};
32+
}
33+
34+
singleton!(LOGICAL_NULL, logical_null, Null);
35+
singleton!(LOGICAL_BOOLEAN, logical_boolean, Boolean);
36+
singleton!(LOGICAL_INT8, logical_int8, Int8);
37+
singleton!(LOGICAL_INT16, logical_int16, Int16);
38+
singleton!(LOGICAL_INT32, logical_int32, Int32);
39+
singleton!(LOGICAL_INT64, logical_int64, Int64);
40+
singleton!(LOGICAL_UINT8, logical_uint8, UInt8);
41+
singleton!(LOGICAL_UINT16, logical_uint16, UInt16);
42+
singleton!(LOGICAL_UINT32, logical_uint32, UInt32);
43+
singleton!(LOGICAL_UINT64, logical_uint64, UInt64);
44+
singleton!(LOGICAL_FLOAT16, logical_float16, Float16);
45+
singleton!(LOGICAL_FLOAT32, logical_float32, Float32);
46+
singleton!(LOGICAL_FLOAT64, logical_float64, Float64);
47+
singleton!(LOGICAL_DATE, logical_date, Date);
48+
singleton!(LOGICAL_BINARY, logical_binary, Binary);
49+
singleton!(LOGICAL_STRING, logical_string, String);

datafusion/common/src/types/field.rs

+114
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use arrow_schema::{Field, Fields, UnionFields};
19+
use std::hash::{Hash, Hasher};
20+
use std::{ops::Deref, sync::Arc};
21+
22+
use super::{LogicalTypeRef, NativeType};
23+
24+
/// A record of a logical type, its name and its nullability.
25+
#[derive(Debug, Clone, Eq, PartialOrd, Ord)]
26+
pub struct LogicalField {
27+
pub name: String,
28+
pub logical_type: LogicalTypeRef,
29+
pub nullable: bool,
30+
}
31+
32+
impl PartialEq for LogicalField {
33+
fn eq(&self, other: &Self) -> bool {
34+
self.name == other.name
35+
&& self.logical_type.eq(&other.logical_type)
36+
&& self.nullable == other.nullable
37+
}
38+
}
39+
40+
impl Hash for LogicalField {
41+
fn hash<H: Hasher>(&self, state: &mut H) {
42+
self.name.hash(state);
43+
self.logical_type.hash(state);
44+
self.nullable.hash(state);
45+
}
46+
}
47+
48+
impl From<&Field> for LogicalField {
49+
fn from(value: &Field) -> Self {
50+
Self {
51+
name: value.name().clone(),
52+
logical_type: Arc::new(NativeType::from(value.data_type().clone())),
53+
nullable: value.is_nullable(),
54+
}
55+
}
56+
}
57+
58+
/// A reference counted [`LogicalField`].
59+
pub type LogicalFieldRef = Arc<LogicalField>;
60+
61+
/// A cheaply cloneable, owned collection of [`LogicalFieldRef`].
62+
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
63+
pub struct LogicalFields(Arc<[LogicalFieldRef]>);
64+
65+
impl Deref for LogicalFields {
66+
type Target = [LogicalFieldRef];
67+
68+
fn deref(&self) -> &Self::Target {
69+
self.0.as_ref()
70+
}
71+
}
72+
73+
impl From<&Fields> for LogicalFields {
74+
fn from(value: &Fields) -> Self {
75+
value
76+
.iter()
77+
.map(|field| Arc::new(LogicalField::from(field.as_ref())))
78+
.collect()
79+
}
80+
}
81+
82+
impl FromIterator<LogicalFieldRef> for LogicalFields {
83+
fn from_iter<T: IntoIterator<Item = LogicalFieldRef>>(iter: T) -> Self {
84+
Self(iter.into_iter().collect())
85+
}
86+
}
87+
88+
/// A cheaply cloneable, owned collection of [`LogicalFieldRef`] and their
89+
/// corresponding type ids.
90+
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
91+
pub struct LogicalUnionFields(Arc<[(i8, LogicalFieldRef)]>);
92+
93+
impl Deref for LogicalUnionFields {
94+
type Target = [(i8, LogicalFieldRef)];
95+
96+
fn deref(&self) -> &Self::Target {
97+
self.0.as_ref()
98+
}
99+
}
100+
101+
impl From<&UnionFields> for LogicalUnionFields {
102+
fn from(value: &UnionFields) -> Self {
103+
value
104+
.iter()
105+
.map(|(i, field)| (i, Arc::new(LogicalField::from(field.as_ref()))))
106+
.collect()
107+
}
108+
}
109+
110+
impl FromIterator<(i8, LogicalFieldRef)> for LogicalUnionFields {
111+
fn from_iter<T: IntoIterator<Item = (i8, LogicalFieldRef)>>(iter: T) -> Self {
112+
Self(iter.into_iter().collect())
113+
}
114+
}
+128
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use super::NativeType;
19+
use crate::error::Result;
20+
use arrow_schema::DataType;
21+
use core::fmt;
22+
use std::{cmp::Ordering, hash::Hash, sync::Arc};
23+
24+
/// Signature that uniquely identifies a type among other types.
25+
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
26+
pub enum TypeSignature<'a> {
27+
/// Represents a built-in native type.
28+
Native(&'a NativeType),
29+
/// Represents an arrow-compatible extension type.
30+
/// (<https://arrow.apache.org/docs/format/Columnar.html#extension-types>)
31+
///
32+
/// The `name` should contain the same value as 'ARROW:extension:name'.
33+
Extension {
34+
name: &'a str,
35+
parameters: &'a [TypeParameter<'a>],
36+
},
37+
}
38+
39+
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
40+
pub enum TypeParameter<'a> {
41+
Type(TypeSignature<'a>),
42+
Number(i128),
43+
}
44+
45+
/// A reference counted [`LogicalType`].
46+
pub type LogicalTypeRef = Arc<dyn LogicalType>;
47+
48+
/// Representation of a logical type with its signature and its native backing
49+
/// type.
50+
///
51+
/// The logical type is meant to be used during the DataFusion logical planning
52+
/// phase in order to reason about logical types without worrying about their
53+
/// underlying physical implementation.
54+
///
55+
/// ### Extension types
56+
///
57+
/// [`LogicalType`] is a trait in order to allow the possibility of declaring
58+
/// extension types:
59+
///
60+
/// ```
61+
/// use datafusion_common::types::{LogicalType, NativeType, TypeSignature};
62+
///
63+
/// struct JSON {}
64+
///
65+
/// impl LogicalType for JSON {
66+
/// fn native(&self) -> &NativeType {
67+
/// &NativeType::String
68+
/// }
69+
///
70+
/// fn signature(&self) -> TypeSignature<'_> {
71+
/// TypeSignature::Extension {
72+
/// name: "JSON",
73+
/// parameters: &[],
74+
/// }
75+
/// }
76+
/// }
77+
/// ```
78+
pub trait LogicalType: Sync + Send {
79+
/// Get the native backing type of this logical type.
80+
fn native(&self) -> &NativeType;
81+
/// Get the unique type signature for this logical type. Logical types with identical
82+
/// signatures are considered equal.
83+
fn signature(&self) -> TypeSignature<'_>;
84+
85+
/// Get the default physical type to cast `origin` to in order to obtain a physical type
86+
/// that is logically compatible with this logical type.
87+
fn default_cast_for(&self, origin: &DataType) -> Result<DataType> {
88+
self.native().default_cast_for(origin)
89+
}
90+
}
91+
92+
impl fmt::Debug for dyn LogicalType {
93+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
94+
f.debug_tuple("LogicalType")
95+
.field(&self.signature())
96+
.field(&self.native())
97+
.finish()
98+
}
99+
}
100+
101+
impl PartialEq for dyn LogicalType {
102+
fn eq(&self, other: &Self) -> bool {
103+
self.signature().eq(&other.signature())
104+
}
105+
}
106+
107+
impl Eq for dyn LogicalType {}
108+
109+
impl PartialOrd for dyn LogicalType {
110+
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
111+
Some(self.cmp(other))
112+
}
113+
}
114+
115+
impl Ord for dyn LogicalType {
116+
fn cmp(&self, other: &Self) -> Ordering {
117+
self.signature()
118+
.cmp(&other.signature())
119+
.then(self.native().cmp(other.native()))
120+
}
121+
}
122+
123+
impl Hash for dyn LogicalType {
124+
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
125+
self.signature().hash(state);
126+
self.native().hash(state);
127+
}
128+
}

datafusion/common/src/types/mod.rs

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
mod builtin;
19+
mod field;
20+
mod logical;
21+
mod native;
22+
23+
pub use builtin::*;
24+
pub use field::*;
25+
pub use logical::*;
26+
pub use native::*;

0 commit comments

Comments
 (0)