Skip to content

Commit 4673d76

Browse files
committed
Add additional documentation and examples to DataType
1 parent 1f0b000 commit 4673d76

File tree

1 file changed

+60
-8
lines changed

1 file changed

+60
-8
lines changed

arrow-schema/src/datatype.rs

Lines changed: 60 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,64 @@ use std::sync::Arc;
2121

2222
use crate::{ArrowError, Field, FieldRef, Fields, UnionFields};
2323

24-
/// The set of datatypes that are supported by this implementation of Apache Arrow.
24+
/// Datatypes supported by this implementation of Apache Arrow.
2525
///
26-
/// The Arrow specification on data types includes some more types.
27-
/// See also [`Schema.fbs`](https://github.com/apache/arrow/blob/main/format/Schema.fbs)
28-
/// for Arrow's specification.
26+
/// The variants of this enum include primitive fixed size types as well as
27+
/// parametric or nested types. See [`Schema.fbs`] for Arrow's specification.
2928
///
30-
/// The variants of this enum include primitive fixed size types as well as parametric or
31-
/// nested types.
32-
/// Currently the Rust implementation supports the following nested types:
29+
/// # Examples
30+
///
31+
/// Primitive types
32+
/// ```
33+
/// # use arrow_schema::DataType;
34+
/// // create a new 32-bit signed integer
35+
/// let data_type = DataType::Int32;
36+
/// ```
37+
///
38+
/// Nested Types
39+
/// ```
40+
/// # use arrow_schema::{DataType, Field};
41+
/// # use std::sync::Arc;
42+
/// // create a new list of 32-bit signed integers directly
43+
/// let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
44+
/// // Create the same list type with constructor
45+
/// let list_data_type2 = DataType::new_list(DataType::Int32, true);
46+
/// assert_eq!(list_data_type, list_data_type2);
47+
/// ```
48+
///
49+
/// Dictionary Types
50+
/// ```
51+
/// # use arrow_schema::{DataType};
52+
/// // String Dictionary (key type Int32 and value type Utf8)
53+
/// let data_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
54+
/// ```
55+
///
56+
/// Timestamp Types
57+
/// ```
58+
/// # use arrow_schema::{DataType, TimeUnit};
59+
/// // timestamp with millisecond precision without timezone specified
60+
/// let data_type = DataType::Timestamp(TimeUnit::Millisecond, None);
61+
/// // timestamp with nanosecond precision in UTC timezone
62+
/// let data_type = DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()));
63+
///```
64+
///
65+
/// # Display and FromStr
66+
///
67+
/// The `Display` and `FromStr` implementations for `DataType` are
68+
/// human-readable, parseable, and reversible.
69+
///
70+
/// ```
71+
/// # use arrow_schema::DataType;
72+
/// let data_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
73+
/// let data_type_string = data_type.to_string();
74+
/// assert_eq!(data_type_string, "Dictionary(Int32, Utf8)");
75+
/// // display can be parsed back into the original type
76+
/// let parsed_data_type: DataType = data_type.to_string().parse().unwrap();
77+
/// assert_eq!(data_type, parsed_data_type);
78+
/// ```
79+
///
80+
/// # Nested Support
81+
/// Currently, the Rust implementation supports the following nested types:
3382
/// - `List<T>`
3483
/// - `LargeList<T>`
3584
/// - `FixedSizeList<T>`
@@ -39,7 +88,10 @@ use crate::{ArrowError, Field, FieldRef, Fields, UnionFields};
3988
///
4089
/// Nested types can themselves be nested within other arrays.
4190
/// For more information on these types please see
42-
/// [the physical memory layout of Apache Arrow](https://arrow.apache.org/docs/format/Columnar.html#physical-memory-layout).
91+
/// [the physical memory layout of Apache Arrow]
92+
///
93+
/// [`Schema.fbs`]: https://github.com/apache/arrow/blob/main/format/Schema.fbs
94+
/// [the physical memory layout of Apache Arrow]: https://arrow.apache.org/docs/format/Columnar.html#physical-memory-layout
4395
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
4496
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
4597
pub enum DataType {

0 commit comments

Comments
 (0)