From e2f91a8078667f0c9ad034783486ed2104641c3e Mon Sep 17 00:00:00 2001 From: Luka Peschke Date: Fri, 9 Feb 2024 16:53:34 +0100 Subject: [PATCH] feat: add support for int columns Signed-off-by: Luka Peschke --- src/utils/arrow.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/utils/arrow.rs b/src/utils/arrow.rs index e56c36d..12b2df9 100644 --- a/src/utils/arrow.rs +++ b/src/utils/arrow.rs @@ -37,6 +37,7 @@ fn get_cell_type(data: &Range, row: usize, col: usize) -> Result> = OnceLock::new(); +static INT_TYPES_CELL: OnceLock> = OnceLock::new(); static STRING_TYPES_CELL: OnceLock> = OnceLock::new(); fn float_types() -> &'static HashSet { @@ -49,6 +50,10 @@ fn float_types() -> &'static HashSet { }) } +fn int_types() -> &'static HashSet { + INT_TYPES_CELL.get_or_init(|| HashSet::from([ArrowDataType::Int64, ArrowDataType::Boolean])) +} + fn string_types() -> &'static HashSet { STRING_TYPES_CELL.get_or_init(|| { HashSet::from([ @@ -78,6 +83,9 @@ fn get_arrow_column_type( } else if column_types.len() == 1 { // If a single non-null type was found, return it Ok(column_types.into_iter().next().unwrap()) + } else if column_types.is_subset(int_types()) { + // If every cell in the column can be converted to an int, return int64 + Ok(ArrowDataType::Int64) } else if column_types.is_subset(float_types()) { // If every cell in the column can be converted to a float, return Float64 Ok(ArrowDataType::Float64) @@ -171,6 +179,8 @@ mod tests { #[case(5, 8, ArrowDataType::Float64)] // int + float + bool + null #[case(5, 9, ArrowDataType::Float64)] + // int + bool + #[case(8, 10, ArrowDataType::Int64)] fn get_arrow_column_type_multi_dtype_ok( range: Range, #[case] start_row: usize,