From 6d76a1958ef98a7096916292b95f79298627c2f3 Mon Sep 17 00:00:00 2001 From: Luka Peschke Date: Sun, 25 Feb 2024 19:37:38 +0100 Subject: [PATCH] feat: implemented custom errors (#186) * feat: implemented custom errors This adds custom exception types, which should make error management by users easier closes #45 Signed-off-by: Luka Peschke * replace match with .map_err Signed-off-by: Luka Peschke * added docstirngs to exception types Signed-off-by: Luka Peschke * reorder __all__ to be more logical in html docs Signed-off-by: Luka Peschke * ci: added a docs check job Signed-off-by: Luka Peschke --------- Signed-off-by: Luka Peschke --- .github/workflows/CI.yml | 28 ++++ Cargo.lock | 8 -- Cargo.toml | 3 +- Makefile | 1 + python/fastexcel/__init__.py | 29 ++++- python/fastexcel/_fastexcel.pyi | 10 ++ python/tests/test_errors.py | 60 +++++++++ python/tests/test_fastexcel.py | 4 +- src/error.rs | 221 ++++++++++++++++++++++++++++++++ src/lib.rs | 43 ++++++- src/types/excelreader.rs | 36 ++++-- src/types/excelsheet.rs | 43 +++++-- src/utils/arrow.rs | 23 ++-- 13 files changed, 456 insertions(+), 53 deletions(-) create mode 100644 python/tests/test_errors.py create mode 100644 src/error.rs diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index eb3a09e..732fdc2 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -110,3 +110,31 @@ jobs: command: build args: "-o dist --interpreter python${{ matrix.python-version }}" target: ${{ steps.target.outputs.target }} + + check-docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Set up rust toolchain + uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - run: | + git config user.name github-actions + git config user.email github-actions@github.com + + # venv required by maturin + python3 -m venv .venv + source .venv/bin/activate + + make install-test-requirements + make install-doc-requirements + # Required for pdoc to be able to import the sources + make dev-install + make doc diff --git a/Cargo.lock b/Cargo.lock index e6317d1..8c44037 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -45,12 +45,6 @@ dependencies = [ "libc", ] -[[package]] -name = "anyhow" -version = "1.0.80" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1" - [[package]] name = "arrow" version = "50.0.0" @@ -363,7 +357,6 @@ dependencies = [ name = "fastexcel" version = "0.9.1" dependencies = [ - "anyhow", "arrow", "calamine", "chrono", @@ -706,7 +699,6 @@ version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53bdbb96d49157e65d45cc287af5f32ffadd5f4761438b527b055fb0d4bb8233" dependencies = [ - "anyhow", "cfg-if", "indoc", "libc", diff --git a/Cargo.toml b/Cargo.toml index 0e86451..c62376d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,10 +9,9 @@ name = "fastexcel" crate-type = ["cdylib"] [dependencies] -anyhow = "1.0.80" calamine = { version = "0.24.0", features = ["dates"] } chrono = { version = "0.4.34", default-features = false } -pyo3 = { version = "0.20.3", features = ["extension-module", "anyhow", "abi3-py38"] } +pyo3 = { version = "0.20.3", features = ["extension-module", "abi3-py38"] } [dependencies.arrow] version = "50.0.0" diff --git a/Makefile b/Makefile index 5bee4ed..d4e2ee7 100644 --- a/Makefile +++ b/Makefile @@ -22,6 +22,7 @@ format: $(ruff) --fix $(format) $(fmt) + $(clippy) --fix --lib -p fastexcel --allow-dirty --allow-staged install-test-requirements: pip install -U -r test-requirements.txt -r build-requirements.txt diff --git a/python/fastexcel/__init__.py b/python/fastexcel/__init__.py index 46265d3..fb00d9b 100644 --- a/python/fastexcel/__init__.py +++ b/python/fastexcel/__init__.py @@ -12,7 +12,19 @@ import pyarrow as pa -from ._fastexcel import __version__, _ExcelReader, _ExcelSheet +from ._fastexcel import ( + ArrowError, + CalamineCellError, + CalamineError, + CannotRetrieveCellDataError, + FastExcelError, + InvalidParametersError, + SheetNotFoundError, + UnsupportedColumnTypeCombinationError, + __version__, + _ExcelReader, + _ExcelSheet, +) from ._fastexcel import read_excel as _read_excel @@ -202,4 +214,17 @@ def read_excel(path: Path | str) -> ExcelReader: return ExcelReader(_read_excel(expanduser(path))) -__all__ = ("ExcelReader", "ExcelSheet", "read_excel", "__version__") +__all__ = ( + "__version__", + "read_excel", + "ExcelReader", + "ExcelSheet", + "FastExcelError", + "CannotRetrieveCellDataError", + "CalamineCellError", + "CalamineError", + "SheetNotFoundError", + "ArrowError", + "InvalidParametersError", + "UnsupportedColumnTypeCombinationError", +) diff --git a/python/fastexcel/_fastexcel.pyi b/python/fastexcel/_fastexcel.pyi index 26e1841..865f4e7 100644 --- a/python/fastexcel/_fastexcel.pyi +++ b/python/fastexcel/_fastexcel.pyi @@ -61,3 +61,13 @@ def read_excel(path: str) -> _ExcelReader: """Reads an excel file and returns an ExcelReader""" __version__: str + +# Exceptions +class FastExcelError(Exception): ... +class UnsupportedColumnTypeCombinationError(FastExcelError): ... +class CannotRetrieveCellDataError(FastExcelError): ... +class CalamineCellError(FastExcelError): ... +class CalamineError(FastExcelError): ... +class SheetNotFoundError(FastExcelError): ... +class ArrowError(FastExcelError): ... +class InvalidParametersError(FastExcelError): ... diff --git a/python/tests/test_errors.py b/python/tests/test_errors.py new file mode 100644 index 0000000..1b1c33e --- /dev/null +++ b/python/tests/test_errors.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +import fastexcel +import pytest +from utils import path_for_fixture + + +def test_does_not_exist() -> None: + expected_message = """calamine error: Cannot detect file format +Context: + 0: Could not open workbook at path_does_not_exist.nope + 1: could not load excel file at path_does_not_exist.nope""" + + with pytest.raises(fastexcel.CalamineError, match=expected_message) as exc_info: + fastexcel.read_excel("path_does_not_exist.nope") + + assert exc_info.value.__doc__ == "Generic calamine error" + + # Should also work with the base error type + with pytest.raises(fastexcel.FastExcelError, match=expected_message): + fastexcel.read_excel("path_does_not_exist.nope") + + +def test_sheet_not_found_error() -> None: + excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet.xlsx")) + expected_message = """sheet at index 42 not found +Context: + 0: Sheet index 42 is out of range. File has 1 sheets""" + + with pytest.raises(fastexcel.SheetNotFoundError, match=expected_message) as exc_info: + excel_reader.load_sheet(42) + + assert exc_info.value.__doc__ == "Sheet was not found" + + # Should also work with the base error type + with pytest.raises(fastexcel.FastExcelError, match=expected_message): + excel_reader.load_sheet(42) + + +@pytest.mark.parametrize( + "exc_class, expected_docstring", + [ + (fastexcel.FastExcelError, "The base class for all fastexcel errors"), + ( + fastexcel.UnsupportedColumnTypeCombinationError, + "Column contains an unsupported type combination", + ), + (fastexcel.CannotRetrieveCellDataError, "Data for a given cell cannot be retrieved"), + ( + fastexcel.CalamineCellError, + "calamine returned an error regarding the content of the cell", + ), + (fastexcel.CalamineError, "Generic calamine error"), + (fastexcel.SheetNotFoundError, "Sheet was not found"), + (fastexcel.ArrowError, "Generic arrow error"), + (fastexcel.InvalidParametersError, "Provided parameters are invalid"), + ], +) +def test_docstrings(exc_class: type[Exception], expected_docstring: str) -> None: + assert exc_class.__doc__ == expected_docstring diff --git a/python/tests/test_fastexcel.py b/python/tests/test_fastexcel.py index baf6999..f59caaf 100644 --- a/python/tests/test_fastexcel.py +++ b/python/tests/test_fastexcel.py @@ -387,7 +387,9 @@ def test_sheet_with_pagination_out_of_bound(): excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet-with-types.xlsx")) assert excel_reader.sheet_names == ["Sheet1"] - with pytest.raises(RuntimeError, match="To many rows skipped. Max height is 4"): + with pytest.raises( + fastexcel.InvalidParametersError, match="Too many rows skipped. Max height is 4" + ): excel_reader.load_sheet( 0, skip_rows=1000000, diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..4c6de09 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,221 @@ +use std::{error::Error, fmt::Display}; + +#[derive(Debug)] +pub(crate) enum SheetIdxOrName { + Idx(usize), + // Leaving this variant if someday we want to check if a name exists before calling worksheet_range + #[allow(dead_code)] + Name(String), +} + +#[derive(Debug)] +pub(crate) enum FastExcelErrorKind { + UnsupportedColumnTypeCombination(String), + CannotRetrieveCellData(usize, usize), + CalamineCellError(calamine::CellErrorType), + CalamineError(calamine::Error), + SheetNotFound(SheetIdxOrName), + // Arrow errors can be of several different types (arrow::error::Error, PyError), and having + // the actual type has not much value for us, so we just store a string context + ArrowError(String), + InvalidParameters(String), +} + +impl Display for FastExcelErrorKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + FastExcelErrorKind::UnsupportedColumnTypeCombination(detail) => { + write!(f, "unsupported column type combination: {detail}") + } + FastExcelErrorKind::CannotRetrieveCellData(row, col) => { + write!(f, "cannot retrieve cell data at ({row}, {col})") + } + FastExcelErrorKind::CalamineCellError(calamine_error) => { + write!(f, "calamine cell error: {calamine_error}") + } + FastExcelErrorKind::CalamineError(calamine_error) => { + write!(f, "calamine error: {calamine_error}") + } + FastExcelErrorKind::SheetNotFound(idx_or_name) => { + let message = { + match idx_or_name { + SheetIdxOrName::Idx(idx) => format!("at index {idx}"), + SheetIdxOrName::Name(name) => format!("with name \"{name}\" not found"), + } + }; + write!(f, "sheet {message} not found") + } + FastExcelErrorKind::ArrowError(err) => write!(f, "arrow error: {err}"), + FastExcelErrorKind::InvalidParameters(err) => write!(f, "invalid parameters: {err}"), + } + } +} + +#[derive(Debug)] +pub(crate) struct FastExcelError { + kind: FastExcelErrorKind, + context: Vec, +} + +pub(crate) trait ErrorContext { + fn with_context(self, ctx_fn: F) -> Self + where + F: FnOnce() -> S; +} + +impl FastExcelError { + pub(crate) fn new(kind: FastExcelErrorKind) -> Self { + Self { + kind, + context: vec![], + } + } +} + +impl Display for FastExcelError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{kind}", kind = self.kind)?; + if !self.context.is_empty() { + writeln!(f, "\nContext:")?; + + self.context + .iter() + .enumerate() + .try_for_each(|(idx, ctx_value)| writeln!(f, " {idx}: {ctx_value}"))?; + } + Ok(()) + } +} + +impl Error for FastExcelError {} + +impl ErrorContext for FastExcelError { + fn with_context(mut self, ctx_fn: F) -> Self + where + F: FnOnce() -> S, + { + self.context.push(ctx_fn().to_string()); + self + } +} + +impl From for FastExcelError { + fn from(kind: FastExcelErrorKind) -> Self { + FastExcelError::new(kind) + } +} + +pub(crate) type FastExcelResult = Result; + +impl ErrorContext for FastExcelResult { + fn with_context(self, ctx_fn: F) -> Self + where + F: FnOnce() -> S, + { + match self { + Ok(_) => self, + Err(e) => Err(e.with_context(ctx_fn)), + } + } +} + +/// Contains Python versions of our custom errors +pub(crate) mod py_errors { + use super::FastExcelErrorKind; + use pyo3::{create_exception, exceptions::PyException, PyResult}; + + // Base fastexcel error + create_exception!( + _fastexcel, + FastExcelError, + PyException, + "The base class for all fastexcel errors" + ); + // Unsupported column type + create_exception!( + _fastexcel, + UnsupportedColumnTypeCombinationError, + FastExcelError, + "Column contains an unsupported type combination" + ); + // Cannot retrieve cell data + create_exception!( + _fastexcel, + CannotRetrieveCellDataError, + FastExcelError, + "Data for a given cell cannot be retrieved" + ); + // Calamine cell error + create_exception!( + _fastexcel, + CalamineCellError, + FastExcelError, + "calamine returned an error regarding the content of the cell" + ); + // Calamine error + create_exception!( + _fastexcel, + CalamineError, + FastExcelError, + "Generic calamine error" + ); + // Sheet not found + create_exception!( + _fastexcel, + SheetNotFoundError, + FastExcelError, + "Sheet was not found" + ); + // Arrow error + create_exception!( + _fastexcel, + ArrowError, + FastExcelError, + "Generic arrow error" + ); + // Invalid parameters + create_exception!( + _fastexcel, + InvalidParametersError, + FastExcelError, + "Provided parameters are invalid" + ); + + pub(crate) trait IntoPyResult { + type Inner; + + fn into_pyresult(self) -> PyResult; + } + + impl IntoPyResult for super::FastExcelResult { + type Inner = T; + + fn into_pyresult(self) -> PyResult { + match self { + Ok(ok) => Ok(ok), + Err(err) => { + let message = err.to_string(); + Err(match err.kind { + FastExcelErrorKind::UnsupportedColumnTypeCombination(_) => { + UnsupportedColumnTypeCombinationError::new_err(message) + } + FastExcelErrorKind::CannotRetrieveCellData(_, _) => { + CannotRetrieveCellDataError::new_err(message) + } + FastExcelErrorKind::CalamineCellError(_) => { + CalamineCellError::new_err(message) + } + FastExcelErrorKind::CalamineError(_) => CalamineError::new_err(message), + FastExcelErrorKind::SheetNotFound(_) => { + SheetNotFoundError::new_err(message) + } + FastExcelErrorKind::ArrowError(_) => ArrowError::new_err(message), + FastExcelErrorKind::InvalidParameters(_) => { + InvalidParametersError::new_err(message) + } + }) + } + } + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 1f5df1e..7740754 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,14 +1,19 @@ +mod error; mod types; mod utils; -use anyhow::Result; +use error::{py_errors, ErrorContext}; use pyo3::prelude::*; use types::{ExcelReader, ExcelSheet}; /// Reads an excel file and returns an object allowing to access its sheets and a bit of metadata #[pyfunction] -fn read_excel(path: &str) -> Result { - Ok(ExcelReader::try_from_path(path).unwrap()) +fn read_excel(path: &str) -> PyResult { + use py_errors::IntoPyResult; + + ExcelReader::try_from_path(path) + .with_context(|| format!("could not load excel file at {path}")) + .into_pyresult() } // Taken from pydantic-core: @@ -24,10 +29,38 @@ fn get_version() -> String { } #[pymodule] -fn _fastexcel(_py: Python, m: &PyModule) -> PyResult<()> { +fn _fastexcel(py: Python, m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(read_excel, m)?)?; m.add_class::()?; m.add_class::()?; m.add("__version__", get_version())?; - Ok(()) + + // errors + [ + ("FastExcelError", py.get_type::()), + ( + "UnsupportedColumnTypeCombinationError", + py.get_type::(), + ), + ( + "CannotRetrieveCellDataError", + py.get_type::(), + ), + ( + "CalamineCellError", + py.get_type::(), + ), + ("CalamineError", py.get_type::()), + ( + "SheetNotFoundError", + py.get_type::(), + ), + ("ArrowError", py.get_type::()), + ( + "InvalidParametersError", + py.get_type::(), + ), + ] + .into_iter() + .try_for_each(|(exc_name, exc_type)| m.add(exc_name, exc_type)) } diff --git a/src/types/excelreader.rs b/src/types/excelreader.rs index 0c9aadd..6424db1 100644 --- a/src/types/excelreader.rs +++ b/src/types/excelreader.rs @@ -1,8 +1,11 @@ use std::{fs::File, io::BufReader}; -use anyhow::{Context, Result}; use calamine::{open_workbook_auto, Reader, Sheets}; -use pyo3::{pyclass, pymethods}; +use pyo3::{pyclass, pymethods, PyResult}; + +use crate::error::{ + py_errors::IntoPyResult, ErrorContext, FastExcelErrorKind, FastExcelResult, SheetIdxOrName, +}; use super::{ excelsheet::{Header, Pagination}, @@ -20,8 +23,9 @@ pub(crate) struct ExcelReader { impl ExcelReader { // NOTE: Not implementing TryFrom here, because we're aren't building the file from the passed // string, but rather from the file pointed by it. Semantically, try_from_path is clearer - pub(crate) fn try_from_path(path: &str) -> Result { + pub(crate) fn try_from_path(path: &str) -> FastExcelResult { let sheets = open_workbook_auto(path) + .map_err(|err| FastExcelErrorKind::CalamineError(err).into()) .with_context(|| format!("Could not open workbook at {path}"))?; let sheet_names = sheets.sheet_names().to_owned(); Ok(Self { @@ -55,14 +59,16 @@ impl ExcelReader { skip_rows: usize, n_rows: Option, schema_sample_rows: Option, - ) -> Result { + ) -> PyResult { let range = self .sheets .worksheet_range(&name) - .with_context(|| format!("Error while loading sheet {name}"))?; + .map_err(|err| FastExcelErrorKind::CalamineError(err).into()) + .with_context(|| format!("Error while loading sheet {name}")) + .into_pyresult()?; let header = Header::new(header_row, column_names); - let pagination = Pagination::new(skip_rows, n_rows, &range)?; + let pagination = Pagination::new(skip_rows, n_rows, &range).into_pyresult()?; Ok(ExcelSheet::new( name, range, @@ -89,25 +95,33 @@ impl ExcelReader { skip_rows: usize, n_rows: Option, schema_sample_rows: Option, - ) -> Result { + ) -> PyResult { let name = self .sheet_names .get(idx) + .ok_or_else(|| FastExcelErrorKind::SheetNotFound(SheetIdxOrName::Idx(idx)).into()) .with_context(|| { format!( "Sheet index {idx} is out of range. File has {} sheets", self.sheet_names.len() ) - })? + }) + .into_pyresult()? .to_owned(); + let range = self .sheets .worksheet_range_at(idx) - .with_context(|| format!("Sheet at idx {idx} not found"))? - .with_context(|| format!("Error while loading sheet at idx {idx}"))?; + // Returns Option, Self::Error>>, so we convert the Option into a + // SheetNotFoundError and unwrap it + .ok_or_else(|| FastExcelErrorKind::SheetNotFound(SheetIdxOrName::Idx(idx)).into()) + .into_pyresult()? + // And here, we convert the calamine error in an owned error and unwrap it + .map_err(|err| FastExcelErrorKind::CalamineError(err).into()) + .into_pyresult()?; let header = Header::new(header_row, column_names); - let pagination = Pagination::new(skip_rows, n_rows, &range)?; + let pagination = Pagination::new(skip_rows, n_rows, &range).into_pyresult()?; Ok(ExcelSheet::new( name, range, diff --git a/src/types/excelsheet.rs b/src/types/excelsheet.rs index 38fcc5d..d950eec 100644 --- a/src/types/excelsheet.rs +++ b/src/types/excelsheet.rs @@ -1,6 +1,9 @@ use std::sync::Arc; -use anyhow::{anyhow, bail, Context, Result}; +use crate::error::{ + py_errors::IntoPyResult, ErrorContext, FastExcelError, FastExcelErrorKind, FastExcelResult, +}; + use arrow::{ array::{ Array, BooleanArray, Date32Array, DurationMillisecondArray, Float64Array, Int64Array, @@ -13,7 +16,10 @@ use arrow::{ use calamine::{Data as CalData, DataType, Range}; use chrono::NaiveDate; -use pyo3::prelude::{pyclass, pymethods, PyObject, Python}; +use pyo3::{ + prelude::{pyclass, pymethods, PyObject, Python}, + PyResult, +}; use crate::utils::arrow::arrow_schema_from_column_names_and_range; @@ -53,12 +59,16 @@ impl Pagination { skip_rows: usize, n_rows: Option, range: &Range, - ) -> Result { + ) -> FastExcelResult { let max_height = range.height(); if max_height < skip_rows { - bail!("To many rows skipped. Max height is {max_height}"); + Err(FastExcelErrorKind::InvalidParameters(format!( + "Too many rows skipped. Max height is {max_height}" + )) + .into()) + } else { + Ok(Self { skip_rows, n_rows }) } - Ok(Self { skip_rows, n_rows }) } pub(crate) fn offset(&self) -> usize { @@ -244,7 +254,7 @@ fn create_duration_array( } impl TryFrom<&ExcelSheet> for Schema { - type Error = anyhow::Error; + type Error = FastExcelError; fn try_from(sheet: &ExcelSheet) -> Result { // Checking how many rows we want to use to determine the dtype for a column. If sample_rows is @@ -262,9 +272,9 @@ impl TryFrom<&ExcelSheet> for Schema { } impl TryFrom<&ExcelSheet> for RecordBatch { - type Error = anyhow::Error; + type Error = FastExcelError; - fn try_from(sheet: &ExcelSheet) -> Result { + fn try_from(sheet: &ExcelSheet) -> FastExcelResult { let offset = sheet.offset(); let limit = sheet.limit(); let schema = Schema::try_from(sheet) @@ -309,6 +319,7 @@ impl TryFrom<&ExcelSheet> for RecordBatch { Ok(RecordBatch::new_empty(Arc::new(schema))) } else { RecordBatch::try_from_iter(iter) + .map_err(|err| FastExcelErrorKind::ArrowError(err.to_string()).into()) .with_context(|| format!("Could not convert sheet {} to RecordBatch", sheet.name)) } } @@ -348,16 +359,20 @@ impl ExcelSheet { self.header.offset() + self.pagination.offset() } - pub fn to_arrow(&self, py: Python<'_>) -> Result { + pub fn to_arrow(&self, py: Python<'_>) -> PyResult { RecordBatch::try_from(self) .with_context(|| format!("Could not create RecordBatch from sheet {}", self.name)) - .and_then(|rb| match rb.to_pyarrow(py) { - Err(e) => Err(anyhow!( - "Could not convert RecordBatch to pyarrow for sheet {}: {e}", + .and_then(|rb| { + rb.to_pyarrow(py) + .map_err(|err| FastExcelErrorKind::ArrowError(err.to_string()).into()) + }) + .with_context(|| { + format!( + "Could not convert RecordBatch to pyarrow for sheet {}", self.name - )), - Ok(pyobj) => Ok(pyobj), + ) }) + .into_pyresult() } pub fn __repr__(&self) -> String { diff --git a/src/utils/arrow.rs b/src/utils/arrow.rs index 807d7d0..33501d9 100644 --- a/src/utils/arrow.rs +++ b/src/utils/arrow.rs @@ -1,19 +1,21 @@ use std::{collections::HashSet, sync::OnceLock}; -use anyhow::{anyhow, Context, Result}; use arrow::datatypes::{DataType as ArrowDataType, Field, Schema, TimeUnit}; use calamine::{CellErrorType, Data as CalData, DataType, Range}; +use crate::error::{FastExcelErrorKind, FastExcelResult}; + /// All the possible string values that should be considered as NULL const NULL_STRING_VALUES: [&str; 19] = [ "", "#N/A", "#N/A N/A", "#NA", "-1.#IND", "-1.#QNAN", "-NaN", "-nan", "1.#IND", "1.#QNAN", "", "N/A", "NA", "NULL", "NaN", "None", "n/a", "nan", "null", ]; -fn get_cell_type(data: &Range, row: usize, col: usize) -> Result { +fn get_cell_type(data: &Range, row: usize, col: usize) -> FastExcelResult { let cell = data .get((row, col)) - .with_context(|| format!("Could not retrieve data at ({row},{col})"))?; + .ok_or_else(|| FastExcelErrorKind::CannotRetrieveCellData(row, col))?; + match cell { CalData::Int(_) => Ok(ArrowDataType::Int64), CalData::Float(_) => Ok(ArrowDataType::Float64), @@ -42,7 +44,7 @@ fn get_cell_type(data: &Range, row: usize, col: usize) -> Result match err { CellErrorType::NA => Ok(ArrowDataType::Null), - _ => Err(anyhow!("Error in calamine cell: {err:?}")), + _ => Err(FastExcelErrorKind::CalamineCellError(err.to_owned()).into()), }, CalData::Empty => Ok(ArrowDataType::Null), } @@ -81,10 +83,10 @@ fn get_arrow_column_type( start_row: usize, end_row: usize, col: usize, -) -> Result { +) -> FastExcelResult { let mut column_types = (start_row..end_row) .map(|row| get_cell_type(data, row, col)) - .collect::>>()?; + .collect::>>()?; // All columns are nullable anyway so we're not taking Null into account here column_types.remove(&ArrowDataType::Null); @@ -106,9 +108,10 @@ fn get_arrow_column_type( Ok(ArrowDataType::Utf8) } else { // NOTE: Not being too smart about multi-types columns for now - Err(anyhow!( - "could not figure out column type for following type combination: {column_types:?}" - )) + Err( + FastExcelErrorKind::UnsupportedColumnTypeCombination(format!("{column_types:?}")) + .into(), + ) } } @@ -133,7 +136,7 @@ pub(crate) fn arrow_schema_from_column_names_and_range( column_names: &[String], row_idx: usize, row_limit: usize, -) -> Result { +) -> FastExcelResult { let mut fields = Vec::with_capacity(column_names.len()); for (col_idx, name) in column_names.iter().enumerate() {