diff --git a/python/tests/test_column_selection.py b/python/tests/test_column_selection.py index cf82de5..a8d6ea0 100644 --- a/python/tests/test_column_selection.py +++ b/python/tests/test_column_selection.py @@ -418,19 +418,23 @@ def test_use_columns_with_callable() -> None: excel_reader = fastexcel.read_excel(path_for_fixture("fixture-multi-sheet.xlsx")) sheet = excel_reader.load_sheet(2) - assert [(c.name, c.dtype) for c in sheet.available_columns] == [ - ("col1", "float"), - ("__UNNAMED__1", "float"), - ("col3", "string"), - ("__UNNAMED__3", "float"), - ("col5", "string"), - ] + assert ( + [(c.name, c.dtype) for c in sheet.available_columns] + == [(c.name, c.dtype) for c in sheet.selected_columns] + == [ + ("col1", "float"), + ("__UNNAMED__1", "float"), + ("col3", "string"), + ("__UNNAMED__3", "float"), + ("col5", "string"), + ] + ) sheet = excel_reader.load_sheet( 2, use_columns=lambda col: col.name.startswith("col"), ) - assert [(c.name, c.dtype) for c in sheet.available_columns] == [ + assert [(c.name, c.dtype) for c in sheet.selected_columns] == [ ("col1", "float"), ("col3", "string"), ("col5", "string"), @@ -440,7 +444,7 @@ def test_use_columns_with_callable() -> None: 2, use_columns=lambda col: col.index % 2 == 1, ) - assert [(c.name, c.dtype) for c in sheet.available_columns] == [ + assert [(c.name, c.dtype) for c in sheet.selected_columns] == [ ("__UNNAMED__1", "float"), ("__UNNAMED__3", "float"), ] @@ -449,7 +453,29 @@ def test_use_columns_with_callable() -> None: 2, use_columns=lambda col: col.dtype == "string", ) - assert [(c.name, c.dtype) for c in sheet.available_columns] == [ + assert [(c.name, c.dtype) for c in sheet.selected_columns] == [ ("col3", "string"), ("col5", "string"), ] + + +def test_use_columns_with_bad_callable() -> None: + excel_reader = fastexcel.read_excel(path_for_fixture("fixture-multi-sheet.xlsx")) + with pytest.raises( + fastexcel.InvalidParametersError, + match=re.escape( + "`use_columns` callable could not be called (TypeError: test_use_columns_with_bad_callable..() takes 0 positional arguments but 1 was given)", + ), + ): + excel_reader.load_sheet( + 2, + use_columns=lambda: True, # type: ignore + ) + + with pytest.raises( + fastexcel.InvalidParametersError, match="`use_columns` callable should return a boolean" + ): + excel_reader.load_sheet( + 2, + use_columns=lambda _: 42, # type: ignore + ) diff --git a/src/types/python/excelsheet/mod.rs b/src/types/python/excelsheet/mod.rs index 4792891..a1c9a61 100644 --- a/src/types/python/excelsheet/mod.rs +++ b/src/types/python/excelsheet/mod.rs @@ -104,27 +104,53 @@ impl TryFrom<&PyList> for SelectedColumns { } } -#[derive(Debug, PartialEq)] pub(crate) enum SelectedColumns { All, Selection(Vec), + DynamicSelection(PyObject), +} + +impl std::fmt::Debug for SelectedColumns { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::All => write!(f, "All"), + Self::Selection(selection) => write!(f, "Selection({selection:?})"), + Self::DynamicSelection(func) => { + let addr = func as *const _ as usize; + write!(f, "DynamicSelection({addr})") + } + } + } +} + +impl PartialEq for SelectedColumns { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Self::All, Self::All) => true, + (Self::Selection(selection), Self::Selection(other_selection)) => { + selection == other_selection + } + (Self::DynamicSelection(f1), Self::DynamicSelection(f2)) => std::ptr::eq(f1, f2), + _ => false, + } + } } impl SelectedColumns { pub(super) fn select_columns( &self, - column_info: &[ColumnInfo], + available_columns: &[ColumnInfo], ) -> FastExcelResult> { match self { - SelectedColumns::All => Ok(column_info.to_vec()), + SelectedColumns::All => Ok(available_columns.to_vec()), SelectedColumns::Selection(selection) => selection .iter() .map(|selected_column| { match selected_column { - IdxOrName::Idx(index) => column_info + IdxOrName::Idx(index) => available_columns .iter() .find(|col_info| &col_info.index() == index), - IdxOrName::Name(name) => column_info + IdxOrName::Name(name) => available_columns .iter() .find(|col_info| col_info.name() == name.as_str()), } @@ -132,9 +158,28 @@ impl SelectedColumns { FastExcelErrorKind::ColumnNotFound(selected_column.clone()).into() }) .cloned() - .with_context(|| format!("available columns are: {column_info:?}")) + .with_context(|| format!("available columns are: {available_columns:?}")) }) .collect(), + SelectedColumns::DynamicSelection(use_col_func) => Python::with_gil(|py| { + Ok(available_columns + .iter() + .filter_map( + |col_info| match use_col_func.call1(py, (col_info.clone(),)) { + Err(err) => Some(Err(FastExcelErrorKind::InvalidParameters(format!( + "`use_columns` callable could not be called ({err})" + )))), + Ok(should_use_col) => match should_use_col.extract::(py) { + Err(_) => Some(Err(FastExcelErrorKind::InvalidParameters( + "`use_columns` callable should return a boolean".to_string(), + ))), + Ok(true) => Some(Ok(col_info.clone())), + Ok(false) => None, + }, + }, + ) + .collect::, _>>()?) + }), } } const ALPHABET: [char; 26] = [ @@ -261,6 +306,8 @@ impl TryFrom> for SelectedColumns { .parse() } else if let Ok(py_list) = py_any.downcast::() { py_list.try_into() + } else if let Ok(py_function) = py_any.extract::() { + Ok(Self::DynamicSelection(py_function)) } else { Err(FastExcelErrorKind::InvalidParameters(format!( "unsupported object type {object_type}",