From 246f33a4db3e8ef018cd1cb4b71690294478e828 Mon Sep 17 00:00:00 2001 From: Luka Peschke Date: Tue, 27 Feb 2024 15:05:23 +0100 Subject: [PATCH 1/6] feat: support column ranges in string format in use_columns Signed-off-by: Luka Peschke --- python/fastexcel/__init__.py | 18 ++- python/fastexcel/_fastexcel.pyi | 4 +- python/tests/test_column_selection.py | 26 +++- src/types/excelreader.rs | 11 +- src/types/excelsheet.rs | 211 +++++++++++++++++++++++--- 5 files changed, 233 insertions(+), 37 deletions(-) diff --git a/python/fastexcel/__init__.py b/python/fastexcel/__init__.py index b57f8f4..b043245 100644 --- a/python/fastexcel/__init__.py +++ b/python/fastexcel/__init__.py @@ -112,7 +112,7 @@ def load_sheet_by_name( skip_rows: int = 0, n_rows: int | None = None, schema_sample_rows: int | None = 1_000, - use_columns: list[str] | list[int] | None = None, + use_columns: list[str] | list[int] | str | None = None, ) -> ExcelSheet: """Loads a sheet by name. @@ -129,8 +129,10 @@ def load_sheet_by_name( :param schema_sample_rows: Specifies how many rows should be used to determine the dtype of a column. If `None`, all rows will be used. - :param use_columns: Specifies the columns to use. Can either be a list of column names, or - a list of column indices (starting at 0). + :param use_columns: Specifies the columns to use. Can either be a list of column names, a + a list of column indices (starting at 0), or a list of columns and + ranges as column names indices in excel form. For example, `"A,C:E,F"` + will select columns `A,C,D,F`. If `None`, all columns will be used. """ return ExcelSheet( @@ -154,7 +156,7 @@ def load_sheet_by_idx( skip_rows: int = 0, n_rows: int | None = None, schema_sample_rows: int | None = 1_000, - use_columns: list[str] | list[int] | None = None, + use_columns: list[str] | list[int] | str | None = None, ) -> ExcelSheet: """Loads a sheet by index. @@ -171,8 +173,10 @@ def load_sheet_by_idx( :param schema_sample_rows: Specifies how many rows should be used to determine the dtype of a column. If `None`, all rows will be used. - :param use_columns: Specifies the columns to use. Can either be a list of column names, or - a list of column indices (starting at 0). + :param use_columns: Specifies the columns to use. Can either be a list of column names, a + a list of column indices (starting at 0), or a list of columns and + ranges as column names indices in excel form. For example, `"A,C:E,F"` + will select columns `A,C,D,F`. If `None`, all columns will be used. """ if idx < 0: @@ -198,7 +202,7 @@ def load_sheet( skip_rows: int = 0, n_rows: int | None = None, schema_sample_rows: int | None = 1_000, - use_columns: list[str] | list[int] | None = None, + use_columns: list[str] | list[int] | str | None = None, ) -> ExcelSheet: """Loads a sheet by name if a string is passed or by index if an integer is passed. diff --git a/python/fastexcel/_fastexcel.pyi b/python/fastexcel/_fastexcel.pyi index b4e2c36..01fd90d 100644 --- a/python/fastexcel/_fastexcel.pyi +++ b/python/fastexcel/_fastexcel.pyi @@ -39,7 +39,7 @@ class _ExcelReader: skip_rows: int = 0, n_rows: int | None = None, schema_sample_rows: int | None = 1_000, - use_columns: list[str] | list[int] | None = None, + use_columns: list[str] | list[int] | str | None = None, ) -> _ExcelSheet: ... def load_sheet_by_idx( self, @@ -50,7 +50,7 @@ class _ExcelReader: skip_rows: int = 0, n_rows: int | None = None, schema_sample_rows: int | None = 1_000, - use_columns: list[str] | list[int] | None = None, + use_columns: list[str] | list[int] | str | None = None, ) -> _ExcelSheet: ... @property def sheet_names(self) -> list[str]: ... diff --git a/python/tests/test_column_selection.py b/python/tests/test_column_selection.py index 8fa363c..7f57bb0 100644 --- a/python/tests/test_column_selection.py +++ b/python/tests/test_column_selection.py @@ -222,12 +222,33 @@ def test_single_sheet_with_unnamed_columns_and_pagination_and_column_names( pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected_first_row_skipped)) +def test_single_sheet_with_unnamed_columns_and_str_range( + excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader, + single_sheet_with_unnamed_columns_expected: dict[str, list[Any]], +) -> None: + use_columns_str = "A,C:E" + use_columns_idx = [0, 2, 3] + expected = { + k: v + for k, v in single_sheet_with_unnamed_columns_expected.items() + if k in ["col1", "col3", "__UNNAMED__3"] + } + sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet( + "With unnamed columns", use_columns=use_columns_str + ) + assert sheet.selected_columns == use_columns_idx + assert sheet.available_columns == ["col1", "__UNNAMED__1", "col3", "__UNNAMED__3", "col5"] + pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected)) + pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected)) + + def test_single_sheet_invalid_column_indices_negative_integer( excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader, ) -> None: expected_message = """invalid parameters: expected list[int] | list[str], got [-2] Context: - 0: expected selected columns to be list[str] | list[int] | None, got Some([-2]) + 0: could not determine selected columns from provided object: [-2] + 1: expected selected columns to be list[str] | list[int] | str | None, got Some([-2]) """ with pytest.raises(fastexcel.InvalidParametersError, match=re.escape(expected_message)): excel_reader_single_sheet_with_unnamed_columns.load_sheet(0, use_columns=[-2]) @@ -238,7 +259,8 @@ def test_single_sheet_invalid_column_indices_empty_list( ) -> None: expected_message = """invalid parameters: list of selected columns is empty Context: - 0: expected selected columns to be list[str] | list[int] | None, got Some([]) + 0: could not determine selected columns from provided object: [] + 1: expected selected columns to be list[str] | list[int] | str | None, got Some([]) """ with pytest.raises(fastexcel.InvalidParametersError, match=re.escape(expected_message)): excel_reader_single_sheet_with_unnamed_columns.load_sheet(0, use_columns=[]) diff --git a/src/types/excelreader.rs b/src/types/excelreader.rs index f198061..6a15ce5 100644 --- a/src/types/excelreader.rs +++ b/src/types/excelreader.rs @@ -1,7 +1,7 @@ use std::{fs::File, io::BufReader}; use calamine::{open_workbook_auto, Reader, Sheets}; -use pyo3::{pyclass, pymethods, types::PyList, PyResult}; +use pyo3::{pyclass, pymethods, PyAny, PyResult}; use crate::error::{ py_errors::IntoPyResult, ErrorContext, FastExcelErrorKind, FastExcelResult, IdxOrName, @@ -61,7 +61,8 @@ impl ExcelReader { skip_rows: usize, n_rows: Option, schema_sample_rows: Option, - use_columns: Option<&PyList>, + // pyo3 forces us to take an Option in case the default value is None + use_columns: Option<&PyAny>, ) -> PyResult { let range = self .sheets @@ -72,7 +73,7 @@ impl ExcelReader { let header = Header::new(header_row, column_names); let pagination = Pagination::new(skip_rows, n_rows, &range).into_pyresult()?; - let selected_columns = use_columns.try_into().with_context(|| format!("expected selected columns to be list[str] | list[int] | None, got {use_columns:?}")).into_pyresult()?; + let selected_columns = use_columns.try_into().with_context(|| format!("expected selected columns to be list[str] | list[int] | str | None, got {use_columns:?}")).into_pyresult()?; ExcelSheet::try_new( name, range, @@ -103,7 +104,7 @@ impl ExcelReader { skip_rows: usize, n_rows: Option, schema_sample_rows: Option, - use_columns: Option<&PyList>, + use_columns: Option<&PyAny>, ) -> PyResult { let name = self .sheet_names @@ -131,7 +132,7 @@ impl ExcelReader { let header = Header::new(header_row, column_names); let pagination = Pagination::new(skip_rows, n_rows, &range).into_pyresult()?; - let selected_columns = use_columns.try_into().with_context(|| format!("expected selected columns to be list[str] | list[int] | None, got {use_columns:?}")).into_pyresult()?; + let selected_columns = use_columns.try_into().with_context(|| format!("expected selected columns to be list[str] | list[int] | str | None, got {use_columns:?}")).into_pyresult()?; ExcelSheet::try_new( name, range, diff --git a/src/types/excelsheet.rs b/src/types/excelsheet.rs index 5f42dff..cd0a0d6 100644 --- a/src/types/excelsheet.rs +++ b/src/types/excelsheet.rs @@ -1,4 +1,4 @@ -use std::sync::Arc; +use std::{cmp, collections::HashSet, str::FromStr, sync::Arc}; use crate::error::{ py_errors::IntoPyResult, ErrorContext, FastExcelError, FastExcelErrorKind, FastExcelResult, @@ -19,8 +19,8 @@ use chrono::NaiveDate; use pyo3::{ prelude::{pyclass, pymethods, PyObject, Python}, - types::PyList, - PyResult, + types::{PyList, PyString}, + PyAny, PyResult, }; use crate::utils::arrow::arrow_schema_from_column_names_and_range; @@ -149,28 +149,146 @@ impl SelectedColumns { } } -impl TryFrom> for SelectedColumns { +impl TryFrom<&PyList> for SelectedColumns { type Error = FastExcelError; - fn try_from(value: Option<&PyList>) -> FastExcelResult { + fn try_from(py_list: &PyList) -> FastExcelResult { use FastExcelErrorKind::InvalidParameters; - match value { + if py_list.is_empty() { + Err(InvalidParameters("list of selected columns is empty".to_string()).into()) + } else if let Ok(name_vec) = py_list.extract::>() { + Ok(Self::ByName(name_vec)) + } else if let Ok(index_vec) = py_list.extract::>() { + Ok(Self::ByIndex(index_vec)) + } else { + Err( + InvalidParameters(format!("expected list[int] | list[str], got {py_list:?}")) + .into(), + ) + } + } +} + +impl SelectedColumns { + const ALPHABET: [char; 26] = [ + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', + 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + ]; + + fn col_idx_for_col_as_letter(col: &str) -> FastExcelResult { + use FastExcelErrorKind::InvalidParameters; + + if col.len() != 1 { + return Err(InvalidParameters(format!( + "a column should be exactly one character, got {n_chars}", + n_chars = col.len() + )) + .into()); + } + + col.chars() + .next() + .ok_or_else(|| { + InvalidParameters(format!("could not get first char of column \"{col}\"")).into() + }) + .and_then(|col_name| { + Self::ALPHABET + .iter() + .position(|chr| chr == &col_name) + .ok_or_else(|| { + InvalidParameters(format!("Char is not a valid column name: {col_name}")) + .into() + }) + }) + } + + fn col_indices_for_letter_range(col_range: &str) -> FastExcelResult> { + use FastExcelErrorKind::InvalidParameters; + + let col_elements = col_range.split(':').collect::>(); + if col_elements.len() == 2 { + let start = Self::col_idx_for_col_as_letter(col_elements[0]) + .with_context(|| format!("invalid start element for range \"{col_range}\""))?; + let end = Self::col_idx_for_col_as_letter(col_elements[1]) + .with_context(|| format!("invalid end element for range \"{col_range}\""))?; + + match start.cmp(&end) { + cmp::Ordering::Less => Ok((start..end).collect()), + cmp::Ordering::Greater => Err(InvalidParameters(format!( + "end of range is before start: \"{col_range}\"" + )) + .into()), + cmp::Ordering::Equal => { + Err(InvalidParameters(format!("empty range: \"{col_range}\"")).into()) + } + } + } else { + Err(InvalidParameters(format!( + "expected range to contain exactly 2 elements, got {n_elements}: \"{col_range}\"", + n_elements = col_elements.len() + )) + .into()) + } + } +} + +// Does not support columns beyond 26 for now +impl FromStr for SelectedColumns { + type Err = FastExcelError; + + fn from_str(s: &str) -> FastExcelResult { + let unique_col_indices: HashSet = s + .to_uppercase() + .split(',') + .map(|col_or_range| { + if col_or_range.contains(':') { + Self::col_indices_for_letter_range(col_or_range) + } else { + Self::col_idx_for_col_as_letter(col_or_range).map(|idx| vec![idx]) + } + }) + .collect::>>()? + .into_iter() + .flatten() + .collect(); + let mut sorted_col_indices: Vec = unique_col_indices.into_iter().collect(); + sorted_col_indices.sort(); + Ok(Self::ByIndex(sorted_col_indices)) + } +} + +impl TryFrom> for SelectedColumns { + type Error = FastExcelError; + + fn try_from(py_any_opt: Option<&PyAny>) -> FastExcelResult { + match py_any_opt { None => Ok(Self::All), - Some(py_list) => { - if py_list.is_empty() { - Err(InvalidParameters("list of selected columns is empty".to_string()).into()) - } else if let Ok(name_vec) = py_list.extract::>() { - Ok(Self::ByName(name_vec)) - } else if let Ok(index_vec) = py_list.extract::>() { - Ok(Self::ByIndex(index_vec)) + Some(py_any) => { + // Not trying to downcast to PyNone here as we assume that this would result in + // py_any_opt being None + if let Ok(py_str) = py_any.downcast::() { + py_str + .to_str() + .map_err(|err| { + FastExcelErrorKind::InvalidParameters(format!( + "provided string is not valid unicode: {err}" + )) + })? + .parse() + } else if let Ok(py_list) = py_any.downcast::() { + py_list.try_into() } else { - Err(InvalidParameters(format!( - "expected list[int] | list[str], got {py_list:?}" + Err(FastExcelErrorKind::InvalidParameters(format!( + "unsupported object type {object_type}", + object_type = py_any.get_type() )) .into()) } } + .with_context(|| { + format!("could not determine selected columns from provided object: {py_any}") + }), } } } @@ -386,7 +504,7 @@ impl TryFrom<&ExcelSheet> for Schema { &sheet.available_columns, sheet.offset(), // If sample_rows is higher than the sheet's limit, use the limit instead - std::cmp::min(sample_rows, sheet.limit()), + cmp::min(sample_rows, sheet.limit()), &sheet.selected_columns, ) } @@ -536,6 +654,7 @@ impl ExcelSheet { mod tests { use super::*; use pretty_assertions::assert_eq; + use rstest::rstest; #[test] fn selected_columns_from_none() { @@ -548,7 +667,7 @@ mod tests { #[test] fn selected_columns_from_list_of_valid_ints() { Python::with_gil(|py| { - let py_list = PyList::new(py, vec![0, 1, 2]); + let py_list = PyList::new(py, vec![0, 1, 2]).as_ref(); assert_eq!( TryInto::::try_into(Some(py_list)).unwrap(), SelectedColumns::ByIndex(vec![0, 1, 2]) @@ -559,7 +678,7 @@ mod tests { #[test] fn selected_columns_from_list_of_valid_strings() { Python::with_gil(|py| { - let py_list = PyList::new(py, vec!["foo", "bar"]); + let py_list = PyList::new(py, vec!["foo", "bar"]).as_ref(); assert_eq!( TryInto::::try_into(Some(py_list)).unwrap(), SelectedColumns::ByName(vec!["foo".to_string(), "bar".to_string()]) @@ -570,7 +689,7 @@ mod tests { #[test] fn selected_columns_from_invalid_ints() { Python::with_gil(|py| { - let py_list = PyList::new(py, vec![0, 2, -1]); + let py_list = PyList::new(py, vec![0, 2, -1]).as_ref(); let err = TryInto::::try_into(Some(py_list)).unwrap_err(); assert!(matches!(err.kind, FastExcelErrorKind::InvalidParameters(_))); @@ -580,7 +699,7 @@ mod tests { #[test] fn selected_columns_from_empty_int_list() { Python::with_gil(|py| { - let py_list = PyList::new(py, Vec::::new()); + let py_list = PyList::new(py, Vec::::new()).as_ref(); let err = TryInto::::try_into(Some(py_list)).unwrap_err(); assert!(matches!(err.kind, FastExcelErrorKind::InvalidParameters(_))); @@ -590,10 +709,60 @@ mod tests { #[test] fn selected_columns_from_empty_string_list() { Python::with_gil(|py| { - let py_list = PyList::new(py, Vec::::new()); + let py_list = PyList::new(py, Vec::::new()).as_ref(); let err = TryInto::::try_into(Some(py_list)).unwrap_err(); assert!(matches!(err.kind, FastExcelErrorKind::InvalidParameters(_))); }); } + + #[rstest] + // Standard unique columns + #[case("A,B,D", vec![0, 1, 3])] + // Standard unique columns + range + #[case("A,B:E,Y", vec![0, 1, 2, 3, 24])] + // Standard unique column + ranges with mixed case + #[case("A:c,b:E,w,Y:z", vec![0, 1, 2, 3, 22, 24])] + fn selected_columns_from_valid_ranges(#[case] raw: &str, #[case] expected: Vec) { + Python::with_gil(|py| { + let expected_range = SelectedColumns::ByIndex(expected); + let input = PyString::new(py, raw).as_ref(); + + let range = TryInto::::try_into(Some(input)) + .expect("expected a valid column selection"); + + assert_eq!(range, expected_range) + }) + } + + #[rstest] + // Standard unique columns + #[case("", "exactly one character")] + // empty range + #[case("a:a,b:d,e", "empty range")] + // end before start + #[case("b:a", "end of range is before start")] + // no start + #[case(":a", "exactly one character, got 0")] + // no end + #[case("a:", "exactly one character, got 0")] + // too many elements + #[case("a:b:e", "exactly 2 elements, got 3")] + fn selected_columns_from_invalid_ranges(#[case] raw: &str, #[case] message: &str) { + Python::with_gil(|py| { + let input = PyString::new(py, raw).as_ref(); + + let err = + TryInto::::try_into(Some(input)).expect_err("expected an error"); + + match err.kind { + FastExcelErrorKind::InvalidParameters(detail) => { + if !detail.contains(message) { + panic!("expected \"{detail}\" to contain \"{message}\"") + } + } + _ => panic!("Expected error to be InvalidParameters, got {err:?}"), + } + }) + } } From 976f9cb7424a32509ee893bd64b883435676f4f0 Mon Sep 17 00:00:00 2001 From: Luka Peschke Date: Tue, 27 Feb 2024 16:34:34 +0100 Subject: [PATCH 2/6] support ranges beyond Z Signed-off-by: Luka Peschke --- src/types/excelsheet.rs | 55 +++++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/src/types/excelsheet.rs b/src/types/excelsheet.rs index cd0a0d6..497d143 100644 --- a/src/types/excelsheet.rs +++ b/src/types/excelsheet.rs @@ -179,27 +179,43 @@ impl SelectedColumns { fn col_idx_for_col_as_letter(col: &str) -> FastExcelResult { use FastExcelErrorKind::InvalidParameters; - if col.len() != 1 { - return Err(InvalidParameters(format!( - "a column should be exactly one character, got {n_chars}", - n_chars = col.len() - )) + if col.is_empty() { + return Err(InvalidParameters( + "a column should have at least one character, got none".to_string(), + ) .into()); } col.chars() - .next() - .ok_or_else(|| { - InvalidParameters(format!("could not get first char of column \"{col}\"")).into() - }) - .and_then(|col_name| { - Self::ALPHABET + // iterating over all chars reversed, to have a power based on their rank + .rev() + .enumerate() + // Parses every char, checks its position and returns its numeric equivalent based on + // its rank. For example, AB becomes 27 (26 + 1) + .map(|(idx, col_chr)| { + let pos_in_alphabet = Self::ALPHABET .iter() - .position(|chr| chr == &col_name) + .position(|chr| chr == &col_chr) .ok_or_else(|| { - InvalidParameters(format!("Char is not a valid column name: {col_name}")) - .into() - }) + FastExcelError::from(InvalidParameters(format!( + "Char is not a valid column name: {col_chr}" + ))) + })?; + + Ok(match idx { + // in case it's the last char, just return its position + 0 => pos_in_alphabet, + // otherwise, 26^idx * (position + 1) + // For example, CBA is 2081: + // A -> 0 + // B -> 26 (53^1 * (1 + 1)) + // C -> 2028 (26^2 * (2 + 1)) + _ => 26usize.pow(idx as u32) * (pos_in_alphabet + 1), + }) + }) + // Sums all previously obtained ranks + .try_fold(0usize, |acc, elem_result| { + elem_result.map(|elem| acc + elem) }) } @@ -723,6 +739,9 @@ mod tests { #[case("A,B:E,Y", vec![0, 1, 2, 3, 24])] // Standard unique column + ranges with mixed case #[case("A:c,b:E,w,Y:z", vec![0, 1, 2, 3, 22, 24])] + // Ranges beyond Z + #[case("A,y:AB", vec![0, 24, 25, 26])] + #[case("BB:BE,DDC:DDF", vec![53, 54, 55, 2810, 2811, 2812])] fn selected_columns_from_valid_ranges(#[case] raw: &str, #[case] expected: Vec) { Python::with_gil(|py| { let expected_range = SelectedColumns::ByIndex(expected); @@ -737,15 +756,15 @@ mod tests { #[rstest] // Standard unique columns - #[case("", "exactly one character")] + #[case("", "at least one character")] // empty range #[case("a:a,b:d,e", "empty range")] // end before start #[case("b:a", "end of range is before start")] // no start - #[case(":a", "exactly one character, got 0")] + #[case(":a", "at least one character, got none")] // no end - #[case("a:", "exactly one character, got 0")] + #[case("a:", "at least one character, got none")] // too many elements #[case("a:b:e", "exactly 2 elements, got 3")] fn selected_columns_from_invalid_ranges(#[case] raw: &str, #[case] message: &str) { From 47b815dff58a149bd635de5bb66251ec3ca4c867 Mon Sep 17 00:00:00 2001 From: Luka Peschke Date: Tue, 27 Feb 2024 17:47:04 +0100 Subject: [PATCH 3/6] adapt docstrings Signed-off-by: Luka Peschke --- python/fastexcel/__init__.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/python/fastexcel/__init__.py b/python/fastexcel/__init__.py index b043245..4c519a4 100644 --- a/python/fastexcel/__init__.py +++ b/python/fastexcel/__init__.py @@ -129,11 +129,13 @@ def load_sheet_by_name( :param schema_sample_rows: Specifies how many rows should be used to determine the dtype of a column. If `None`, all rows will be used. - :param use_columns: Specifies the columns to use. Can either be a list of column names, a - a list of column indices (starting at 0), or a list of columns and - ranges as column names indices in excel form. For example, `"A,C:E,F"` - will select columns `A,C,D,F`. - If `None`, all columns will be used. + :param use_columns: Specifies the columns to use. Can either be: + - `None` to select all columns + - a list of strings, the column names + - a list of ints, the column indices (starting at 0) + - a string, a comma separated list of Excel column letters and column + ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in + `A,B,C,D,E` and `A,C,E,F`) """ return ExcelSheet( self._reader.load_sheet_by_name( @@ -173,11 +175,13 @@ def load_sheet_by_idx( :param schema_sample_rows: Specifies how many rows should be used to determine the dtype of a column. If `None`, all rows will be used. - :param use_columns: Specifies the columns to use. Can either be a list of column names, a - a list of column indices (starting at 0), or a list of columns and - ranges as column names indices in excel form. For example, `"A,C:E,F"` - will select columns `A,C,D,F`. - If `None`, all columns will be used. + :param use_columns: Specifies the columns to use. Can either be: + - `None` to select all columns + - a list of strings, the column names + - a list of ints, the column indices (starting at 0) + - a string, a comma separated list of Excel column letters and column + ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in + `A,B,C,D,E` and `A,C,E,F`) """ if idx < 0: raise ValueError(f"Expected idx to be > 0, got {idx}") From bb18d942e67419893cda1c5840b0d801017b3896 Mon Sep 17 00:00:00 2001 From: Luka Peschke Date: Tue, 27 Feb 2024 17:47:57 +0100 Subject: [PATCH 4/6] remove outdated comment Signed-off-by: Luka Peschke --- src/types/excelsheet.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/types/excelsheet.rs b/src/types/excelsheet.rs index 497d143..008b0f9 100644 --- a/src/types/excelsheet.rs +++ b/src/types/excelsheet.rs @@ -249,7 +249,6 @@ impl SelectedColumns { } } -// Does not support columns beyond 26 for now impl FromStr for SelectedColumns { type Err = FastExcelError; From 248acdd799d0e8ecbaf039307ed639167e6243ba Mon Sep 17 00:00:00 2001 From: Luka Peschke Date: Tue, 27 Feb 2024 17:50:08 +0100 Subject: [PATCH 5/6] refactor: make end of range inclusive Signed-off-by: Luka Peschke --- src/types/excelsheet.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/types/excelsheet.rs b/src/types/excelsheet.rs index 008b0f9..2596dd1 100644 --- a/src/types/excelsheet.rs +++ b/src/types/excelsheet.rs @@ -230,7 +230,7 @@ impl SelectedColumns { .with_context(|| format!("invalid end element for range \"{col_range}\""))?; match start.cmp(&end) { - cmp::Ordering::Less => Ok((start..end).collect()), + cmp::Ordering::Less => Ok((start..=end).collect()), cmp::Ordering::Greater => Err(InvalidParameters(format!( "end of range is before start: \"{col_range}\"" )) @@ -735,12 +735,12 @@ mod tests { // Standard unique columns #[case("A,B,D", vec![0, 1, 3])] // Standard unique columns + range - #[case("A,B:E,Y", vec![0, 1, 2, 3, 24])] + #[case("A,B:E,Y", vec![0, 1, 2, 3, 4, 24])] // Standard unique column + ranges with mixed case - #[case("A:c,b:E,w,Y:z", vec![0, 1, 2, 3, 22, 24])] + #[case("A:c,b:E,w,Y:z", vec![0, 1, 2, 3, 4, 22, 24, 25])] // Ranges beyond Z - #[case("A,y:AB", vec![0, 24, 25, 26])] - #[case("BB:BE,DDC:DDF", vec![53, 54, 55, 2810, 2811, 2812])] + #[case("A,y:AB", vec![0, 24, 25, 26, 27])] + #[case("BB:BE,DDC:DDF", vec![53, 54, 55, 56, 2810, 2811, 2812, 2813])] fn selected_columns_from_valid_ranges(#[case] raw: &str, #[case] expected: Vec) { Python::with_gil(|py| { let expected_range = SelectedColumns::ByIndex(expected); From 8dba9fd5de4a1c2fb8b2d2863fe8217f64ede88a Mon Sep 17 00:00:00 2001 From: Luka Peschke Date: Tue, 27 Feb 2024 17:56:01 +0100 Subject: [PATCH 6/6] fix python test Signed-off-by: Luka Peschke --- python/tests/test_column_selection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/tests/test_column_selection.py b/python/tests/test_column_selection.py index 7f57bb0..a4cade3 100644 --- a/python/tests/test_column_selection.py +++ b/python/tests/test_column_selection.py @@ -227,11 +227,11 @@ def test_single_sheet_with_unnamed_columns_and_str_range( single_sheet_with_unnamed_columns_expected: dict[str, list[Any]], ) -> None: use_columns_str = "A,C:E" - use_columns_idx = [0, 2, 3] + use_columns_idx = [0, 2, 3, 4] expected = { k: v for k, v in single_sheet_with_unnamed_columns_expected.items() - if k in ["col1", "col3", "__UNNAMED__3"] + if k in ["col1", "col3", "__UNNAMED__3", "col5"] } sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet( "With unnamed columns", use_columns=use_columns_str