From 246f33a4db3e8ef018cd1cb4b71690294478e828 Mon Sep 17 00:00:00 2001
From: Luka Peschke <luka.peschke@toucantoco.com>
Date: Tue, 27 Feb 2024 15:05:23 +0100
Subject: [PATCH 1/6] feat: support column ranges in string format in
 use_columns

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
---
 python/fastexcel/__init__.py          |  18 ++-
 python/fastexcel/_fastexcel.pyi       |   4 +-
 python/tests/test_column_selection.py |  26 +++-
 src/types/excelreader.rs              |  11 +-
 src/types/excelsheet.rs               | 211 +++++++++++++++++++++++---
 5 files changed, 233 insertions(+), 37 deletions(-)

diff --git a/python/fastexcel/__init__.py b/python/fastexcel/__init__.py
index b57f8f4..b043245 100644
--- a/python/fastexcel/__init__.py
+++ b/python/fastexcel/__init__.py
@@ -112,7 +112,7 @@ def load_sheet_by_name(
         skip_rows: int = 0,
         n_rows: int | None = None,
         schema_sample_rows: int | None = 1_000,
-        use_columns: list[str] | list[int] | None = None,
+        use_columns: list[str] | list[int] | str | None = None,
     ) -> ExcelSheet:
         """Loads a sheet by name.
 
@@ -129,8 +129,10 @@ def load_sheet_by_name(
         :param schema_sample_rows: Specifies how many rows should be used to determine
                                    the dtype of a column.
                                    If `None`, all rows will be used.
-        :param use_columns: Specifies the columns to use. Can either be a list of column names, or
-                            a list of column indices (starting at 0).
+        :param use_columns: Specifies the columns to use. Can either be a list of column names, a
+                            a list of column indices (starting at 0), or a list of columns and
+                            ranges as column names indices in excel form. For example, `"A,C:E,F"`
+                            will select columns `A,C,D,F`.
                             If `None`, all columns will be used.
         """
         return ExcelSheet(
@@ -154,7 +156,7 @@ def load_sheet_by_idx(
         skip_rows: int = 0,
         n_rows: int | None = None,
         schema_sample_rows: int | None = 1_000,
-        use_columns: list[str] | list[int] | None = None,
+        use_columns: list[str] | list[int] | str | None = None,
     ) -> ExcelSheet:
         """Loads a sheet by index.
 
@@ -171,8 +173,10 @@ def load_sheet_by_idx(
         :param schema_sample_rows: Specifies how many rows should be used to determine
                                    the dtype of a column.
                                    If `None`, all rows will be used.
-        :param use_columns: Specifies the columns to use. Can either be a list of column names, or
-                            a list of column indices (starting at 0).
+        :param use_columns: Specifies the columns to use. Can either be a list of column names, a
+                            a list of column indices (starting at 0), or a list of columns and
+                            ranges as column names indices in excel form. For example, `"A,C:E,F"`
+                            will select columns `A,C,D,F`.
                             If `None`, all columns will be used.
         """
         if idx < 0:
@@ -198,7 +202,7 @@ def load_sheet(
         skip_rows: int = 0,
         n_rows: int | None = None,
         schema_sample_rows: int | None = 1_000,
-        use_columns: list[str] | list[int] | None = None,
+        use_columns: list[str] | list[int] | str | None = None,
     ) -> ExcelSheet:
         """Loads a sheet by name if a string is passed or by index if an integer is passed.
 
diff --git a/python/fastexcel/_fastexcel.pyi b/python/fastexcel/_fastexcel.pyi
index b4e2c36..01fd90d 100644
--- a/python/fastexcel/_fastexcel.pyi
+++ b/python/fastexcel/_fastexcel.pyi
@@ -39,7 +39,7 @@ class _ExcelReader:
         skip_rows: int = 0,
         n_rows: int | None = None,
         schema_sample_rows: int | None = 1_000,
-        use_columns: list[str] | list[int] | None = None,
+        use_columns: list[str] | list[int] | str | None = None,
     ) -> _ExcelSheet: ...
     def load_sheet_by_idx(
         self,
@@ -50,7 +50,7 @@ class _ExcelReader:
         skip_rows: int = 0,
         n_rows: int | None = None,
         schema_sample_rows: int | None = 1_000,
-        use_columns: list[str] | list[int] | None = None,
+        use_columns: list[str] | list[int] | str | None = None,
     ) -> _ExcelSheet: ...
     @property
     def sheet_names(self) -> list[str]: ...
diff --git a/python/tests/test_column_selection.py b/python/tests/test_column_selection.py
index 8fa363c..7f57bb0 100644
--- a/python/tests/test_column_selection.py
+++ b/python/tests/test_column_selection.py
@@ -222,12 +222,33 @@ def test_single_sheet_with_unnamed_columns_and_pagination_and_column_names(
     pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected_first_row_skipped))
 
 
+def test_single_sheet_with_unnamed_columns_and_str_range(
+    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
+    single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],
+) -> None:
+    use_columns_str = "A,C:E"
+    use_columns_idx = [0, 2, 3]
+    expected = {
+        k: v
+        for k, v in single_sheet_with_unnamed_columns_expected.items()
+        if k in ["col1", "col3", "__UNNAMED__3"]
+    }
+    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
+        "With unnamed columns", use_columns=use_columns_str
+    )
+    assert sheet.selected_columns == use_columns_idx
+    assert sheet.available_columns == ["col1", "__UNNAMED__1", "col3", "__UNNAMED__3", "col5"]
+    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
+    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
+
+
 def test_single_sheet_invalid_column_indices_negative_integer(
     excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
 ) -> None:
     expected_message = """invalid parameters: expected list[int] | list[str], got [-2]
 Context:
-    0: expected selected columns to be list[str] | list[int] | None, got Some([-2])
+    0: could not determine selected columns from provided object: [-2]
+    1: expected selected columns to be list[str] | list[int] | str | None, got Some([-2])
 """
     with pytest.raises(fastexcel.InvalidParametersError, match=re.escape(expected_message)):
         excel_reader_single_sheet_with_unnamed_columns.load_sheet(0, use_columns=[-2])
@@ -238,7 +259,8 @@ def test_single_sheet_invalid_column_indices_empty_list(
 ) -> None:
     expected_message = """invalid parameters: list of selected columns is empty
 Context:
-    0: expected selected columns to be list[str] | list[int] | None, got Some([])
+    0: could not determine selected columns from provided object: []
+    1: expected selected columns to be list[str] | list[int] | str | None, got Some([])
 """
     with pytest.raises(fastexcel.InvalidParametersError, match=re.escape(expected_message)):
         excel_reader_single_sheet_with_unnamed_columns.load_sheet(0, use_columns=[])
diff --git a/src/types/excelreader.rs b/src/types/excelreader.rs
index f198061..6a15ce5 100644
--- a/src/types/excelreader.rs
+++ b/src/types/excelreader.rs
@@ -1,7 +1,7 @@
 use std::{fs::File, io::BufReader};
 
 use calamine::{open_workbook_auto, Reader, Sheets};
-use pyo3::{pyclass, pymethods, types::PyList, PyResult};
+use pyo3::{pyclass, pymethods, PyAny, PyResult};
 
 use crate::error::{
     py_errors::IntoPyResult, ErrorContext, FastExcelErrorKind, FastExcelResult, IdxOrName,
@@ -61,7 +61,8 @@ impl ExcelReader {
         skip_rows: usize,
         n_rows: Option<usize>,
         schema_sample_rows: Option<usize>,
-        use_columns: Option<&PyList>,
+        // pyo3 forces us to take an Option in case the default value is None
+        use_columns: Option<&PyAny>,
     ) -> PyResult<ExcelSheet> {
         let range = self
             .sheets
@@ -72,7 +73,7 @@ impl ExcelReader {
 
         let header = Header::new(header_row, column_names);
         let pagination = Pagination::new(skip_rows, n_rows, &range).into_pyresult()?;
-        let selected_columns = use_columns.try_into().with_context(|| format!("expected selected columns to be list[str] | list[int] | None, got {use_columns:?}")).into_pyresult()?;
+        let selected_columns = use_columns.try_into().with_context(|| format!("expected selected columns to be list[str] | list[int] | str | None, got {use_columns:?}")).into_pyresult()?;
         ExcelSheet::try_new(
             name,
             range,
@@ -103,7 +104,7 @@ impl ExcelReader {
         skip_rows: usize,
         n_rows: Option<usize>,
         schema_sample_rows: Option<usize>,
-        use_columns: Option<&PyList>,
+        use_columns: Option<&PyAny>,
     ) -> PyResult<ExcelSheet> {
         let name = self
             .sheet_names
@@ -131,7 +132,7 @@ impl ExcelReader {
 
         let header = Header::new(header_row, column_names);
         let pagination = Pagination::new(skip_rows, n_rows, &range).into_pyresult()?;
-        let selected_columns = use_columns.try_into().with_context(|| format!("expected selected columns to be list[str] | list[int] | None, got {use_columns:?}")).into_pyresult()?;
+        let selected_columns = use_columns.try_into().with_context(|| format!("expected selected columns to be list[str] | list[int] | str | None, got {use_columns:?}")).into_pyresult()?;
         ExcelSheet::try_new(
             name,
             range,
diff --git a/src/types/excelsheet.rs b/src/types/excelsheet.rs
index 5f42dff..cd0a0d6 100644
--- a/src/types/excelsheet.rs
+++ b/src/types/excelsheet.rs
@@ -1,4 +1,4 @@
-use std::sync::Arc;
+use std::{cmp, collections::HashSet, str::FromStr, sync::Arc};
 
 use crate::error::{
     py_errors::IntoPyResult, ErrorContext, FastExcelError, FastExcelErrorKind, FastExcelResult,
@@ -19,8 +19,8 @@ use chrono::NaiveDate;
 
 use pyo3::{
     prelude::{pyclass, pymethods, PyObject, Python},
-    types::PyList,
-    PyResult,
+    types::{PyList, PyString},
+    PyAny, PyResult,
 };
 
 use crate::utils::arrow::arrow_schema_from_column_names_and_range;
@@ -149,28 +149,146 @@ impl SelectedColumns {
     }
 }
 
-impl TryFrom<Option<&PyList>> for SelectedColumns {
+impl TryFrom<&PyList> for SelectedColumns {
     type Error = FastExcelError;
 
-    fn try_from(value: Option<&PyList>) -> FastExcelResult<Self> {
+    fn try_from(py_list: &PyList) -> FastExcelResult<Self> {
         use FastExcelErrorKind::InvalidParameters;
 
-        match value {
+        if py_list.is_empty() {
+            Err(InvalidParameters("list of selected columns is empty".to_string()).into())
+        } else if let Ok(name_vec) = py_list.extract::<Vec<String>>() {
+            Ok(Self::ByName(name_vec))
+        } else if let Ok(index_vec) = py_list.extract::<Vec<usize>>() {
+            Ok(Self::ByIndex(index_vec))
+        } else {
+            Err(
+                InvalidParameters(format!("expected list[int] | list[str], got {py_list:?}"))
+                    .into(),
+            )
+        }
+    }
+}
+
+impl SelectedColumns {
+    const ALPHABET: [char; 26] = [
+        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
+        'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+    ];
+
+    fn col_idx_for_col_as_letter(col: &str) -> FastExcelResult<usize> {
+        use FastExcelErrorKind::InvalidParameters;
+
+        if col.len() != 1 {
+            return Err(InvalidParameters(format!(
+                "a column should be exactly one character, got {n_chars}",
+                n_chars = col.len()
+            ))
+            .into());
+        }
+
+        col.chars()
+            .next()
+            .ok_or_else(|| {
+                InvalidParameters(format!("could not get first char of column \"{col}\"")).into()
+            })
+            .and_then(|col_name| {
+                Self::ALPHABET
+                    .iter()
+                    .position(|chr| chr == &col_name)
+                    .ok_or_else(|| {
+                        InvalidParameters(format!("Char is not a valid column name: {col_name}"))
+                            .into()
+                    })
+            })
+    }
+
+    fn col_indices_for_letter_range(col_range: &str) -> FastExcelResult<Vec<usize>> {
+        use FastExcelErrorKind::InvalidParameters;
+
+        let col_elements = col_range.split(':').collect::<Vec<_>>();
+        if col_elements.len() == 2 {
+            let start = Self::col_idx_for_col_as_letter(col_elements[0])
+                .with_context(|| format!("invalid start element for range \"{col_range}\""))?;
+            let end = Self::col_idx_for_col_as_letter(col_elements[1])
+                .with_context(|| format!("invalid end element for range \"{col_range}\""))?;
+
+            match start.cmp(&end) {
+                cmp::Ordering::Less => Ok((start..end).collect()),
+                cmp::Ordering::Greater => Err(InvalidParameters(format!(
+                    "end of range is before start: \"{col_range}\""
+                ))
+                .into()),
+                cmp::Ordering::Equal => {
+                    Err(InvalidParameters(format!("empty range: \"{col_range}\"")).into())
+                }
+            }
+        } else {
+            Err(InvalidParameters(format!(
+                "expected range to contain exactly 2 elements, got {n_elements}: \"{col_range}\"",
+                n_elements = col_elements.len()
+            ))
+            .into())
+        }
+    }
+}
+
+// Does not support columns beyond 26 for now
+impl FromStr for SelectedColumns {
+    type Err = FastExcelError;
+
+    fn from_str(s: &str) -> FastExcelResult<Self> {
+        let unique_col_indices: HashSet<usize> = s
+            .to_uppercase()
+            .split(',')
+            .map(|col_or_range| {
+                if col_or_range.contains(':') {
+                    Self::col_indices_for_letter_range(col_or_range)
+                } else {
+                    Self::col_idx_for_col_as_letter(col_or_range).map(|idx| vec![idx])
+                }
+            })
+            .collect::<FastExcelResult<Vec<_>>>()?
+            .into_iter()
+            .flatten()
+            .collect();
+        let mut sorted_col_indices: Vec<usize> = unique_col_indices.into_iter().collect();
+        sorted_col_indices.sort();
+        Ok(Self::ByIndex(sorted_col_indices))
+    }
+}
+
+impl TryFrom<Option<&PyAny>> for SelectedColumns {
+    type Error = FastExcelError;
+
+    fn try_from(py_any_opt: Option<&PyAny>) -> FastExcelResult<Self> {
+        match py_any_opt {
             None => Ok(Self::All),
-            Some(py_list) => {
-                if py_list.is_empty() {
-                    Err(InvalidParameters("list of selected columns is empty".to_string()).into())
-                } else if let Ok(name_vec) = py_list.extract::<Vec<String>>() {
-                    Ok(Self::ByName(name_vec))
-                } else if let Ok(index_vec) = py_list.extract::<Vec<usize>>() {
-                    Ok(Self::ByIndex(index_vec))
+            Some(py_any) => {
+                // Not trying to downcast to PyNone here as we assume that this would result in
+                // py_any_opt being None
+                if let Ok(py_str) = py_any.downcast::<PyString>() {
+                    py_str
+                        .to_str()
+                        .map_err(|err| {
+                            FastExcelErrorKind::InvalidParameters(format!(
+                                "provided string is not valid unicode: {err}"
+                            ))
+                        })?
+                        .parse()
+                } else if let Ok(py_list) = py_any.downcast::<PyList>() {
+                    py_list.try_into()
                 } else {
-                    Err(InvalidParameters(format!(
-                        "expected list[int] | list[str], got {py_list:?}"
+                    Err(FastExcelErrorKind::InvalidParameters(format!(
+                        "unsupported object type {object_type}",
+                        object_type = py_any.get_type()
                     ))
                     .into())
                 }
             }
+            .with_context(|| {
+                format!("could not determine selected columns from provided object: {py_any}")
+            }),
         }
     }
 }
@@ -386,7 +504,7 @@ impl TryFrom<&ExcelSheet> for Schema {
             &sheet.available_columns,
             sheet.offset(),
             // If sample_rows is higher than the sheet's limit, use the limit instead
-            std::cmp::min(sample_rows, sheet.limit()),
+            cmp::min(sample_rows, sheet.limit()),
             &sheet.selected_columns,
         )
     }
@@ -536,6 +654,7 @@ impl ExcelSheet {
 mod tests {
     use super::*;
     use pretty_assertions::assert_eq;
+    use rstest::rstest;
 
     #[test]
     fn selected_columns_from_none() {
@@ -548,7 +667,7 @@ mod tests {
     #[test]
     fn selected_columns_from_list_of_valid_ints() {
         Python::with_gil(|py| {
-            let py_list = PyList::new(py, vec![0, 1, 2]);
+            let py_list = PyList::new(py, vec![0, 1, 2]).as_ref();
             assert_eq!(
                 TryInto::<SelectedColumns>::try_into(Some(py_list)).unwrap(),
                 SelectedColumns::ByIndex(vec![0, 1, 2])
@@ -559,7 +678,7 @@ mod tests {
     #[test]
     fn selected_columns_from_list_of_valid_strings() {
         Python::with_gil(|py| {
-            let py_list = PyList::new(py, vec!["foo", "bar"]);
+            let py_list = PyList::new(py, vec!["foo", "bar"]).as_ref();
             assert_eq!(
                 TryInto::<SelectedColumns>::try_into(Some(py_list)).unwrap(),
                 SelectedColumns::ByName(vec!["foo".to_string(), "bar".to_string()])
@@ -570,7 +689,7 @@ mod tests {
     #[test]
     fn selected_columns_from_invalid_ints() {
         Python::with_gil(|py| {
-            let py_list = PyList::new(py, vec![0, 2, -1]);
+            let py_list = PyList::new(py, vec![0, 2, -1]).as_ref();
             let err = TryInto::<SelectedColumns>::try_into(Some(py_list)).unwrap_err();
 
             assert!(matches!(err.kind, FastExcelErrorKind::InvalidParameters(_)));
@@ -580,7 +699,7 @@ mod tests {
     #[test]
     fn selected_columns_from_empty_int_list() {
         Python::with_gil(|py| {
-            let py_list = PyList::new(py, Vec::<usize>::new());
+            let py_list = PyList::new(py, Vec::<usize>::new()).as_ref();
             let err = TryInto::<SelectedColumns>::try_into(Some(py_list)).unwrap_err();
 
             assert!(matches!(err.kind, FastExcelErrorKind::InvalidParameters(_)));
@@ -590,10 +709,60 @@ mod tests {
     #[test]
     fn selected_columns_from_empty_string_list() {
         Python::with_gil(|py| {
-            let py_list = PyList::new(py, Vec::<String>::new());
+            let py_list = PyList::new(py, Vec::<String>::new()).as_ref();
             let err = TryInto::<SelectedColumns>::try_into(Some(py_list)).unwrap_err();
 
             assert!(matches!(err.kind, FastExcelErrorKind::InvalidParameters(_)));
         });
     }
+
+    #[rstest]
+    // Standard unique columns
+    #[case("A,B,D", vec![0, 1, 3])]
+    // Standard unique columns + range
+    #[case("A,B:E,Y", vec![0, 1, 2, 3, 24])]
+    // Standard unique column + ranges with mixed case
+    #[case("A:c,b:E,w,Y:z", vec![0, 1, 2, 3, 22, 24])]
+    fn selected_columns_from_valid_ranges(#[case] raw: &str, #[case] expected: Vec<usize>) {
+        Python::with_gil(|py| {
+            let expected_range = SelectedColumns::ByIndex(expected);
+            let input = PyString::new(py, raw).as_ref();
+
+            let range = TryInto::<SelectedColumns>::try_into(Some(input))
+                .expect("expected a valid column selection");
+
+            assert_eq!(range, expected_range)
+        })
+    }
+
+    #[rstest]
+    // Standard unique columns
+    #[case("", "exactly one character")]
+    // empty range
+    #[case("a:a,b:d,e", "empty range")]
+    // end before start
+    #[case("b:a", "end of range is before start")]
+    // no start
+    #[case(":a", "exactly one character, got 0")]
+    // no end
+    #[case("a:", "exactly one character, got 0")]
+    // too many elements
+    #[case("a:b:e", "exactly 2 elements, got 3")]
+    fn selected_columns_from_invalid_ranges(#[case] raw: &str, #[case] message: &str) {
+        Python::with_gil(|py| {
+            let input = PyString::new(py, raw).as_ref();
+
+            let err =
+                TryInto::<SelectedColumns>::try_into(Some(input)).expect_err("expected an error");
+
+            match err.kind {
+                FastExcelErrorKind::InvalidParameters(detail) => {
+                    if !detail.contains(message) {
+                        panic!("expected \"{detail}\" to contain \"{message}\"")
+                    }
+                }
+                _ => panic!("Expected error to be InvalidParameters, got {err:?}"),
+            }
+        })
+    }
 }

From 976f9cb7424a32509ee893bd64b883435676f4f0 Mon Sep 17 00:00:00 2001
From: Luka Peschke <luka.peschke@toucantoco.com>
Date: Tue, 27 Feb 2024 16:34:34 +0100
Subject: [PATCH 2/6] support ranges beyond Z

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
---
 src/types/excelsheet.rs | 55 +++++++++++++++++++++++++++--------------
 1 file changed, 37 insertions(+), 18 deletions(-)

diff --git a/src/types/excelsheet.rs b/src/types/excelsheet.rs
index cd0a0d6..497d143 100644
--- a/src/types/excelsheet.rs
+++ b/src/types/excelsheet.rs
@@ -179,27 +179,43 @@ impl SelectedColumns {
     fn col_idx_for_col_as_letter(col: &str) -> FastExcelResult<usize> {
         use FastExcelErrorKind::InvalidParameters;
 
-        if col.len() != 1 {
-            return Err(InvalidParameters(format!(
-                "a column should be exactly one character, got {n_chars}",
-                n_chars = col.len()
-            ))
+        if col.is_empty() {
+            return Err(InvalidParameters(
+                "a column should have at least one character, got none".to_string(),
+            )
             .into());
         }
 
         col.chars()
-            .next()
-            .ok_or_else(|| {
-                InvalidParameters(format!("could not get first char of column \"{col}\"")).into()
-            })
-            .and_then(|col_name| {
-                Self::ALPHABET
+            //  iterating over all chars reversed, to have a power based on their rank
+            .rev()
+            .enumerate()
+            //  Parses every char, checks its position and returns its numeric equivalent based on
+            //  its rank. For example, AB becomes 27 (26 + 1)
+            .map(|(idx, col_chr)| {
+                let pos_in_alphabet = Self::ALPHABET
                     .iter()
-                    .position(|chr| chr == &col_name)
+                    .position(|chr| chr == &col_chr)
                     .ok_or_else(|| {
-                        InvalidParameters(format!("Char is not a valid column name: {col_name}"))
-                            .into()
-                    })
+                        FastExcelError::from(InvalidParameters(format!(
+                            "Char is not a valid column name: {col_chr}"
+                        )))
+                    })?;
+
+                Ok(match idx {
+                    // in case it's the last char, just return its position
+                    0 => pos_in_alphabet,
+                    // otherwise, 26^idx * (position + 1)
+                    // For example, CBA is 2081:
+                    // A -> 0
+                    // B -> 26 (53^1 * (1 + 1))
+                    // C -> 2028 (26^2 * (2 + 1))
+                    _ => 26usize.pow(idx as u32) * (pos_in_alphabet + 1),
+                })
+            })
+            // Sums all previously obtained ranks
+            .try_fold(0usize, |acc, elem_result| {
+                elem_result.map(|elem| acc + elem)
             })
     }
 
@@ -723,6 +739,9 @@ mod tests {
     #[case("A,B:E,Y", vec![0, 1, 2, 3, 24])]
     // Standard unique column + ranges with mixed case
     #[case("A:c,b:E,w,Y:z", vec![0, 1, 2, 3, 22, 24])]
+    // Ranges beyond Z
+    #[case("A,y:AB", vec![0, 24, 25, 26])]
+    #[case("BB:BE,DDC:DDF", vec![53, 54, 55, 2810, 2811, 2812])]
     fn selected_columns_from_valid_ranges(#[case] raw: &str, #[case] expected: Vec<usize>) {
         Python::with_gil(|py| {
             let expected_range = SelectedColumns::ByIndex(expected);
@@ -737,15 +756,15 @@ mod tests {
 
     #[rstest]
     // Standard unique columns
-    #[case("", "exactly one character")]
+    #[case("", "at least one character")]
     // empty range
     #[case("a:a,b:d,e", "empty range")]
     // end before start
     #[case("b:a", "end of range is before start")]
     // no start
-    #[case(":a", "exactly one character, got 0")]
+    #[case(":a", "at least one character, got none")]
     // no end
-    #[case("a:", "exactly one character, got 0")]
+    #[case("a:", "at least one character, got none")]
     // too many elements
     #[case("a:b:e", "exactly 2 elements, got 3")]
     fn selected_columns_from_invalid_ranges(#[case] raw: &str, #[case] message: &str) {

From 47b815dff58a149bd635de5bb66251ec3ca4c867 Mon Sep 17 00:00:00 2001
From: Luka Peschke <luka.peschke@toucantoco.com>
Date: Tue, 27 Feb 2024 17:47:04 +0100
Subject: [PATCH 3/6] adapt docstrings

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
---
 python/fastexcel/__init__.py | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/python/fastexcel/__init__.py b/python/fastexcel/__init__.py
index b043245..4c519a4 100644
--- a/python/fastexcel/__init__.py
+++ b/python/fastexcel/__init__.py
@@ -129,11 +129,13 @@ def load_sheet_by_name(
         :param schema_sample_rows: Specifies how many rows should be used to determine
                                    the dtype of a column.
                                    If `None`, all rows will be used.
-        :param use_columns: Specifies the columns to use. Can either be a list of column names, a
-                            a list of column indices (starting at 0), or a list of columns and
-                            ranges as column names indices in excel form. For example, `"A,C:E,F"`
-                            will select columns `A,C,D,F`.
-                            If `None`, all columns will be used.
+        :param use_columns: Specifies the columns to use. Can either be:
+                            - `None` to select all columns
+                            - a list of strings, the column names
+                            - a list of ints, the column indices (starting at 0)
+                            - a string, a comma separated list of Excel column letters and column
+                              ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
+                              `A,B,C,D,E` and `A,C,E,F`)
         """
         return ExcelSheet(
             self._reader.load_sheet_by_name(
@@ -173,11 +175,13 @@ def load_sheet_by_idx(
         :param schema_sample_rows: Specifies how many rows should be used to determine
                                    the dtype of a column.
                                    If `None`, all rows will be used.
-        :param use_columns: Specifies the columns to use. Can either be a list of column names, a
-                            a list of column indices (starting at 0), or a list of columns and
-                            ranges as column names indices in excel form. For example, `"A,C:E,F"`
-                            will select columns `A,C,D,F`.
-                            If `None`, all columns will be used.
+        :param use_columns: Specifies the columns to use. Can either be:
+                            - `None` to select all columns
+                            - a list of strings, the column names
+                            - a list of ints, the column indices (starting at 0)
+                            - a string, a comma separated list of Excel column letters and column
+                              ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
+                              `A,B,C,D,E` and `A,C,E,F`)
         """
         if idx < 0:
             raise ValueError(f"Expected idx to be > 0, got {idx}")

From bb18d942e67419893cda1c5840b0d801017b3896 Mon Sep 17 00:00:00 2001
From: Luka Peschke <luka.peschke@toucantoco.com>
Date: Tue, 27 Feb 2024 17:47:57 +0100
Subject: [PATCH 4/6] remove outdated comment

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
---
 src/types/excelsheet.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/types/excelsheet.rs b/src/types/excelsheet.rs
index 497d143..008b0f9 100644
--- a/src/types/excelsheet.rs
+++ b/src/types/excelsheet.rs
@@ -249,7 +249,6 @@ impl SelectedColumns {
     }
 }
 
-// Does not support columns beyond 26 for now
 impl FromStr for SelectedColumns {
     type Err = FastExcelError;
 

From 248acdd799d0e8ecbaf039307ed639167e6243ba Mon Sep 17 00:00:00 2001
From: Luka Peschke <luka.peschke@toucantoco.com>
Date: Tue, 27 Feb 2024 17:50:08 +0100
Subject: [PATCH 5/6] refactor: make end of range inclusive

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
---
 src/types/excelsheet.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/types/excelsheet.rs b/src/types/excelsheet.rs
index 008b0f9..2596dd1 100644
--- a/src/types/excelsheet.rs
+++ b/src/types/excelsheet.rs
@@ -230,7 +230,7 @@ impl SelectedColumns {
                 .with_context(|| format!("invalid end element for range \"{col_range}\""))?;
 
             match start.cmp(&end) {
-                cmp::Ordering::Less => Ok((start..end).collect()),
+                cmp::Ordering::Less => Ok((start..=end).collect()),
                 cmp::Ordering::Greater => Err(InvalidParameters(format!(
                     "end of range is before start: \"{col_range}\""
                 ))
@@ -735,12 +735,12 @@ mod tests {
     // Standard unique columns
     #[case("A,B,D", vec![0, 1, 3])]
     // Standard unique columns + range
-    #[case("A,B:E,Y", vec![0, 1, 2, 3, 24])]
+    #[case("A,B:E,Y", vec![0, 1, 2, 3, 4, 24])]
     // Standard unique column + ranges with mixed case
-    #[case("A:c,b:E,w,Y:z", vec![0, 1, 2, 3, 22, 24])]
+    #[case("A:c,b:E,w,Y:z", vec![0, 1, 2, 3, 4, 22, 24, 25])]
     // Ranges beyond Z
-    #[case("A,y:AB", vec![0, 24, 25, 26])]
-    #[case("BB:BE,DDC:DDF", vec![53, 54, 55, 2810, 2811, 2812])]
+    #[case("A,y:AB", vec![0, 24, 25, 26, 27])]
+    #[case("BB:BE,DDC:DDF", vec![53, 54, 55, 56, 2810, 2811, 2812, 2813])]
     fn selected_columns_from_valid_ranges(#[case] raw: &str, #[case] expected: Vec<usize>) {
         Python::with_gil(|py| {
             let expected_range = SelectedColumns::ByIndex(expected);

From 8dba9fd5de4a1c2fb8b2d2863fe8217f64ede88a Mon Sep 17 00:00:00 2001
From: Luka Peschke <luka.peschke@toucantoco.com>
Date: Tue, 27 Feb 2024 17:56:01 +0100
Subject: [PATCH 6/6] fix python test

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
---
 python/tests/test_column_selection.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/tests/test_column_selection.py b/python/tests/test_column_selection.py
index 7f57bb0..a4cade3 100644
--- a/python/tests/test_column_selection.py
+++ b/python/tests/test_column_selection.py
@@ -227,11 +227,11 @@ def test_single_sheet_with_unnamed_columns_and_str_range(
     single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],
 ) -> None:
     use_columns_str = "A,C:E"
-    use_columns_idx = [0, 2, 3]
+    use_columns_idx = [0, 2, 3, 4]
     expected = {
         k: v
         for k, v in single_sheet_with_unnamed_columns_expected.items()
-        if k in ["col1", "col3", "__UNNAMED__3"]
+        if k in ["col1", "col3", "__UNNAMED__3", "col5"]
     }
     sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
         "With unnamed columns", use_columns=use_columns_str