From 39491322b5be423cd25c386d8e2e707cc3f0f28a Mon Sep 17 00:00:00 2001
From: Luka Peschke <luka.peschke@toucantoco.com>
Date: Sun, 25 Feb 2024 13:31:02 +0100
Subject: [PATCH 01/14] feat(Makefile): split test and lint targets between
 rust and python

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
---
 Makefile | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index d4e2ee7..defe771 100644
--- a/Makefile
+++ b/Makefile
@@ -13,17 +13,26 @@ cargo-test	= cargo test
 ## Docs
 pdoc	= pdoc -o docs python/fastexcel
 
-lint:
+lint-python:
 	$(ruff)
 	$(format)  --check --diff
+
+lint-rust:
 	$(mypy)
 	$(clippy)
-format:
+
+lint: lint-rust lint-python
+
+format-python:
 	$(ruff) --fix
 	$(format)
+
+format-rust:
 	$(fmt)
 	$(clippy) --fix --lib -p fastexcel --allow-dirty --allow-staged
 
+format: format-rust format-python
+
 install-test-requirements:
 	pip install -U -r test-requirements.txt -r build-requirements.txt
 
@@ -39,10 +48,14 @@ dev-install:
 prod-install:
 	./prod_install.sh
 
-test:
+test-rust:
 	$(cargo-test)
+
+test-python:
 	$(pytest)
 
+test: test-rust test-python
+
 doc:
 	$(pdoc)
 

From 048405d07b53c0813f92f3d2122cad34707e0731 Mon Sep 17 00:00:00 2001
From: Luka Peschke <luka.peschke@toucantoco.com>
Date: Sun, 25 Feb 2024 13:31:27 +0100
Subject: [PATCH 02/14] fix(build): allow tests in pyo3 modules to be run

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
---
 Cargo.toml | 9 ++++++++-
 Makefile   | 2 +-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index c62376d..62d12c4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,7 +11,8 @@ crate-type = ["cdylib"]
 [dependencies]
 calamine = { version = "0.24.0", features = ["dates"] }
 chrono = { version = "0.4.34", default-features = false }
-pyo3 = { version = "0.20.3", features = ["extension-module", "abi3-py38"] }
+# NOTE: "extension-module" is actually required, see comments on features below
+pyo3 = { version = "0.20.3", features = ["abi3-py38"] }
 
 [dependencies.arrow]
 version = "50.0.0"
@@ -21,3 +22,9 @@ features = ["pyarrow"]
 
 [dev-dependencies]
 rstest = { version = "0.18.2", default-features = false }
+
+# NOTE: This is a hack to bypass pyo3 limitations when testing:
+# https://pyo3.rs/v0.20.3/faq.html#i-cant-run-cargo-test-or-i-cant-build-in-a-cargo-workspace-im-having-linker-issues-like-symbol-not-found-or-undefined-reference-to-_pyexc_systemerror
+[features]
+extension-module = ["pyo3/extension-module"]
+default = ["extension-module"]
diff --git a/Makefile b/Makefile
index defe771..eb4295d 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ pytest	= pytest -v
 ## Rust
 clippy		= cargo clippy
 fmt		= cargo fmt
-cargo-test	= cargo test
+cargo-test	= cargo test --no-default-features
 ## Docs
 pdoc	= pdoc -o docs python/fastexcel
 

From 5e0bd762e430a3bbc59353658ca43ec4a1a682f5 Mon Sep 17 00:00:00 2001
From: Luka Peschke <luka.peschke@toucantoco.com>
Date: Sun, 25 Feb 2024 13:40:39 +0100
Subject: [PATCH 03/14] refactor(build): Allow rust tests to spin up python
 interpreters

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
---
 Cargo.toml | 3 +++
 Makefile   | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 62d12c4..f2f85e1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -28,3 +28,6 @@ rstest = { version = "0.18.2", default-features = false }
 [features]
 extension-module = ["pyo3/extension-module"]
 default = ["extension-module"]
+# feature for tests only. This makes Python::with_gil auto-initialize Python
+# interpreters, which allows us ot instantiate Python objects in tests
+tests = ["pyo3/auto-initialize"]
diff --git a/Makefile b/Makefile
index eb4295d..46ad571 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ pytest	= pytest -v
 ## Rust
 clippy		= cargo clippy
 fmt		= cargo fmt
-cargo-test	= cargo test --no-default-features
+cargo-test	= cargo test --no-default-features --features tests
 ## Docs
 pdoc	= pdoc -o docs python/fastexcel
 

From f044a51c9964459fe371838f1e900cf5818565ec Mon Sep 17 00:00:00 2001
From: Luka Peschke <luka.peschke@toucantoco.com>
Date: Sun, 25 Feb 2024 14:14:17 +0100
Subject: [PATCH 04/14] feat: introduced a ColumnNotFoundError exception

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
---
 python/fastexcel/__init__.py    |  2 ++
 python/fastexcel/_fastexcel.pyi |  1 +
 python/tests/test_errors.py     |  1 +
 src/error.rs                    | 41 +++++++++++++++++++++++++--------
 src/lib.rs                      |  4 ++++
 src/types/excelreader.rs        |  6 ++---
 6 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/python/fastexcel/__init__.py b/python/fastexcel/__init__.py
index fb00d9b..232ce4f 100644
--- a/python/fastexcel/__init__.py
+++ b/python/fastexcel/__init__.py
@@ -17,6 +17,7 @@
     CalamineCellError,
     CalamineError,
     CannotRetrieveCellDataError,
+    ColumnNotFoundError,
     FastExcelError,
     InvalidParametersError,
     SheetNotFoundError,
@@ -224,6 +225,7 @@ def read_excel(path: Path | str) -> ExcelReader:
     "CalamineCellError",
     "CalamineError",
     "SheetNotFoundError",
+    "ColumnNotFoundError",
     "ArrowError",
     "InvalidParametersError",
     "UnsupportedColumnTypeCombinationError",
diff --git a/python/fastexcel/_fastexcel.pyi b/python/fastexcel/_fastexcel.pyi
index 865f4e7..6524559 100644
--- a/python/fastexcel/_fastexcel.pyi
+++ b/python/fastexcel/_fastexcel.pyi
@@ -69,5 +69,6 @@ class CannotRetrieveCellDataError(FastExcelError): ...
 class CalamineCellError(FastExcelError): ...
 class CalamineError(FastExcelError): ...
 class SheetNotFoundError(FastExcelError): ...
+class ColumnNotFoundError(FastExcelError): ...
 class ArrowError(FastExcelError): ...
 class InvalidParametersError(FastExcelError): ...
diff --git a/python/tests/test_errors.py b/python/tests/test_errors.py
index 1b1c33e..4897158 100644
--- a/python/tests/test_errors.py
+++ b/python/tests/test_errors.py
@@ -51,6 +51,7 @@ def test_sheet_not_found_error() -> None:
             "calamine returned an error regarding the content of the cell",
         ),
         (fastexcel.CalamineError, "Generic calamine error"),
+        (fastexcel.ColumnNotFoundError, "Column was not found"),
         (fastexcel.SheetNotFoundError, "Sheet was not found"),
         (fastexcel.ArrowError, "Generic arrow error"),
         (fastexcel.InvalidParametersError, "Provided parameters are invalid"),
diff --git a/src/error.rs b/src/error.rs
index 4c6de09..2f056f7 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -1,20 +1,28 @@
 use std::{error::Error, fmt::Display};
 
 #[derive(Debug)]
-pub(crate) enum SheetIdxOrName {
+pub(crate) enum IdxOrName {
     Idx(usize),
-    // Leaving this variant if someday we want to check if a name exists before calling worksheet_range
-    #[allow(dead_code)]
     Name(String),
 }
 
+impl IdxOrName {
+    pub(super) fn format_message(&self) -> String {
+        match self {
+            Self::Idx(idx) => format!("at index {idx}"),
+            Self::Name(name) => format!("with name \"{name}\" not found"),
+        }
+    }
+}
+
 #[derive(Debug)]
 pub(crate) enum FastExcelErrorKind {
     UnsupportedColumnTypeCombination(String),
     CannotRetrieveCellData(usize, usize),
     CalamineCellError(calamine::CellErrorType),
     CalamineError(calamine::Error),
-    SheetNotFound(SheetIdxOrName),
+    SheetNotFound(IdxOrName),
+    ColumnNotFound(IdxOrName),
     // Arrow errors can be of several different types (arrow::error::Error, PyError), and having
     // the actual type has not much value for us, so we just store a string context
     ArrowError(String),
@@ -37,14 +45,13 @@ impl Display for FastExcelErrorKind {
                 write!(f, "calamine error: {calamine_error}")
             }
             FastExcelErrorKind::SheetNotFound(idx_or_name) => {
-                let message = {
-                    match idx_or_name {
-                        SheetIdxOrName::Idx(idx) => format!("at index {idx}"),
-                        SheetIdxOrName::Name(name) => format!("with name \"{name}\" not found"),
-                    }
-                };
+                let message = idx_or_name.format_message();
                 write!(f, "sheet {message} not found")
             }
+            FastExcelErrorKind::ColumnNotFound(idx_or_name) => {
+                let message = idx_or_name.format_message();
+                write!(f, "column {message} not found")
+            }
             FastExcelErrorKind::ArrowError(err) => write!(f, "arrow error: {err}"),
             FastExcelErrorKind::InvalidParameters(err) => write!(f, "invalid parameters: {err}"),
         }
@@ -70,6 +77,10 @@ impl FastExcelError {
             context: vec![],
         }
     }
+
+    pub(crate) fn kind(&self) -> &FastExcelErrorKind {
+        &self.kind
+    }
 }
 
 impl Display for FastExcelError {
@@ -166,6 +177,13 @@ pub(crate) mod py_errors {
         FastExcelError,
         "Sheet was not found"
     );
+    // Sheet not found
+    create_exception!(
+        _fastexcel,
+        ColumnNotFoundError,
+        FastExcelError,
+        "Column was not found"
+    );
     // Arrow error
     create_exception!(
         _fastexcel,
@@ -209,6 +227,9 @@ pub(crate) mod py_errors {
                         FastExcelErrorKind::SheetNotFound(_) => {
                             SheetNotFoundError::new_err(message)
                         }
+                        FastExcelErrorKind::ColumnNotFound(_) => {
+                            ColumnNotFoundError::new_err(message)
+                        }
                         FastExcelErrorKind::ArrowError(_) => ArrowError::new_err(message),
                         FastExcelErrorKind::InvalidParameters(_) => {
                             InvalidParametersError::new_err(message)
diff --git a/src/lib.rs b/src/lib.rs
index 7740754..1537816 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -55,6 +55,10 @@ fn _fastexcel(py: Python, m: &PyModule) -> PyResult<()> {
             "SheetNotFoundError",
             py.get_type::<py_errors::SheetNotFoundError>(),
         ),
+        (
+            "ColumnNotFoundError",
+            py.get_type::<py_errors::ColumnNotFoundError>(),
+        ),
         ("ArrowError", py.get_type::<py_errors::ArrowError>()),
         (
             "InvalidParametersError",
diff --git a/src/types/excelreader.rs b/src/types/excelreader.rs
index 6424db1..976bddb 100644
--- a/src/types/excelreader.rs
+++ b/src/types/excelreader.rs
@@ -4,7 +4,7 @@ use calamine::{open_workbook_auto, Reader, Sheets};
 use pyo3::{pyclass, pymethods, PyResult};
 
 use crate::error::{
-    py_errors::IntoPyResult, ErrorContext, FastExcelErrorKind, FastExcelResult, SheetIdxOrName,
+    py_errors::IntoPyResult, ErrorContext, FastExcelErrorKind, FastExcelResult, IdxOrName,
 };
 
 use super::{
@@ -99,7 +99,7 @@ impl ExcelReader {
         let name = self
             .sheet_names
             .get(idx)
-            .ok_or_else(|| FastExcelErrorKind::SheetNotFound(SheetIdxOrName::Idx(idx)).into())
+            .ok_or_else(|| FastExcelErrorKind::SheetNotFound(IdxOrName::Idx(idx)).into())
             .with_context(|| {
                 format!(
                     "Sheet index {idx} is out of range. File has {} sheets",
@@ -114,7 +114,7 @@ impl ExcelReader {
             .worksheet_range_at(idx)
             // Returns Option<Result<Range<Data>, Self::Error>>, so we convert the Option into a
             // SheetNotFoundError and unwrap it
-            .ok_or_else(|| FastExcelErrorKind::SheetNotFound(SheetIdxOrName::Idx(idx)).into())
+            .ok_or_else(|| FastExcelErrorKind::SheetNotFound(IdxOrName::Idx(idx)).into())
             .into_pyresult()?
             // And here, we convert the calamine error in an owned error and unwrap it
             .map_err(|err| FastExcelErrorKind::CalamineError(err).into())

From c5e6f682af22cacf6b20c719ff443d76f41e8617 Mon Sep 17 00:00:00 2001
From: Luka Peschke <luka.peschke@toucantoco.com>
Date: Sun, 25 Feb 2024 17:19:35 +0100
Subject: [PATCH 05/14] feat: allow to select a subset of columns

closes #172

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
---
 Cargo.lock                            |  23 +++
 Cargo.toml                            |   1 +
 python/fastexcel/__init__.py          |   7 +
 python/fastexcel/_fastexcel.pyi       |  12 +-
 python/tests/test_column_selection.py | 251 ++++++++++++++++++++++++
 src/error.rs                          |   8 +-
 src/types/excelreader.rs              |  12 +-
 src/types/excelsheet.rs               | 263 ++++++++++++++++++++++----
 src/utils/arrow.rs                    |  19 +-
 9 files changed, 540 insertions(+), 56 deletions(-)
 create mode 100644 python/tests/test_column_selection.py

diff --git a/Cargo.lock b/Cargo.lock
index 8c44037..e7c0053 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -344,6 +344,12 @@ version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
 
+[[package]]
+name = "diff"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
+
 [[package]]
 name = "encoding_rs"
 version = "0.8.31"
@@ -360,6 +366,7 @@ dependencies = [
  "arrow",
  "calamine",
  "chrono",
+ "pretty_assertions",
  "pyo3",
  "rstest",
 ]
@@ -678,6 +685,16 @@ version = "1.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0"
 
+[[package]]
+name = "pretty_assertions"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66"
+dependencies = [
+ "diff",
+ "yansi",
+]
+
 [[package]]
 name = "proc-macro-hack"
 version = "0.5.19"
@@ -1122,6 +1139,12 @@ version = "0.52.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
 
+[[package]]
+name = "yansi"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
+
 [[package]]
 name = "zip"
 version = "0.6.3"
diff --git a/Cargo.toml b/Cargo.toml
index f2f85e1..96e8512 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,6 +21,7 @@ default-features = false
 features = ["pyarrow"]
 
 [dev-dependencies]
+pretty_assertions = "1.4.0"
 rstest = { version = "0.18.2", default-features = false }
 
 # NOTE: This is a hack to bypass pyo3 limitations when testing:
diff --git a/python/fastexcel/__init__.py b/python/fastexcel/__init__.py
index 232ce4f..4776c21 100644
--- a/python/fastexcel/__init__.py
+++ b/python/fastexcel/__init__.py
@@ -102,6 +102,7 @@ def load_sheet_by_name(
         skip_rows: int = 0,
         n_rows: int | None = None,
         schema_sample_rows: int | None = 1_000,
+        use_columns: list[str] | list[int] | None = None,
     ) -> ExcelSheet:
         """Loads a sheet by name.
 
@@ -127,6 +128,7 @@ def load_sheet_by_name(
                 skip_rows=skip_rows,
                 n_rows=n_rows,
                 schema_sample_rows=schema_sample_rows,
+                use_columns=use_columns,
             )
         )
 
@@ -139,6 +141,7 @@ def load_sheet_by_idx(
         skip_rows: int = 0,
         n_rows: int | None = None,
         schema_sample_rows: int | None = 1_000,
+        use_columns: list[str] | list[int] | None = None,
     ) -> ExcelSheet:
         """Loads a sheet by index.
 
@@ -166,6 +169,7 @@ def load_sheet_by_idx(
                 skip_rows=skip_rows,
                 n_rows=n_rows,
                 schema_sample_rows=schema_sample_rows,
+                use_columns=use_columns,
             )
         )
 
@@ -178,6 +182,7 @@ def load_sheet(
         skip_rows: int = 0,
         n_rows: int | None = None,
         schema_sample_rows: int | None = 1_000,
+        use_columns: list[str] | list[int] | None = None,
     ) -> ExcelSheet:
         """Loads a sheet by name if a string is passed or by index if an integer is passed.
 
@@ -191,6 +196,7 @@ def load_sheet(
                 skip_rows=skip_rows,
                 n_rows=n_rows,
                 schema_sample_rows=schema_sample_rows,
+                use_columns=use_columns,
             )
             if isinstance(idx_or_name, int)
             else self.load_sheet_by_name(
@@ -200,6 +206,7 @@ def load_sheet(
                 skip_rows=skip_rows,
                 n_rows=n_rows,
                 schema_sample_rows=schema_sample_rows,
+                use_columns=use_columns,
             )
         )
 
diff --git a/python/fastexcel/_fastexcel.pyi b/python/fastexcel/_fastexcel.pyi
index 6524559..7db8493 100644
--- a/python/fastexcel/_fastexcel.pyi
+++ b/python/fastexcel/_fastexcel.pyi
@@ -33,6 +33,7 @@ class _ExcelReader:
         skip_rows: int = 0,
         n_rows: int | None = None,
         schema_sample_rows: int | None = 1_000,
+        use_columns: list[str] | list[int] | None = None,
     ) -> _ExcelSheet: ...
     def load_sheet_by_idx(
         self,
@@ -43,16 +44,7 @@ class _ExcelReader:
         skip_rows: int = 0,
         n_rows: int | None = None,
         schema_sample_rows: int | None = 1_000,
-    ) -> _ExcelSheet: ...
-    def load_sheet(
-        self,
-        idx_or_name: int | str,
-        *,
-        header_row: int | None = 0,
-        column_names: list[str] | None = None,
-        skip_rows: int = 0,
-        n_rows: int | None = None,
-        schema_sample_rows: int | None = 1_000,
+        use_columns: list[str] | list[int] | None = None,
     ) -> _ExcelSheet: ...
     @property
     def sheet_names(self) -> list[str]: ...
diff --git a/python/tests/test_column_selection.py b/python/tests/test_column_selection.py
new file mode 100644
index 0000000..4107e07
--- /dev/null
+++ b/python/tests/test_column_selection.py
@@ -0,0 +1,251 @@
+from __future__ import annotations
+
+import re
+from typing import Any
+
+import fastexcel
+import pandas as pd
+import polars as pl
+import pytest
+from pandas.testing import assert_frame_equal as pd_assert_frame_equal
+from polars.testing import assert_frame_equal as pl_assert_frame_equal
+from utils import path_for_fixture
+
+
+@pytest.fixture
+def excel_reader_single_sheet() -> fastexcel.ExcelReader:
+    return fastexcel.read_excel(path_for_fixture("fixture-single-sheet.xlsx"))
+
+
+def test_single_sheet_all_columns(excel_reader_single_sheet: fastexcel.ExcelReader) -> None:
+    sheet = excel_reader_single_sheet.load_sheet(0)
+
+    sheet_explicit_arg = excel_reader_single_sheet.load_sheet(0, use_columns=None)
+
+    expected = {"Month": [1.0, 2.0], "Year": [2019.0, 2020.0]}
+    expected_pd_df = pd.DataFrame(expected)
+    expected_pl_df = pl.DataFrame(expected)
+
+    pd_df = sheet.to_pandas()
+    pd_assert_frame_equal(pd_df, expected_pd_df)
+    pd_df_explicit_arg = sheet_explicit_arg.to_pandas()
+    pd_assert_frame_equal(pd_df_explicit_arg, expected_pd_df)
+
+    pl_df = sheet.to_polars()
+    pl_assert_frame_equal(pl_df, expected_pl_df)
+    pl_df_explicit_arg = sheet_explicit_arg.to_polars()
+    pl_assert_frame_equal(pl_df_explicit_arg, expected_pl_df)
+
+
+def test_single_sheet_subset_by_str(excel_reader_single_sheet: fastexcel.ExcelReader) -> None:
+    expected = {"Month": [1.0, 2.0], "Year": [2019.0, 2020.0]}
+
+    for sheet_name_or_idx in [0, "January"]:
+        for col in ["Month", "Year"]:
+            sheet = excel_reader_single_sheet.load_sheet(0, use_columns=[col])
+
+            pd_df = sheet.to_pandas()
+            pd_assert_frame_equal(pd_df, pd.DataFrame({col: expected[col]}))
+
+            pl_df = sheet.to_polars()
+            pl_assert_frame_equal(pl_df, pl.DataFrame({col: expected[col]}))
+
+
+def test_single_sheet_subset_by_index(excel_reader_single_sheet: fastexcel.ExcelReader) -> None:
+    expected = {"Month": [1.0, 2.0], "Year": [2019.0, 2020.0]}
+
+    for sheet_name_or_idx in [0, "January"]:
+        for idx, col_name in enumerate(["Month", "Year"]):
+            sheet = excel_reader_single_sheet.load_sheet(0, use_columns=[idx])
+
+            pd_df = sheet.to_pandas()
+            pd_assert_frame_equal(pd_df, pd.DataFrame({col_name: expected[col_name]}))
+
+            pl_df = sheet.to_polars()
+            pl_assert_frame_equal(pl_df, pl.DataFrame({col_name: expected[col_name]}))
+
+
+@pytest.fixture
+def excel_reader_single_sheet_with_unnamed_columns() -> fastexcel.ExcelReader:
+    return fastexcel.read_excel(path_for_fixture("fixture-multi-sheet.xlsx"))
+
+
+@pytest.fixture
+def single_sheet_with_unnamed_columns_expected() -> dict[str, list[Any]]:
+    return {
+        "col1": [2.0, 3.0],
+        "__UNNAMED__1": [1.5, 2.5],
+        "col3": ["hello", "world"],
+        "__UNNAMED__3": [-5.0, -6.0],
+        "col5": ["a", "b"],
+    }
+
+
+def test_single_sheet_with_unnamed_columns(
+    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
+    single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],
+) -> None:
+    use_columns_str = ["col1", "col3", "__UNNAMED__3"]
+    use_columns_idx = [0, 2, 3]
+    expected = {
+        k: v for k, v in single_sheet_with_unnamed_columns_expected.items() if k in use_columns_str
+    }
+
+    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
+        "With unnamed columns", use_columns=use_columns_str
+    )
+
+    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
+    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
+
+    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
+        "With unnamed columns", use_columns=use_columns_idx
+    )
+
+    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
+    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
+
+
+def test_single_sheet_with_unnamed_columns_and_pagination(
+    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
+    single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],
+) -> None:
+    use_columns_str = ["col1", "col3", "__UNNAMED__3"]
+    use_columns_idx = [0, 2, 3]
+
+    # first row only
+    expected = {
+        k: v[:1]
+        for k, v in single_sheet_with_unnamed_columns_expected.items()
+        if k in use_columns_str
+    }
+
+    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
+        "With unnamed columns", use_columns=use_columns_str, n_rows=1
+    )
+
+    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
+    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
+
+    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
+        "With unnamed columns", use_columns=use_columns_idx, n_rows=1
+    )
+
+    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
+    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
+
+    # second row
+    expected = {
+        k: v[1:]
+        for k, v in single_sheet_with_unnamed_columns_expected.items()
+        if k in use_columns_str
+    }
+
+    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
+        "With unnamed columns", use_columns=use_columns_str, skip_rows=1
+    )
+
+    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
+    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
+
+    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
+        "With unnamed columns", use_columns=use_columns_idx, skip_rows=1
+    )
+
+    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
+    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
+
+
+def test_single_sheet_with_unnamed_columns_and_pagination_and_column_names(
+    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
+) -> None:
+    use_columns_str = ["col0", "col2", "col3"]
+    use_columns_idx = [0, 2, 3]
+    expected: dict[str, list[Any]] = {
+        "col0": [2.0, 3.0],
+        "col2": ["hello", "world"],
+        "col3": [-5.0, -6.0],
+    }
+    column_names = [f"col{i}" for i in range(5)]
+
+    # skipping the header row only
+    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
+        "With unnamed columns", use_columns=use_columns_str, skip_rows=1, column_names=column_names
+    )
+
+    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
+    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
+
+    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
+        "With unnamed columns", use_columns=use_columns_idx, skip_rows=1, column_names=column_names
+    )
+
+    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
+    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
+
+    # skipping the header row + first data row
+    expected_first_row_skipped = {k: v[1:] for k, v in expected.items()}
+
+    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
+        "With unnamed columns", use_columns=use_columns_str, skip_rows=2, column_names=column_names
+    )
+
+    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected_first_row_skipped))
+    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected_first_row_skipped))
+
+    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
+        "With unnamed columns", use_columns=use_columns_idx, skip_rows=2, column_names=column_names
+    )
+
+    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected_first_row_skipped))
+    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected_first_row_skipped))
+
+
+def test_single_sheet_invalid_column_indices_negative_integer(
+    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
+) -> None:
+    expected_message = """invalid parameters: expected list[int] | list[str], got [-2]
+Context:
+    0: expected selected columns to be list[str] | list[int] | None, got Some([-2])
+"""
+    with pytest.raises(fastexcel.InvalidParametersError, match=re.escape(expected_message)):
+        excel_reader_single_sheet_with_unnamed_columns.load_sheet(0, use_columns=[-2])
+
+
+def test_single_sheet_invalid_column_indices_empty_list(
+    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
+) -> None:
+    expected_message = """invalid parameters: list of select columns is empty
+Context:
+    0: expected selected columns to be list[str] | list[int] | None, got Some([])
+"""
+    with pytest.raises(fastexcel.InvalidParametersError, match=re.escape(expected_message)):
+        excel_reader_single_sheet_with_unnamed_columns.load_sheet(0, use_columns=[])
+
+
+def test_single_sheet_invalid_column_indices_column_does_not_exist_str(
+    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
+) -> None:
+    expected_message = """column with name "nope" not found
+Context:
+    0: selected columns are invalid
+    1: could not create RecordBatch from sheet "January"
+    2: could not convert RecordBatch to pyarrow for sheet "January"
+"""
+    with pytest.raises(fastexcel.ColumnNotFoundError, match=re.escape(expected_message)):
+        excel_reader_single_sheet_with_unnamed_columns.load_sheet(
+            0, use_columns=["nope"]
+        ).to_arrow()
+
+
+def test_single_sheet_invalid_column_indices_column_does_not_exist_int(
+    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
+) -> None:
+    expected_message = """column at index 42 not found
+Context:
+    0: selected columns are invalid
+    1: could not create RecordBatch from sheet "January"
+    2: could not convert RecordBatch to pyarrow for sheet "January"
+"""
+    with pytest.raises(fastexcel.ColumnNotFoundError, match=re.escape(expected_message)):
+        excel_reader_single_sheet_with_unnamed_columns.load_sheet(0, use_columns=[42]).to_arrow()
diff --git a/src/error.rs b/src/error.rs
index 2f056f7..48e6bdb 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -10,7 +10,7 @@ impl IdxOrName {
     pub(super) fn format_message(&self) -> String {
         match self {
             Self::Idx(idx) => format!("at index {idx}"),
-            Self::Name(name) => format!("with name \"{name}\" not found"),
+            Self::Name(name) => format!("with name \"{name}\""),
         }
     }
 }
@@ -60,7 +60,7 @@ impl Display for FastExcelErrorKind {
 
 #[derive(Debug)]
 pub(crate) struct FastExcelError {
-    kind: FastExcelErrorKind,
+    pub kind: FastExcelErrorKind,
     context: Vec<String>,
 }
 
@@ -77,10 +77,6 @@ impl FastExcelError {
             context: vec![],
         }
     }
-
-    pub(crate) fn kind(&self) -> &FastExcelErrorKind {
-        &self.kind
-    }
 }
 
 impl Display for FastExcelError {
diff --git a/src/types/excelreader.rs b/src/types/excelreader.rs
index 976bddb..e6986da 100644
--- a/src/types/excelreader.rs
+++ b/src/types/excelreader.rs
@@ -1,7 +1,7 @@
 use std::{fs::File, io::BufReader};
 
 use calamine::{open_workbook_auto, Reader, Sheets};
-use pyo3::{pyclass, pymethods, PyResult};
+use pyo3::{pyclass, pymethods, types::PyList, PyResult};
 
 use crate::error::{
     py_errors::IntoPyResult, ErrorContext, FastExcelErrorKind, FastExcelResult, IdxOrName,
@@ -50,7 +50,9 @@ impl ExcelReader {
         skip_rows = 0,
         n_rows = None,
         schema_sample_rows = 1_000,
+        use_columns = None
     ))]
+    #[allow(clippy::too_many_arguments)]
     pub fn load_sheet_by_name(
         &mut self,
         name: String,
@@ -59,6 +61,7 @@ impl ExcelReader {
         skip_rows: usize,
         n_rows: Option<usize>,
         schema_sample_rows: Option<usize>,
+        use_columns: Option<&PyList>,
     ) -> PyResult<ExcelSheet> {
         let range = self
             .sheets
@@ -69,12 +72,14 @@ impl ExcelReader {
 
         let header = Header::new(header_row, column_names);
         let pagination = Pagination::new(skip_rows, n_rows, &range).into_pyresult()?;
+        let selected_columns = use_columns.try_into().with_context(|| format!("expected selected columns to be list[str] | list[int] | None, got {use_columns:?}")).into_pyresult()?;
         Ok(ExcelSheet::new(
             name,
             range,
             header,
             pagination,
             schema_sample_rows,
+            selected_columns,
         ))
     }
 
@@ -86,7 +91,9 @@ impl ExcelReader {
         skip_rows = 0,
         n_rows = None,
         schema_sample_rows = 1_000,
+        use_columns = None
     ))]
+    #[allow(clippy::too_many_arguments)]
     pub fn load_sheet_by_idx(
         &mut self,
         idx: usize,
@@ -95,6 +102,7 @@ impl ExcelReader {
         skip_rows: usize,
         n_rows: Option<usize>,
         schema_sample_rows: Option<usize>,
+        use_columns: Option<&PyList>,
     ) -> PyResult<ExcelSheet> {
         let name = self
             .sheet_names
@@ -122,12 +130,14 @@ impl ExcelReader {
 
         let header = Header::new(header_row, column_names);
         let pagination = Pagination::new(skip_rows, n_rows, &range).into_pyresult()?;
+        let selected_columns = use_columns.try_into().with_context(|| format!("expected selected columns to be list[str] | list[int] | None, got {use_columns:?}")).into_pyresult()?;
         Ok(ExcelSheet::new(
             name,
             range,
             header,
             pagination,
             schema_sample_rows,
+            selected_columns,
         ))
     }
 }
diff --git a/src/types/excelsheet.rs b/src/types/excelsheet.rs
index d950eec..964f07a 100644
--- a/src/types/excelsheet.rs
+++ b/src/types/excelsheet.rs
@@ -2,6 +2,7 @@ use std::sync::Arc;
 
 use crate::error::{
     py_errors::IntoPyResult, ErrorContext, FastExcelError, FastExcelErrorKind, FastExcelResult,
+    IdxOrName,
 };
 
 use arrow::{
@@ -18,11 +19,13 @@ use chrono::NaiveDate;
 
 use pyo3::{
     prelude::{pyclass, pymethods, PyObject, Python},
+    types::PyList,
     PyResult,
 };
 
 use crate::utils::arrow::arrow_schema_from_column_names_and_range;
 
+#[derive(Debug)]
 pub(crate) enum Header {
     None,
     At(usize),
@@ -76,6 +79,100 @@ impl Pagination {
     }
 }
 
+#[derive(Debug, PartialEq)]
+pub(crate) enum SelectedColumns {
+    All,
+    ByIndex(Vec<usize>),
+    ByName(Vec<String>),
+}
+
+impl SelectedColumns {
+    pub(crate) fn validate_columns(&self, column_names: &[String]) -> FastExcelResult<()> {
+        match self {
+            SelectedColumns::All => Ok(()),
+            // If no selected indice is >= to the len of column_names, we're good
+            SelectedColumns::ByIndex(indices) => indices.iter().try_for_each(|idx| {
+                if idx >= &column_names.len() {
+                    Err(FastExcelErrorKind::ColumnNotFound(IdxOrName::Idx(*idx)).into())
+                } else {
+                    Ok(())
+                }
+            }),
+            // Every selected column must be in the provided column_names
+            SelectedColumns::ByName(selected_names) => {
+                selected_names.iter().try_for_each(|selected_name| {
+                    if column_names.contains(selected_name) {
+                        Ok(())
+                    } else {
+                        Err(FastExcelErrorKind::ColumnNotFound(IdxOrName::Name(
+                            selected_name.to_string(),
+                        ))
+                        .into())
+                    }
+                })
+            }
+        }
+    }
+
+    pub(crate) fn idx_for_column(
+        &self,
+        col_names: &[String],
+        col_name: &str,
+        col_idx: usize,
+    ) -> Option<usize> {
+        match self {
+            SelectedColumns::All => None,
+            SelectedColumns::ByIndex(indices) => {
+                if indices.contains(&col_idx) {
+                    Some(col_idx)
+                } else {
+                    None
+                }
+            }
+            SelectedColumns::ByName(names) => {
+                // cannot use .contains() because we have &String and &str
+                if names.iter().any(|name| name == col_name) {
+                    col_names.iter().position(|name| name == col_name)
+                } else {
+                    None
+                }
+            }
+        }
+    }
+}
+
+impl TryFrom<Option<&PyList>> for SelectedColumns {
+    type Error = FastExcelError;
+
+    fn try_from(value: Option<&PyList>) -> FastExcelResult<Self> {
+        use FastExcelErrorKind::InvalidParameters;
+
+        match value {
+            None => Ok(Self::All),
+            Some(py_list) => {
+                if let Ok(name_vec) = py_list.extract::<Vec<String>>() {
+                    if name_vec.is_empty() {
+                        Err(InvalidParameters("list of select columns is empty".to_string()).into())
+                    } else {
+                        Ok(Self::ByName(name_vec))
+                    }
+                } else if let Ok(index_vec) = py_list.extract::<Vec<usize>>() {
+                    if index_vec.is_empty() {
+                        Err(InvalidParameters("list of select columns is empty".to_string()).into())
+                    } else {
+                        Ok(Self::ByIndex(index_vec))
+                    }
+                } else {
+                    Err(InvalidParameters(format!(
+                        "expected list[int] | list[str], got {py_list:?}"
+                    ))
+                    .into())
+                }
+            }
+        }
+    }
+}
+
 #[pyclass(name = "_ExcelSheet")]
 pub(crate) struct ExcelSheet {
     #[pyo3(get)]
@@ -87,6 +184,7 @@ pub(crate) struct ExcelSheet {
     total_height: Option<usize>,
     width: Option<usize>,
     schema_sample_rows: Option<usize>,
+    selected_columns: SelectedColumns,
 }
 
 impl ExcelSheet {
@@ -100,6 +198,7 @@ impl ExcelSheet {
         header: Header,
         pagination: Pagination,
         schema_sample_rows: Option<usize>,
+        selected_columns: SelectedColumns,
     ) -> Self {
         ExcelSheet {
             name,
@@ -107,6 +206,7 @@ impl ExcelSheet {
             pagination,
             data,
             schema_sample_rows,
+            selected_columns,
             height: None,
             total_height: None,
             width: None,
@@ -267,6 +367,7 @@ impl TryFrom<&ExcelSheet> for Schema {
             sheet.offset(),
             // If sample_rows is higher than the sheet's limit, use the limit instead
             std::cmp::min(sample_rows, sheet.limit()),
+            &sheet.selected_columns,
         )
     }
 }
@@ -277,50 +378,76 @@ impl TryFrom<&ExcelSheet> for RecordBatch {
     fn try_from(sheet: &ExcelSheet) -> FastExcelResult<Self> {
         let offset = sheet.offset();
         let limit = sheet.limit();
+
+        let column_names = sheet.column_names();
+
+        // Ensuring selected columns are valid
+        sheet
+            .selected_columns
+            .validate_columns(&column_names)
+            .with_context(|| "selected columns are invalid")?;
+
         let schema = Schema::try_from(sheet)
-            .with_context(|| format!("Could not build schema for sheet {}", sheet.name))?;
-        let mut iter = schema
-            .fields()
+            .with_context(|| format!("could not build schema for sheet {}", sheet.name))?;
+
+        let mut iter = column_names
             .iter()
             .enumerate()
-            .map(|(col_idx, field)| {
-                (
-                    field.name(),
-                    match field.data_type() {
-                        ArrowDataType::Boolean => {
-                            create_boolean_array(sheet.data(), col_idx, offset, limit)
-                        }
-                        ArrowDataType::Int64 => {
-                            create_int_array(sheet.data(), col_idx, offset, limit)
-                        }
-                        ArrowDataType::Float64 => {
-                            create_float_array(sheet.data(), col_idx, offset, limit)
-                        }
-                        ArrowDataType::Utf8 => {
-                            create_string_array(sheet.data(), col_idx, offset, limit)
-                        }
-                        ArrowDataType::Timestamp(TimeUnit::Millisecond, None) => {
-                            create_datetime_array(sheet.data(), col_idx, offset, limit)
-                        }
-                        ArrowDataType::Date32 => {
-                            create_date_array(sheet.data(), col_idx, offset, limit)
-                        }
-                        ArrowDataType::Duration(TimeUnit::Millisecond) => {
-                            create_duration_array(sheet.data(), col_idx, offset, limit)
-                        }
-                        ArrowDataType::Null => Arc::new(NullArray::new(limit - offset)),
-                        _ => unreachable!(),
-                    },
-                )
+            .filter_map(|(idx, column_name)| {
+                // checking if the current column has been selected
+                if let Some(col_idx) = match sheet.selected_columns {
+                    // All columns selected, return the current index
+                    SelectedColumns::All => Some(idx),
+                    // Otherwise, return its index. If None is found, it means the column was not
+                    // selected, and we will just continue
+                    _ => sheet
+                        .selected_columns
+                        .idx_for_column(&column_names, column_name, idx),
+                } {
+                    // At this point, we know for sure that the column is in the schema so we can
+                    // safely unwrap
+                    let field = schema.field_with_name(column_name).unwrap();
+                    Some((
+                        field.name(),
+                        match field.data_type() {
+                            ArrowDataType::Boolean => {
+                                create_boolean_array(sheet.data(), col_idx, offset, limit)
+                            }
+                            ArrowDataType::Int64 => {
+                                create_int_array(sheet.data(), col_idx, offset, limit)
+                            }
+                            ArrowDataType::Float64 => {
+                                create_float_array(sheet.data(), col_idx, offset, limit)
+                            }
+                            ArrowDataType::Utf8 => {
+                                create_string_array(sheet.data(), col_idx, offset, limit)
+                            }
+                            ArrowDataType::Timestamp(TimeUnit::Millisecond, None) => {
+                                create_datetime_array(sheet.data(), col_idx, offset, limit)
+                            }
+                            ArrowDataType::Date32 => {
+                                create_date_array(sheet.data(), col_idx, offset, limit)
+                            }
+                            ArrowDataType::Duration(TimeUnit::Millisecond) => {
+                                create_duration_array(sheet.data(), col_idx, offset, limit)
+                            }
+                            ArrowDataType::Null => Arc::new(NullArray::new(limit - offset)),
+                            _ => unreachable!(),
+                        },
+                    ))
+                } else {
+                    None
+                }
             })
             .peekable();
+
         // If the iterable is empty, try_from_iter returns an Err
         if iter.peek().is_none() {
             Ok(RecordBatch::new_empty(Arc::new(schema)))
         } else {
             RecordBatch::try_from_iter(iter)
                 .map_err(|err| FastExcelErrorKind::ArrowError(err.to_string()).into())
-                .with_context(|| format!("Could not convert sheet {} to RecordBatch", sheet.name))
+                .with_context(|| format!("could not convert sheet {} to RecordBatch", sheet.name))
         }
     }
 }
@@ -361,14 +488,14 @@ impl ExcelSheet {
 
     pub fn to_arrow(&self, py: Python<'_>) -> PyResult<PyObject> {
         RecordBatch::try_from(self)
-            .with_context(|| format!("Could not create RecordBatch from sheet {}", self.name))
+            .with_context(|| format!("could not create RecordBatch from sheet \"{}\"", &self.name))
             .and_then(|rb| {
                 rb.to_pyarrow(py)
                     .map_err(|err| FastExcelErrorKind::ArrowError(err.to_string()).into())
             })
             .with_context(|| {
                 format!(
-                    "Could not convert RecordBatch to pyarrow for sheet {}",
+                    "could not convert RecordBatch to pyarrow for sheet \"{}\"",
                     self.name
                 )
             })
@@ -379,3 +506,69 @@ impl ExcelSheet {
         format!("ExcelSheet<{}>", self.name)
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use pretty_assertions::assert_eq;
+
+    #[test]
+    fn selected_columns_from_none() {
+        assert_eq!(
+            TryInto::<SelectedColumns>::try_into(None).unwrap(),
+            SelectedColumns::All
+        )
+    }
+
+    #[test]
+    fn selected_columns_from_list_of_valid_ints() {
+        Python::with_gil(|py| {
+            let py_list = PyList::new(py, vec![0, 1, 2]);
+            assert_eq!(
+                TryInto::<SelectedColumns>::try_into(Some(py_list)).unwrap(),
+                SelectedColumns::ByIndex(vec![0, 1, 2])
+            )
+        });
+    }
+
+    #[test]
+    fn selected_columns_from_list_of_valid_strings() {
+        Python::with_gil(|py| {
+            let py_list = PyList::new(py, vec!["foo", "bar"]);
+            assert_eq!(
+                TryInto::<SelectedColumns>::try_into(Some(py_list)).unwrap(),
+                SelectedColumns::ByName(vec!["foo".to_string(), "bar".to_string()])
+            )
+        });
+    }
+
+    #[test]
+    fn selected_columns_from_invalid_ints() {
+        Python::with_gil(|py| {
+            let py_list = PyList::new(py, vec![0, 2, -1]);
+            let err = TryInto::<SelectedColumns>::try_into(Some(py_list)).unwrap_err();
+
+            assert!(matches!(err.kind, FastExcelErrorKind::InvalidParameters(_)));
+        });
+    }
+
+    #[test]
+    fn selected_columns_from_empty_int_list() {
+        Python::with_gil(|py| {
+            let py_list = PyList::new(py, Vec::<usize>::new());
+            let err = TryInto::<SelectedColumns>::try_into(Some(py_list)).unwrap_err();
+
+            assert!(matches!(err.kind, FastExcelErrorKind::InvalidParameters(_)));
+        });
+    }
+
+    #[test]
+    fn selected_columns_from_empty_string_list() {
+        Python::with_gil(|py| {
+            let py_list = PyList::new(py, Vec::<String>::new());
+            let err = TryInto::<SelectedColumns>::try_into(Some(py_list)).unwrap_err();
+
+            assert!(matches!(err.kind, FastExcelErrorKind::InvalidParameters(_)));
+        });
+    }
+}
diff --git a/src/utils/arrow.rs b/src/utils/arrow.rs
index 33501d9..7da209b 100644
--- a/src/utils/arrow.rs
+++ b/src/utils/arrow.rs
@@ -3,7 +3,10 @@ use std::{collections::HashSet, sync::OnceLock};
 use arrow::datatypes::{DataType as ArrowDataType, Field, Schema, TimeUnit};
 use calamine::{CellErrorType, Data as CalData, DataType, Range};
 
-use crate::error::{FastExcelErrorKind, FastExcelResult};
+use crate::{
+    error::{FastExcelErrorKind, FastExcelResult},
+    types::excelsheet::SelectedColumns,
+};
 
 /// All the possible string values that should be considered as NULL
 const NULL_STRING_VALUES: [&str; 19] = [
@@ -136,12 +139,20 @@ pub(crate) fn arrow_schema_from_column_names_and_range(
     column_names: &[String],
     row_idx: usize,
     row_limit: usize,
+    selected_columns: &SelectedColumns,
 ) -> FastExcelResult<Schema> {
     let mut fields = Vec::with_capacity(column_names.len());
 
-    for (col_idx, name) in column_names.iter().enumerate() {
-        let col_type = get_arrow_column_type(range, row_idx, row_limit, col_idx)?;
-        fields.push(Field::new(&alias_for_name(name, &fields), col_type, true));
+    for (idx, name) in column_names.iter().enumerate() {
+        // If we have an index for the given column, extract it and add it to the schema. Otherwise,
+        // just ignore it
+        if let Some(col_idx) = match selected_columns {
+            SelectedColumns::All => Some(idx),
+            _ => selected_columns.idx_for_column(column_names, name, idx),
+        } {
+            let col_type = get_arrow_column_type(range, row_idx, row_limit, col_idx)?;
+            fields.push(Field::new(&alias_for_name(name, &fields), col_type, true));
+        }
     }
 
     Ok(Schema::new(fields))

From 309f8b43732f17c47a57d6be636e9fcaf1fe377f Mon Sep 17 00:00:00 2001
From: Luka Peschke <luka.peschke@toucantoco.com>
Date: Mon, 26 Feb 2024 11:49:27 +0100
Subject: [PATCH 06/14] fix: mypy should be in lint-python make target

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 46ad571..f7c29fe 100644
--- a/Makefile
+++ b/Makefile
@@ -16,9 +16,9 @@ pdoc	= pdoc -o docs python/fastexcel
 lint-python:
 	$(ruff)
 	$(format)  --check --diff
+	$(mypy)
 
 lint-rust:
-	$(mypy)
 	$(clippy)
 
 lint: lint-rust lint-python

From 3acd8c81a8d8ab158981c633ce894b4da0bafd8e Mon Sep 17 00:00:00 2001
From: Luka Peschke <luka.peschke@toucantoco.com>
Date: Mon, 26 Feb 2024 11:50:21 +0100
Subject: [PATCH 07/14] typo

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 96e8512..1210ccf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -30,5 +30,5 @@ rstest = { version = "0.18.2", default-features = false }
 extension-module = ["pyo3/extension-module"]
 default = ["extension-module"]
 # feature for tests only. This makes Python::with_gil auto-initialize Python
-# interpreters, which allows us ot instantiate Python objects in tests
+# interpreters, which allows us to instantiate Python objects in tests
 tests = ["pyo3/auto-initialize"]

From 8ff4192db63953845b904a4a92fae47d0ced7787 Mon Sep 17 00:00:00 2001
From: Luka Peschke <luka.peschke@toucantoco.com>
Date: Mon, 26 Feb 2024 11:54:16 +0100
Subject: [PATCH 08/14] fix(tests): use sheet_name_or_idx

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
---
 python/tests/test_column_selection.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/python/tests/test_column_selection.py b/python/tests/test_column_selection.py
index 4107e07..230872c 100644
--- a/python/tests/test_column_selection.py
+++ b/python/tests/test_column_selection.py
@@ -40,9 +40,11 @@ def test_single_sheet_all_columns(excel_reader_single_sheet: fastexcel.ExcelRead
 def test_single_sheet_subset_by_str(excel_reader_single_sheet: fastexcel.ExcelReader) -> None:
     expected = {"Month": [1.0, 2.0], "Year": [2019.0, 2020.0]}
 
-    for sheet_name_or_idx in [0, "January"]:
+    # looks like mypy 1.8 became more stupid
+    sheets: list[str | int] = [0, "January"]
+    for sheet_name_or_idx in sheets:
         for col in ["Month", "Year"]:
-            sheet = excel_reader_single_sheet.load_sheet(0, use_columns=[col])
+            sheet = excel_reader_single_sheet.load_sheet(sheet_name_or_idx, use_columns=[col])
 
             pd_df = sheet.to_pandas()
             pd_assert_frame_equal(pd_df, pd.DataFrame({col: expected[col]}))
@@ -54,9 +56,10 @@ def test_single_sheet_subset_by_str(excel_reader_single_sheet: fastexcel.ExcelRe
 def test_single_sheet_subset_by_index(excel_reader_single_sheet: fastexcel.ExcelReader) -> None:
     expected = {"Month": [1.0, 2.0], "Year": [2019.0, 2020.0]}
 
-    for sheet_name_or_idx in [0, "January"]:
+    sheets: list[str | int] = [0, "January"]
+    for sheet_name_or_idx in sheets:
         for idx, col_name in enumerate(["Month", "Year"]):
-            sheet = excel_reader_single_sheet.load_sheet(0, use_columns=[idx])
+            sheet = excel_reader_single_sheet.load_sheet(sheet_name_or_idx, use_columns=[idx])
 
             pd_df = sheet.to_pandas()
             pd_assert_frame_equal(pd_df, pd.DataFrame({col_name: expected[col_name]}))

From bbb35423ec6e13ea855747ca7c73abbe80978ce8 Mon Sep 17 00:00:00 2001
From: Luka Peschke <luka.peschke@toucantoco.com>
Date: Mon, 26 Feb 2024 12:11:56 +0100
Subject: [PATCH 09/14] refactor: only check for py_list lenght once

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
---
 python/tests/test_column_selection.py |  2 +-
 src/types/excelsheet.rs               | 16 +++++-----------
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/python/tests/test_column_selection.py b/python/tests/test_column_selection.py
index 230872c..eff55f0 100644
--- a/python/tests/test_column_selection.py
+++ b/python/tests/test_column_selection.py
@@ -218,7 +218,7 @@ def test_single_sheet_invalid_column_indices_negative_integer(
 def test_single_sheet_invalid_column_indices_empty_list(
     excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
 ) -> None:
-    expected_message = """invalid parameters: list of select columns is empty
+    expected_message = """invalid parameters: list of selected columns is empty
 Context:
     0: expected selected columns to be list[str] | list[int] | None, got Some([])
 """
diff --git a/src/types/excelsheet.rs b/src/types/excelsheet.rs
index 964f07a..8da0c32 100644
--- a/src/types/excelsheet.rs
+++ b/src/types/excelsheet.rs
@@ -150,18 +150,12 @@ impl TryFrom<Option<&PyList>> for SelectedColumns {
         match value {
             None => Ok(Self::All),
             Some(py_list) => {
-                if let Ok(name_vec) = py_list.extract::<Vec<String>>() {
-                    if name_vec.is_empty() {
-                        Err(InvalidParameters("list of select columns is empty".to_string()).into())
-                    } else {
-                        Ok(Self::ByName(name_vec))
-                    }
+                if py_list.is_empty() {
+                    Err(InvalidParameters("list of selected columns is empty".to_string()).into())
+                } else if let Ok(name_vec) = py_list.extract::<Vec<String>>() {
+                    Ok(Self::ByName(name_vec))
                 } else if let Ok(index_vec) = py_list.extract::<Vec<usize>>() {
-                    if index_vec.is_empty() {
-                        Err(InvalidParameters("list of select columns is empty".to_string()).into())
-                    } else {
-                        Ok(Self::ByIndex(index_vec))
-                    }
+                    Ok(Self::ByIndex(index_vec))
                 } else {
                     Err(InvalidParameters(format!(
                         "expected list[int] | list[str], got {py_list:?}"

From 36c054d59c2a3d81c1c6888b54c27d02c8d4f6e6 Mon Sep 17 00:00:00 2001
From: Luka Peschke <luka.peschke@toucantoco.com>
Date: Mon, 26 Feb 2024 12:15:19 +0100
Subject: [PATCH 10/14] docs: documented use_columns param

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
---
 python/fastexcel/__init__.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/python/fastexcel/__init__.py b/python/fastexcel/__init__.py
index 4776c21..3dab411 100644
--- a/python/fastexcel/__init__.py
+++ b/python/fastexcel/__init__.py
@@ -119,6 +119,9 @@ def load_sheet_by_name(
         :param schema_sample_rows: Specifies how many rows should be used to determine
                                    the dtype of a column.
                                    If `None`, all rows will be used.
+        :param use_columns: Specifies the columns to use. Can either be a list of column names, or
+                            a list of column indices (starting at 0).
+                            If `None`, all columns will be used.
         """
         return ExcelSheet(
             self._reader.load_sheet_by_name(
@@ -158,6 +161,9 @@ def load_sheet_by_idx(
         :param schema_sample_rows: Specifies how many rows should be used to determine
                                    the dtype of a column.
                                    If `None`, all rows will be used.
+        :param use_columns: Specifies the columns to use. Can either be a list of column names, or
+                            a list of column indices (starting at 0).
+                            If `None`, all columns will be used.
         """
         if idx < 0:
             raise ValueError(f"Expected idx to be > 0, got {idx}")

From f5a8d4dba834f53a53e4983cc457fd133801217d Mon Sep 17 00:00:00 2001
From: Luka Peschke <luka.peschke@toucantoco.com>
Date: Mon, 26 Feb 2024 12:16:19 +0100
Subject: [PATCH 11/14] ci: move check-docs jobs higher in the CI file to get
 the result on top in PR previews

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
---
 .github/workflows/CI.yml | 56 ++++++++++++++++++++--------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 732fdc2..f937e1f 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -46,6 +46,34 @@ jobs:
         source .venv/bin/activate
         make lint
 
+  check-docs:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Set up rust toolchain
+        uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+      - run: |
+          git config user.name github-actions
+          git config user.email github-actions@github.com
+
+          # venv required by maturin
+          python3 -m venv .venv
+          source .venv/bin/activate
+
+          make install-test-requirements
+          make install-doc-requirements
+          # Required for pdoc to be able to import the sources
+          make dev-install
+          make doc
+
   # GitHub provides only x86_64 runners, so we cannot test on arm architecture
   test:
     runs-on: ${{ matrix.os }}
@@ -110,31 +138,3 @@ jobs:
         command: build
         args: "-o dist --interpreter python${{ matrix.python-version }}"
         target: ${{ steps.target.outputs.target }}
-
-  check-docs:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-      - name: Set up rust toolchain
-        uses: actions-rs/toolchain@v1
-        with:
-          profile: minimal
-          toolchain: stable
-          override: true
-      - run: |
-          git config user.name github-actions
-          git config user.email github-actions@github.com
-
-          # venv required by maturin
-          python3 -m venv .venv
-          source .venv/bin/activate
-
-          make install-test-requirements
-          make install-doc-requirements
-          # Required for pdoc to be able to import the sources
-          make dev-install
-          make doc

From 045a11428f1dfa2a011b719436b07cf7b05fa91c Mon Sep 17 00:00:00 2001
From: Luka Peschke <luka.peschke@toucantoco.com>
Date: Mon, 26 Feb 2024 12:30:49 +0100
Subject: [PATCH 12/14] feat: added a selected_columns property to ExcelSheet

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
---
 python/fastexcel/__init__.py          |  5 +++++
 python/fastexcel/_fastexcel.pyi       |  3 +++
 python/tests/test_column_selection.py |  5 +++++
 src/types/excelsheet.rs               | 13 +++++++++++++
 4 files changed, 26 insertions(+)

diff --git a/python/fastexcel/__init__.py b/python/fastexcel/__init__.py
index 3dab411..18d4992 100644
--- a/python/fastexcel/__init__.py
+++ b/python/fastexcel/__init__.py
@@ -55,6 +55,11 @@ def total_height(self) -> int:
         """The sheet's total height"""
         return self._sheet.total_height
 
+    @property
+    def selected_columns(self) -> list[str] | list[int] | None:
+        """The sheet's selected columns"""
+        return self._sheet.selected_columns
+
     def to_arrow(self) -> pa.RecordBatch:
         """Converts the sheet to a pyarrow `RecordBatch`"""
         return self._sheet.to_arrow()
diff --git a/python/fastexcel/_fastexcel.pyi b/python/fastexcel/_fastexcel.pyi
index 7db8493..df7ead0 100644
--- a/python/fastexcel/_fastexcel.pyi
+++ b/python/fastexcel/_fastexcel.pyi
@@ -18,6 +18,9 @@ class _ExcelSheet:
     @property
     def offset(self) -> int:
         """The sheet's offset before data starts"""
+    @property
+    def selected_columns(self) -> list[str] | list[int] | None:
+        """The sheet's selected columns"""
     def to_arrow(self) -> pa.RecordBatch:
         """Converts the sheet to a pyarrow `RecordBatch`"""
 
diff --git a/python/tests/test_column_selection.py b/python/tests/test_column_selection.py
index eff55f0..ab0cd07 100644
--- a/python/tests/test_column_selection.py
+++ b/python/tests/test_column_selection.py
@@ -21,6 +21,7 @@ def test_single_sheet_all_columns(excel_reader_single_sheet: fastexcel.ExcelRead
     sheet = excel_reader_single_sheet.load_sheet(0)
 
     sheet_explicit_arg = excel_reader_single_sheet.load_sheet(0, use_columns=None)
+    assert sheet.selected_columns is None
 
     expected = {"Month": [1.0, 2.0], "Year": [2019.0, 2020.0]}
     expected_pd_df = pd.DataFrame(expected)
@@ -45,6 +46,7 @@ def test_single_sheet_subset_by_str(excel_reader_single_sheet: fastexcel.ExcelRe
     for sheet_name_or_idx in sheets:
         for col in ["Month", "Year"]:
             sheet = excel_reader_single_sheet.load_sheet(sheet_name_or_idx, use_columns=[col])
+            assert sheet.selected_columns == [col]
 
             pd_df = sheet.to_pandas()
             pd_assert_frame_equal(pd_df, pd.DataFrame({col: expected[col]}))
@@ -60,6 +62,7 @@ def test_single_sheet_subset_by_index(excel_reader_single_sheet: fastexcel.Excel
     for sheet_name_or_idx in sheets:
         for idx, col_name in enumerate(["Month", "Year"]):
             sheet = excel_reader_single_sheet.load_sheet(sheet_name_or_idx, use_columns=[idx])
+            assert sheet.selected_columns == [idx]
 
             pd_df = sheet.to_pandas()
             pd_assert_frame_equal(pd_df, pd.DataFrame({col_name: expected[col_name]}))
@@ -97,6 +100,7 @@ def test_single_sheet_with_unnamed_columns(
     sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
         "With unnamed columns", use_columns=use_columns_str
     )
+    assert sheet.selected_columns == use_columns_str
 
     pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
     pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
@@ -104,6 +108,7 @@ def test_single_sheet_with_unnamed_columns(
     sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
         "With unnamed columns", use_columns=use_columns_idx
     )
+    assert sheet.selected_columns == use_columns_idx
 
     pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
     pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
diff --git a/src/types/excelsheet.rs b/src/types/excelsheet.rs
index 8da0c32..b765306 100644
--- a/src/types/excelsheet.rs
+++ b/src/types/excelsheet.rs
@@ -139,6 +139,14 @@ impl SelectedColumns {
             }
         }
     }
+
+    pub(crate) fn to_python<'p>(&self, py: Python<'p>) -> Option<&'p PyList> {
+        match self {
+            SelectedColumns::All => None,
+            SelectedColumns::ByIndex(idx_vec) => Some(PyList::new(py, idx_vec)),
+            SelectedColumns::ByName(name_vec) => Some(PyList::new(py, name_vec)),
+        }
+    }
 }
 
 impl TryFrom<Option<&PyList>> for SelectedColumns {
@@ -480,6 +488,11 @@ impl ExcelSheet {
         self.header.offset() + self.pagination.offset()
     }
 
+    #[getter]
+    pub fn selected_columns<'p>(&'p self, py: Python<'p>) -> Option<&PyList> {
+        self.selected_columns.to_python(py)
+    }
+
     pub fn to_arrow(&self, py: Python<'_>) -> PyResult<PyObject> {
         RecordBatch::try_from(self)
             .with_context(|| format!("could not create RecordBatch from sheet \"{}\"", &self.name))

From 1edc77a6af945613ab3790d431dd5be6eee4bd9c Mon Sep 17 00:00:00 2001
From: Luka Peschke <luka.peschke@toucantoco.com>
Date: Mon, 26 Feb 2024 14:27:39 +0100
Subject: [PATCH 13/14] refactor: validate columns on sheet instantiation

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
---
 python/tests/test_column_selection.py | 14 +++-----
 src/types/excelreader.rs              | 10 +++---
 src/types/excelsheet.rs               | 49 +++++++++++++++++----------
 test.py                               |  5 ++-
 4 files changed, 45 insertions(+), 33 deletions(-)

diff --git a/python/tests/test_column_selection.py b/python/tests/test_column_selection.py
index ab0cd07..2129173 100644
--- a/python/tests/test_column_selection.py
+++ b/python/tests/test_column_selection.py
@@ -236,14 +236,10 @@ def test_single_sheet_invalid_column_indices_column_does_not_exist_str(
 ) -> None:
     expected_message = """column with name "nope" not found
 Context:
-    0: selected columns are invalid
-    1: could not create RecordBatch from sheet "January"
-    2: could not convert RecordBatch to pyarrow for sheet "January"
+    0: selected columns are invalid, available columns are: ["Month", "Year"]
 """
     with pytest.raises(fastexcel.ColumnNotFoundError, match=re.escape(expected_message)):
-        excel_reader_single_sheet_with_unnamed_columns.load_sheet(
-            0, use_columns=["nope"]
-        ).to_arrow()
+        excel_reader_single_sheet_with_unnamed_columns.load_sheet(0, use_columns=["nope"])
 
 
 def test_single_sheet_invalid_column_indices_column_does_not_exist_int(
@@ -251,9 +247,7 @@ def test_single_sheet_invalid_column_indices_column_does_not_exist_int(
 ) -> None:
     expected_message = """column at index 42 not found
 Context:
-    0: selected columns are invalid
-    1: could not create RecordBatch from sheet "January"
-    2: could not convert RecordBatch to pyarrow for sheet "January"
+    0: selected columns are invalid, available columns are: ["Month", "Year"]
 """
     with pytest.raises(fastexcel.ColumnNotFoundError, match=re.escape(expected_message)):
-        excel_reader_single_sheet_with_unnamed_columns.load_sheet(0, use_columns=[42]).to_arrow()
+        excel_reader_single_sheet_with_unnamed_columns.load_sheet(0, use_columns=[42])
diff --git a/src/types/excelreader.rs b/src/types/excelreader.rs
index e6986da..f198061 100644
--- a/src/types/excelreader.rs
+++ b/src/types/excelreader.rs
@@ -73,14 +73,15 @@ impl ExcelReader {
         let header = Header::new(header_row, column_names);
         let pagination = Pagination::new(skip_rows, n_rows, &range).into_pyresult()?;
         let selected_columns = use_columns.try_into().with_context(|| format!("expected selected columns to be list[str] | list[int] | None, got {use_columns:?}")).into_pyresult()?;
-        Ok(ExcelSheet::new(
+        ExcelSheet::try_new(
             name,
             range,
             header,
             pagination,
             schema_sample_rows,
             selected_columns,
-        ))
+        )
+        .into_pyresult()
     }
 
     #[pyo3(signature = (
@@ -131,13 +132,14 @@ impl ExcelReader {
         let header = Header::new(header_row, column_names);
         let pagination = Pagination::new(skip_rows, n_rows, &range).into_pyresult()?;
         let selected_columns = use_columns.try_into().with_context(|| format!("expected selected columns to be list[str] | list[int] | None, got {use_columns:?}")).into_pyresult()?;
-        Ok(ExcelSheet::new(
+        ExcelSheet::try_new(
             name,
             range,
             header,
             pagination,
             schema_sample_rows,
             selected_columns,
-        ))
+        )
+        .into_pyresult()
     }
 }
diff --git a/src/types/excelsheet.rs b/src/types/excelsheet.rs
index b765306..6da29fa 100644
--- a/src/types/excelsheet.rs
+++ b/src/types/excelsheet.rs
@@ -187,6 +187,7 @@ pub(crate) struct ExcelSheet {
     width: Option<usize>,
     schema_sample_rows: Option<usize>,
     selected_columns: SelectedColumns,
+    available_columns: Vec<String>,
 }
 
 impl ExcelSheet {
@@ -194,15 +195,15 @@ impl ExcelSheet {
         &self.data
     }
 
-    pub(crate) fn new(
+    pub(crate) fn try_new(
         name: String,
         data: Range<CalData>,
         header: Header,
         pagination: Pagination,
         schema_sample_rows: Option<usize>,
         selected_columns: SelectedColumns,
-    ) -> Self {
-        ExcelSheet {
+    ) -> FastExcelResult<Self> {
+        let mut sheet = ExcelSheet {
             name,
             header,
             pagination,
@@ -212,10 +213,27 @@ impl ExcelSheet {
             height: None,
             total_height: None,
             width: None,
-        }
+            // an empty vec as it will be replaced
+            available_columns: Vec::with_capacity(0),
+        };
+
+        let available_columns = sheet.get_available_columns();
+
+        // Ensuring selected columns are valid
+        sheet
+            .selected_columns
+            .validate_columns(&available_columns)
+            .with_context(|| {
+                format!(
+                    "selected columns are invalid, available columns are: {available_columns:?}"
+                )
+            })?;
+
+        sheet.available_columns = available_columns;
+        Ok(sheet)
     }
 
-    pub(crate) fn column_names(&self) -> Vec<String> {
+    fn get_available_columns(&self) -> Vec<String> {
         let width = self.data.width();
         match &self.header {
             Header::None => (0..width)
@@ -365,7 +383,7 @@ impl TryFrom<&ExcelSheet> for Schema {
 
         arrow_schema_from_column_names_and_range(
             sheet.data(),
-            &sheet.column_names(),
+            &sheet.available_columns,
             sheet.offset(),
             // If sample_rows is higher than the sheet's limit, use the limit instead
             std::cmp::min(sample_rows, sheet.limit()),
@@ -381,18 +399,11 @@ impl TryFrom<&ExcelSheet> for RecordBatch {
         let offset = sheet.offset();
         let limit = sheet.limit();
 
-        let column_names = sheet.column_names();
-
-        // Ensuring selected columns are valid
-        sheet
-            .selected_columns
-            .validate_columns(&column_names)
-            .with_context(|| "selected columns are invalid")?;
-
         let schema = Schema::try_from(sheet)
             .with_context(|| format!("could not build schema for sheet {}", sheet.name))?;
 
-        let mut iter = column_names
+        let mut iter = sheet
+            .available_columns
             .iter()
             .enumerate()
             .filter_map(|(idx, column_name)| {
@@ -402,9 +413,11 @@ impl TryFrom<&ExcelSheet> for RecordBatch {
                     SelectedColumns::All => Some(idx),
                     // Otherwise, return its index. If None is found, it means the column was not
                     // selected, and we will just continue
-                    _ => sheet
-                        .selected_columns
-                        .idx_for_column(&column_names, column_name, idx),
+                    _ => sheet.selected_columns.idx_for_column(
+                        &sheet.available_columns,
+                        column_name,
+                        idx,
+                    ),
                 } {
                     // At this point, we know for sure that the column is in the schema so we can
                     // safely unwrap
diff --git a/test.py b/test.py
index 163f028..7ce0f28 100644
--- a/test.py
+++ b/test.py
@@ -6,14 +6,17 @@
 def get_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser()
     parser.add_argument("file")
+    parser.add_argument("-c", "--column", type=str, nargs="+", help="the columns to use")
     return parser.parse_args()
 
 
 def main():
     args = get_args()
     excel_file = fastexcel.read_excel(args.file)
+    use_columns = args.column or None
+
     for sheet_name in excel_file.sheet_names:
-        excel_file.load_sheet_by_name(sheet_name).to_pandas()
+        excel_file.load_sheet_by_name(sheet_name, use_columns=use_columns).to_arrow()
 
 
 if __name__ == "__main__":

From 61cfc3936b29cb4a7908c593ec9feadaa70911bf Mon Sep 17 00:00:00 2001
From: Luka Peschke <luka.peschke@toucantoco.com>
Date: Mon, 26 Feb 2024 18:06:58 +0100
Subject: [PATCH 14/14] feat: added an available_columns property

Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
---
 python/fastexcel/__init__.py          |  5 +++++
 python/fastexcel/_fastexcel.pyi       |  3 +++
 python/tests/test_column_selection.py | 13 +++++++++++++
 src/types/excelsheet.rs               |  5 +++++
 4 files changed, 26 insertions(+)

diff --git a/python/fastexcel/__init__.py b/python/fastexcel/__init__.py
index 18d4992..b57f8f4 100644
--- a/python/fastexcel/__init__.py
+++ b/python/fastexcel/__init__.py
@@ -60,6 +60,11 @@ def selected_columns(self) -> list[str] | list[int] | None:
         """The sheet's selected columns"""
         return self._sheet.selected_columns
 
+    @property
+    def available_columns(self) -> list[str]:
+        """The columns available for the given sheet"""
+        return self._sheet.available_columns
+
     def to_arrow(self) -> pa.RecordBatch:
         """Converts the sheet to a pyarrow `RecordBatch`"""
         return self._sheet.to_arrow()
diff --git a/python/fastexcel/_fastexcel.pyi b/python/fastexcel/_fastexcel.pyi
index df7ead0..b4e2c36 100644
--- a/python/fastexcel/_fastexcel.pyi
+++ b/python/fastexcel/_fastexcel.pyi
@@ -21,6 +21,9 @@ class _ExcelSheet:
     @property
     def selected_columns(self) -> list[str] | list[int] | None:
         """The sheet's selected columns"""
+    @property
+    def available_columns(self) -> list[str]:
+        """The columns available for the given sheet"""
     def to_arrow(self) -> pa.RecordBatch:
         """Converts the sheet to a pyarrow `RecordBatch`"""
 
diff --git a/python/tests/test_column_selection.py b/python/tests/test_column_selection.py
index 2129173..8fa363c 100644
--- a/python/tests/test_column_selection.py
+++ b/python/tests/test_column_selection.py
@@ -22,6 +22,7 @@ def test_single_sheet_all_columns(excel_reader_single_sheet: fastexcel.ExcelRead
 
     sheet_explicit_arg = excel_reader_single_sheet.load_sheet(0, use_columns=None)
     assert sheet.selected_columns is None
+    assert sheet.available_columns == ["Month", "Year"]
 
     expected = {"Month": [1.0, 2.0], "Year": [2019.0, 2020.0]}
     expected_pd_df = pd.DataFrame(expected)
@@ -47,6 +48,7 @@ def test_single_sheet_subset_by_str(excel_reader_single_sheet: fastexcel.ExcelRe
         for col in ["Month", "Year"]:
             sheet = excel_reader_single_sheet.load_sheet(sheet_name_or_idx, use_columns=[col])
             assert sheet.selected_columns == [col]
+            assert sheet.available_columns == ["Month", "Year"]
 
             pd_df = sheet.to_pandas()
             pd_assert_frame_equal(pd_df, pd.DataFrame({col: expected[col]}))
@@ -63,6 +65,7 @@ def test_single_sheet_subset_by_index(excel_reader_single_sheet: fastexcel.Excel
         for idx, col_name in enumerate(["Month", "Year"]):
             sheet = excel_reader_single_sheet.load_sheet(sheet_name_or_idx, use_columns=[idx])
             assert sheet.selected_columns == [idx]
+            assert sheet.available_columns == ["Month", "Year"]
 
             pd_df = sheet.to_pandas()
             pd_assert_frame_equal(pd_df, pd.DataFrame({col_name: expected[col_name]}))
@@ -101,6 +104,7 @@ def test_single_sheet_with_unnamed_columns(
         "With unnamed columns", use_columns=use_columns_str
     )
     assert sheet.selected_columns == use_columns_str
+    assert sheet.available_columns == ["col1", "__UNNAMED__1", "col3", "__UNNAMED__3", "col5"]
 
     pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
     pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
@@ -109,6 +113,7 @@ def test_single_sheet_with_unnamed_columns(
         "With unnamed columns", use_columns=use_columns_idx
     )
     assert sheet.selected_columns == use_columns_idx
+    assert sheet.available_columns == ["col1", "__UNNAMED__1", "col3", "__UNNAMED__3", "col5"]
 
     pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
     pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
@@ -131,6 +136,7 @@ def test_single_sheet_with_unnamed_columns_and_pagination(
     sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
         "With unnamed columns", use_columns=use_columns_str, n_rows=1
     )
+    assert sheet.available_columns == ["col1", "__UNNAMED__1", "col3", "__UNNAMED__3", "col5"]
 
     pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
     pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
@@ -138,6 +144,7 @@ def test_single_sheet_with_unnamed_columns_and_pagination(
     sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
         "With unnamed columns", use_columns=use_columns_idx, n_rows=1
     )
+    assert sheet.available_columns == ["col1", "__UNNAMED__1", "col3", "__UNNAMED__3", "col5"]
 
     pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
     pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
@@ -152,6 +159,7 @@ def test_single_sheet_with_unnamed_columns_and_pagination(
     sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
         "With unnamed columns", use_columns=use_columns_str, skip_rows=1
     )
+    assert sheet.available_columns == ["col1", "__UNNAMED__1", "col3", "__UNNAMED__3", "col5"]
 
     pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
     pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
@@ -159,6 +167,7 @@ def test_single_sheet_with_unnamed_columns_and_pagination(
     sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
         "With unnamed columns", use_columns=use_columns_idx, skip_rows=1
     )
+    assert sheet.available_columns == ["col1", "__UNNAMED__1", "col3", "__UNNAMED__3", "col5"]
 
     pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
     pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
@@ -180,6 +189,7 @@ def test_single_sheet_with_unnamed_columns_and_pagination_and_column_names(
     sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
         "With unnamed columns", use_columns=use_columns_str, skip_rows=1, column_names=column_names
     )
+    assert sheet.available_columns == column_names
 
     pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
     pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
@@ -187,6 +197,7 @@ def test_single_sheet_with_unnamed_columns_and_pagination_and_column_names(
     sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
         "With unnamed columns", use_columns=use_columns_idx, skip_rows=1, column_names=column_names
     )
+    assert sheet.available_columns == column_names
 
     pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
     pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
@@ -197,6 +208,7 @@ def test_single_sheet_with_unnamed_columns_and_pagination_and_column_names(
     sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
         "With unnamed columns", use_columns=use_columns_str, skip_rows=2, column_names=column_names
     )
+    assert sheet.available_columns == column_names
 
     pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected_first_row_skipped))
     pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected_first_row_skipped))
@@ -204,6 +216,7 @@ def test_single_sheet_with_unnamed_columns_and_pagination_and_column_names(
     sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
         "With unnamed columns", use_columns=use_columns_idx, skip_rows=2, column_names=column_names
     )
+    assert sheet.available_columns == column_names
 
     pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected_first_row_skipped))
     pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected_first_row_skipped))
diff --git a/src/types/excelsheet.rs b/src/types/excelsheet.rs
index 6da29fa..5f42dff 100644
--- a/src/types/excelsheet.rs
+++ b/src/types/excelsheet.rs
@@ -506,6 +506,11 @@ impl ExcelSheet {
         self.selected_columns.to_python(py)
     }
 
+    #[getter]
+    pub fn available_columns<'p>(&'p self, py: Python<'p>) -> &PyList {
+        PyList::new(py, &self.available_columns)
+    }
+
     pub fn to_arrow(&self, py: Python<'_>) -> PyResult<PyObject> {
         RecordBatch::try_from(self)
             .with_context(|| format!("could not create RecordBatch from sheet \"{}\"", &self.name))