Skip to content

Commit

Permalink
feat: prepare API and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
PrettyWood committed Jun 30, 2024
1 parent 162cde3 commit 1a3990b
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 9 deletions.
10 changes: 6 additions & 4 deletions python/fastexcel/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import sys
from typing import TYPE_CHECKING, Literal
from typing import TYPE_CHECKING, Callable, Literal

if sys.version_info < (3, 10):
from typing_extensions import TypeAlias
Expand Down Expand Up @@ -128,7 +128,7 @@ def load_sheet(
skip_rows: int = 0,
n_rows: int | None = None,
schema_sample_rows: int | None = 1_000,
use_columns: list[str] | list[int] | str | None = None,
use_columns: list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None = None,
dtypes: DTypeMap | None = None,
) -> ExcelSheet:
"""Loads a sheet by index or name.
Expand All @@ -153,6 +153,8 @@ def load_sheet(
- A string, a comma separated list of Excel column letters and column
ranges (e.g. `“A:E”` or `“A,C,E:F”`, which would result in
`A,B,C,D,E` and `A,C,E,F`)
- A callable, a function that takes a column and returns a boolean
indicating whether the column should be used
:param dtypes: An optional dict of dtypes. Keys can be column indices or names
"""
return ExcelSheet(
Expand All @@ -177,7 +179,7 @@ def load_sheet_by_name(
skip_rows: int = 0,
n_rows: int | None = None,
schema_sample_rows: int | None = 1_000,
use_columns: list[str] | list[int] | str | None = None,
use_columns: list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None = None,
dtypes: DTypeMap | None = None,
) -> ExcelSheet:
"""Loads a sheet by name.
Expand Down Expand Up @@ -206,7 +208,7 @@ def load_sheet_by_idx(
skip_rows: int = 0,
n_rows: int | None = None,
schema_sample_rows: int | None = 1_000,
use_columns: list[str] | list[int] | str | None = None,
use_columns: list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None = None,
dtypes: DTypeMap | None = None,
) -> ExcelSheet:
"""Loads a sheet by index.
Expand Down
4 changes: 2 additions & 2 deletions python/fastexcel/_fastexcel.pyi
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import Literal
from typing import Callable, Literal

import pyarrow as pa

Expand Down Expand Up @@ -70,7 +70,7 @@ class _ExcelReader:
skip_rows: int = 0,
n_rows: int | None = None,
schema_sample_rows: int | None = 1_000,
use_columns: list[str] | list[int] | str | None = None,
use_columns: list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None = None,
dtypes: DTypeMap | None = None,
) -> _ExcelSheet: ...
@property
Expand Down
46 changes: 44 additions & 2 deletions python/tests/test_column_selection.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# ruff: noqa: E501
from __future__ import annotations

import re
Expand Down Expand Up @@ -307,7 +308,7 @@ def test_single_sheet_invalid_column_indices_negative_integer(
expected_message = """invalid parameters: expected list[int] | list[str], got [-2]
Context:
0: could not determine selected columns from provided object: [-2]
1: expected selected columns to be list[str] | list[int] | str | None, got Some([-2])
1: expected selected columns to be list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None, got Some([-2])
"""
with pytest.raises(fastexcel.InvalidParametersError, match=re.escape(expected_message)):
excel_reader_single_sheet_with_unnamed_columns.load_sheet(0, use_columns=[-2])
Expand All @@ -319,7 +320,7 @@ def test_single_sheet_invalid_column_indices_empty_list(
expected_message = """invalid parameters: list of selected columns is empty
Context:
0: could not determine selected columns from provided object: []
1: expected selected columns to be list[str] | list[int] | str | None, got Some([])
1: expected selected columns to be list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None, got Some([])
"""
with pytest.raises(fastexcel.InvalidParametersError, match=re.escape(expected_message)):
excel_reader_single_sheet_with_unnamed_columns.load_sheet(0, use_columns=[])
Expand Down Expand Up @@ -411,3 +412,44 @@ def test_use_columns_with_column_names() -> None:
pl.col("dates_renamed").str.strptime(pl.Datetime, "%F %T").dt.cast_time_unit("ms")
),
)


def test_use_columns_with_callable() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-multi-sheet.xlsx"))

sheet = excel_reader.load_sheet(2)
assert [(c.name, c.dtype) for c in sheet.available_columns] == [
("col1", "float"),
("__UNNAMED__1", "float"),
("col3", "string"),
("__UNNAMED__3", "float"),
("col5", "string"),
]

sheet = excel_reader.load_sheet(
2,
use_columns=lambda col: col.name.startswith("col"),
)
assert [(c.name, c.dtype) for c in sheet.available_columns] == [
("col1", "float"),
("col3", "string"),
("col5", "string"),
]

sheet = excel_reader.load_sheet(
2,
use_columns=lambda col: col.index % 2 == 1,
)
assert [(c.name, c.dtype) for c in sheet.available_columns] == [
("__UNNAMED__1", "float"),
("__UNNAMED__3", "float"),
]

sheet = excel_reader.load_sheet(
2,
use_columns=lambda col: col.dtype == "string",
)
assert [(c.name, c.dtype) for c in sheet.available_columns] == [
("col3", "string"),
("col5", "string"),
]
2 changes: 1 addition & 1 deletion src/types/python/excelreader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ impl ExcelReader {
}

fn build_selected_columns(use_columns: Option<&PyAny>) -> FastExcelResult<SelectedColumns> {
use_columns.try_into().with_context(|| format!("expected selected columns to be list[str] | list[int] | str | None, got {use_columns:?}"))
use_columns.try_into().with_context(|| format!("expected selected columns to be list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None, got {use_columns:?}"))
}

#[allow(clippy::too_many_arguments)]
Expand Down

0 comments on commit 1a3990b

Please sign in to comment.