Skip to content

Commit

Permalink
feat: added an available_columns property
Browse files Browse the repository at this point in the history
Signed-off-by: Luka Peschke <[email protected]>
  • Loading branch information
lukapeschke committed Feb 26, 2024
1 parent 1edc77a commit 61cfc39
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 0 deletions.
5 changes: 5 additions & 0 deletions python/fastexcel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ def selected_columns(self) -> list[str] | list[int] | None:
"""The sheet's selected columns"""
return self._sheet.selected_columns

@property
def available_columns(self) -> list[str]:
"""The columns available for the given sheet"""
return self._sheet.available_columns

def to_arrow(self) -> pa.RecordBatch:
"""Converts the sheet to a pyarrow `RecordBatch`"""
return self._sheet.to_arrow()
Expand Down
3 changes: 3 additions & 0 deletions python/fastexcel/_fastexcel.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ class _ExcelSheet:
@property
def selected_columns(self) -> list[str] | list[int] | None:
"""The sheet's selected columns"""
@property
def available_columns(self) -> list[str]:
"""The columns available for the given sheet"""
def to_arrow(self) -> pa.RecordBatch:
"""Converts the sheet to a pyarrow `RecordBatch`"""

Expand Down
13 changes: 13 additions & 0 deletions python/tests/test_column_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def test_single_sheet_all_columns(excel_reader_single_sheet: fastexcel.ExcelRead

sheet_explicit_arg = excel_reader_single_sheet.load_sheet(0, use_columns=None)
assert sheet.selected_columns is None
assert sheet.available_columns == ["Month", "Year"]

expected = {"Month": [1.0, 2.0], "Year": [2019.0, 2020.0]}
expected_pd_df = pd.DataFrame(expected)
Expand All @@ -47,6 +48,7 @@ def test_single_sheet_subset_by_str(excel_reader_single_sheet: fastexcel.ExcelRe
for col in ["Month", "Year"]:
sheet = excel_reader_single_sheet.load_sheet(sheet_name_or_idx, use_columns=[col])
assert sheet.selected_columns == [col]
assert sheet.available_columns == ["Month", "Year"]

pd_df = sheet.to_pandas()
pd_assert_frame_equal(pd_df, pd.DataFrame({col: expected[col]}))
Expand All @@ -63,6 +65,7 @@ def test_single_sheet_subset_by_index(excel_reader_single_sheet: fastexcel.Excel
for idx, col_name in enumerate(["Month", "Year"]):
sheet = excel_reader_single_sheet.load_sheet(sheet_name_or_idx, use_columns=[idx])
assert sheet.selected_columns == [idx]
assert sheet.available_columns == ["Month", "Year"]

pd_df = sheet.to_pandas()
pd_assert_frame_equal(pd_df, pd.DataFrame({col_name: expected[col_name]}))
Expand Down Expand Up @@ -101,6 +104,7 @@ def test_single_sheet_with_unnamed_columns(
"With unnamed columns", use_columns=use_columns_str
)
assert sheet.selected_columns == use_columns_str
assert sheet.available_columns == ["col1", "__UNNAMED__1", "col3", "__UNNAMED__3", "col5"]

pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
Expand All @@ -109,6 +113,7 @@ def test_single_sheet_with_unnamed_columns(
"With unnamed columns", use_columns=use_columns_idx
)
assert sheet.selected_columns == use_columns_idx
assert sheet.available_columns == ["col1", "__UNNAMED__1", "col3", "__UNNAMED__3", "col5"]

pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
Expand All @@ -131,13 +136,15 @@ def test_single_sheet_with_unnamed_columns_and_pagination(
sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_str, n_rows=1
)
assert sheet.available_columns == ["col1", "__UNNAMED__1", "col3", "__UNNAMED__3", "col5"]

pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))

sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_idx, n_rows=1
)
assert sheet.available_columns == ["col1", "__UNNAMED__1", "col3", "__UNNAMED__3", "col5"]

pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
Expand All @@ -152,13 +159,15 @@ def test_single_sheet_with_unnamed_columns_and_pagination(
sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_str, skip_rows=1
)
assert sheet.available_columns == ["col1", "__UNNAMED__1", "col3", "__UNNAMED__3", "col5"]

pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))

sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_idx, skip_rows=1
)
assert sheet.available_columns == ["col1", "__UNNAMED__1", "col3", "__UNNAMED__3", "col5"]

pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
Expand All @@ -180,13 +189,15 @@ def test_single_sheet_with_unnamed_columns_and_pagination_and_column_names(
sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_str, skip_rows=1, column_names=column_names
)
assert sheet.available_columns == column_names

pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))

sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_idx, skip_rows=1, column_names=column_names
)
assert sheet.available_columns == column_names

pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
Expand All @@ -197,13 +208,15 @@ def test_single_sheet_with_unnamed_columns_and_pagination_and_column_names(
sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_str, skip_rows=2, column_names=column_names
)
assert sheet.available_columns == column_names

pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected_first_row_skipped))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected_first_row_skipped))

sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_idx, skip_rows=2, column_names=column_names
)
assert sheet.available_columns == column_names

pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected_first_row_skipped))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected_first_row_skipped))
Expand Down
5 changes: 5 additions & 0 deletions src/types/excelsheet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,11 @@ impl ExcelSheet {
self.selected_columns.to_python(py)
}

#[getter]
pub fn available_columns<'p>(&'p self, py: Python<'p>) -> &PyList {
PyList::new(py, &self.available_columns)
}

pub fn to_arrow(&self, py: Python<'_>) -> PyResult<PyObject> {
RecordBatch::try_from(self)
.with_context(|| format!("could not create RecordBatch from sheet \"{}\"", &self.name))
Expand Down

0 comments on commit 61cfc39

Please sign in to comment.