Skip to content

Commit

Permalink
improve IO docs ordering
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie committed Mar 7, 2024
1 parent a5332bc commit cac54df
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 9 deletions.
6 changes: 3 additions & 3 deletions py-polars/docs/source/reference/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ Delta Lake
.. autosummary::
:toctree: api/

scan_delta
read_delta
scan_delta
DataFrame.write_delta

Excel / ODS
Expand All @@ -64,9 +64,9 @@ Feather / IPC
:toctree: api/

read_ipc
read_ipc_schema
read_ipc_stream
scan_ipc
read_ipc_schema
DataFrame.write_ipc
DataFrame.write_ipc_stream
LazyFrame.sink_ipc
Expand Down Expand Up @@ -96,8 +96,8 @@ Parquet
:toctree: api/

read_parquet
scan_parquet
read_parquet_schema
scan_parquet
DataFrame.write_parquet
LazyFrame.sink_parquet

Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
"read_delta",
"read_excel",
"read_ipc",
"read_ipc_stream",
"read_ipc_schema",
"read_ipc_stream",
"read_json",
"read_ndjson",
"read_ods",
Expand Down
2 changes: 2 additions & 0 deletions py-polars/polars/io/csv/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from polars.io.csv.batched_reader import BatchedCsvReader
from polars.io.csv.functions import read_csv, read_csv_batched, scan_csv

__all__ = [
"BatchedCsvReader",
"read_csv",
"read_csv_batched",
"scan_csv",
Expand Down
6 changes: 2 additions & 4 deletions py-polars/polars/io/csv/batched_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,12 @@ def next_batches(self, n: int) -> list[DataFrame] | None:
"""
Read `n` batches from the reader.
The `n` chunks will be parallelized over the
available threads.
These batches will be parallelized over the available threads.
Parameters
----------
n
Number of chunks to fetch.
This is ideally >= number of threads
Number of chunks to fetch; ideally this is >= number of threads.
Examples
--------
Expand Down
6 changes: 5 additions & 1 deletion py-polars/tests/unit/io/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import polars as pl
from polars._utils.various import normalize_filepath
from polars.exceptions import ComputeError, NoDataError
from polars.io.csv import BatchedCsvReader
from polars.testing import assert_frame_equal, assert_series_equal

if TYPE_CHECKING:
Expand Down Expand Up @@ -1414,8 +1415,9 @@ def test_csv_categorical_categorical_merge() -> None:

def test_batched_csv_reader(foods_file_path: Path) -> None:
reader = pl.read_csv_batched(foods_file_path, batch_size=4)
batches = reader.next_batches(5)
assert isinstance(reader, BatchedCsvReader)

batches = reader.next_batches(5)
assert batches is not None
assert len(batches) == 5
assert batches[0].to_dict(as_series=False) == {
Expand All @@ -1431,10 +1433,12 @@ def test_batched_csv_reader(foods_file_path: Path) -> None:
"sugars_g": [25, 0, 5, 11],
}
assert_frame_equal(pl.concat(batches), pl.read_csv(foods_file_path))

# the final batch of the low-memory variant is different
reader = pl.read_csv_batched(foods_file_path, batch_size=4, low_memory=True)
batches = reader.next_batches(5)
assert len(batches) == 5 # type: ignore[arg-type]

batches += reader.next_batches(5) # type: ignore[operator]
assert_frame_equal(pl.concat(batches), pl.read_csv(foods_file_path))

Expand Down

0 comments on commit cac54df

Please sign in to comment.