Skip to content

Commit

Permalink
feat: change default to write parquet statistics
Browse files Browse the repository at this point in the history
  • Loading branch information
deanm0000 committed Apr 11, 2024
1 parent 31df06d commit 9df2ca1
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 6 deletions.
2 changes: 1 addition & 1 deletion crates/polars-io/src/parquet/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ where
ParquetWriter {
writer,
compression: ParquetCompression::default().into(),
statistics: false,
statistics: true,
row_group_size: None,
data_page_size: None,
parallel: true,
Expand Down
4 changes: 2 additions & 2 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3302,7 +3302,7 @@ def write_parquet(
*,
compression: ParquetCompression = "zstd",
compression_level: int | None = None,
statistics: bool = False,
statistics: bool = True,
row_group_size: int | None = None,
data_page_size: int | None = None,
use_pyarrow: bool = False,
Expand All @@ -3329,7 +3329,7 @@ def write_parquet(
- "zstd" : min-level: 1, max-level: 22.
statistics
Write statistics to the parquet headers. This requires extra compute.
Write statistics to the parquet headers. This is the default behavior.
row_group_size
Size of the row groups in number of rows. Defaults to 512^2 rows.
data_page_size
Expand Down
6 changes: 3 additions & 3 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2116,7 +2116,7 @@ def sink_parquet(
*,
compression: str = "zstd",
compression_level: int | None = None,
statistics: bool = False,
statistics: bool = True,
row_group_size: int | None = None,
data_pagesize_limit: int | None = None,
maintain_order: bool = True,
Expand All @@ -2126,7 +2126,7 @@ def sink_parquet(
simplify_expression: bool = True,
slice_pushdown: bool = True,
no_optimization: bool = False,
) -> DataFrame:
) -> None:
"""
Evaluate the query in streaming mode and write to a Parquet file.
Expand All @@ -2153,7 +2153,7 @@ def sink_parquet(
- "brotli" : min-level: 0, max-level: 11.
- "zstd" : min-level: 1, max-level: 22.
statistics
Write statistics to the parquet headers. This requires extra compute.
Write statistics to the parquet headers. This is the default behavior.
row_group_size
Size of the row groups in number of rows.
If None (default), the chunks of the `DataFrame` are
Expand Down

0 comments on commit 9df2ca1

Please sign in to comment.