diff --git a/crates/polars-io/src/parquet/write.rs b/crates/polars-io/src/parquet/write.rs index 8149f7d5e128..c3aa6e85fb6f 100644 --- a/crates/polars-io/src/parquet/write.rs +++ b/crates/polars-io/src/parquet/write.rs @@ -121,7 +121,7 @@ where ParquetWriter { writer, compression: ParquetCompression::default().into(), - statistics: false, + statistics: true, row_group_size: None, data_page_size: None, parallel: true, diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 7a64f597558c..4c7bf0d9a6f6 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -2851,7 +2851,7 @@ def write_parquet( *, compression: ParquetCompression = "zstd", compression_level: int | None = None, - statistics: bool = False, + statistics: bool = True, row_group_size: int | None = None, data_page_size: int | None = None, use_pyarrow: bool = False, @@ -2878,7 +2878,7 @@ def write_parquet( - "zstd" : min-level: 1, max-level: 22. statistics - Write statistics to the parquet headers. This requires extra compute. + Write statistics to the parquet headers. This is the default behavior. row_group_size Size of the row groups in number of rows. Defaults to 512^2 rows. data_page_size diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index b420aaed80a2..bbdf3b605405 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -1854,7 +1854,7 @@ def sink_parquet( *, compression: str = "zstd", compression_level: int | None = None, - statistics: bool = False, + statistics: bool = True, row_group_size: int | None = None, data_pagesize_limit: int | None = None, maintain_order: bool = True, @@ -1864,7 +1864,7 @@ def sink_parquet( simplify_expression: bool = True, slice_pushdown: bool = True, no_optimization: bool = False, - ) -> DataFrame: + ) -> None: """ Evaluate the query in streaming mode and write to a Parquet file. @@ -1891,7 +1891,7 @@ def sink_parquet( - "brotli" : min-level: 0, max-level: 11. - "zstd" : min-level: 1, max-level: 22. statistics - Write statistics to the parquet headers. This requires extra compute. + Write statistics to the parquet headers. This is the default behavior. row_group_size Size of the row groups in number of rows. If None (default), the chunks of the `DataFrame` are