From 9df2ca13737532b460cc422a637dbb48e381a7b3 Mon Sep 17 00:00:00 2001
From: Dean MacGregor <powertrading121@gmail.com>
Date: Thu, 11 Apr 2024 13:02:29 -0400
Subject: [PATCH] feat: change default to write parquet statistics

---
 crates/polars-io/src/parquet/write.rs | 2 +-
 py-polars/polars/dataframe/frame.py   | 4 ++--
 py-polars/polars/lazyframe/frame.py   | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/crates/polars-io/src/parquet/write.rs b/crates/polars-io/src/parquet/write.rs
index 8149f7d5e128..c3aa6e85fb6f 100644
--- a/crates/polars-io/src/parquet/write.rs
+++ b/crates/polars-io/src/parquet/write.rs
@@ -121,7 +121,7 @@ where
         ParquetWriter {
             writer,
             compression: ParquetCompression::default().into(),
-            statistics: false,
+            statistics: true,
             row_group_size: None,
             data_page_size: None,
             parallel: true,
diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py
index 8b9caf5a3a85..a2aeb5b05b30 100644
--- a/py-polars/polars/dataframe/frame.py
+++ b/py-polars/polars/dataframe/frame.py
@@ -3302,7 +3302,7 @@ def write_parquet(
         *,
         compression: ParquetCompression = "zstd",
         compression_level: int | None = None,
-        statistics: bool = False,
+        statistics: bool = True,
         row_group_size: int | None = None,
         data_page_size: int | None = None,
         use_pyarrow: bool = False,
@@ -3329,7 +3329,7 @@ def write_parquet(
             - "zstd" : min-level: 1, max-level: 22.
 
         statistics
-            Write statistics to the parquet headers. This requires extra compute.
+            Write statistics to the parquet headers. This is the default behavior.
         row_group_size
             Size of the row groups in number of rows. Defaults to 512^2 rows.
         data_page_size
diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py
index f96adfd0bbc4..687af4a10d69 100644
--- a/py-polars/polars/lazyframe/frame.py
+++ b/py-polars/polars/lazyframe/frame.py
@@ -2116,7 +2116,7 @@ def sink_parquet(
         *,
         compression: str = "zstd",
         compression_level: int | None = None,
-        statistics: bool = False,
+        statistics: bool = True,
         row_group_size: int | None = None,
         data_pagesize_limit: int | None = None,
         maintain_order: bool = True,
@@ -2126,7 +2126,7 @@ def sink_parquet(
         simplify_expression: bool = True,
         slice_pushdown: bool = True,
         no_optimization: bool = False,
-    ) -> DataFrame:
+    ) -> None:
         """
         Evaluate the query in streaming mode and write to a Parquet file.
 
@@ -2153,7 +2153,7 @@ def sink_parquet(
             - "brotli" : min-level: 0, max-level: 11.
             - "zstd" : min-level: 1, max-level: 22.
         statistics
-            Write statistics to the parquet headers. This requires extra compute.
+            Write statistics to the parquet headers. This is the default behavior.
         row_group_size
             Size of the row groups in number of rows.
             If None (default), the chunks of the `DataFrame` are