From 448b0ec3c0d6b648d18e821dea851658698d6611 Mon Sep 17 00:00:00 2001
From: Alexander Rafferty <websites@alexanderrafferty.com>
Date: Sun, 28 Jul 2024 18:46:04 +1000
Subject: [PATCH 1/2] Fix #11687: Replace all uses of `OnceLock` with
 `LazyLock`

---
 Cargo.toml                                    |  2 +-
 datafusion-cli/src/main.rs                    | 20 +++---
 datafusion/core/Cargo.toml                    |  2 +-
 .../physical_plan/parquet/access_plan.rs      | 58 +++++++--------
 datafusion/core/tests/expr_api/mod.rs         | 72 +++++++++----------
 datafusion/core/tests/memory_limit/mod.rs     | 22 +++---
 .../tests/parquet/external_access_plan.rs     | 66 ++++++++---------
 datafusion/expr/src/test/function_stub.rs     | 16 ++---
 datafusion/functions-aggregate/src/macros.rs  | 16 ++---
 datafusion/functions-nested/src/macros.rs     | 19 ++---
 datafusion/functions/src/macros.rs            | 22 +++---
 .../functions/src/regex/regexpreplace.rs      | 15 ++--
 .../physical-expr/src/utils/guarantee.rs      | 22 +++---
 .../engines/datafusion_engine/normalize.rs    | 50 ++++++-------
 14 files changed, 177 insertions(+), 225 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index cdf3d2f93b93..6d8f28380f66 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -54,7 +54,7 @@ homepage = "https://datafusion.apache.org"
 license = "Apache-2.0"
 readme = "README.md"
 repository = "https://github.com/apache/datafusion"
-rust-version = "1.76"
+rust-version = "1.80"
 version = "40.0.0"
 
 [workspace.dependencies]
diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs
index 6266ae6f561a..a2b52eb701a1 100644
--- a/datafusion-cli/src/main.rs
+++ b/datafusion-cli/src/main.rs
@@ -19,7 +19,7 @@ use std::collections::HashMap;
 use std::env;
 use std::path::Path;
 use std::process::ExitCode;
-use std::sync::{Arc, OnceLock};
+use std::sync::{Arc, LazyLock};
 
 use datafusion::error::{DataFusionError, Result};
 use datafusion::execution::context::SessionConfig;
@@ -295,9 +295,8 @@ impl ByteUnit {
 }
 
 fn extract_memory_pool_size(size: &str) -> Result<usize, String> {
-    fn byte_suffixes() -> &'static HashMap<&'static str, ByteUnit> {
-        static BYTE_SUFFIXES: OnceLock<HashMap<&'static str, ByteUnit>> = OnceLock::new();
-        BYTE_SUFFIXES.get_or_init(|| {
+    static BYTE_SUFFIXES: LazyLock<HashMap<&'static str, ByteUnit>> =
+        LazyLock::new(|| {
             let mut m = HashMap::new();
             m.insert("b", ByteUnit::Byte);
             m.insert("k", ByteUnit::KiB);
@@ -309,23 +308,20 @@ fn extract_memory_pool_size(size: &str) -> Result<usize, String> {
             m.insert("t", ByteUnit::TiB);
             m.insert("tb", ByteUnit::TiB);
             m
-        })
-    }
+        });
 
-    fn suffix_re() -> &'static regex::Regex {
-        static SUFFIX_REGEX: OnceLock<regex::Regex> = OnceLock::new();
-        SUFFIX_REGEX.get_or_init(|| regex::Regex::new(r"^(-?[0-9]+)([a-z]+)?$").unwrap())
-    }
+    static SUFFIX_REGEX: LazyLock<regex::Regex> =
+        LazyLock::new(|| regex::Regex::new(r"^(-?[0-9]+)([a-z]+)?$").unwrap());
 
     let lower = size.to_lowercase();
-    if let Some(caps) = suffix_re().captures(&lower) {
+    if let Some(caps) = SUFFIX_REGEX.captures(&lower) {
         let num_str = caps.get(1).unwrap().as_str();
         let num = num_str.parse::<usize>().map_err(|_| {
             format!("Invalid numeric value in memory pool size '{}'", size)
         })?;
 
         let suffix = caps.get(2).map(|m| m.as_str()).unwrap_or("b");
-        let unit = byte_suffixes()
+        let unit = BYTE_SUFFIXES
             .get(suffix)
             .ok_or_else(|| format!("Invalid memory pool size '{}'", size))?;
         let memory_pool_size = usize::try_from(unit.multiplier())
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 09b90a56d2aa..44d7563dba8a 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -30,7 +30,7 @@ authors = { workspace = true }
 # Specify MSRV here as `cargo msrv` doesn't support workspace version and fails with
 # "Unable to find key 'package.rust-version' (or 'package.metadata.msrv') in 'arrow-datafusion/Cargo.toml'"
 # https://github.com/foresterre/cargo-msrv/issues/590
-rust-version = "1.76"
+rust-version = "1.80"
 
 [lints]
 workspace = true
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/access_plan.rs b/datafusion/core/src/datasource/physical_plan/parquet/access_plan.rs
index ea3030664b7b..0d77a99699bd 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/access_plan.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/access_plan.rs
@@ -345,7 +345,7 @@ mod test {
     use parquet::basic::LogicalType;
     use parquet::file::metadata::ColumnChunkMetaData;
     use parquet::schema::types::{SchemaDescPtr, SchemaDescriptor};
-    use std::sync::{Arc, OnceLock};
+    use std::sync::{Arc, LazyLock};
 
     #[test]
     fn test_only_scans() {
@@ -358,7 +358,7 @@ mod test {
 
         let row_group_indexes = access_plan.row_group_indexes();
         let row_selection = access_plan
-            .into_overall_row_selection(row_group_metadata())
+            .into_overall_row_selection(&ROW_GROUP_METADATA)
             .unwrap();
 
         // scan all row groups, no selection
@@ -377,7 +377,7 @@ mod test {
 
         let row_group_indexes = access_plan.row_group_indexes();
         let row_selection = access_plan
-            .into_overall_row_selection(row_group_metadata())
+            .into_overall_row_selection(&ROW_GROUP_METADATA)
             .unwrap();
 
         // skip all row groups, no selection
@@ -403,7 +403,7 @@ mod test {
 
         let row_group_indexes = access_plan.row_group_indexes();
         let row_selection = access_plan
-            .into_overall_row_selection(row_group_metadata())
+            .into_overall_row_selection(&ROW_GROUP_METADATA)
             .unwrap();
 
         assert_eq!(row_group_indexes, vec![0, 1]);
@@ -442,7 +442,7 @@ mod test {
 
         let row_group_indexes = access_plan.row_group_indexes();
         let row_selection = access_plan
-            .into_overall_row_selection(row_group_metadata())
+            .into_overall_row_selection(&ROW_GROUP_METADATA)
             .unwrap();
 
         assert_eq!(row_group_indexes, vec![1, 2, 3]);
@@ -478,7 +478,7 @@ mod test {
 
         let row_group_indexes = access_plan.row_group_indexes();
         let err = access_plan
-            .into_overall_row_selection(row_group_metadata())
+            .into_overall_row_selection(&ROW_GROUP_METADATA)
             .unwrap_err()
             .to_string();
         assert_eq!(row_group_indexes, vec![0, 1, 2, 3]);
@@ -504,39 +504,35 @@ mod test {
 
         let row_group_indexes = access_plan.row_group_indexes();
         let err = access_plan
-            .into_overall_row_selection(row_group_metadata())
+            .into_overall_row_selection(&ROW_GROUP_METADATA)
             .unwrap_err()
             .to_string();
         assert_eq!(row_group_indexes, vec![0, 1, 2, 3]);
         assert_contains!(err, "Invalid ParquetAccessPlan Selection. Row group 1 has 20 rows but selection only specifies 22 rows");
     }
 
-    static ROW_GROUP_METADATA: OnceLock<Vec<RowGroupMetaData>> = OnceLock::new();
-
     /// [`RowGroupMetaData`] that returns 4 row groups with 10, 20, 30, 40 rows
     /// respectively
-    fn row_group_metadata() -> &'static [RowGroupMetaData] {
-        ROW_GROUP_METADATA.get_or_init(|| {
-            let schema_descr = get_test_schema_descr();
-            let row_counts = [10, 20, 30, 40];
-
-            row_counts
-                .into_iter()
-                .map(|num_rows| {
-                    let column = ColumnChunkMetaData::builder(schema_descr.column(0))
-                        .set_num_values(num_rows)
-                        .build()
-                        .unwrap();
-
-                    RowGroupMetaData::builder(schema_descr.clone())
-                        .set_num_rows(num_rows)
-                        .set_column_metadata(vec![column])
-                        .build()
-                        .unwrap()
-                })
-                .collect()
-        })
-    }
+    static ROW_GROUP_METADATA: LazyLock<Vec<RowGroupMetaData>> = LazyLock::new(|| {
+        let schema_descr = get_test_schema_descr();
+        let row_counts = [10, 20, 30, 40];
+
+        row_counts
+            .into_iter()
+            .map(|num_rows| {
+                let column = ColumnChunkMetaData::builder(schema_descr.column(0))
+                    .set_num_values(num_rows)
+                    .build()
+                    .unwrap();
+
+                RowGroupMetaData::builder(schema_descr.clone())
+                    .set_num_rows(num_rows)
+                    .set_column_metadata(vec![column])
+                    .build()
+                    .unwrap()
+            })
+            .collect()
+    });
 
     /// Single column schema with a single column named "a" of type `BYTE_ARRAY`/`String`
     fn get_test_schema_descr() -> SchemaDescPtr {
diff --git a/datafusion/core/tests/expr_api/mod.rs b/datafusion/core/tests/expr_api/mod.rs
index 051d65652633..cc0f0af9a9aa 100644
--- a/datafusion/core/tests/expr_api/mod.rs
+++ b/datafusion/core/tests/expr_api/mod.rs
@@ -28,7 +28,7 @@ use datafusion_functions_aggregate::sum::sum_udaf;
 use datafusion_functions_nested::expr_ext::{IndexAccessor, SliceAccessor};
 use sqlparser::ast::NullTreatment;
 /// Tests of using and evaluating `Expr`s outside the context of a LogicalPlan
-use std::sync::{Arc, OnceLock};
+use std::sync::{Arc, LazyLock};
 
 mod parse_sql_expr;
 mod simplification;
@@ -320,7 +320,7 @@ async fn test_aggregate_ext_null_treatment() {
 /// Evaluates the specified expr as an aggregate and compares the result to the
 /// expected result.
 async fn evaluate_agg_test(expr: Expr, expected_lines: Vec<&str>) {
-    let batch = test_batch();
+    let batch = TEST_BATCH.clone();
 
     let ctx = SessionContext::new();
     let group_expr = vec![];
@@ -347,7 +347,7 @@ async fn evaluate_agg_test(expr: Expr, expected_lines: Vec<&str>) {
 /// Converts the `Expr` to a `PhysicalExpr`, evaluates it against the provided
 /// `RecordBatch` and compares the result to the expected result.
 fn evaluate_expr_test(expr: Expr, expected_lines: Vec<&str>) {
-    let batch = test_batch();
+    let batch = TEST_BATCH.clone();
     let df_schema = DFSchema::try_from(batch.schema()).unwrap();
     let physical_expr = SessionContext::new()
         .create_physical_expr(expr, &df_schema)
@@ -365,39 +365,33 @@ fn evaluate_expr_test(expr: Expr, expected_lines: Vec<&str>) {
     );
 }
 
-static TEST_BATCH: OnceLock<RecordBatch> = OnceLock::new();
-
-fn test_batch() -> RecordBatch {
-    TEST_BATCH
-        .get_or_init(|| {
-            let string_array: ArrayRef = Arc::new(StringArray::from(vec!["1", "2", "3"]));
-            let int_array: ArrayRef =
-                Arc::new(Int64Array::from_iter(vec![Some(10), None, Some(5)]));
-
-            // { a: "2021-02-01" } { a: "2021-02-02" } { a: "2021-02-03" }
-            let struct_array: ArrayRef = Arc::from(StructArray::from(vec![(
-                Arc::new(Field::new("a", DataType::Utf8, false)),
-                Arc::new(StringArray::from(vec![
-                    "2021-02-01",
-                    "2021-02-02",
-                    "2021-02-03",
-                ])) as _,
-            )]));
-
-            // ["one"] ["two", "three", "four"] ["five"]
-            let mut builder = ListBuilder::new(StringBuilder::new());
-            builder.append_value([Some("one")]);
-            builder.append_value([Some("two"), Some("three"), Some("four")]);
-            builder.append_value([Some("five")]);
-            let list_array: ArrayRef = Arc::new(builder.finish());
-
-            RecordBatch::try_from_iter(vec![
-                ("id", string_array),
-                ("i", int_array),
-                ("props", struct_array),
-                ("list", list_array),
-            ])
-            .unwrap()
-        })
-        .clone()
-}
+static TEST_BATCH: LazyLock<RecordBatch> = LazyLock::new(|| {
+    let string_array: ArrayRef = Arc::new(StringArray::from(vec!["1", "2", "3"]));
+    let int_array: ArrayRef =
+        Arc::new(Int64Array::from_iter(vec![Some(10), None, Some(5)]));
+
+    // { a: "2021-02-01" } { a: "2021-02-02" } { a: "2021-02-03" }
+    let struct_array: ArrayRef = Arc::from(StructArray::from(vec![(
+        Arc::new(Field::new("a", DataType::Utf8, false)),
+        Arc::new(StringArray::from(vec![
+            "2021-02-01",
+            "2021-02-02",
+            "2021-02-03",
+        ])) as _,
+    )]));
+
+    // ["one"] ["two", "three", "four"] ["five"]
+    let mut builder = ListBuilder::new(StringBuilder::new());
+    builder.append_value([Some("one")]);
+    builder.append_value([Some("two"), Some("three"), Some("four")]);
+    builder.append_value([Some("five")]);
+    let list_array: ArrayRef = Arc::new(builder.finish());
+
+    RecordBatch::try_from_iter(vec![
+        ("id", string_array),
+        ("i", int_array),
+        ("props", struct_array),
+        ("list", list_array),
+    ])
+    .unwrap()
+});
diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs
index f62a019eb960..a775e3458912 100644
--- a/datafusion/core/tests/memory_limit/mod.rs
+++ b/datafusion/core/tests/memory_limit/mod.rs
@@ -30,7 +30,7 @@ use datafusion_expr::{Expr, TableType};
 use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr};
 use futures::StreamExt;
 use std::any::Any;
-use std::sync::{Arc, OnceLock};
+use std::sync::{Arc, LazyLock};
 use tokio::fs::File;
 
 use datafusion::datasource::streaming::StreamingTable;
@@ -568,7 +568,10 @@ impl Scenario {
             single_row_batches, ..
         } = self
         {
-            batches_byte_size(&maybe_split_batches(dict_batches(), *single_row_batches))
+            batches_byte_size(&maybe_split_batches(
+                DICT_BATCHES.clone(),
+                *single_row_batches,
+            ))
         } else {
             panic!("Scenario does not support partition size");
         }
@@ -604,7 +607,7 @@ impl Scenario {
             } => {
                 use datafusion::physical_expr::expressions::col;
                 let batches: Vec<Vec<_>> = std::iter::repeat(maybe_split_batches(
-                    dict_batches(),
+                    DICT_BATCHES.clone(),
                     *single_row_batches,
                 ))
                 .take(*partitions)
@@ -686,18 +689,11 @@ fn maybe_split_batches(
         .collect()
 }
 
-static DICT_BATCHES: OnceLock<Vec<RecordBatch>> = OnceLock::new();
-
-/// Returns 5 sorted string dictionary batches each with 50 rows with
-/// this schema.
+/// 5 sorted string dictionary batches each with 50 rows with this schema.
 ///
 /// a: Dictionary<Utf8, Int32>,
 /// b: Dictionary<Utf8, Int32>,
-fn dict_batches() -> Vec<RecordBatch> {
-    DICT_BATCHES.get_or_init(make_dict_batches).clone()
-}
-
-fn make_dict_batches() -> Vec<RecordBatch> {
+static DICT_BATCHES: LazyLock<Vec<RecordBatch>> = LazyLock::new(|| {
     let batch_size = 50;
 
     let mut i = 0;
@@ -731,7 +727,7 @@ fn make_dict_batches() -> Vec<RecordBatch> {
     });
 
     batches
-}
+});
 
 // How many bytes does the memory from dict_batches consume?
 fn batches_byte_size(batches: &[RecordBatch]) -> usize {
diff --git a/datafusion/core/tests/parquet/external_access_plan.rs b/datafusion/core/tests/parquet/external_access_plan.rs
index 03afc858dfca..e6dc02fac556 100644
--- a/datafusion/core/tests/parquet/external_access_plan.rs
+++ b/datafusion/core/tests/parquet/external_access_plan.rs
@@ -33,7 +33,7 @@ use datafusion_physical_plan::ExecutionPlan;
 use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
 use parquet::arrow::ArrowWriter;
 use parquet::file::properties::WriterProperties;
-use std::sync::{Arc, OnceLock};
+use std::sync::{Arc, LazyLock};
 use tempfile::NamedTempFile;
 
 #[tokio::test]
@@ -317,7 +317,7 @@ impl TestFull {
             schema,
             file_name,
             file_size,
-        } = get_test_data();
+        } = TEST_DATA;
 
         let mut partitioned_file = PartitionedFile::new(file_name, *file_size);
 
@@ -369,46 +369,42 @@ struct TestData {
     file_size: u64,
 }
 
-static TEST_DATA: OnceLock<TestData> = OnceLock::new();
+/// A parquet file with 2 row groups each with 5 rows
+static TEST_DATA: LazyLock<TestData> = LazyLock::new(|| {
+    let scenario = Scenario::UTF8;
+    let row_per_group = 5;
 
-/// Return a parquet file with 2 row groups each with 5 rows
-fn get_test_data() -> &'static TestData {
-    TEST_DATA.get_or_init(|| {
-        let scenario = Scenario::UTF8;
-        let row_per_group = 5;
+    let mut temp_file = tempfile::Builder::new()
+        .prefix("user_access_plan")
+        .suffix(".parquet")
+        .tempfile()
+        .expect("tempfile creation");
 
-        let mut temp_file = tempfile::Builder::new()
-            .prefix("user_access_plan")
-            .suffix(".parquet")
-            .tempfile()
-            .expect("tempfile creation");
+    let props = WriterProperties::builder()
+        .set_max_row_group_size(row_per_group)
+        .build();
 
-        let props = WriterProperties::builder()
-            .set_max_row_group_size(row_per_group)
-            .build();
+    let batches = create_data_batch(scenario);
+    let schema = batches[0].schema();
 
-        let batches = create_data_batch(scenario);
-        let schema = batches[0].schema();
+    let mut writer =
+        ArrowWriter::try_new(&mut temp_file, schema.clone(), Some(props)).unwrap();
 
-        let mut writer =
-            ArrowWriter::try_new(&mut temp_file, schema.clone(), Some(props)).unwrap();
-
-        for batch in batches {
-            writer.write(&batch).expect("writing batch");
-        }
-        writer.close().unwrap();
+    for batch in batches {
+        writer.write(&batch).expect("writing batch");
+    }
+    writer.close().unwrap();
 
-        let file_name = temp_file.path().to_string_lossy().to_string();
-        let file_size = temp_file.path().metadata().unwrap().len();
+    let file_name = temp_file.path().to_string_lossy().to_string();
+    let file_size = temp_file.path().metadata().unwrap().len();
 
-        TestData {
-            temp_file,
-            schema,
-            file_name,
-            file_size,
-        }
-    })
-}
+    TestData {
+        temp_file,
+        schema,
+        file_name,
+        file_size,
+    }
+});
 
 /// Return the total value of the specified metric name
 fn metric_value(parquet_metrics: &MetricsSet, metric_name: &str) -> Option<usize> {
diff --git a/datafusion/expr/src/test/function_stub.rs b/datafusion/expr/src/test/function_stub.rs
index 14a6522ebe91..a6c7f1d19396 100644
--- a/datafusion/expr/src/test/function_stub.rs
+++ b/datafusion/expr/src/test/function_stub.rs
@@ -20,6 +20,7 @@
 //! These are used to avoid a dependence on `datafusion-functions-aggregate` which live in a different crate
 
 use std::any::Any;
+use std::sync::{Arc, LazyLock};
 
 use arrow::datatypes::{
     DataType, Field, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
@@ -43,18 +44,13 @@ macro_rules! create_func {
             /// Singleton instance of [$UDAF], ensures the UDAF is only created once
             /// named STATIC_$(UDAF). For example `STATIC_FirstValue`
             #[allow(non_upper_case_globals)]
-            static [< STATIC_ $UDAF >]: std::sync::OnceLock<std::sync::Arc<crate::AggregateUDF>> =
-            std::sync::OnceLock::new();
+            static [< STATIC_ $UDAF >]: LazyLock<Arc<crate::AggregateUDF>> = LazyLock::new(|| {
+                Arc::new(crate::AggregateUDF::from(<$UDAF>::default()))
+            });
 
-            /// AggregateFunction that returns a [AggregateUDF] for [$UDAF]
-            ///
-            /// [AggregateUDF]: crate::AggregateUDF
+            #[doc = concat!("AggregateFunction that returns a [AggregateUDF](crate::AggregateUDF) for [`", stringify!($UDAF), "`]")]
             pub fn $AGGREGATE_UDF_FN() -> std::sync::Arc<crate::AggregateUDF> {
-                [< STATIC_ $UDAF >]
-                    .get_or_init(|| {
-                        std::sync::Arc::new(crate::AggregateUDF::from(<$UDAF>::default()))
-                    })
-                    .clone()
+                [< STATIC_ $UDAF >].clone()
             }
         }
     }
diff --git a/datafusion/functions-aggregate/src/macros.rs b/datafusion/functions-aggregate/src/macros.rs
index cae72cf35223..233b3983df9d 100644
--- a/datafusion/functions-aggregate/src/macros.rs
+++ b/datafusion/functions-aggregate/src/macros.rs
@@ -85,18 +85,14 @@ macro_rules! create_func {
             /// Singleton instance of [$UDAF], ensures the UDAF is only created once
             /// named STATIC_$(UDAF). For example `STATIC_FirstValue`
             #[allow(non_upper_case_globals)]
-            static [< STATIC_ $UDAF >]: std::sync::OnceLock<std::sync::Arc<datafusion_expr::AggregateUDF>> =
-            std::sync::OnceLock::new();
+            static [< STATIC_ $UDAF >]: std::sync::LazyLock<std::sync::Arc<datafusion_expr::AggregateUDF>> =
+                std::sync::LazyLock::new(|| {
+                    std::sync::Arc::new(datafusion_expr::AggregateUDF::from($CREATE))
+                });
 
-            /// AggregateFunction that returns a [AggregateUDF] for [$UDAF]
-            ///
-            /// [AggregateUDF]: datafusion_expr::AggregateUDF
+            #[doc = concat!("AggregateFunction that returns a [`AggregateUDF`](datafusion_expr::AggregateUDF) for [`", stringify!($UDAF), "`]")]
             pub fn $AGGREGATE_UDF_FN() -> std::sync::Arc<datafusion_expr::AggregateUDF> {
-                [< STATIC_ $UDAF >]
-                    .get_or_init(|| {
-                        std::sync::Arc::new(datafusion_expr::AggregateUDF::from($CREATE))
-                    })
-                    .clone()
+                [< STATIC_ $UDAF >].clone()
             }
         }
     }
diff --git a/datafusion/functions-nested/src/macros.rs b/datafusion/functions-nested/src/macros.rs
index a6e0c2ee62be..3042cee344d8 100644
--- a/datafusion/functions-nested/src/macros.rs
+++ b/datafusion/functions-nested/src/macros.rs
@@ -88,19 +88,14 @@ macro_rules! create_func {
             /// Singleton instance of [`$UDF`], ensures the UDF is only created once
             /// named STATIC_$(UDF). For example `STATIC_ArrayToString`
             #[allow(non_upper_case_globals)]
-            static [< STATIC_ $UDF >]: std::sync::OnceLock<std::sync::Arc<datafusion_expr::ScalarUDF>> =
-                std::sync::OnceLock::new();
-            /// ScalarFunction that returns a [`ScalarUDF`] for [`$UDF`]
-            ///
-            /// [`ScalarUDF`]: datafusion_expr::ScalarUDF
+            static [< STATIC_ $UDF >]: std::sync::LazyLock<std::sync::Arc<datafusion_expr::ScalarUDF>> =
+                std::sync::LazyLock::new(|| {
+                    std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(<$UDF>::new()))
+                });
+
+            #[doc = concat!("ScalarFunction that returns a [`ScalarUDF`](datafusion_expr::ScalarUDF) for [`", stringify!($UDF), "`]")]
             pub fn $SCALAR_UDF_FN() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
-                [< STATIC_ $UDF >]
-                    .get_or_init(|| {
-                        std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
-                            <$UDF>::new(),
-                        ))
-                    })
-                    .clone()
+                [< STATIC_ $UDF >].clone()
             }
         }
     };
diff --git a/datafusion/functions/src/macros.rs b/datafusion/functions/src/macros.rs
index e26c94e1bb79..bb538d88b5b5 100644
--- a/datafusion/functions/src/macros.rs
+++ b/datafusion/functions/src/macros.rs
@@ -72,20 +72,16 @@ macro_rules! export_functions {
 macro_rules! make_udf_function {
     ($UDF:ty, $GNAME:ident, $NAME:ident) => {
         /// Singleton instance of the function
-        static $GNAME: std::sync::OnceLock<std::sync::Arc<datafusion_expr::ScalarUDF>> =
-            std::sync::OnceLock::new();
-
-        /// Return a [`ScalarUDF`] for [`$UDF`]
-        ///
-        /// [`ScalarUDF`]: datafusion_expr::ScalarUDF
+        static $GNAME: std::sync::LazyLock<std::sync::Arc<datafusion_expr::ScalarUDF>> =
+            std::sync::LazyLock::new(|| {
+                std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
+                    <$UDF>::new(),
+                ))
+            });
+
+        #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) for [`", stringify!($UDF), "`]")]
         pub fn $NAME() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
-            $GNAME
-                .get_or_init(|| {
-                    std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
-                        <$UDF>::new(),
-                    ))
-                })
-                .clone()
+            $GNAME.clone()
         }
     };
 }
diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs
index d820f991be18..aed117b79eb1 100644
--- a/datafusion/functions/src/regex/regexpreplace.rs
+++ b/datafusion/functions/src/regex/regexpreplace.rs
@@ -36,7 +36,7 @@ use regex::Regex;
 use std::any::Any;
 use std::collections::HashMap;
 use std::sync::Arc;
-use std::sync::OnceLock;
+use std::sync::LazyLock;
 #[derive(Debug)]
 pub struct RegexpReplaceFunc {
     signature: Signature,
@@ -118,6 +118,7 @@ impl ScalarUDFImpl for RegexpReplaceFunc {
         }
     }
 }
+
 fn regexp_replace_func(args: &[ColumnarValue]) -> Result<ArrayRef> {
     match args[0].data_type() {
         DataType::Utf8 => specialize_regexp_replace::<i32>(args),
@@ -127,14 +128,14 @@ fn regexp_replace_func(args: &[ColumnarValue]) -> Result<ArrayRef> {
         }
     }
 }
-/// replace POSIX capture groups (like \1) with Rust Regex group (like ${1})
+
+/// Replace POSIX capture groups (like \1) with Rust Regex group (like ${1})
 /// used by regexp_replace
 fn regex_replace_posix_groups(replacement: &str) -> String {
-    fn capture_groups_re() -> &'static Regex {
-        static CAPTURE_GROUPS_RE_LOCK: OnceLock<Regex> = OnceLock::new();
-        CAPTURE_GROUPS_RE_LOCK.get_or_init(|| Regex::new(r"(\\)(\d*)").unwrap())
-    }
-    capture_groups_re()
+    static CAPTURE_GROUPS_RE_LOCK: LazyLock<Regex> =
+        LazyLock::new(|| Regex::new(r"(\\)(\d*)").unwrap());
+
+    CAPTURE_GROUPS_RE_LOCK
         .replace_all(replacement, "$${$2}")
         .into_owned()
 }
diff --git a/datafusion/physical-expr/src/utils/guarantee.rs b/datafusion/physical-expr/src/utils/guarantee.rs
index 4385066529e7..e1a104423727 100644
--- a/datafusion/physical-expr/src/utils/guarantee.rs
+++ b/datafusion/physical-expr/src/utils/guarantee.rs
@@ -420,7 +420,7 @@ impl<'a> ColOpLit<'a> {
 
 #[cfg(test)]
 mod test {
-    use std::sync::OnceLock;
+    use std::sync::LazyLock;
 
     use super::*;
     use crate::planner::logical2physical;
@@ -835,8 +835,7 @@ mod test {
     /// Tests that analyzing expr results in the expected guarantees
     fn test_analyze(expr: Expr, expected: Vec<LiteralGuarantee>) {
         println!("Begin analyze of {expr}");
-        let schema = schema();
-        let physical_expr = logical2physical(&expr, &schema);
+        let physical_expr = logical2physical(&expr, &SCHEMA);
 
         let actual = LiteralGuarantee::analyze(&physical_expr);
         assert_eq!(
@@ -869,15 +868,10 @@ mod test {
         LiteralGuarantee::try_new(column, Guarantee::NotIn, literals.iter()).unwrap()
     }
 
-    // Schema for testing
-    fn schema() -> SchemaRef {
-        Arc::clone(SCHEMA.get_or_init(|| {
-            Arc::new(Schema::new(vec![
-                Field::new("a", DataType::Utf8, false),
-                Field::new("b", DataType::Int32, false),
-            ]))
-        }))
-    }
-
-    static SCHEMA: OnceLock<SchemaRef> = OnceLock::new();
+    static SCHEMA: LazyLock<SchemaRef> = LazyLock::new(|| {
+        Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Utf8, false),
+            Field::new("b", DataType::Int32, false),
+        ]))
+    });
 }
diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
index 520b6b53b32d..f0b3b0354346 100644
--- a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
+++ b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
@@ -21,7 +21,7 @@ use arrow::{array, array::ArrayRef, datatypes::DataType, record_batch::RecordBat
 use datafusion_common::format::DEFAULT_FORMAT_OPTIONS;
 use datafusion_common::DataFusionError;
 use std::path::PathBuf;
-use std::sync::OnceLock;
+use std::sync::LazyLock;
 
 use crate::engines::output::DFColumnType;
 
@@ -130,7 +130,7 @@ fn expand_row(mut row: Vec<String>) -> impl Iterator<Item = Vec<String>> {
 /// ```
 fn normalize_paths(mut row: Vec<String>) -> Vec<String> {
     row.iter_mut().for_each(|s| {
-        let workspace_root: &str = workspace_root().as_ref();
+        let workspace_root: &str = WORKSPACE_ROOT.as_ref();
         if s.contains(workspace_root) {
             *s = s.replace(workspace_root, "WORKSPACE_ROOT");
         }
@@ -138,34 +138,30 @@ fn normalize_paths(mut row: Vec<String>) -> Vec<String> {
     row
 }
 
-/// return the location of the datafusion checkout
-fn workspace_root() -> &'static object_store::path::Path {
-    static WORKSPACE_ROOT_LOCK: OnceLock<object_store::path::Path> = OnceLock::new();
-    WORKSPACE_ROOT_LOCK.get_or_init(|| {
-        // e.g. /Software/datafusion/datafusion/core
-        let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+/// The location of the datafusion checkout
+static WORKSPACE_ROOT: LazyLock<object_store::path::Path> = LazyLock::new(|| {
+    // e.g. /Software/datafusion/datafusion/core
+    let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
 
-        // e.g. /Software/datafusion/datafusion
-        let workspace_root = dir
-            .parent()
-            .expect("Can not find parent of datafusion/core")
-            // e.g. /Software/datafusion
-            .parent()
-            .expect("parent of datafusion")
-            .to_string_lossy();
+    // e.g. /Software/datafusion/datafusion
+    let workspace_root = dir
+        .parent()
+        .expect("Can not find parent of datafusion/core")
+        // e.g. /Software/datafusion
+        .parent()
+        .expect("parent of datafusion")
+        .to_string_lossy();
 
-        let sanitized_workplace_root = if cfg!(windows) {
-            // Object store paths are delimited with `/`, e.g. `/datafusion/datafusion/testing/data/csv/aggregate_test_100.csv`.
-            // The default windows delimiter is `\`, so the workplace path is `datafusion\datafusion`.
-            workspace_root
-                .replace(std::path::MAIN_SEPARATOR, object_store::path::DELIMITER)
-        } else {
-            workspace_root.to_string()
-        };
+    let sanitized_workplace_root = if cfg!(windows) {
+        // Object store paths are delimited with `/`, e.g. `/datafusion/datafusion/testing/data/csv/aggregate_test_100.csv`.
+        // The default windows delimiter is `\`, so the workplace path is `datafusion\datafusion`.
+        workspace_root.replace(std::path::MAIN_SEPARATOR, object_store::path::DELIMITER)
+    } else {
+        workspace_root.to_string()
+    };
 
-        object_store::path::Path::parse(sanitized_workplace_root).unwrap()
-    })
-}
+    object_store::path::Path::parse(sanitized_workplace_root).unwrap()
+});
 
 /// Convert a single batch to a `Vec<Vec<String>>` for comparison
 fn convert_batch(batch: RecordBatch) -> Result<Vec<Vec<String>>> {

From ac6962256c976a1d307bf5ad7b5e6071f74b7f79 Mon Sep 17 00:00:00 2001
From: Alexander Rafferty <websites@alexanderrafferty.com>
Date: Sun, 28 Jul 2024 21:03:58 +1000
Subject: [PATCH 2/2] Fix bug in `external_access_plan.rs` and update remaining
 MSRVs to 1.80.

---
 datafusion-cli/Cargo.toml                             | 2 +-
 datafusion/core/tests/parquet/external_access_plan.rs | 2 +-
 datafusion/proto-common/Cargo.toml                    | 2 +-
 datafusion/proto-common/gen/Cargo.toml                | 2 +-
 datafusion/proto/Cargo.toml                           | 2 +-
 datafusion/proto/gen/Cargo.toml                       | 2 +-
 datafusion/substrait/Cargo.toml                       | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index 860dc123fa94..9583492c4113 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -26,7 +26,7 @@ license = "Apache-2.0"
 homepage = "https://datafusion.apache.org"
 repository = "https://github.com/apache/datafusion"
 # Specify MSRV here as `cargo msrv` doesn't support workspace version
-rust-version = "1.76"
+rust-version = "1.80"
 readme = "README.md"
 
 [dependencies]
diff --git a/datafusion/core/tests/parquet/external_access_plan.rs b/datafusion/core/tests/parquet/external_access_plan.rs
index e6dc02fac556..cce9c66f1e71 100644
--- a/datafusion/core/tests/parquet/external_access_plan.rs
+++ b/datafusion/core/tests/parquet/external_access_plan.rs
@@ -317,7 +317,7 @@ impl TestFull {
             schema,
             file_name,
             file_size,
-        } = TEST_DATA;
+        } = &*TEST_DATA;
 
         let mut partitioned_file = PartitionedFile::new(file_name, *file_size);
 
diff --git a/datafusion/proto-common/Cargo.toml b/datafusion/proto-common/Cargo.toml
index e5d65827cdec..9b2f15a9a710 100644
--- a/datafusion/proto-common/Cargo.toml
+++ b/datafusion/proto-common/Cargo.toml
@@ -26,7 +26,7 @@ homepage = { workspace = true }
 repository = { workspace = true }
 license = { workspace = true }
 authors = { workspace = true }
-rust-version = "1.76"
+rust-version = "1.80"
 
 # Exclude proto files so crates.io consumers don't need protoc
 exclude = ["*.proto"]
diff --git a/datafusion/proto-common/gen/Cargo.toml b/datafusion/proto-common/gen/Cargo.toml
index 54ec0e44694b..bb03208b2b70 100644
--- a/datafusion/proto-common/gen/Cargo.toml
+++ b/datafusion/proto-common/gen/Cargo.toml
@@ -20,7 +20,7 @@ name = "gen-common"
 description = "Code generation for proto"
 version = "0.1.0"
 edition = { workspace = true }
-rust-version = "1.76"
+rust-version = "1.80"
 authors = { workspace = true }
 homepage = { workspace = true }
 repository = { workspace = true }
diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml
index 95d9e6700a50..4203bd7a28c0 100644
--- a/datafusion/proto/Cargo.toml
+++ b/datafusion/proto/Cargo.toml
@@ -27,7 +27,7 @@ repository = { workspace = true }
 license = { workspace = true }
 authors = { workspace = true }
 # Specify MSRV here as `cargo msrv` doesn't support workspace version
-rust-version = "1.76"
+rust-version = "1.80"
 
 # Exclude proto files so crates.io consumers don't need protoc
 exclude = ["*.proto"]
diff --git a/datafusion/proto/gen/Cargo.toml b/datafusion/proto/gen/Cargo.toml
index 401c51c94563..e69282540cb2 100644
--- a/datafusion/proto/gen/Cargo.toml
+++ b/datafusion/proto/gen/Cargo.toml
@@ -20,7 +20,7 @@ name = "gen"
 description = "Code generation for proto"
 version = "0.1.0"
 edition = { workspace = true }
-rust-version = "1.76"
+rust-version = "1.80"
 authors = { workspace = true }
 homepage = { workspace = true }
 repository = { workspace = true }
diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml
index 9e7ef9632ad3..0647263225c4 100644
--- a/datafusion/substrait/Cargo.toml
+++ b/datafusion/substrait/Cargo.toml
@@ -26,7 +26,7 @@ repository = { workspace = true }
 license = { workspace = true }
 authors = { workspace = true }
 # Specify MSRV here as `cargo msrv` doesn't support workspace version
-rust-version = "1.76"
+rust-version = "1.80"
 
 [lints]
 workspace = true