apache · pan3793 · Feb 14, 2025 · Feb 14, 2025 · Feb 15, 2025 · Feb 17, 2025
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
@@ -21,6 +21,10 @@ on:
   schedule:
     - cron: 0 4 * * *
 
+  pull_request:
+    branches:
+      - master
+
 jobs:
   build:
     name: Build

diff --git a/...k-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkOperation.scala b/...k-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkOperation.scala
@@ -23,7 +23,7 @@ import java.time.ZoneId
 import org.apache.spark.kyuubi.{SparkProgressMonitor, SQLOperationListener}
 import org.apache.spark.kyuubi.SparkUtilsHelper.redact
 import org.apache.spark.sql.{DataFrame, Row, SparkSession}
-import org.apache.spark.sql.execution.SQLExecution
+import org.apache.spark.sql.execution.SparkSQLExecutionHelper
 import org.apache.spark.sql.types.{BinaryType, StructField, StructType}
 import org.apache.spark.ui.SparkUIUtils.formatDuration
 
@@ -155,7 +155,7 @@ abstract class SparkOperation(session: Session)
     spark.sparkContext.setLocalProperty
 
   protected def withLocalProperties[T](f: => T): T = {
-    SQLExecution.withSQLConfPropagated(spark) {
+    SparkSQLExecutionHelper.withSQLConfPropagated(spark) {
       val originalSession = SparkSession.getActiveSession
       try {
         SparkSession.setActiveSession(spark)

diff --git a/...rk-sql-engine/src/main/scala/org/apache/spark/sql/execution/SparkSQLExecutionHelper.scala b/...rk-sql-engine/src/main/scala/org/apache/spark/sql/execution/SparkSQLExecutionHelper.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.sql.SparkSession
+
+import org.apache.kyuubi.util.reflect.{DynClasses, DynMethods}
+
+object SparkSQLExecutionHelper {
+
+  private val sparkSessionClz = DynClasses.builder()
+    .impl("org.apache.spark.sql.classic.SparkSession") // SPARK-49700 (4.0.0)
+    .impl("org.apache.spark.sql.SparkSession")
+    .build()
+
+  private val withSQLConfPropagatedMethod =
+    DynMethods.builder("withSQLConfPropagated")
+      .impl(SQLExecution.getClass, sparkSessionClz, classOf[() => Any])
+      .buildChecked(SQLExecution)
+
+  def withSQLConfPropagated[T](sparkSession: SparkSession)(body: => T): T = {
+    withSQLConfPropagatedMethod.invokeChecked[T](sparkSession, () => body)
+  }
+}
diff --git a/...uubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/kyuubi/SparkDatasetHelper.scala b/...uubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/kyuubi/SparkDatasetHelper.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.types._
 import org.apache.kyuubi.engine.spark.KyuubiSparkUtil
 import org.apache.kyuubi.engine.spark.schema.RowSet
 import org.apache.kyuubi.engine.spark.util.SparkCatalogUtils.quoteIfNeeded
-import org.apache.kyuubi.util.reflect.DynMethods
+import org.apache.kyuubi.util.reflect.{DynClasses, DynMethods}
 
 object SparkDatasetHelper extends Logging {
 
@@ -63,8 +63,18 @@ object SparkDatasetHelper extends Logging {
       toArrowBatchRdd(plan).collect()
   }
 
+  private val datasetClz = DynClasses.builder()
+    .impl("org.apache.spark.sql.classic.Dataset") // SPARK-49700 (4.0.0)
+    .impl("org.apache.spark.sql.Dataset")
+    .build()
+
+  private val toArrowBatchRddMethod =
+    DynMethods.builder("toArrowBatchRdd")
+      .impl(datasetClz)
+      .buildChecked()
+
   def toArrowBatchRdd[T](ds: Dataset[T]): RDD[Array[Byte]] = {
-    ds.toArrowBatchRdd
+    toArrowBatchRddMethod.bind(ds).invoke()
   }
 
   /**

diff --git a/...c/test/scala/org/apache/kyuubi/engine/spark/operation/SparkArrowbasedOperationSuite.scala b/...c/test/scala/org/apache/kyuubi/engine/spark/operation/SparkArrowbasedOperationSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.{QueryTest, Row, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.plans.logical.Project
 import org.apache.spark.sql.execution.{CollectLimitExec, LocalTableScanExec, QueryExecution, SparkPlan}
-import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
+import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, QueryStageExec}
 import org.apache.spark.sql.execution.exchange.Exchange
 import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, SortMergeJoinExec}
 import org.apache.spark.sql.execution.metric.SparkMetricsTestUtils
@@ -163,6 +163,7 @@ class SparkArrowbasedOperationSuite extends WithSparkSQLEngine with SparkDataTyp
         sparkPlan.schema,
         "",
         true,
+        true, // spark.sql.execution.arrow.useLargeVarTypes
         KyuubiSparkContextHelper.dummyTaskContext())
       assert(rows.size == expectSize)
     }
@@ -247,7 +248,11 @@ class SparkArrowbasedOperationSuite extends WithSparkSQLEngine with SparkDataTyp
           |) LIMIT 1
           |""".stripMargin)
       val smj = plan.collect { case smj: SortMergeJoinExec => smj }
-      val bhj = adaptivePlan.collect { case bhj: BroadcastHashJoinExec => bhj }
+      val bhj = (adaptivePlan match {
+        // SPARK-51008 (4.0.0) adds ResultQueryStageExec
+        case queryStage: QueryStageExec => queryStage.plan
+        case plan => plan
+      }).collect { case bhj: BroadcastHashJoinExec => bhj }
       assert(smj.size == 1)
       assert(bhj.size == 1)
     }
@@ -505,33 +510,49 @@ class SparkArrowbasedOperationSuite extends WithSparkSQLEngine with SparkDataTyp
     }
   }
 
-  // the signature of function [[ArrowConverters.fromBatchIterator]] is changed in SPARK-43528
-  // (since Spark 3.5)
   private lazy val fromBatchIteratorMethod = DynMethods.builder("fromBatchIterator")
     .hiddenImpl( // for Spark 3.4 or previous
       "org.apache.spark.sql.execution.arrow.ArrowConverters$",
       classOf[Iterator[Array[Byte]]],
       classOf[StructType],
       classOf[String],
       classOf[TaskContext])
-    .hiddenImpl( // for Spark 3.5 or later
+    .hiddenImpl( // SPARK-43528: Spark 3.5
       "org.apache.spark.sql.execution.arrow.ArrowConverters$",
       classOf[Iterator[Array[Byte]]],
       classOf[StructType],
       classOf[String],
       classOf[Boolean],
       classOf[TaskContext])
+    .hiddenImpl( // SPARK-51079: Spark 4.0 or later
+      "org.apache.spark.sql.execution.arrow.ArrowConverters$",
+      classOf[Iterator[Array[Byte]]],
+      classOf[StructType],
+      classOf[String],
+      classOf[Boolean],
+      classOf[Boolean],
+      classOf[TaskContext])
     .build()
 
   def fromBatchIterator(
       arrowBatchIter: Iterator[Array[Byte]],
       schema: StructType,
       timeZoneId: String,
       errorOnDuplicatedFieldNames: JBoolean,
+      largeVarTypes: Boolean,
       context: TaskContext): Iterator[InternalRow] = {
     val className = "org.apache.spark.sql.execution.arrow.ArrowConverters$"
     val instance = DynFields.builder().impl(className, "MODULE$").build[Object]().get(null)
-    if (SPARK_ENGINE_RUNTIME_VERSION >= "3.5") {
+    if (SPARK_ENGINE_RUNTIME_VERSION >= "4.0") {
+      fromBatchIteratorMethod.invoke[Iterator[InternalRow]](
+        instance,
+        arrowBatchIter,
+        schema,
+        timeZoneId,
+        errorOnDuplicatedFieldNames,
+        largeVarTypes,
+        context)
+    } else if (SPARK_ENGINE_RUNTIME_VERSION === "3.5") {
       fromBatchIteratorMethod.invoke[Iterator[InternalRow]](
         instance,
         arrowBatchIter,