From a49ea784639bcedaee61172918627bc4ea2389c7 Mon Sep 17 00:00:00 2001 From: Satya Kommula Date: Mon, 17 Jan 2022 18:35:47 +0530 Subject: [PATCH 1/3] bump spark version "3.0.0" -> "3.2.0" --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index 2303e62d..68e31d3c 100644 --- a/build.sbt +++ b/build.sbt @@ -14,7 +14,7 @@ sparkPackageName := "databricks/spark-sql-perf" // All Spark Packages need a license licenses := Seq("Apache-2.0" -> url("http://opensource.org/licenses/Apache-2.0")) -sparkVersion := "3.0.0" +sparkVersion := "3.2.0" sparkComponents ++= Seq("sql", "hive", "mllib") From 229b5fff9078c8733d9301e58c5d71352904c2df Mon Sep 17 00:00:00 2001 From: Satya Kommula Date: Tue, 3 Dec 2024 15:49:48 +0530 Subject: [PATCH 2/3] Upgrade Spark to version 3.5.1, update dependencies, and replace the Bintray URL --- bin/run | 2 +- build.sbt | 6 +++--- build/sbt-launch-lib.bash | 17 +++++++++-------- project/plugins.sbt | 19 +++++++++++-------- .../databricks/spark/sql/perf/Benchmark.scala | 3 ++- .../spark/sql/perf/Benchmarkable.scala | 8 +++++--- .../com/databricks/spark/sql/perf/Query.scala | 3 ++- .../mllib/MLPipelineStageBenchmarkable.scala | 3 ++- version.sbt | 2 +- 9 files changed, 36 insertions(+), 27 deletions(-) diff --git a/bin/run b/bin/run index 7d28227c..f8923ffc 100755 --- a/bin/run +++ b/bin/run @@ -3,4 +3,4 @@ # runs spark-sql-perf from the current directory ARGS="runBenchmark $@" -build/sbt "$ARGS" \ No newline at end of file +sbt "$ARGS" \ No newline at end of file diff --git a/build.sbt b/build.sbt index 68e31d3c..1a2b09f2 100644 --- a/build.sbt +++ b/build.sbt @@ -5,16 +5,16 @@ name := "spark-sql-perf" organization := "com.databricks" -scalaVersion := "2.12.10" +scalaVersion := "2.12.18" -crossScalaVersions := Seq("2.12.10") +crossScalaVersions := Seq("2.12.18") sparkPackageName := "databricks/spark-sql-perf" // All Spark Packages need a license licenses := Seq("Apache-2.0" -> url("http://opensource.org/licenses/Apache-2.0")) -sparkVersion := "3.2.0" +sparkVersion := "3.5.1" sparkComponents ++= Seq("sql", "hive", "mllib") diff --git a/build/sbt-launch-lib.bash b/build/sbt-launch-lib.bash index 2a399365..707f70ef 100755 --- a/build/sbt-launch-lib.bash +++ b/build/sbt-launch-lib.bash @@ -45,9 +45,8 @@ dlog () { acquire_sbt_jar () { SBT_VERSION=`awk -F "=" '/sbt\.version/ {print $2}' ./project/build.properties` - URL1=https://dl.bintray.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar + URL1=https://github.com/sbt/sbt/releases/download/v${SBT_VERSION}/sbt-${SBT_VERSION}.zip JAR=build/sbt-launch-${SBT_VERSION}.jar - sbt_jar=$JAR if [[ ! -f "$sbt_jar" ]]; then @@ -55,13 +54,15 @@ acquire_sbt_jar () { if [ ! -f "${JAR}" ]; then # Download printf "Attempting to fetch sbt\n" - JAR_DL="${JAR}.part" + COMPLETE_SBT="build/sbt.zip" if [ $(command -v curl) ]; then - curl --fail --location --silent ${URL1} > "${JAR_DL}" &&\ - mv "${JAR_DL}" "${JAR}" + curl --fail --location --silent ${URL1} > "${COMPLETE_SBT}" &&\ + unzip ${COMPLETE_SBT} &&\ + cp "sbt/bin/sbt-launch.jar" "${JAR}" elif [ $(command -v wget) ]; then - wget --quiet ${URL1} -O "${JAR_DL}" &&\ - mv "${JAR_DL}" "${JAR}" + wget --quiet ${URL1} -O "${COMPLETE_SBT}" &&\ + unzip ${COMPLETE_SBT} &&\ + cp "sbt/bin/sbt-launch.jar" "${JAR}" else printf "You do not have curl or wget installed, please install sbt manually from http://www.scala-sbt.org/\n" exit -1 @@ -195,4 +196,4 @@ run() { -jar "$sbt_jar" \ "${sbt_commands[@]}" \ "${residual_args[@]}" -} +} \ No newline at end of file diff --git a/project/plugins.sbt b/project/plugins.sbt index d2473b61..c76851f6 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,17 +1,20 @@ // You may use this file to add plugin dependencies for sbt. -resolvers += "Spark Packages repo" at "https://repos.spark-packages.org/" +resolvers ++= Seq( + Resolver.mavenLocal, + Resolver.sonatypeRepo("releases"), + "Maven Central" at "https://repo1.maven.org/maven2/", + "Spark Packages Repo" at "https://repos.spark-packages.org/" +) -resolvers += "sonatype-releases" at "https://oss.sonatype.org/content/repositories/releases/" - -addSbtPlugin("org.spark-packages" %% "sbt-spark-package" % "0.1.1") +addSbtPlugin("org.spark-packages" % "sbt-spark-package" % "0.2.3") addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.6.0") -addSbtPlugin("com.github.gseitz" % "sbt-release" % "1.0.0") +addSbtPlugin("com.github.sbt" % "sbt-release" % "1.0.15") -addSbtPlugin("com.databricks" %% "sbt-databricks" % "0.1.3") +addSbtPlugin("com.databricks" %% "sbt-databricks" % "0.1.5") -addSbtPlugin("me.lessis" % "bintray-sbt" % "0.3.0") +addSbtPlugin("org.foundweekends" % "sbt-bintray" % "0.5.6") -addSbtPlugin("com.jsuereth" % "sbt-pgp" % "1.0.0") +addSbtPlugin("com.github.sbt" % "sbt-pgp" % "2.1.2") diff --git a/src/main/scala/com/databricks/spark/sql/perf/Benchmark.scala b/src/main/scala/com/databricks/spark/sql/perf/Benchmark.scala index ebb49353..6098f353 100644 --- a/src/main/scala/com/databricks/spark/sql/perf/Benchmark.scala +++ b/src/main/scala/com/databricks/spark/sql/perf/Benchmark.scala @@ -240,7 +240,8 @@ abstract class Benchmark( protected override def doBenchmark( includeBreakdown: Boolean, description: String = "", - messages: ArrayBuffer[String]): BenchmarkResult = { + messages: ArrayBuffer[String], + iteration: Int = 1): BenchmarkResult = { try { val timeMs = measureTimeMs(run()) BenchmarkResult( diff --git a/src/main/scala/com/databricks/spark/sql/perf/Benchmarkable.scala b/src/main/scala/com/databricks/spark/sql/perf/Benchmarkable.scala index 24efef70..b36850fc 100644 --- a/src/main/scala/com/databricks/spark/sql/perf/Benchmarkable.scala +++ b/src/main/scala/com/databricks/spark/sql/perf/Benchmarkable.scala @@ -43,14 +43,15 @@ trait Benchmarkable { description: String = "", messages: ArrayBuffer[String], timeout: Long, - forkThread: Boolean = true): BenchmarkResult = { + forkThread: Boolean = true, + iteration: Int = 1): BenchmarkResult = { logger.info(s"$this: benchmark") sparkContext.setJobDescription(s"Execution: $name, $description") beforeBenchmark() val result = if (forkThread) { runBenchmarkForked(includeBreakdown, description, messages, timeout) } else { - doBenchmark(includeBreakdown, description, messages) + doBenchmark(includeBreakdown, description, messages, iteration) } afterBenchmark(sqlContext.sparkContext) result @@ -107,7 +108,8 @@ trait Benchmarkable { protected def doBenchmark( includeBreakdown: Boolean, description: String = "", - messages: ArrayBuffer[String]): BenchmarkResult + messages: ArrayBuffer[String], + iteration: Int = 1): BenchmarkResult protected def measureTimeMs[A](f: => A): Double = { val startTime = System.nanoTime() diff --git a/src/main/scala/com/databricks/spark/sql/perf/Query.scala b/src/main/scala/com/databricks/spark/sql/perf/Query.scala index babc63f0..48c0e880 100644 --- a/src/main/scala/com/databricks/spark/sql/perf/Query.scala +++ b/src/main/scala/com/databricks/spark/sql/perf/Query.scala @@ -62,7 +62,8 @@ class Query( protected override def doBenchmark( includeBreakdown: Boolean, description: String = "", - messages: ArrayBuffer[String]): BenchmarkResult = { + messages: ArrayBuffer[String], + iteration: Int = 1): BenchmarkResult = { try { val dataFrame = buildDataFrame val queryExecution = dataFrame.queryExecution diff --git a/src/main/scala/com/databricks/spark/sql/perf/mllib/MLPipelineStageBenchmarkable.scala b/src/main/scala/com/databricks/spark/sql/perf/mllib/MLPipelineStageBenchmarkable.scala index 8296f46b..58b58919 100644 --- a/src/main/scala/com/databricks/spark/sql/perf/mllib/MLPipelineStageBenchmarkable.scala +++ b/src/main/scala/com/databricks/spark/sql/perf/mllib/MLPipelineStageBenchmarkable.scala @@ -45,7 +45,8 @@ class MLPipelineStageBenchmarkable( override protected def doBenchmark( includeBreakdown: Boolean, description: String, - messages: ArrayBuffer[String]): BenchmarkResult = { + messages: ArrayBuffer[String], + iteration: Int = 1): BenchmarkResult = { try { val (trainingTime, model: Transformer) = measureTime { logger.info(s"$this: train: trainingSet=${trainingData.schema}") diff --git a/version.sbt b/version.sbt index 7338ce76..f9436171 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -version in ThisBuild := "0.5.1-SNAPSHOT" +version in ThisBuild := "0.5.2-SNAPSHOT" From 9b8d6531dab1044148421d78d733488a33c3064d Mon Sep 17 00:00:00 2001 From: Satya Kommula Date: Tue, 3 Dec 2024 17:24:42 +0530 Subject: [PATCH 3/3] initial commit for github workflows --- .github/workflows/scala.yml | 58 +++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 .github/workflows/scala.yml diff --git a/.github/workflows/scala.yml b/.github/workflows/scala.yml new file mode 100644 index 00000000..c1d767bc --- /dev/null +++ b/.github/workflows/scala.yml @@ -0,0 +1,58 @@ +name: Build Spark sql perf + +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + build: + runs-on: ubuntu-22.04 + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Set up JDK 11 + uses: actions/setup-java@v3 + with: + java-version: '11' + distribution: 'adopt' + + - name: Install SDKMAN! and sbt 0.13.18 + run: | + curl -s "https://get.sdkman.io" | bash + source "$HOME/.sdkman/bin/sdkman-init.sh" + sdk install sbt 0.13.18 + + - name: Cache sbt + uses: actions/cache@v4 + with: + path: | + ~/.ivy2/cache + ~/.sbt + ~/.coursier + key: ${{ runner.os }}-sbt-${{ hashFiles('**/build.sbt') }} + restore-keys: | + ${{ runner.os }}-sbt- + + - name: Build with sbt + run: sbt compile + + - name: Package with sbt + run: sbt package + + - name: Extract version + id: extract_version + run: | + version=$(cat version.sbt | grep 'version in ThisBuild :=' | awk -F'\"' '{print $2}') + echo "version=$version" >> $GITHUB_ENV + + - name: Upload JAR artifact + uses: actions/upload-artifact@v4 + with: + name: spark-sql-perf_2.12-${{ env.version }}.jar + path: target/scala-2.12/*.jar \ No newline at end of file