Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bump Spark version "3.0.0" -> "3.2.0" #212

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions .github/workflows/scala.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
name: Build Spark sql perf

on:
push:
branches:
- master
pull_request:
branches:
- master

jobs:
build:
runs-on: ubuntu-22.04

steps:
- name: Checkout code
uses: actions/checkout@v2

- name: Set up JDK 11
uses: actions/setup-java@v3
with:
java-version: '11'
distribution: 'adopt'

- name: Install SDKMAN! and sbt 0.13.18
run: |
curl -s "https://get.sdkman.io" | bash
source "$HOME/.sdkman/bin/sdkman-init.sh"
sdk install sbt 0.13.18

- name: Cache sbt
uses: actions/cache@v4
with:
path: |
~/.ivy2/cache
~/.sbt
~/.coursier
key: ${{ runner.os }}-sbt-${{ hashFiles('**/build.sbt') }}
restore-keys: |
${{ runner.os }}-sbt-

- name: Build with sbt
run: sbt compile

- name: Package with sbt
run: sbt package

- name: Extract version
id: extract_version
run: |
version=$(cat version.sbt | grep 'version in ThisBuild :=' | awk -F'\"' '{print $2}')
echo "version=$version" >> $GITHUB_ENV

- name: Upload JAR artifact
uses: actions/upload-artifact@v4
with:
name: spark-sql-perf_2.12-${{ env.version }}.jar
path: target/scala-2.12/*.jar
2 changes: 1 addition & 1 deletion bin/run
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
# runs spark-sql-perf from the current directory

ARGS="runBenchmark $@"
build/sbt "$ARGS"
sbt "$ARGS"
6 changes: 3 additions & 3 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@ name := "spark-sql-perf"

organization := "com.databricks"

scalaVersion := "2.12.10"
scalaVersion := "2.12.18"

crossScalaVersions := Seq("2.12.10")
crossScalaVersions := Seq("2.12.18")

sparkPackageName := "databricks/spark-sql-perf"

// All Spark Packages need a license
licenses := Seq("Apache-2.0" -> url("http://opensource.org/licenses/Apache-2.0"))

sparkVersion := "3.0.0"
sparkVersion := "3.5.1"

sparkComponents ++= Seq("sql", "hive", "mllib")

Expand Down
17 changes: 9 additions & 8 deletions build/sbt-launch-lib.bash
Original file line number Diff line number Diff line change
Expand Up @@ -45,23 +45,24 @@ dlog () {

acquire_sbt_jar () {
SBT_VERSION=`awk -F "=" '/sbt\.version/ {print $2}' ./project/build.properties`
URL1=https://dl.bintray.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar
URL1=https://github.com/sbt/sbt/releases/download/v${SBT_VERSION}/sbt-${SBT_VERSION}.zip
JAR=build/sbt-launch-${SBT_VERSION}.jar

sbt_jar=$JAR

if [[ ! -f "$sbt_jar" ]]; then
# Download sbt launch jar if it hasn't been downloaded yet
if [ ! -f "${JAR}" ]; then
# Download
printf "Attempting to fetch sbt\n"
JAR_DL="${JAR}.part"
COMPLETE_SBT="build/sbt.zip"
if [ $(command -v curl) ]; then
curl --fail --location --silent ${URL1} > "${JAR_DL}" &&\
mv "${JAR_DL}" "${JAR}"
curl --fail --location --silent ${URL1} > "${COMPLETE_SBT}" &&\
unzip ${COMPLETE_SBT} &&\
cp "sbt/bin/sbt-launch.jar" "${JAR}"
elif [ $(command -v wget) ]; then
wget --quiet ${URL1} -O "${JAR_DL}" &&\
mv "${JAR_DL}" "${JAR}"
wget --quiet ${URL1} -O "${COMPLETE_SBT}" &&\
unzip ${COMPLETE_SBT} &&\
cp "sbt/bin/sbt-launch.jar" "${JAR}"
else
printf "You do not have curl or wget installed, please install sbt manually from http://www.scala-sbt.org/\n"
exit -1
Expand Down Expand Up @@ -195,4 +196,4 @@ run() {
-jar "$sbt_jar" \
"${sbt_commands[@]}" \
"${residual_args[@]}"
}
}
19 changes: 11 additions & 8 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
// You may use this file to add plugin dependencies for sbt.

resolvers += "Spark Packages repo" at "https://repos.spark-packages.org/"
resolvers ++= Seq(
Resolver.mavenLocal,
Resolver.sonatypeRepo("releases"),
"Maven Central" at "https://repo1.maven.org/maven2/",
"Spark Packages Repo" at "https://repos.spark-packages.org/"
)

resolvers += "sonatype-releases" at "https://oss.sonatype.org/content/repositories/releases/"

addSbtPlugin("org.spark-packages" %% "sbt-spark-package" % "0.1.1")
addSbtPlugin("org.spark-packages" % "sbt-spark-package" % "0.2.3")

addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.6.0")

addSbtPlugin("com.github.gseitz" % "sbt-release" % "1.0.0")
addSbtPlugin("com.github.sbt" % "sbt-release" % "1.0.15")

addSbtPlugin("com.databricks" %% "sbt-databricks" % "0.1.3")
addSbtPlugin("com.databricks" %% "sbt-databricks" % "0.1.5")

addSbtPlugin("me.lessis" % "bintray-sbt" % "0.3.0")
addSbtPlugin("org.foundweekends" % "sbt-bintray" % "0.5.6")

addSbtPlugin("com.jsuereth" % "sbt-pgp" % "1.0.0")
addSbtPlugin("com.github.sbt" % "sbt-pgp" % "2.1.2")
3 changes: 2 additions & 1 deletion src/main/scala/com/databricks/spark/sql/perf/Benchmark.scala
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,8 @@ abstract class Benchmark(
protected override def doBenchmark(
includeBreakdown: Boolean,
description: String = "",
messages: ArrayBuffer[String]): BenchmarkResult = {
messages: ArrayBuffer[String],
iteration: Int = 1): BenchmarkResult = {
try {
val timeMs = measureTimeMs(run())
BenchmarkResult(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,15 @@ trait Benchmarkable {
description: String = "",
messages: ArrayBuffer[String],
timeout: Long,
forkThread: Boolean = true): BenchmarkResult = {
forkThread: Boolean = true,
iteration: Int = 1): BenchmarkResult = {
logger.info(s"$this: benchmark")
sparkContext.setJobDescription(s"Execution: $name, $description")
beforeBenchmark()
val result = if (forkThread) {
runBenchmarkForked(includeBreakdown, description, messages, timeout)
} else {
doBenchmark(includeBreakdown, description, messages)
doBenchmark(includeBreakdown, description, messages, iteration)
}
afterBenchmark(sqlContext.sparkContext)
result
Expand Down Expand Up @@ -107,7 +108,8 @@ trait Benchmarkable {
protected def doBenchmark(
includeBreakdown: Boolean,
description: String = "",
messages: ArrayBuffer[String]): BenchmarkResult
messages: ArrayBuffer[String],
iteration: Int = 1): BenchmarkResult

protected def measureTimeMs[A](f: => A): Double = {
val startTime = System.nanoTime()
Expand Down
3 changes: 2 additions & 1 deletion src/main/scala/com/databricks/spark/sql/perf/Query.scala
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ class Query(
protected override def doBenchmark(
includeBreakdown: Boolean,
description: String = "",
messages: ArrayBuffer[String]): BenchmarkResult = {
messages: ArrayBuffer[String],
iteration: Int = 1): BenchmarkResult = {
try {
val dataFrame = buildDataFrame
val queryExecution = dataFrame.queryExecution
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ class MLPipelineStageBenchmarkable(
override protected def doBenchmark(
includeBreakdown: Boolean,
description: String,
messages: ArrayBuffer[String]): BenchmarkResult = {
messages: ArrayBuffer[String],
iteration: Int = 1): BenchmarkResult = {
try {
val (trainingTime, model: Transformer) = measureTime {
logger.info(s"$this: train: trainingSet=${trainingData.schema}")
Expand Down
2 changes: 1 addition & 1 deletion version.sbt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version in ThisBuild := "0.5.1-SNAPSHOT"
version in ThisBuild := "0.5.2-SNAPSHOT"