diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2ddcb9d..744b8c6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,3 +17,5 @@ jobs: - uses: olafurpg/setup-scala@v10 - name: Test run: sbt -Dspark.testVersion=${{ matrix.spark }} +test + - name: Benchmark + run: sbt -Dspark.testVersion=${{ matrix.spark }} +benchmarks/Jmh/run diff --git a/benchmarks/src/main/scala/com/github/mrpowers/spark/fast/tests/MyBenchmark.scala b/benchmarks/src/main/scala/com/github/mrpowers/spark/fast/tests/MyBenchmark.scala index afacf9d..4da7450 100644 --- a/benchmarks/src/main/scala/com/github/mrpowers/spark/fast/tests/MyBenchmark.scala +++ b/benchmarks/src/main/scala/com/github/mrpowers/spark/fast/tests/MyBenchmark.scala @@ -1,59 +1,41 @@ package com.github.mrpowers.spark.fast.tests -import org.apache.spark.sql.Row +import org.apache.spark.sql.SparkSession import org.openjdk.jmh.annotations._ import org.openjdk.jmh.infra.Blackhole -import java.time.Instant import java.util.concurrent.TimeUnit +import scala.util.Try -// TODO: move this to separate benchmark project -private class MyBenchmark { - +private class MyBenchmark extends DataFrameComparer { @Benchmark - @BenchmarkMode(Array(Mode.AverageTime)) + @BenchmarkMode(Array(Mode.AverageTime, Mode.SingleShotTime)) @Fork(value = 2) @Warmup(iterations = 10) @Measurement(iterations = 10) @OutputTimeUnit(TimeUnit.NANOSECONDS) - def testMethod(blackHole: Blackhole): Boolean = { - val r1 = Row( - "a", - Row( - 1, - Row( - 2.0, - Row( - null, - Row( - Seq(Row("c"), Row("d")), - BigDecimal.decimal(1.0), - Row(Instant.EPOCH) - ) - ) - ) - ) - ) + def assertApproximateDataFrameEqualityWithPrecision(blackHole: Blackhole): Boolean = { + val spark = SparkSession + .builder() + .master("local") + .appName("spark session") + .config("spark.sql.shuffle.partitions", "1") + .getOrCreate() + spark.sparkContext.setLogLevel("ERROR") + + import spark.implicits._ + val ds1 = Seq( + ("1", "10/01/2019", 26.762499999999996), + ("1", "11/01/2019", 26.762499999999996) + ).toDF("col_B", "col_C", "col_A") + + val ds2 = Seq( + ("1", "10/01/2019", 26.762499999999946), + ("1", "11/01/2019", 26.76249999999991) + ).toDF("col_B", "col_C", "col_A") + val result = Try(assertApproximateDataFrameEquality(ds1, ds2, precision = 0.0000001, orderedComparison = false)) - val r2 = Row( - "a", - Row( - 1, - Row( - 2.0, - Row( - null, - Row( - Seq(Row("c"), Row("d")), - BigDecimal.decimal(1.0), - Row(Instant.EPOCH) - ) - ) - ) - ) - ) - val bool = RowComparer.areRowsEqual(r1, r2) - blackHole.consume(bool) - bool + blackHole.consume(result) + result.isSuccess } }