Skip to content

Commit

Permalink
Demo CI Benchmark assertApproximateDataFrameEquality
Browse files Browse the repository at this point in the history
  • Loading branch information
zeotuan committed Sep 3, 2024
1 parent 2921f3c commit db60e09
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 44 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,5 @@ jobs:
- uses: olafurpg/setup-scala@v10
- name: Test
run: sbt -Dspark.testVersion=${{ matrix.spark }} +test
- name: Benchmark
run: sbt -Dspark.testVersion=${{ matrix.spark }} +benchmarks/Jmh/run
Original file line number Diff line number Diff line change
@@ -1,59 +1,41 @@
package com.github.mrpowers.spark.fast.tests

import org.apache.spark.sql.Row
import org.apache.spark.sql.SparkSession
import org.openjdk.jmh.annotations._
import org.openjdk.jmh.infra.Blackhole

import java.time.Instant
import java.util.concurrent.TimeUnit
import scala.util.Try

// TODO: move this to separate benchmark project
private class MyBenchmark {

private class MyBenchmark extends DataFrameComparer {
@Benchmark
@BenchmarkMode(Array(Mode.AverageTime))
@BenchmarkMode(Array(Mode.AverageTime, Mode.SingleShotTime))
@Fork(value = 2)
@Warmup(iterations = 10)
@Measurement(iterations = 10)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
def testMethod(blackHole: Blackhole): Boolean = {
val r1 = Row(
"a",
Row(
1,
Row(
2.0,
Row(
null,
Row(
Seq(Row("c"), Row("d")),
BigDecimal.decimal(1.0),
Row(Instant.EPOCH)
)
)
)
)
)
def assertApproximateDataFrameEqualityWithPrecision(blackHole: Blackhole): Boolean = {
val spark = SparkSession
.builder()
.master("local")
.appName("spark session")
.config("spark.sql.shuffle.partitions", "1")
.getOrCreate()
spark.sparkContext.setLogLevel("ERROR")

import spark.implicits._
val ds1 = Seq(
("1", "10/01/2019", 26.762499999999996),
("1", "11/01/2019", 26.762499999999996)
).toDF("col_B", "col_C", "col_A")

val ds2 = Seq(
("1", "10/01/2019", 26.762499999999946),
("1", "11/01/2019", 26.76249999999991)
).toDF("col_B", "col_C", "col_A")
val result = Try(assertApproximateDataFrameEquality(ds1, ds2, precision = 0.0000001, orderedComparison = false))

val r2 = Row(
"a",
Row(
1,
Row(
2.0,
Row(
null,
Row(
Seq(Row("c"), Row("d")),
BigDecimal.decimal(1.0),
Row(Instant.EPOCH)
)
)
)
)
)
val bool = RowComparer.areRowsEqual(r1, r2)
blackHole.consume(bool)
bool
blackHole.consume(result)
result.isSuccess
}
}

0 comments on commit db60e09

Please sign in to comment.