From 588b6e0b3130ac5d62578da52eb721f068d7d47c Mon Sep 17 00:00:00 2001 From: Jingsong Lee Date: Tue, 10 Sep 2024 16:51:30 +0800 Subject: [PATCH] [core] Introduce projection fields to EqualiserCodeGenerator (#4154) --- .../paimon/codegen/CodeGeneratorImpl.java | 5 ++-- .../codegen/EqualiserCodeGenerator.scala | 14 +++++------ .../codegen/EqualiserCodeGeneratorTest.java | 25 +++++++++++++++++++ .../apache/paimon/codegen/CodeGenerator.java | 11 +++----- .../apache/paimon/codegen/CodeGenUtils.java | 8 ++++-- 5 files changed, 43 insertions(+), 20 deletions(-) diff --git a/paimon-codegen/src/main/java/org/apache/paimon/codegen/CodeGeneratorImpl.java b/paimon-codegen/src/main/java/org/apache/paimon/codegen/CodeGeneratorImpl.java index b8efa170deae..29096e96b206 100644 --- a/paimon-codegen/src/main/java/org/apache/paimon/codegen/CodeGeneratorImpl.java +++ b/paimon-codegen/src/main/java/org/apache/paimon/codegen/CodeGeneratorImpl.java @@ -53,8 +53,9 @@ public GeneratedClass generateRecordComparator( } @Override - public GeneratedClass generateRecordEqualiser(List fieldTypes) { - return new EqualiserCodeGenerator(RowType.builder().fields(fieldTypes).build()) + public GeneratedClass generateRecordEqualiser( + List fieldTypes, int[] fields) { + return new EqualiserCodeGenerator(fieldTypes.toArray(new DataType[0]), fields) .generateRecordEqualiser("RecordEqualiser"); } diff --git a/paimon-codegen/src/main/scala/org/apache/paimon/codegen/EqualiserCodeGenerator.scala b/paimon-codegen/src/main/scala/org/apache/paimon/codegen/EqualiserCodeGenerator.scala index 41c7427af433..3548d5b4c44f 100644 --- a/paimon-codegen/src/main/scala/org/apache/paimon/codegen/EqualiserCodeGenerator.scala +++ b/paimon-codegen/src/main/scala/org/apache/paimon/codegen/EqualiserCodeGenerator.scala @@ -20,21 +20,19 @@ package org.apache.paimon.codegen import org.apache.paimon.codegen.GenerateUtils._ import org.apache.paimon.codegen.ScalarOperatorGens.{generateEquals, generateRowEqualiser} -import org.apache.paimon.types.{BooleanType, DataType, RowType} +import org.apache.paimon.types.{BooleanType, DataType} import org.apache.paimon.types.DataTypeChecks.isCompositeType import org.apache.paimon.types.DataTypeRoot._ import org.apache.paimon.utils.TypeUtils.isPrimitive -import scala.collection.JavaConverters._ - -class EqualiserCodeGenerator(fieldTypes: Array[DataType]) { +class EqualiserCodeGenerator(fieldTypes: Array[DataType], fields: Array[Int]) { private val RECORD_EQUALISER = className[RecordEqualiser] private val LEFT_INPUT = "left" private val RIGHT_INPUT = "right" - def this(rowType: RowType) = { - this(rowType.getFieldTypes.asScala.toArray) + def this(fieldTypes: Array[DataType]) = { + this(fieldTypes, fieldTypes.indices.toArray) } def generateRecordEqualiser(name: String): GeneratedClass[RecordEqualiser] = { @@ -42,8 +40,8 @@ class EqualiserCodeGenerator(fieldTypes: Array[DataType]) { val ctx = new CodeGeneratorContext val className = newName(name) - val equalsMethodCodes = for (idx <- fieldTypes.indices) yield generateEqualsMethod(ctx, idx) - val equalsMethodCalls = for (idx <- fieldTypes.indices) yield { + val equalsMethodCodes = for (idx <- fields) yield generateEqualsMethod(ctx, idx) + val equalsMethodCalls = for (idx <- fields) yield { val methodName = getEqualsMethodName(idx) s"""result = result && $methodName($LEFT_INPUT, $RIGHT_INPUT);""" } diff --git a/paimon-codegen/src/test/java/org/apache/paimon/codegen/EqualiserCodeGeneratorTest.java b/paimon-codegen/src/test/java/org/apache/paimon/codegen/EqualiserCodeGeneratorTest.java index e4b93daa6973..f72881dfd0ff 100644 --- a/paimon-codegen/src/test/java/org/apache/paimon/codegen/EqualiserCodeGeneratorTest.java +++ b/paimon-codegen/src/test/java/org/apache/paimon/codegen/EqualiserCodeGeneratorTest.java @@ -44,6 +44,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.Map; +import java.util.Objects; import java.util.concurrent.ThreadLocalRandom; import java.util.function.Function; @@ -204,6 +205,30 @@ public void testSingleField(DataTypeRoot dataTypeRoot) { assertBoolean(equaliser, func, testData.left(), testData.right(), false); } + @RepeatedTest(100) + public void testProjection() { + GeneratedData field0 = TEST_DATA.get(DataTypeRoot.INTEGER); + GeneratedData field1 = TEST_DATA.get(DataTypeRoot.VARCHAR); + GeneratedData field2 = TEST_DATA.get(DataTypeRoot.BIGINT); + + RecordEqualiser equaliser = + new EqualiserCodeGenerator( + new DataType[] {field0.dataType, field1.dataType, field2.dataType}, + new int[] {1, 2}) + .generateRecordEqualiser("projectionFieldEquals") + .newInstance(Thread.currentThread().getContextClassLoader()); + + boolean result = + equaliser.equals( + GenericRow.of(field0.left(), field1.left(), field2.left()), + GenericRow.of(field0.right(), field1.right(), field2.right())); + boolean expected = + Objects.equals( + GenericRow.of(field1.left(), field2.left()), + GenericRow.of(field1.right(), field2.right())); + assertThat(result).isEqualTo(expected); + } + @RepeatedTest(100) public void testManyFields() { int size = 499; diff --git a/paimon-common/src/main/java/org/apache/paimon/codegen/CodeGenerator.java b/paimon-common/src/main/java/org/apache/paimon/codegen/CodeGenerator.java index a9b48b70b58e..e137619143a3 100644 --- a/paimon-common/src/main/java/org/apache/paimon/codegen/CodeGenerator.java +++ b/paimon-common/src/main/java/org/apache/paimon/codegen/CodeGenerator.java @@ -48,12 +48,7 @@ GeneratedClass generateNormalizedKeyComputer( GeneratedClass generateRecordComparator( List inputTypes, int[] sortFields); - /** - * Generate a {@link RecordEqualiser}. - * - * @param fieldTypes Both the input row field types and the sort key field types. Records are * - * compared by the first field, then the second field, then the third field and so on. All * - * fields are compared in ascending order. - */ - GeneratedClass generateRecordEqualiser(List fieldTypes); + /** Generate a {@link RecordEqualiser} with fields. */ + GeneratedClass generateRecordEqualiser( + List fieldTypes, int[] fields); } diff --git a/paimon-core/src/main/java/org/apache/paimon/codegen/CodeGenUtils.java b/paimon-core/src/main/java/org/apache/paimon/codegen/CodeGenUtils.java index 02eb7acb4bfe..76aeae54732b 100644 --- a/paimon-core/src/main/java/org/apache/paimon/codegen/CodeGenUtils.java +++ b/paimon-core/src/main/java/org/apache/paimon/codegen/CodeGenUtils.java @@ -83,11 +83,15 @@ public static RecordComparator newRecordComparator( } public static RecordEqualiser newRecordEqualiser(List fieldTypes) { + return newRecordEqualiser(fieldTypes, IntStream.range(0, fieldTypes.size()).toArray()); + } + + public static RecordEqualiser newRecordEqualiser(List fieldTypes, int[] fields) { return generate( RecordEqualiser.class, fieldTypes, - IntStream.range(0, fieldTypes.size()).toArray(), - () -> getCodeGenerator().generateRecordEqualiser(fieldTypes)); + fields, + () -> getCodeGenerator().generateRecordEqualiser(fieldTypes, fields)); } private static T generate(