From fec4257e47d56aa27805562cc769a323b4fedb4c Mon Sep 17 00:00:00 2001 From: cshuo Date: Tue, 21 Jan 2025 16:54:51 +0800 Subject: [PATCH 1/2] [HUDI-8004] avoid unnecessary record rewrite during merging with base file --- .../org/apache/hudi/avro/AvroSchemaUtils.java | 13 +++++++++- .../apache/hudi/avro/TestAvroSchemaUtils.java | 26 +++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java b/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java index e4a5fe8ecf156..f78accbf89c4c 100644 --- a/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java @@ -174,7 +174,18 @@ private static boolean isAtomicSchemasCompatible(Schema oneAtomicType, Schema an * */ public static boolean isStrictProjectionOf(Schema sourceSchema, Schema targetSchema) { - return isProjectionOfInternal(sourceSchema, targetSchema, Objects::equals); + return isProjectionOfInternal(sourceSchema, targetSchema, AvroSchemaUtils::isAtomicTypeStrictProject); + } + + private static boolean isAtomicTypeStrictProject(Schema source, Schema target) { + // ignore name/namespace for FIXED type + if (source.getType() == Schema.Type.FIXED && target.getType() == Schema.Type.FIXED) { + return source.getLogicalType().equals(target.getLogicalType()) + && source.getFixedSize() == target.getFixedSize() + && source.getObjectProps().equals(target.getObjectProps()); + } else { + return Objects.equals(source, target); + } } private static boolean isProjectionOfInternal(Schema sourceSchema, diff --git a/hudi-common/src/test/java/org/apache/hudi/avro/TestAvroSchemaUtils.java b/hudi-common/src/test/java/org/apache/hudi/avro/TestAvroSchemaUtils.java index d13bfe5a69ac2..27d29f83f05b7 100644 --- a/hudi-common/src/test/java/org/apache/hudi/avro/TestAvroSchemaUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/avro/TestAvroSchemaUtils.java @@ -47,6 +47,19 @@ public class TestAvroSchemaUtils { + " \"type\": [\"null\", \"int\"]\n" + " },\n" + " {\n" + + " \"name\" : \"f1\",\n" + + " \"type\" : [ \"null\", {\n" + + " \"type\" : \"fixed\",\n" + + " \"name\" : \"f1\",\n" + + " \"namespace\" : \"\",\n" + + " \"size\" : 5,\n" + + " \"logicalType\" : \"decimal\",\n" + + " \"precision\" : 10,\n" + + " \"scale\" : 2\n" + + " }],\n" + + " \"default\" : null\n" + + " },\n" + + " {\n" + " \"name\": \"nested_record\",\n" + " \"type\": {\n" + " \"name\": \"nested\",\n" @@ -76,6 +89,19 @@ public class TestAvroSchemaUtils { + " \"type\": [\"null\", \"int\"]\n" + " },\n" + " {\n" + + " \"name\" : \"f1\",\n" + + " \"type\" : [ \"null\", {\n" + + " \"type\" : \"fixed\",\n" + + " \"name\" : \"fixed\",\n" + + " \"namespace\" : \"example.schema.source.f1\",\n" + + " \"size\" : 5,\n" + + " \"logicalType\" : \"decimal\",\n" + + " \"precision\" : 10,\n" + + " \"scale\" : 2\n" + + " }],\n" + + " \"default\" : null\n" + + " },\n" + + " {\n" + " \"name\": \"nested_record\",\n" + " \"type\": {\n" + " \"name\": \"nested\",\n" From aca7351cdad1aeecef7c6905aa8922ae3a3a3f3d Mon Sep 17 00:00:00 2001 From: cshuo Date: Wed, 22 Jan 2025 09:53:59 +0800 Subject: [PATCH 2/2] fix comment --- .../src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java b/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java index f78accbf89c4c..28dc9743ad7b1 100644 --- a/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java @@ -174,10 +174,10 @@ private static boolean isAtomicSchemasCompatible(Schema oneAtomicType, Schema an * */ public static boolean isStrictProjectionOf(Schema sourceSchema, Schema targetSchema) { - return isProjectionOfInternal(sourceSchema, targetSchema, AvroSchemaUtils::isAtomicTypeStrictProject); + return isProjectionOfInternal(sourceSchema, targetSchema, AvroSchemaUtils::isAtomicTypeEquals); } - private static boolean isAtomicTypeStrictProject(Schema source, Schema target) { + private static boolean isAtomicTypeEquals(Schema source, Schema target) { // ignore name/namespace for FIXED type if (source.getType() == Schema.Type.FIXED && target.getType() == Schema.Type.FIXED) { return source.getLogicalType().equals(target.getLogicalType())