-
Notifications
You must be signed in to change notification settings - Fork 1.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Kernel] Extended schema JSON serde to support collations #3628
Merged
vkorukanti
merged 51 commits into
delta-io:master
from
ilicmarkodb:extend_SerDe_to_support_collations
Sep 27, 2024
Merged
Changes from all commits
Commits
Show all changes
51 commits
Select commit
Hold shift + click to select a range
fb588c1
extended StringType to have CollationIdentifier
ilicmarkodb db4e7b2
reordered attributes
ilicmarkodb 49059ff
changed PROVIDER_KERNEL to PROVIDER_SPARK
ilicmarkodb d9279ef
extended serialization and deserialization to support collation
ilicmarkodb 916049e
style fix
ilicmarkodb 54b59f4
style fix
ilicmarkodb 9715cf5
style fix
ilicmarkodb 55c5191
added CollationIdentifier equals
ilicmarkodb 51162f0
style fix
ilicmarkodb 712e081
style fix
ilicmarkodb 36571ab
fix
ilicmarkodb 86602c6
tests added for CollationIdentifier
ilicmarkodb 76cdbd5
style fix
ilicmarkodb d8fc611
style fix
ilicmarkodb 9c9684a
changed toString and fromString
ilicmarkodb c6bd336
changed CollationIdentifier
ilicmarkodb 5e0e43e
changed CollationIdentifier
ilicmarkodb 2d9465d
merged with extend_string_type_to_have_collation
ilicmarkodb 20a1081
suggestions applied
ilicmarkodb 6469ba1
suggestions applied
ilicmarkodb daa2f66
merged with extend_string_type_to_have_collation
ilicmarkodb 37c3617
javadoc updated
ilicmarkodb 9b2835f
merged with extend_string_type_to_have_collation
ilicmarkodb 8e0fb82
temp
ilicmarkodb 164edcc
temp
ilicmarkodb 16113cd
parser and tests fixed
ilicmarkodb 65ad43e
parser and tests fixed
ilicmarkodb dc3db16
temp commit
ilicmarkodb e107247
suggestions applied
ilicmarkodb 2339d46
stringtype equals tests added
ilicmarkodb 14b7327
stringtype equals updated
ilicmarkodb e914e6f
removed DEFAULT values
ilicmarkodb 1feea71
since tag added
ilicmarkodb 4c7d72f
merged with extend_string_type_to_have_collation
ilicmarkodb d95ebc0
changed CollationIdentifier constructor
ilicmarkodb a7e435b
java doc added
ilicmarkodb fa836a4
temp
ilicmarkodb eff0abf
suggestion applied
ilicmarkodb 99ce5ae
test fixed
ilicmarkodb bd62e3d
style fix
ilicmarkodb 908750d
merged with master
ilicmarkodb 9b2001b
temp
ilicmarkodb 52280ce
suggestions applied
ilicmarkodb 6a45b46
style fix
ilicmarkodb 555d49e
added fetchCollationMetadata method
ilicmarkodb d359ed4
moved fetchCollationMetadata to constructor
ilicmarkodb 67854c9
style fix
ilicmarkodb c6f1c97
fix
ilicmarkodb c5f41b9
fix
ilicmarkodb 7b8c844
Update StructField.java
vkorukanti ae0b189
minor change
vkorukanti File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,9 @@ | |
package io.delta.kernel.types; | ||
|
||
import io.delta.kernel.annotation.Evolving; | ||
import io.delta.kernel.internal.util.Tuple2; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.Objects; | ||
|
||
/** | ||
|
@@ -47,6 +50,8 @@ public class StructField { | |
false, | ||
FieldMetadata.builder().putBoolean(IS_METADATA_COLUMN_KEY, true).build()); | ||
|
||
public static final String COLLATIONS_METADATA_KEY = "__COLLATIONS"; | ||
|
||
//////////////////////////////////////////////////////////////////////////////// | ||
// Instance Fields / Methods | ||
//////////////////////////////////////////////////////////////////////////////// | ||
|
@@ -64,7 +69,10 @@ public StructField(String name, DataType dataType, boolean nullable, FieldMetada | |
this.name = name; | ||
this.dataType = dataType; | ||
this.nullable = nullable; | ||
this.metadata = metadata; | ||
|
||
FieldMetadata collationMetadata = fetchCollationMetadata(); | ||
this.metadata = | ||
new FieldMetadata.Builder().fromMetadata(metadata).fromMetadata(collationMetadata).build(); | ||
} | ||
|
||
/** @return the name of this field */ | ||
|
@@ -125,4 +133,46 @@ public int hashCode() { | |
public StructField withNewMetadata(FieldMetadata metadata) { | ||
return new StructField(name, dataType, nullable, metadata); | ||
} | ||
|
||
private List<Tuple2<String, String>> getNestedCollatedFields(DataType parent, String path) { | ||
List<Tuple2<String, String>> nestedCollatedFields = new ArrayList<>(); | ||
if (parent instanceof StringType) { | ||
StringType stringType = (StringType) parent; | ||
if (!stringType | ||
.getCollationIdentifier() | ||
.equals(CollationIdentifier.fromString("SPARK.UTF8_BINARY"))) { | ||
nestedCollatedFields.add( | ||
new Tuple2<>( | ||
path, ((StringType) parent).getCollationIdentifier().toStringWithoutVersion())); | ||
} | ||
} else if (parent instanceof MapType) { | ||
nestedCollatedFields.addAll( | ||
getNestedCollatedFields(((MapType) parent).getKeyType(), path + ".key")); | ||
nestedCollatedFields.addAll( | ||
getNestedCollatedFields(((MapType) parent).getValueType(), path + ".value")); | ||
} else if (parent instanceof ArrayType) { | ||
nestedCollatedFields.addAll( | ||
getNestedCollatedFields(((ArrayType) parent).getElementType(), path + ".element")); | ||
} | ||
// We didn't check for StructType because we store the StringType's | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we still need to go through the fields within the StructType and check if any of them contains a Map/Array type. |
||
// collation information in the nearest ancestor StructField's metadata when serializing. | ||
return nestedCollatedFields; | ||
vkorukanti marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
/** Fetches collation metadata from nested collated fields. */ | ||
private FieldMetadata fetchCollationMetadata() { | ||
List<Tuple2<String, String>> nestedCollatedFields = getNestedCollatedFields(dataType, name); | ||
if (nestedCollatedFields.isEmpty()) { | ||
return FieldMetadata.empty(); | ||
} | ||
|
||
FieldMetadata.Builder metadataBuilder = new FieldMetadata.Builder(); | ||
for (Tuple2<String, String> nestedField : nestedCollatedFields) { | ||
metadataBuilder.putString(nestedField._1, nestedField._2); | ||
} | ||
|
||
return new FieldMetadata.Builder() | ||
.putFieldMetadata(COLLATIONS_METADATA_KEY, metadataBuilder.build()) | ||
.build(); | ||
} | ||
} |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
update the method docs to include what the
collationMap
is.