-
Notifications
You must be signed in to change notification settings - Fork 1.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Kernel] Extended schema JSON serde to support collations #3628
Changes from 41 commits
fb588c1
db4e7b2
49059ff
d9279ef
916049e
54b59f4
9715cf5
55c5191
51162f0
712e081
36571ab
86602c6
76cdbd5
d8fc611
9c9684a
c6bd336
5e0e43e
2d9465d
20a1081
6469ba1
daa2f66
37c3617
9b2835f
8e0fb82
164edcc
16113cd
65ad43e
dc3db16
e107247
2339d46
14b7327
e914e6f
1feea71
4c7d72f
d95ebc0
a7e435b
fa836a4
eff0abf
99ce5ae
bd62e3d
908750d
9b2001b
52280ce
6a45b46
555d49e
d359ed4
67854c9
c6f1c97
c5f41b9
7b8c844
ae0b189
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,9 @@ | |
package io.delta.kernel.types; | ||
|
||
import io.delta.kernel.annotation.Evolving; | ||
import io.delta.kernel.internal.util.Tuple2; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.Objects; | ||
|
||
/** | ||
|
@@ -102,6 +105,47 @@ public String toString() { | |
"StructField(name=%s,type=%s,nullable=%s,metadata=%s)", name, dataType, nullable, metadata); | ||
} | ||
|
||
public FieldMetadata getSerializationMetadata() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need this and how is it different from the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I understand correctly, this is capturing the nested field collation types and returning in FieldMetadata. Why is this not already the case when this StructField is created? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @stefankandic is this how Spark does? This seems not clear. What is the difference between |
||
List<Tuple2<String, String>> nestedCollatedFields = getNestedCollatedFields(dataType, name); | ||
if (nestedCollatedFields.isEmpty()) { | ||
return metadata; | ||
} | ||
|
||
FieldMetadata.Builder metadataBuilder = new FieldMetadata.Builder(); | ||
for (Tuple2<String, String> nestedField : nestedCollatedFields) { | ||
metadataBuilder.putString(nestedField._1, nestedField._2); | ||
} | ||
return new FieldMetadata.Builder() | ||
.fromMetadata(metadata) | ||
.putFieldMetadata(DataType.COLLATIONS_METADATA_KEY, metadataBuilder.build()) | ||
.build(); | ||
} | ||
|
||
private List<Tuple2<String, String>> getNestedCollatedFields(DataType parent, String path) { | ||
List<Tuple2<String, String>> nestedCollatedFields = new ArrayList<>(); | ||
if (parent instanceof StringType) { | ||
StringType stringType = (StringType) parent; | ||
if (!stringType | ||
.getCollationIdentifier() | ||
.equals(CollationIdentifier.fromString("SPARK.UTF8_BINARY"))) { | ||
nestedCollatedFields.add( | ||
new Tuple2<>( | ||
path, ((StringType) parent).getCollationIdentifier().toStringWithoutVersion())); | ||
} | ||
} else if (parent instanceof MapType) { | ||
nestedCollatedFields.addAll( | ||
getNestedCollatedFields(((MapType) parent).getKeyType(), path + ".key")); | ||
nestedCollatedFields.addAll( | ||
getNestedCollatedFields(((MapType) parent).getValueType(), path + ".value")); | ||
} else if (parent instanceof ArrayType) { | ||
nestedCollatedFields.addAll( | ||
getNestedCollatedFields(((ArrayType) parent).getElementType(), path + ".element")); | ||
} | ||
// We didn't check for StructType because we store the StringType's | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we still need to go through the fields within the StructType and check if any of them contains a Map/Array type. |
||
// collation information in the nearest ancestor StructField's metadata when serializing. | ||
return nestedCollatedFields; | ||
vkorukanti marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
@Override | ||
public boolean equals(Object o) { | ||
if (this == o) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
update the method docs to include what the
collationMap
is.