Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add collated stats reading #3723

Open
wants to merge 42 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
fb588c1
extended StringType to have CollationIdentifier
ilicmarkodb Aug 30, 2024
db4e7b2
reordered attributes
ilicmarkodb Aug 30, 2024
49059ff
changed PROVIDER_KERNEL to PROVIDER_SPARK
ilicmarkodb Aug 30, 2024
d9279ef
extended serialization and deserialization to support collation
ilicmarkodb Aug 30, 2024
916049e
style fix
ilicmarkodb Aug 30, 2024
54b59f4
style fix
ilicmarkodb Aug 30, 2024
55c5191
added CollationIdentifier equals
ilicmarkodb Aug 30, 2024
712e081
style fix
ilicmarkodb Aug 30, 2024
36571ab
fix
ilicmarkodb Aug 31, 2024
a1323d8
icu added
ilicmarkodb Aug 31, 2024
47f3237
empty commit
ilicmarkodb Aug 31, 2024
45bdfc8
temp commit
ilicmarkodb Aug 31, 2024
8f8bb90
added ICUCollationFactory and UTF8CollationFactory
ilicmarkodb Sep 1, 2024
56b111b
temp commit
ilicmarkodb Sep 1, 2024
b478114
temp commit
ilicmarkodb Sep 2, 2024
410a6ae
temp commit
ilicmarkodb Sep 2, 2024
ffb65b3
temp commit
ilicmarkodb Sep 2, 2024
6121864
temp commit
ilicmarkodb Sep 4, 2024
66a0f24
temp commit
ilicmarkodb Sep 8, 2024
f205340
temp commit
ilicmarkodb Sep 9, 2024
2cd94b1
temp commit
ilicmarkodb Sep 10, 2024
3cd598f
added collated Min and Max values in DataFileStatistics
ilicmarkodb Sep 12, 2024
e418d82
add method for fetching nested column in structColumnWriter
ilicmarkodb Sep 13, 2024
6a6510d
temp commit
ilicmarkodb Sep 13, 2024
b89bb47
temp commit
ilicmarkodb Sep 13, 2024
918da3d
temp commit
ilicmarkodb Sep 13, 2024
1e02e0b
temp commit
ilicmarkodb Sep 15, 2024
8196464
temp commit
ilicmarkodb Sep 16, 2024
79984a6
temp commit
ilicmarkodb Sep 16, 2024
5b7154d
temp commit
ilicmarkodb Sep 16, 2024
d78877a
changed data-skipping-basic-stats-collated-data to have third file wi…
ilicmarkodb Sep 16, 2024
891fbf9
changed data-skipping-basic-stats-collated-data to have third file wi…
ilicmarkodb Sep 16, 2024
702476c
java doc updated
ilicmarkodb Sep 17, 2024
8a32ec0
fixed visitScalarExpression
ilicmarkodb Sep 18, 2024
08f1071
temp
ilicmarkodb Sep 18, 2024
9c85566
temp
ilicmarkodb Sep 18, 2024
8a92c98
Revert "temp"
ilicmarkodb Sep 18, 2024
d08270b
test for checking added
ilicmarkodb Sep 18, 2024
cf36e05
added DataSkippingCollatedPredicate
ilicmarkodb Sep 19, 2024
900d855
style fix
ilicmarkodb Sep 19, 2024
39dc29b
style fix
ilicmarkodb Sep 19, 2024
baca20b
temp
ilicmarkodb Sep 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ val hadoopVersion = "3.3.4"
val scalaTestVersion = "3.2.15"
val scalaTestVersionForConnectors = "3.0.8"
val parquet4sVersion = "1.9.4"
val icu4jVersion = "75.1"

// Versions for Hive 3
val hadoopVersionForHive3 = "3.1.0"
Expand Down Expand Up @@ -656,6 +657,7 @@ lazy val kernelDefaults = (project in file("kernel/kernel-defaults"))
"org.apache.hadoop" % "hadoop-client-runtime" % hadoopVersion,
"com.fasterxml.jackson.core" % "jackson-databind" % "2.13.5",
"org.apache.parquet" % "parquet-hadoop" % "1.12.3",
"com.ibm.icu" % "icu4j" % icu4jVersion,

"org.scalatest" %% "scalatest" % scalaTestVersion % "test",
"junit" % "junit" % "4.13.2" % "test",
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"txnId":"fbcb5edc-9392-46f2-aed8-c0969b6bc9fc","tableSizeBytes":0,"numFiles":0,"numDeletedRecordsOpt":0,"numDeletionVectorsOpt":0,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"5afb493f-df83-499b-a05b-2b2123385787","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"__COLLATIONS\":{\"c1\":\"icu.UNICODE_CI\"}}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true"},"createdTime":1726521395379},"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors","collations-dev","appendOnly","invariants"]},"histogramOpt":{"sortedBinBoundaries":[0,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,12582912,16777216,20971520,25165824,29360128,33554432,37748736,41943040,50331648,58720256,67108864,75497472,83886080,92274688,100663296,109051904,117440512,125829120,130023424,134217728,138412032,142606336,146800640,150994944,167772160,184549376,201326592,218103808,234881024,251658240,268435456,285212672,301989888,318767104,335544320,352321536,369098752,385875968,402653184,419430400,436207616,452984832,469762048,486539264,503316480,520093696,536870912,553648128,570425344,587202560,603979776,671088640,738197504,805306368,872415232,939524096,1006632960,1073741824,1140850688,1207959552,1275068416,1342177280,1409286144,1476395008,1610612736,1744830464,1879048192,2013265920,2147483648,2415919104,2684354560,2952790016,3221225472,3489660928,3758096384,4026531840,4294967296,8589934592,17179869184,34359738368,68719476736,137438953472,274877906944],"fileCounts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"totalBytes":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},"deletedRecordCountsHistogramOpt":{"deletedRecordCounts":[0,0,0,0,0,0,0,0,0,0]},"allFiles":[]}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"commitInfo":{"timestamp":1726521396233,"operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{\"delta.enableDeletionVectors\":\"true\"}","statsOnLoad":false},"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/<unknown>","txnId":"fbcb5edc-9392-46f2-aed8-c0969b6bc9fc"}}
{"metaData":{"id":"5afb493f-df83-499b-a05b-2b2123385787","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"__COLLATIONS\":{\"c1\":\"icu.UNICODE_CI\"}}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true"},"createdTime":1726521395379}}
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors","collations-dev","appendOnly","invariants"]}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"txnId":"9d5eec70-f0e4-4f34-9ea7-790b6a691fe5","tableSizeBytes":499,"numFiles":1,"numDeletedRecordsOpt":0,"numDeletionVectorsOpt":0,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"5afb493f-df83-499b-a05b-2b2123385787","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"__COLLATIONS\":{\"c1\":\"icu.UNICODE_CI\"}}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true"},"createdTime":1726521395379},"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors","collations-dev","appendOnly","invariants"]},"histogramOpt":{"sortedBinBoundaries":[0,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,12582912,16777216,20971520,25165824,29360128,33554432,37748736,41943040,50331648,58720256,67108864,75497472,83886080,92274688,100663296,109051904,117440512,125829120,130023424,134217728,138412032,142606336,146800640,150994944,167772160,184549376,201326592,218103808,234881024,251658240,268435456,285212672,301989888,318767104,335544320,352321536,369098752,385875968,402653184,419430400,436207616,452984832,469762048,486539264,503316480,520093696,536870912,553648128,570425344,587202560,603979776,671088640,738197504,805306368,872415232,939524096,1006632960,1073741824,1140850688,1207959552,1275068416,1342177280,1409286144,1476395008,1610612736,1744830464,1879048192,2013265920,2147483648,2415919104,2684354560,2952790016,3221225472,3489660928,3758096384,4026531840,4294967296,8589934592,17179869184,34359738368,68719476736,137438953472,274877906944],"fileCounts":[1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"totalBytes":[499,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},"deletedRecordCountsHistogramOpt":{"deletedRecordCounts":[1,0,0,0,0,0,0,0,0,0]},"allFiles":[{"path":"test%25file%25prefix-part-00000-e06e387f-dbc2-42e9-b1ed-0e8688743353-c000.snappy.parquet","partitionValues":{},"size":499,"modificationTime":1726521400008,"dataChange":false,"stats":"{\"numRecords\":3,\"minValues\":{\"c1\":\"a\"},\"maxValues\":{\"c1\":\"c\"},\"nullCount\":{\"c1\":0},\"tightBounds\":true,\"statsWithCollation\":{\"icu.UNICODE_CI.153.120.0.0\":{\"minValues\":{\"c1\":\"a\"},\"maxValues\":{\"c1\":\"c\"}}}}","tags":{"INSERTION_TIME":"1726521400008000","MIN_INSERTION_TIME":"1726521400008000","MAX_INSERTION_TIME":"1726521400008000","OPTIMIZE_TARGET_SIZE":"268435456"}}]}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"commitInfo":{"timestamp":1726521400325,"operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"3","numOutputBytes":"499"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/<unknown>","txnId":"9d5eec70-f0e4-4f34-9ea7-790b6a691fe5"}}
{"add":{"path":"test%25file%25prefix-part-00000-e06e387f-dbc2-42e9-b1ed-0e8688743353-c000.snappy.parquet","partitionValues":{},"size":499,"modificationTime":1726521400008,"dataChange":true,"stats":"{\"numRecords\":3,\"minValues\":{\"c1\":\"a\"},\"maxValues\":{\"c1\":\"c\"},\"nullCount\":{\"c1\":0},\"tightBounds\":true,\"statsWithCollation\":{\"icu.UNICODE_CI.153.120.0.0\":{\"minValues\":{\"c1\":\"a\"},\"maxValues\":{\"c1\":\"c\"}}}}","tags":{"INSERTION_TIME":"1726521400008000","MIN_INSERTION_TIME":"1726521400008000","MAX_INSERTION_TIME":"1726521400008000","OPTIMIZE_TARGET_SIZE":"268435456"}}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"txnId":"cdbcd9ca-c506-4b46-a80c-0c27686a2bd7","tableSizeBytes":998,"numFiles":2,"numDeletedRecordsOpt":0,"numDeletionVectorsOpt":0,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"5afb493f-df83-499b-a05b-2b2123385787","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"__COLLATIONS\":{\"c1\":\"icu.UNICODE_CI\"}}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true"},"createdTime":1726521395379},"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors","collations-dev","appendOnly","invariants"]},"histogramOpt":{"sortedBinBoundaries":[0,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,12582912,16777216,20971520,25165824,29360128,33554432,37748736,41943040,50331648,58720256,67108864,75497472,83886080,92274688,100663296,109051904,117440512,125829120,130023424,134217728,138412032,142606336,146800640,150994944,167772160,184549376,201326592,218103808,234881024,251658240,268435456,285212672,301989888,318767104,335544320,352321536,369098752,385875968,402653184,419430400,436207616,452984832,469762048,486539264,503316480,520093696,536870912,553648128,570425344,587202560,603979776,671088640,738197504,805306368,872415232,939524096,1006632960,1073741824,1140850688,1207959552,1275068416,1342177280,1409286144,1476395008,1610612736,1744830464,1879048192,2013265920,2147483648,2415919104,2684354560,2952790016,3221225472,3489660928,3758096384,4026531840,4294967296,8589934592,17179869184,34359738368,68719476736,137438953472,274877906944],"fileCounts":[2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"totalBytes":[998,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},"deletedRecordCountsHistogramOpt":{"deletedRecordCounts":[2,0,0,0,0,0,0,0,0,0]},"allFiles":[{"path":"test%25file%25prefix-part-00000-e06e387f-dbc2-42e9-b1ed-0e8688743353-c000.snappy.parquet","partitionValues":{},"size":499,"modificationTime":1726521400008,"dataChange":false,"stats":"{\"numRecords\":3,\"minValues\":{\"c1\":\"a\"},\"maxValues\":{\"c1\":\"c\"},\"nullCount\":{\"c1\":0},\"tightBounds\":true,\"statsWithCollation\":{\"icu.UNICODE_CI.153.120.0.0\":{\"minValues\":{\"c1\":\"a\"},\"maxValues\":{\"c1\":\"c\"}}}}","tags":{"INSERTION_TIME":"1726521400008000","MIN_INSERTION_TIME":"1726521400008000","MAX_INSERTION_TIME":"1726521400008000","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"test%25file%25prefix-part-00000-8b165117-df14-4a40-8b72-b1ad50704659-c000.snappy.parquet","partitionValues":{},"size":499,"modificationTime":1726521400951,"dataChange":false,"stats":"{\"numRecords\":3,\"minValues\":{\"c1\":\"A\"},\"maxValues\":{\"c1\":\"C\"},\"nullCount\":{\"c1\":0},\"tightBounds\":true,\"statsWithCollation\":{\"icu.UNICODE_CI.153.120.0.0\":{\"minValues\":{\"c1\":\"A\"},\"maxValues\":{\"c1\":\"C\"}}}}","tags":{"INSERTION_TIME":"1726521400951000","MIN_INSERTION_TIME":"1726521400951000","MAX_INSERTION_TIME":"1726521400951000","OPTIMIZE_TARGET_SIZE":"268435456"}}]}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"commitInfo":{"timestamp":1726521401028,"operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"readVersion":1,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"3","numOutputBytes":"499"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/<unknown>","txnId":"cdbcd9ca-c506-4b46-a80c-0c27686a2bd7"}}
{"add":{"path":"test%25file%25prefix-part-00000-8b165117-df14-4a40-8b72-b1ad50704659-c000.snappy.parquet","partitionValues":{},"size":499,"modificationTime":1726521400951,"dataChange":true,"stats":"{\"numRecords\":3,\"minValues\":{\"c1\":\"A\"},\"maxValues\":{\"c1\":\"C\"},\"nullCount\":{\"c1\":0},\"tightBounds\":true,\"statsWithCollation\":{\"icu.UNICODE_CI.153.120.0.0\":{\"minValues\":{\"c1\":\"A\"},\"maxValues\":{\"c1\":\"C\"}}}}","tags":{"INSERTION_TIME":"1726521400951000","MIN_INSERTION_TIME":"1726521400951000","MAX_INSERTION_TIME":"1726521400951000","OPTIMIZE_TARGET_SIZE":"268435456"}}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"txnId":"4d6b94f9-d743-4b08-898a-e007b05b33fa","tableSizeBytes":1473,"numFiles":3,"numDeletedRecordsOpt":0,"numDeletionVectorsOpt":0,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"5afb493f-df83-499b-a05b-2b2123385787","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"__COLLATIONS\":{\"c1\":\"icu.UNICODE_CI\"}}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true"},"createdTime":1726521395379},"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors","collations-dev","appendOnly","invariants"]},"histogramOpt":{"sortedBinBoundaries":[0,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,12582912,16777216,20971520,25165824,29360128,33554432,37748736,41943040,50331648,58720256,67108864,75497472,83886080,92274688,100663296,109051904,117440512,125829120,130023424,134217728,138412032,142606336,146800640,150994944,167772160,184549376,201326592,218103808,234881024,251658240,268435456,285212672,301989888,318767104,335544320,352321536,369098752,385875968,402653184,419430400,436207616,452984832,469762048,486539264,503316480,520093696,536870912,553648128,570425344,587202560,603979776,671088640,738197504,805306368,872415232,939524096,1006632960,1073741824,1140850688,1207959552,1275068416,1342177280,1409286144,1476395008,1610612736,1744830464,1879048192,2013265920,2147483648,2415919104,2684354560,2952790016,3221225472,3489660928,3758096384,4026531840,4294967296,8589934592,17179869184,34359738368,68719476736,137438953472,274877906944],"fileCounts":[3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"totalBytes":[1473,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},"deletedRecordCountsHistogramOpt":{"deletedRecordCounts":[3,0,0,0,0,0,0,0,0,0]},"allFiles":[{"path":"test%25file%25prefix-part-00000-b5cb0092-c62b-4d11-80ce-00aad14a801e-c000.snappy.parquet","partitionValues":{},"size":475,"modificationTime":1726521401605,"dataChange":false,"stats":"{\"numRecords\":3,\"minValues\":{},\"maxValues\":{},\"nullCount\":{\"c1\":3},\"tightBounds\":true,\"statsWithCollation\":{\"icu.UNICODE_CI.153.120.0.0\":{\"minValues\":{},\"maxValues\":{}}}}","tags":{"INSERTION_TIME":"1726521401605000","MIN_INSERTION_TIME":"1726521401605000","MAX_INSERTION_TIME":"1726521401605000","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"test%25file%25prefix-part-00000-e06e387f-dbc2-42e9-b1ed-0e8688743353-c000.snappy.parquet","partitionValues":{},"size":499,"modificationTime":1726521400008,"dataChange":false,"stats":"{\"numRecords\":3,\"minValues\":{\"c1\":\"a\"},\"maxValues\":{\"c1\":\"c\"},\"nullCount\":{\"c1\":0},\"tightBounds\":true,\"statsWithCollation\":{\"icu.UNICODE_CI.153.120.0.0\":{\"minValues\":{\"c1\":\"a\"},\"maxValues\":{\"c1\":\"c\"}}}}","tags":{"INSERTION_TIME":"1726521400008000","MIN_INSERTION_TIME":"1726521400008000","MAX_INSERTION_TIME":"1726521400008000","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"test%25file%25prefix-part-00000-8b165117-df14-4a40-8b72-b1ad50704659-c000.snappy.parquet","partitionValues":{},"size":499,"modificationTime":1726521400951,"dataChange":false,"stats":"{\"numRecords\":3,\"minValues\":{\"c1\":\"A\"},\"maxValues\":{\"c1\":\"C\"},\"nullCount\":{\"c1\":0},\"tightBounds\":true,\"statsWithCollation\":{\"icu.UNICODE_CI.153.120.0.0\":{\"minValues\":{\"c1\":\"A\"},\"maxValues\":{\"c1\":\"C\"}}}}","tags":{"INSERTION_TIME":"1726521400951000","MIN_INSERTION_TIME":"1726521400951000","MAX_INSERTION_TIME":"1726521400951000","OPTIMIZE_TARGET_SIZE":"268435456"}}]}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"commitInfo":{"timestamp":1726521401673,"operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"readVersion":2,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"3","numOutputBytes":"475"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/<unknown>","txnId":"4d6b94f9-d743-4b08-898a-e007b05b33fa"}}
{"add":{"path":"test%25file%25prefix-part-00000-b5cb0092-c62b-4d11-80ce-00aad14a801e-c000.snappy.parquet","partitionValues":{},"size":475,"modificationTime":1726521401605,"dataChange":true,"stats":"{\"numRecords\":3,\"minValues\":{},\"maxValues\":{},\"nullCount\":{\"c1\":3},\"tightBounds\":true,\"statsWithCollation\":{\"icu.UNICODE_CI.153.120.0.0\":{\"minValues\":{},\"maxValues\":{}}}}","tags":{"INSERTION_TIME":"1726521401605000","MIN_INSERTION_TIME":"1726521401605000","MAX_INSERTION_TIME":"1726521401605000","OPTIMIZE_TARGET_SIZE":"268435456"}}}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"commitInfo":{"timestamp":1725838990599,"operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"true","properties":"{}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.5.2 Delta-Lake/3.2.0","txnId":"0c4e0d4d-0f13-4726-ad61-f10192ab81e2"}}
{"metaData":{"id":"9a7918b4-42a5-4b47-bf27-0e8a7289d654","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1725838990522}}
{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"commitInfo":{"timestamp":1725839016423,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"440"},"engineInfo":"Apache-Spark/3.5.2 Delta-Lake/3.2.0","txnId":"94355dc1-e083-4da0-9934-716b093eaf3a"}}
{"add":{"path":"part-00000-b839639e-0620-4d9c-baf8-20206fc2b063-c000.snappy.parquet","partitionValues":{},"size":440,"modificationTime":1725839016378,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"c1\":\"a\"},\"maxValues\":{\"c1\":\"a\"},\"nullCount\":{\"c1\":0}}"}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"commitInfo":{"timestamp":1725839020852,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"440"},"engineInfo":"Apache-Spark/3.5.2 Delta-Lake/3.2.0","txnId":"0b39a4b8-878c-4092-a46e-c2bccfa9aaf3"}}
{"add":{"path":"part-00000-fdd2f0a3-75ba-4b6d-85a3-03173742c909-c000.snappy.parquet","partitionValues":{},"size":440,"modificationTime":1725839020847,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"c1\":\"A\"},\"maxValues\":{\"c1\":\"A\"},\"nullCount\":{\"c1\":0}}"}}
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package io.delta.kernel.expressions;

public class CollatedPredicate extends Predicate {
public CollatedPredicate(
String name, Expression left, Expression right, CollationIdentifier collationIdentifier) {
super(name, left, right);
this.collationIdentifier = collationIdentifier;
}

public CollationIdentifier getCollationIdentifier() {
return collationIdentifier;
}

private final CollationIdentifier collationIdentifier;

@Override
public String toString() {
if (BINARY_OPERATORS.contains(name)) {
return String.format(
"(%s %s %s [%s])", children.get(0), name, children.get(1), collationIdentifier);
}
return super.toString();
}
}
Loading