Skip to content

Commit

Permalink
Add type widening to supported table features
Browse files Browse the repository at this point in the history
  • Loading branch information
johanl-db committed Sep 25, 2024
1 parent ac667ff commit 0fc6a11
Show file tree
Hide file tree
Showing 24 changed files with 48 additions and 3 deletions.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"commitInfo":{"timestamp":1727266119620,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"1416"},"engineInfo":"Apache-Spark/3.5.2 Delta-Lake/3.3.0-SNAPSHOT","txnId":"034f1fec-b6d9-4957-93c1-09a19c323fc2"}}
{"metaData":{"id":"43c8feba-0140-4d91-8c16-52f627a79cfe","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"struct\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"map\",\"type\":{\"type\":\"map\",\"keyType\":\"integer\",\"valueType\":\"integer\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"array\",\"type\":{\"type\":\"array\",\"elementType\":\"integer\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1727266118466}}
{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}
{"add":{"path":"part-00000-138244f1-b939-40db-a4bd-d57cf3d214d2-c000.snappy.parquet","partitionValues":{},"size":1416,"modificationTime":1727266119587,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"struct\":{\"a\":1}},\"maxValues\":{\"struct\":{\"a\":1}},\"nullCount\":{\"struct\":{\"a\":0},\"map\":0,\"array\":0}}"}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"commitInfo":{"timestamp":1727266120320,"operation":"SET TBLPROPERTIES","operationParameters":{"properties":"{\"delta.enableTypeWidening\":\"true\"}"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.5.2 Delta-Lake/3.3.0-SNAPSHOT","txnId":"f4db4fbf-a05b-41b4-9049-e1f28a08ec5b"}}
{"metaData":{"id":"43c8feba-0140-4d91-8c16-52f627a79cfe","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"struct\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"map\",\"type\":{\"type\":\"map\",\"keyType\":\"integer\",\"valueType\":\"integer\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"array\",\"type\":{\"type\":\"array\",\"elementType\":\"integer\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableTypeWidening":"true"},"createdTime":1727266118466}}
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["typeWidening-preview"],"writerFeatures":["typeWidening-preview","appendOnly","invariants"]}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"commitInfo":{"timestamp":1727266121897,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"1519"},"engineInfo":"Apache-Spark/3.5.2 Delta-Lake/3.3.0-SNAPSHOT","txnId":"55211a5a-9d2b-4367-929a-ac5850f91b78"}}
{"metaData":{"id":"43c8feba-0140-4d91-8c16-52f627a79cfe","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"struct\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"long\",\"nullable\":true,\"metadata\":{\"delta.typeChanges\":[{\"toType\":\"long\",\"fromType\":\"integer\",\"tableVersion\":2}]}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"map\",\"type\":{\"type\":\"map\",\"keyType\":\"long\",\"valueType\":\"long\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{\"delta.typeChanges\":[{\"toType\":\"long\",\"fromType\":\"integer\",\"tableVersion\":2,\"fieldPath\":\"key\"},{\"toType\":\"long\",\"fromType\":\"integer\",\"tableVersion\":2,\"fieldPath\":\"value\"}]}},{\"name\":\"array\",\"type\":{\"type\":\"array\",\"elementType\":\"long\",\"containsNull\":true},\"nullable\":true,\"metadata\":{\"delta.typeChanges\":[{\"toType\":\"long\",\"fromType\":\"integer\",\"tableVersion\":2,\"fieldPath\":\"element\"}]}}]}","partitionColumns":[],"configuration":{"delta.enableTypeWidening":"true"},"createdTime":1727266118466}}
{"add":{"path":"part-00000-1f777f86-350c-4181-b7ef-73df70847eac-c000.snappy.parquet","partitionValues":{},"size":1519,"modificationTime":1727266121853,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"struct\":{\"a\":9223372036854775807}},\"maxValues\":{\"struct\":{\"a\":9223372036854775807}},\"nullCount\":{\"struct\":{\"a\":0},\"map\":0,\"array\":0}}","defaultRowCommitVersion":2}}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"commitInfo":{"timestamp":1727266110116,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"3694"},"engineInfo":"Apache-Spark/3.5.2 Delta-Lake/3.3.0-SNAPSHOT","txnId":"80c33fca-d936-40cf-81fb-7ef52b67e25b"}}
{"metaData":{"id":"db0018ee-037b-41f7-8266-85058ceafb06","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"byte_long\",\"type\":\"byte\",\"nullable\":true,\"metadata\":{}},{\"name\":\"int_long\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"float_double\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"byte_double\",\"type\":\"byte\",\"nullable\":true,\"metadata\":{}},{\"name\":\"short_double\",\"type\":\"short\",\"nullable\":true,\"metadata\":{}},{\"name\":\"int_double\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"decimal_decimal_same_scale\",\"type\":\"decimal(10,2)\",\"nullable\":true,\"metadata\":{}},{\"name\":\"decimal_decimal_greater_scale\",\"type\":\"decimal(10,2)\",\"nullable\":true,\"metadata\":{}},{\"name\":\"byte_decimal\",\"type\":\"byte\",\"nullable\":true,\"metadata\":{}},{\"name\":\"short_decimal\",\"type\":\"short\",\"nullable\":true,\"metadata\":{}},{\"name\":\"int_decimal\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"long_decimal\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"date_timestamp_ntz\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1727266102938}}
{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}
{"add":{"path":"part-00000-1045efe0-45bb-4b99-9f83-5ffa04a63ab2-c000.snappy.parquet","partitionValues":{},"size":3694,"modificationTime":1727266109760,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"byte_long\":1,\"int_long\":2,\"float_double\":3.4,\"byte_double\":5,\"short_double\":6,\"int_double\":7,\"decimal_decimal_same_scale\":123.45,\"decimal_decimal_greater_scale\":67.89,\"byte_decimal\":1,\"short_decimal\":2,\"int_decimal\":3,\"long_decimal\":4,\"date_timestamp_ntz\":\"2024-09-09\"},\"maxValues\":{\"byte_long\":1,\"int_long\":2,\"float_double\":3.4,\"byte_double\":5,\"short_double\":6,\"int_double\":7,\"decimal_decimal_same_scale\":123.45,\"decimal_decimal_greater_scale\":67.89,\"byte_decimal\":1,\"short_decimal\":2,\"int_decimal\":3,\"long_decimal\":4,\"date_timestamp_ntz\":\"2024-09-09\"},\"nullCount\":{\"byte_long\":0,\"int_long\":0,\"float_double\":0,\"byte_double\":0,\"short_double\":0,\"int_double\":0,\"decimal_decimal_same_scale\":0,\"decimal_decimal_greater_scale\":0,\"byte_decimal\":0,\"short_decimal\":0,\"int_decimal\":0,\"long_decimal\":0,\"date_timestamp_ntz\":0}}"}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"commitInfo":{"timestamp":1727266114275,"operation":"SET TBLPROPERTIES","operationParameters":{"properties":"{\"delta.enableTypeWidening\":\"true\",\"delta.feature.timestampntz\":\"supported\"}"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.5.2 Delta-Lake/3.3.0-SNAPSHOT","txnId":"7b869171-851a-4a8d-96e6-baee5496b98f"}}
{"metaData":{"id":"db0018ee-037b-41f7-8266-85058ceafb06","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"byte_long\",\"type\":\"byte\",\"nullable\":true,\"metadata\":{}},{\"name\":\"int_long\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"float_double\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"byte_double\",\"type\":\"byte\",\"nullable\":true,\"metadata\":{}},{\"name\":\"short_double\",\"type\":\"short\",\"nullable\":true,\"metadata\":{}},{\"name\":\"int_double\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"decimal_decimal_same_scale\",\"type\":\"decimal(10,2)\",\"nullable\":true,\"metadata\":{}},{\"name\":\"decimal_decimal_greater_scale\",\"type\":\"decimal(10,2)\",\"nullable\":true,\"metadata\":{}},{\"name\":\"byte_decimal\",\"type\":\"byte\",\"nullable\":true,\"metadata\":{}},{\"name\":\"short_decimal\",\"type\":\"short\",\"nullable\":true,\"metadata\":{}},{\"name\":\"int_decimal\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"long_decimal\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"date_timestamp_ntz\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableTypeWidening":"true"},"createdTime":1727266102938}}
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["timestampNtz","typeWidening-preview"],"writerFeatures":["timestampNtz","typeWidening-preview","appendOnly","invariants"]}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"commitInfo":{"timestamp":1727266116833,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"4059"},"engineInfo":"Apache-Spark/3.5.2 Delta-Lake/3.3.0-SNAPSHOT","txnId":"2e63edee-6d96-4d12-90af-85b90f4fa9e5"}}
{"metaData":{"id":"db0018ee-037b-41f7-8266-85058ceafb06","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"byte_long\",\"type\":\"long\",\"nullable\":true,\"metadata\":{\"delta.typeChanges\":[{\"toType\":\"long\",\"fromType\":\"byte\",\"tableVersion\":2}]}},{\"name\":\"int_long\",\"type\":\"long\",\"nullable\":true,\"metadata\":{\"delta.typeChanges\":[{\"toType\":\"long\",\"fromType\":\"integer\",\"tableVersion\":2}]}},{\"name\":\"float_double\",\"type\":\"double\",\"nullable\":true,\"metadata\":{\"delta.typeChanges\":[{\"toType\":\"double\",\"fromType\":\"float\",\"tableVersion\":2}]}},{\"name\":\"byte_double\",\"type\":\"double\",\"nullable\":true,\"metadata\":{\"delta.typeChanges\":[{\"toType\":\"double\",\"fromType\":\"byte\",\"tableVersion\":2}]}},{\"name\":\"short_double\",\"type\":\"double\",\"nullable\":true,\"metadata\":{\"delta.typeChanges\":[{\"toType\":\"double\",\"fromType\":\"short\",\"tableVersion\":2}]}},{\"name\":\"int_double\",\"type\":\"double\",\"nullable\":true,\"metadata\":{\"delta.typeChanges\":[{\"toType\":\"double\",\"fromType\":\"integer\",\"tableVersion\":2}]}},{\"name\":\"decimal_decimal_same_scale\",\"type\":\"decimal(20,2)\",\"nullable\":true,\"metadata\":{\"delta.typeChanges\":[{\"toType\":\"decimal(20,2)\",\"fromType\":\"decimal(10,2)\",\"tableVersion\":2}]}},{\"name\":\"decimal_decimal_greater_scale\",\"type\":\"decimal(20,5)\",\"nullable\":true,\"metadata\":{\"delta.typeChanges\":[{\"toType\":\"decimal(20,5)\",\"fromType\":\"decimal(10,2)\",\"tableVersion\":2}]}},{\"name\":\"byte_decimal\",\"type\":\"decimal(11,1)\",\"nullable\":true,\"metadata\":{\"delta.typeChanges\":[{\"toType\":\"decimal(11,1)\",\"fromType\":\"byte\",\"tableVersion\":2}]}},{\"name\":\"short_decimal\",\"type\":\"decimal(11,1)\",\"nullable\":true,\"metadata\":{\"delta.typeChanges\":[{\"toType\":\"decimal(11,1)\",\"fromType\":\"short\",\"tableVersion\":2}]}},{\"name\":\"int_decimal\",\"type\":\"decimal(11,1)\",\"nullable\":true,\"metadata\":{\"delta.typeChanges\":[{\"toType\":\"decimal(11,1)\",\"fromType\":\"integer\",\"tableVersion\":2}]}},{\"name\":\"long_decimal\",\"type\":\"decimal(21,1)\",\"nullable\":true,\"metadata\":{\"delta.typeChanges\":[{\"toType\":\"decimal(21,1)\",\"fromType\":\"long\",\"tableVersion\":2}]}},{\"name\":\"date_timestamp_ntz\",\"type\":\"timestamp_ntz\",\"nullable\":true,\"metadata\":{\"delta.typeChanges\":[{\"toType\":\"timestamp_ntz\",\"fromType\":\"date\",\"tableVersion\":2}]}}]}","partitionColumns":[],"configuration":{"delta.enableTypeWidening":"true"},"createdTime":1727266102938}}
{"add":{"path":"part-00000-cd317895-4ae0-4292-b918-62d4ca832bd7-c000.snappy.parquet","partitionValues":{},"size":4059,"modificationTime":1727266116789,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"byte_long\":9223372036854775807,\"int_long\":9223372036854775807,\"float_double\":1.234567890123,\"byte_double\":1.234567890123,\"short_double\":1.234567890123,\"int_double\":1.234567890123,\"decimal_decimal_same_scale\":12345678901234.56,\"decimal_decimal_greater_scale\":12345678901.23456,\"byte_decimal\":123.4,\"short_decimal\":12345.6,\"int_decimal\":1234567890.1,\"long_decimal\":123456789012345678.9,\"date_timestamp_ntz\":\"2024-09-09T12:34:56.123\"},\"maxValues\":{\"byte_long\":9223372036854775807,\"int_long\":9223372036854775807,\"float_double\":1.234567890123,\"byte_double\":1.234567890123,\"short_double\":1.234567890123,\"int_double\":1.234567890123,\"decimal_decimal_same_scale\":12345678901234.56,\"decimal_decimal_greater_scale\":12345678901.23456,\"byte_decimal\":123.4,\"short_decimal\":12345.6,\"int_decimal\":1234567890.1,\"long_decimal\":123456789012345678.9,\"date_timestamp_ntz\":\"2024-09-09T12:34:56.123\"},\"nullCount\":{\"byte_long\":0,\"int_long\":0,\"float_double\":0,\"byte_double\":0,\"short_double\":0,\"int_double\":0,\"decimal_decimal_same_scale\":0,\"decimal_decimal_greater_scale\":0,\"byte_decimal\":0,\"short_decimal\":0,\"int_decimal\":0,\"long_decimal\":0,\"date_timestamp_ntz\":0}}","defaultRowCommitVersion":2}}
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,9 @@ private static String getValue(Row row, int columnOrdinal) {
return LocalDate.ofEpochDay(daysSinceEpochUTC).toString();
} else if (dataType instanceof LongType) {
return Long.toString(row.getLong(columnOrdinal));
} else if (dataType instanceof TimestampType) {
// TimestampType data is stored internally as the number of microseconds since epoch
} else if (dataType instanceof TimestampType || dataType instanceof TimestampNTZType) {
// Timestamps are stored internally as the number of microseconds since epoch.
// TODO: TimestampType should use the session timezone to display values.
long microSecsSinceEpochUTC = row.getLong(columnOrdinal);
LocalDateTime dateTime = LocalDateTime.ofEpochSecond(
microSecsSinceEpochUTC / 1_000_000 /* epochSecond */,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,22 @@ public void runTests() throws Exception {
asList(new Column("as_int"), Literal.ofInt(1))
)),
0 /* expected row count */);

// Type widening: table with various type changes.
runAndVerifyRowCount(
"type_widening",
"type-widening",
Optional.empty(), /* read schema - read all columns */
Optional.empty(), /* predicate */
2 /* expected row count */);

// Type widening: table with type changes inside nested struct/array/map.
runAndVerifyRowCount(
"type_widening_nested",
"type-widening-nested",
Optional.empty(), /* read schema - read all columns */
Optional.empty(), /* predicate */
2 /* expected row count */);
}

private void runAndVerifyRowCount(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ public class TableFeatures {
add("appendOnly");
add("inCommitTimestamp");
add("columnMapping");
add("typeWidening-preview");
add("typeWidening");
}
});

Expand All @@ -66,6 +68,8 @@ public static void validateReadSupportedTable(
case "timestampNtz": // fall through
case "vacuumProtocolCheck": // fall through
case "variantType-preview": // fall through
case "typeWidening-preview": // fall through
case "typeWidening": // fall through
case "v2Checkpoint":
break;
default:
Expand Down Expand Up @@ -126,6 +130,10 @@ public static void validateWriteSupportedTable(
break;
case "columnMapping":
break;
case "typeWidening-preview":
break;
case "typeWidening":
break;
default:
throw unsupportedWriterFeature(tablePath, writerFeature);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ class TableFeaturesSuite extends AnyFunSuite {
checkSupported(createTestProtocol(minWriterVersion = 7))
}

Seq("appendOnly", "inCommitTimestamp", "columnMapping")
Seq("appendOnly", "inCommitTimestamp", "columnMapping", "typeWidening-preview", "typeWidening")
.foreach { supportedWriterFeature =>
test(s"validateWriteSupported: protocol 7 with $supportedWriterFeature") {
checkSupported(createTestProtocol(minWriterVersion = 7, supportedWriterFeature))
Expand Down

0 comments on commit 0fc6a11

Please sign in to comment.