apache · kasakrisz · Jan 31, 2025 · okumin · Feb 12, 2025 · kasakrisz
diff --git a/...g/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/...g/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -124,6 +124,7 @@
 import org.apache.hadoop.hive.ql.session.SessionStateUtil;
 import org.apache.hadoop.hive.ql.stats.Partish;
 import org.apache.hadoop.hive.ql.stats.StatsUtils;
+import org.apache.hadoop.hive.ql.util.NullOrdering;
 import org.apache.hadoop.hive.serde2.AbstractSerDe;
 import org.apache.hadoop.hive.serde2.DefaultFetchFormatter;
 import org.apache.hadoop.hive.serde2.Deserializer;
@@ -857,7 +858,12 @@ public DynamicPartitionCtx createDPContext(
           if (sortField.sourceId() == field.fieldId()) {
             customSortPositions.add(pos);
             customSortOrder.add(sortField.direction() == SortDirection.ASC ? 1 : 0);
-            customSortNullOrder.add(sortField.nullOrder() == NullOrder.NULLS_FIRST ? 0 : 1);
+
+            NullOrdering nullOrdering = NullOrdering.NULLS_LAST;
+            if (sortField.nullOrder() == NullOrder.NULLS_FIRST) {
+              nullOrdering = NullOrdering.NULLS_FIRST;
+            }
+            customSortNullOrder.add(nullOrdering.getCode());
 NULLS_FIRST(1, HiveParser.TOK_NULLS_FIRST, NullValueOption.MINVALUE, 'a', RelFieldCollation.NullDirection.FIRST), 
 NULLS_LAST(0, HiveParser.TOK_NULLS_LAST, NullValueOption.MAXVALUE, 'z', RelFieldCollation.NullDirection.LAST); 
 sortFields = JSON_OBJECT_MAPPER.reader().readValue(sortOderJSONString, SortFields.class); 
     -> {$orderSpec.tree == null && $nullSpec.tree == null}? 
             ^(TOK_TABSORTCOLNAMEASC ^(TOK_NULLS_FIRST identifier)) 
 NULLS_FIRST(1, HiveParser.TOK_NULLS_FIRST, NullValueOption.MINVALUE, 'a', RelFieldCollation.NullDirection.FIRST), 
 NULLS_LAST(0, HiveParser.TOK_NULLS_LAST, NullValueOption.MAXVALUE, 'z', RelFieldCollation.NullDirection.LAST); 
 sortFields = JSON_OBJECT_MAPPER.reader().readValue(sortOderJSONString, SortFields.class); 
     -> {$orderSpec.tree == null && $nullSpec.tree == null}? 
             ^(TOK_TABSORTCOLNAMEASC ^(TOK_NULLS_FIRST identifier)) 
             break;
           }
           pos++;

diff --git a/.../iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_partitioned.q.out b/.../iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_partitioned.q.out
@@ -458,7 +458,7 @@ STAGE PLANS:
                       Statistics: Num rows: 3 Data size: 1455 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string)
-                        null sort order: aa
+                        null sort order: zz
                         sort order: ++
                         Map-reduce partition columns: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string)
                         Statistics: Num rows: 7 Data size: 3395 Basic stats: COMPLETE Column stats: COMPLETE
@@ -694,7 +694,7 @@ STAGE PLANS:
                   Statistics: Num rows: 4 Data size: 1940 Basic stats: COMPLETE Column stats: COMPLETE
                   Reduce Output Operator
                     key expressions: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string)
-                    null sort order: aa
+                    null sort order: zz
                     sort order: ++
                     Map-reduce partition columns: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string)
                     Statistics: Num rows: 7 Data size: 3395 Basic stats: COMPLETE Column stats: COMPLETE
@@ -1065,7 +1065,7 @@ STAGE PLANS:
                   Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE
                   Reduce Output Operator
                     key expressions: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string)
-                    null sort order: aa
+                    null sort order: zz
                     sort order: ++
                     Map-reduce partition columns: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string)
                     Statistics: Num rows: 2 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
@@ -1137,7 +1137,7 @@ STAGE PLANS:
                       Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string)
-                        null sort order: aa
+                        null sort order: zz
                         sort order: ++
                         Map-reduce partition columns: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string)
                         Statistics: Num rows: 2 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
@@ -1517,7 +1517,7 @@ STAGE PLANS:
                   Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE
                   Reduce Output Operator
                     key expressions: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string)
-                    null sort order: aa
+                    null sort order: zz
                     sort order: ++
                     Map-reduce partition columns: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string)
                     Statistics: Num rows: 2 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
@@ -1589,7 +1589,7 @@ STAGE PLANS:
                       Statistics: Num rows: 1 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string)
-                        null sort order: aa
+                        null sort order: zz
                         sort order: ++
                         Map-reduce partition columns: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string)
                         Statistics: Num rows: 2 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE

diff --git a/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out b/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out
@@ -423,7 +423,6 @@ POSTHOOK: query: select * from default.tbl_target_mixed.files
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tbl_target_mixed
 POSTHOOK: Output: hdfs://### HDFS PATH ###
-0	hdfs://### HDFS PATH ###	ORC	0	{"ccy":null,"c_bucket":null}	2	417	{1:7,2:5,3:5}	{1:2,2:2,3:2}	{1:0,2:2,3:2}	{}	{1:}	{1:n}	NULL	[3]	NULL	0	{"a":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":12,"upper_bound":110},"c":{"column_size":5,"value_count":2,"null_value_count":2,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"ccy":{"column_size":5,"value_count":2,"null_value_count":2,"nan_value_count":null,"lower_bound":null,"upper_bound":null}}
 0	hdfs://### HDFS PATH ###	ORC	0	{"ccy":"CZK","c_bucket":1}	1	449	{1:6,2:12,3:6}	{1:1,2:1,3:1}	{1:0,2:0,3:0}	{}	{1:d,2:CZK,3:}	{1:d,2:CZK,3:}	NULL	[3]	NULL	0	{"a":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":100,"upper_bound":100},"c":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":12,"upper_bound":12},"ccy":{"column_size":12,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":"CZK","upper_bound":"CZK"}}
 0	hdfs://### HDFS PATH ###	ORC	0	{"ccy":"CZK","c_bucket":2}	1	432	{1:6,2:12,3:6}	{1:1,2:1,3:1}	{1:0,2:0,3:0}	{}	{1:
 ,2:CZK,3:}	{1:
@@ -443,6 +442,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 0	hdfs://### HDFS PATH ###	ORC	0	{"ccy":"USD","c_bucket":2}	1	432	{1:6,2:12,3:6}	{1:1,2:1,3:1}	{1:0,2:0,3:0}	{}	{1:,2:USD,3:
 }	{1:,2:USD,3:
 }	NULL	[3]	NULL	0	{"a":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":6,"upper_bound":6},"c":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":10,"upper_bound":10},"ccy":{"column_size":12,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":"USD","upper_bound":"USD"}}
+0	hdfs://### HDFS PATH ###	ORC	0	{"ccy":null,"c_bucket":null}	2	417	{1:7,2:5,3:5}	{1:2,2:2,3:2}	{1:0,2:2,3:2}	{}	{1:}	{1:n}	NULL	[3]	NULL	0	{"a":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":12,"upper_bound":110},"c":{"column_size":5,"value_count":2,"null_value_count":2,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"ccy":{"column_size":5,"value_count":2,"null_value_count":2,"nan_value_count":null,"lower_bound":null,"upper_bound":null}}
 PREHOOK: query: explain insert into table tbl_target_mixed select * from tbl_src where b = 'EUR'
 PREHOOK: type: QUERY
 PREHOOK: Input: default@tbl_src
@@ -605,7 +605,6 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 0	hdfs://### HDFS PATH ###	ORC	0	{"ccy":"EUR","c_bucket":2}	3	448	{1:8,2:17,3:5}	{1:3,2:3,3:3}	{1:0,2:0,3:0}	{}	{1:,2:EUR,3:
 }	{1:(,2:EUR,3:
 }	NULL	[3]	NULL	0	{"a":{"column_size":8,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":1,"upper_bound":40},"c":{"column_size":5,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":10,"upper_bound":10},"ccy":{"column_size":17,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":"EUR","upper_bound":"EUR"}}
-0	hdfs://### HDFS PATH ###	ORC	0	{"ccy":null,"c_bucket":null}	2	417	{1:7,2:5,3:5}	{1:2,2:2,3:2}	{1:0,2:2,3:2}	{}	{1:}	{1:n}	NULL	[3]	NULL	0	{"a":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":12,"upper_bound":110},"c":{"column_size":5,"value_count":2,"null_value_count":2,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"ccy":{"column_size":5,"value_count":2,"null_value_count":2,"nan_value_count":null,"lower_bound":null,"upper_bound":null}}
 0	hdfs://### HDFS PATH ###	ORC	0	{"ccy":"CZK","c_bucket":1}	1	449	{1:6,2:12,3:6}	{1:1,2:1,3:1}	{1:0,2:0,3:0}	{}	{1:d,2:CZK,3:}	{1:d,2:CZK,3:}	NULL	[3]	NULL	0	{"a":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":100,"upper_bound":100},"c":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":12,"upper_bound":12},"ccy":{"column_size":12,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":"CZK","upper_bound":"CZK"}}
 0	hdfs://### HDFS PATH ###	ORC	0	{"ccy":"CZK","c_bucket":2}	1	432	{1:6,2:12,3:6}	{1:1,2:1,3:1}	{1:0,2:0,3:0}	{}	{1:
 ,2:CZK,3:}	{1:
@@ -625,6 +624,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 0	hdfs://### HDFS PATH ###	ORC	0	{"ccy":"USD","c_bucket":2}	1	432	{1:6,2:12,3:6}	{1:1,2:1,3:1}	{1:0,2:0,3:0}	{}	{1:,2:USD,3:
 }	{1:,2:USD,3:
 }	NULL	[3]	NULL	0	{"a":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":6,"upper_bound":6},"c":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":10,"upper_bound":10},"ccy":{"column_size":12,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":"USD","upper_bound":"USD"}}
+0	hdfs://### HDFS PATH ###	ORC	0	{"ccy":null,"c_bucket":null}	2	417	{1:7,2:5,3:5}	{1:2,2:2,3:2}	{1:0,2:2,3:2}	{}	{1:}	{1:n}	NULL	[3]	NULL	0	{"a":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":12,"upper_bound":110},"c":{"column_size":5,"value_count":2,"null_value_count":2,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"ccy":{"column_size":5,"value_count":2,"null_value_count":2,"nan_value_count":null,"lower_bound":null,"upper_bound":null}}
 PREHOOK: query: create external table tbl_bucket_date (id string, date_time_date date, year_partition int) 
     partitioned by spec (year_partition, bucket(1, date_time_date))
 stored by iceberg stored as parquet 

diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition.q.out
@@ -923,7 +923,7 @@ STAGE PLANS:
                     Statistics: Num rows: 6 Data size: 1626 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col2 (type: string), _col3 (type: string)
-                      null sort order: aa
+                      null sort order: zz
                       sort order: ++
                       Map-reduce partition columns: _col2 (type: string), _col3 (type: string)
                       Statistics: Num rows: 6 Data size: 1626 Basic stats: COMPLETE Column stats: COMPLETE
@@ -1042,7 +1042,7 @@ STAGE PLANS:
                     Statistics: Num rows: 12 Data size: 3252 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col2 (type: string), _col3 (type: string)
-                      null sort order: aa
+                      null sort order: zz
                       sort order: ++
                       Map-reduce partition columns: _col2 (type: string), _col3 (type: string)
                       Statistics: Num rows: 12 Data size: 3252 Basic stats: COMPLETE Column stats: COMPLETE
@@ -1161,7 +1161,7 @@ STAGE PLANS:
                     Statistics: Num rows: 24 Data size: 6504 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col2 (type: string), _col3 (type: string)
-                      null sort order: aa
+                      null sort order: zz
                       sort order: ++
                       Map-reduce partition columns: _col2 (type: string), _col3 (type: string)
                       Statistics: Num rows: 24 Data size: 6504 Basic stats: COMPLETE Column stats: COMPLETE
@@ -1280,7 +1280,7 @@ STAGE PLANS:
                     Statistics: Num rows: 48 Data size: 13008 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col2 (type: string), _col3 (type: string)
-                      null sort order: aa
+                      null sort order: zz
                       sort order: ++
                       Map-reduce partition columns: _col2 (type: string), _col3 (type: string)
                       Statistics: Num rows: 48 Data size: 13008 Basic stats: COMPLETE Column stats: COMPLETE
@@ -1401,7 +1401,7 @@ STAGE PLANS:
                     Statistics: Num rows: 96 Data size: 26208 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col2 (type: string), _col3 (type: string)
-                      null sort order: aa
+                      null sort order: zz
                       sort order: ++
                       Map-reduce partition columns: _col2 (type: string), _col3 (type: string)
                       Statistics: Num rows: 96 Data size: 26208 Basic stats: COMPLETE Column stats: COMPLETE
@@ -1522,7 +1522,7 @@ STAGE PLANS:
                     Statistics: Num rows: 192 Data size: 52416 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col2 (type: string), _col3 (type: string)
-                      null sort order: aa
+                      null sort order: zz
                       sort order: ++
                       Map-reduce partition columns: _col2 (type: string), _col3 (type: string)
                       Statistics: Num rows: 192 Data size: 52416 Basic stats: COMPLETE Column stats: COMPLETE

diff --git a/.../iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_transforms.q.out b/.../iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_transforms.q.out
@@ -2910,7 +2910,7 @@ STAGE PLANS:
                         Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
                           key expressions: iceberg_bucket(_col0, 16) (type: int)
-                          null sort order: a
+                          null sort order: z
                           sort order: +
                           Map-reduce partition columns: iceberg_bucket(_col0, 16) (type: int)
                           Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE

diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition.q.out
@@ -495,7 +495,7 @@ STAGE PLANS:
                     Statistics: Num rows: 4 Data size: 1084 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col2 (type: string), _col3 (type: string)
-                      null sort order: aa
+                      null sort order: zz
                       sort order: ++
                       Map-reduce partition columns: _col2 (type: string), _col3 (type: string)
                       Statistics: Num rows: 4 Data size: 1084 Basic stats: COMPLETE Column stats: COMPLETE
@@ -614,7 +614,7 @@ STAGE PLANS:
                     Statistics: Num rows: 4 Data size: 1084 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col2 (type: string), _col3 (type: string)
-                      null sort order: aa
+                      null sort order: zz
                       sort order: ++
                       Map-reduce partition columns: _col2 (type: string), _col3 (type: string)
                       Statistics: Num rows: 4 Data size: 1084 Basic stats: COMPLETE Column stats: COMPLETE
@@ -733,7 +733,7 @@ STAGE PLANS:
                     Statistics: Num rows: 4 Data size: 1084 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col2 (type: string), _col3 (type: string)
-                      null sort order: aa
+                      null sort order: zz
                       sort order: ++
                       Map-reduce partition columns: _col2 (type: string), _col3 (type: string)
                       Statistics: Num rows: 4 Data size: 1084 Basic stats: COMPLETE Column stats: COMPLETE
@@ -852,7 +852,7 @@ STAGE PLANS:
                     Statistics: Num rows: 6 Data size: 1626 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col2 (type: string), _col3 (type: string)
-                      null sort order: aa
+                      null sort order: zz
                       sort order: ++
                       Map-reduce partition columns: _col2 (type: string), _col3 (type: string)
                       Statistics: Num rows: 6 Data size: 1626 Basic stats: COMPLETE Column stats: COMPLETE
@@ -973,7 +973,7 @@ STAGE PLANS:
                     Statistics: Num rows: 10 Data size: 2730 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col2 (type: string), _col3 (type: string)
-                      null sort order: aa
+                      null sort order: zz
                       sort order: ++
                       Map-reduce partition columns: _col2 (type: string), _col3 (type: string)
                       Statistics: Num rows: 10 Data size: 2730 Basic stats: COMPLETE Column stats: COMPLETE
@@ -1094,7 +1094,7 @@ STAGE PLANS:
                     Statistics: Num rows: 20 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col2 (type: string), _col3 (type: string)
-                      null sort order: aa
+                      null sort order: zz
                       sort order: ++
                       Map-reduce partition columns: _col2 (type: string), _col3 (type: string)
                       Statistics: Num rows: 20 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE