Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HIVE-28703: Integral Data Type Overflow Not Enforced for JsonSerDe and UDFs #5608

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
Original file line number Diff line number Diff line change
Expand Up @@ -1840,6 +1840,8 @@ public static enum ConfVars {
"OFFSET is mostly meaningless when a result set doesn't have a total order."),
HIVE_STRICT_TIMESTAMP_CONVERSION("hive.strict.timestamp.conversion", true,
"Restricts unsafe numeric to timestamp conversions"),
HIVE_STRICT_INTEGRAL_LIMIT("hive.strict.integral.limit", true,
"Enabling strict limits for integral values (TINYINT, SMALLINT, INT, BIGINT)."),
HIVE_LOAD_DATA_OWNER("hive.load.data.owner", "",
"Set the owner of files loaded using load data in managed tables."),

Expand Down
1 change: 1 addition & 0 deletions data/files/sampleJson.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"tiny_value": 128, "small_value" : 32768, "int_value" : 2147483648, "big_value" : 9223372036854775808}
10 changes: 9 additions & 1 deletion ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToByte.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@

package org.apache.hadoop.hive.ql.udf;

import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFMethodResolver;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToLong;
Expand Down Expand Up @@ -124,10 +126,16 @@ public ByteWritable evaluate(ShortWritable i) {
* The integer value to convert
* @return Byte
*/
public ByteWritable evaluate(IntWritable i) {
public ByteWritable evaluate(IntWritable i) throws UDFArgumentException {
if (i == null) {
return null;
} else {
HiveConf hiveConf = new HiveConf();
boolean strictIntegralCheck = hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_STRICT_INTEGRAL_LIMIT);
int value = i.get();
if (strictIntegralCheck && (value < Byte.MIN_VALUE || value > Byte.MAX_VALUE)) {
throw new UDFArgumentException("Value out of range for Byte: " + value);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know this obeys the ANSI standard, but it is a breaking change for Hive. Hive's cast is generally known to be very generous, so I think we can't change the behavior without introducing a flag.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey @okumin, thank you so much for the review!

Your suggestion makes sense. I have set the default value as true.

byteWritable.set((byte) i.get());
return byteWritable;
}
Expand Down
10 changes: 9 additions & 1 deletion ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@

package org.apache.hadoop.hive.ql.udf;

import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFMethodResolver;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToLong;
Expand Down Expand Up @@ -142,10 +144,16 @@ public IntWritable evaluate(ShortWritable i) {
* The long value to convert
* @return IntWritable
*/
public IntWritable evaluate(LongWritable i) {
public IntWritable evaluate(LongWritable i) throws UDFArgumentException {
if (i == null) {
return null;
} else {
HiveConf hiveConf = new HiveConf();
boolean strictIntegralCheck = hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_STRICT_INTEGRAL_LIMIT);
long value = i.get();
if (strictIntegralCheck && (value < Integer.MIN_VALUE || value > Integer.MAX_VALUE)) {
throw new UDFArgumentException("Value out of range for Integer: " + value);
}
intWritable.set((int) i.get());
return intWritable;
}
Expand Down
10 changes: 9 additions & 1 deletion ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@

package org.apache.hadoop.hive.ql.udf;

import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFMethodResolver;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToLong;
Expand Down Expand Up @@ -124,10 +126,16 @@ public ShortWritable evaluate(ByteWritable i) {
* The integer value to convert
* @return ShortWritable
*/
public ShortWritable evaluate(IntWritable i) {
public ShortWritable evaluate(IntWritable i) throws UDFArgumentException {
if (i == null) {
return null;
} else {
HiveConf hiveConf = new HiveConf();
boolean strictIntegralCheck = hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_STRICT_INTEGRAL_LIMIT);
int value = i.get();
if (strictIntegralCheck && (value < Short.MIN_VALUE || value > Short.MAX_VALUE)) {
throw new UDFArgumentException("Value out of range for Short: " + value);
}
shortWritable.set((short) i.get());
return shortWritable;
}
Expand Down
10 changes: 10 additions & 0 deletions ql/src/test/queries/clientnegative/json_serde_neg1.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
drop table if exists json_serde1_1;

create table json_serde1_1 (tiny_value TINYINT, small_value SMALLINT, int_value INT, big_value BIGINT)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe';

insert into table json_serde1_1 values (128, 32768, 2147483648, 9223372036854775808);

select * from json_serde1_1;

drop table json_serde1_1;
10 changes: 10 additions & 0 deletions ql/src/test/queries/clientnegative/json_serde_neg2.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
drop table if exists json_serde2_1;

create table json_serde2_1 (tiny_value TINYINT, small_value SMALLINT, int_value INT, big_value BIGINT)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe';

LOAD DATA LOCAL INPATH '../../data/files/sampleJson.json' INTO TABLE json_serde2_1;

select * from json_serde2_1;

drop table json_serde2_1;
8 changes: 8 additions & 0 deletions ql/src/test/queries/clientnegative/json_serde_neg3.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
drop table if exists json_serde3_1;

create table json_serde3_1 (tiny_value TINYINT, small_value SMALLINT, int_value INT, big_value BIGINT)
row format serde 'org.apache.hive.hcatalog.data.JsonSerDe';

insert into table json_serde3_1 values (127, 32768, 2147483648, 9223372036854775808);

select * from json_serde3_1;
8 changes: 8 additions & 0 deletions ql/src/test/queries/clientnegative/json_serde_neg4.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
drop table if exists json_serde4_1;

create table json_serde4_1 (tiny_value TINYINT, small_value SMALLINT, int_value INT, big_value BIGINT)
row format serde 'org.apache.hive.hcatalog.data.JsonSerDe';

LOAD DATA LOCAL INPATH '../../data/files/sampleJson.json' INTO TABLE json_serde4_1;

select * from json_serde4_1;
4 changes: 2 additions & 2 deletions ql/src/test/queries/clientpositive/prepare_plan.q
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,8 @@ prepare pbetween from
select count(*) from alltypes where (c BETWEEN ? AND ?) AND (v BETWEEN ? AND ?) AND (d BETWEEN ? AND ?) AND (dt BETWEEN ? AND ?) OR (ctinyint BETWEEN ? AND ?) AND (csmallint BETWEEN ? AND ?) AND (cint BETWEEN ? AND ?)
AND (cfloat BETWEEN ? AND ?) AND (cdouble BETWEEN ? AND ?) OR (cstring1 BETWEEN ? AND ?) AND (ctimestamp1 BETWEEN ? AND ?) OR (cbigint BETWEEN ? AND ?);

explain execute pbetween using 'ch1' ,'ch2' ,'var1' ,'var2',1000.34, 2000.0, '1947-12-12', '1968-12-31', 11, 1000, 15601, 1, 788564623, 23,1.0, 18.00, 0, 15601.0, 'xTlDv24JYv4s', 'str1', '1969-12-31 16:00:02.351','2020-12-31 16:00:01', 0, 133;
execute pbetween using 'ch1' ,'ch2' ,'var1' ,'var2',1000.34, 2000.0, '1947-12-12', '1968-12-31', 11, 1000, 15601, 1, 788564623, 23,1.0, 18.00, 0, 15601.0, 'xTlDv24JYv4s', 'str1', '1969-12-31 16:00:02.351','2020-12-31 16:00:01', 0, 133;
explain execute pbetween using 'ch1' ,'ch2' ,'var1' ,'var2',1000.34, 2000.0, '1947-12-12', '1968-12-31', 11, 127, 15601, 1, 788564623, 23,1.0, 18.00, 0, 15601.0, 'xTlDv24JYv4s', 'str1', '1969-12-31 16:00:02.351','2020-12-31 16:00:01', 0, 133;
execute pbetween using 'ch1' ,'ch2' ,'var1' ,'var2',1000.34, 2000.0, '1947-12-12', '1968-12-31', 11, 127, 15601, 1, 788564623, 23,1.0, 18.00, 0, 15601.0, 'xTlDv24JYv4s', 'str1', '1969-12-31 16:00:02.351','2020-12-31 16:00:01', 0, 133;

DROP TABLE testParam;
DROP TABLE alltypes;
2 changes: 1 addition & 1 deletion ql/src/test/queries/clientpositive/udf_to_byte.q
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ SELECT CAST(NULL AS TINYINT) FROM src tablesample (1 rows);
SELECT CAST(TRUE AS TINYINT) FROM src tablesample (1 rows);

SELECT CAST(CAST(-18 AS SMALLINT) AS TINYINT) FROM src tablesample (1 rows);
SELECT CAST(-129 AS TINYINT) FROM src tablesample (1 rows);
SELECT CAST(-127 AS TINYINT) FROM src tablesample (1 rows);
SELECT CAST(CAST(-1025 AS BIGINT) AS TINYINT) FROM src tablesample (1 rows);

SELECT CAST(CAST(-3.14 AS DOUBLE) AS TINYINT) FROM src tablesample (1 rows);
Expand Down
73 changes: 73 additions & 0 deletions ql/src/test/results/clientnegative/json_serde_neg1.q.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
PREHOOK: query: drop table if exists json_serde1_1
PREHOOK: type: DROPTABLE
PREHOOK: Output: database:default
POSTHOOK: query: drop table if exists json_serde1_1
POSTHOOK: type: DROPTABLE
POSTHOOK: Output: database:default
PREHOOK: query: create table json_serde1_1 (tiny_value TINYINT, small_value SMALLINT, int_value INT, big_value BIGINT)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@json_serde1_1
POSTHOOK: query: create table json_serde1_1 (tiny_value TINYINT, small_value SMALLINT, int_value INT, big_value BIGINT)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@json_serde1_1
PREHOOK: query: insert into table json_serde1_1 values (128, 32768, 2147483648, 9223372036854775808)
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@json_serde1_1
Status: Failed
Vertex failed, vertexName=Map 1, vertexId=vertex_#ID#, diagnostics=[Task failed, taskId=task_#ID#, diagnostics=[TaskAttempt 0 failed, info=[Error: Error while running task ( failure ) : attempt_#ID#:java.lang.RuntimeException: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing writable
#### A masked pattern was here ####
Caused by: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing writable
#### A masked pattern was here ####
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing writable
#### A masked pattern was here ####
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Unable to execute method public org.apache.hadoop.hive.serde2.io.ByteWritable org.apache.hadoop.hive.ql.udf.UDFToByte.evaluate(org.apache.hadoop.io.IntWritable) throws org.apache.hadoop.hive.ql.exec.UDFArgumentException:Value out of range for Byte: 128
#### A masked pattern was here ####
Caused by: java.lang.reflect.InvocationTargetException
#### A masked pattern was here ####
Caused by: org.apache.hadoop.hive.ql.exec.UDFArgumentException: Value out of range for Byte: 128
#### A masked pattern was here ####
], TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) : attempt_#ID#:java.lang.RuntimeException: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing writable
#### A masked pattern was here ####
Caused by: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing writable
#### A masked pattern was here ####
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing writable
#### A masked pattern was here ####
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Unable to execute method public org.apache.hadoop.hive.serde2.io.ByteWritable org.apache.hadoop.hive.ql.udf.UDFToByte.evaluate(org.apache.hadoop.io.IntWritable) throws org.apache.hadoop.hive.ql.exec.UDFArgumentException:Value out of range for Byte: 128
#### A masked pattern was here ####
Caused by: java.lang.reflect.InvocationTargetException
#### A masked pattern was here ####
Caused by: org.apache.hadoop.hive.ql.exec.UDFArgumentException: Value out of range for Byte: 128
#### A masked pattern was here ####
]], Vertex did not succeed due to OWN_TASK_FAILURE, failedTasks:1 killedTasks:0, Vertex vertex_#ID# [Map 1] killed/failed due to:OWN_TASK_FAILURE]
[Masked Vertex killed due to OTHER_VERTEX_FAILURE]
DAG did not succeed due to VERTEX_FAILURE. failedVertices:1 killedVertices:1
FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.tez.TezTask. Vertex failed, vertexName=Map 1, vertexId=vertex_#ID#, diagnostics=[Task failed, taskId=task_#ID#, diagnostics=[TaskAttempt 0 failed, info=[Error: Error while running task ( failure ) : attempt_#ID#:java.lang.RuntimeException: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing writable
#### A masked pattern was here ####
Caused by: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing writable
#### A masked pattern was here ####
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing writable
#### A masked pattern was here ####
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Unable to execute method public org.apache.hadoop.hive.serde2.io.ByteWritable org.apache.hadoop.hive.ql.udf.UDFToByte.evaluate(org.apache.hadoop.io.IntWritable) throws org.apache.hadoop.hive.ql.exec.UDFArgumentException:Value out of range for Byte: 128
#### A masked pattern was here ####
Caused by: java.lang.reflect.InvocationTargetException
#### A masked pattern was here ####
Caused by: org.apache.hadoop.hive.ql.exec.UDFArgumentException: Value out of range for Byte: 128
#### A masked pattern was here ####
], TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) : attempt_#ID#:java.lang.RuntimeException: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing writable
#### A masked pattern was here ####
Caused by: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing writable
#### A masked pattern was here ####
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing writable
#### A masked pattern was here ####
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Unable to execute method public org.apache.hadoop.hive.serde2.io.ByteWritable org.apache.hadoop.hive.ql.udf.UDFToByte.evaluate(org.apache.hadoop.io.IntWritable) throws org.apache.hadoop.hive.ql.exec.UDFArgumentException:Value out of range for Byte: 128
#### A masked pattern was here ####
Caused by: java.lang.reflect.InvocationTargetException
#### A masked pattern was here ####
Caused by: org.apache.hadoop.hive.ql.exec.UDFArgumentException: Value out of range for Byte: 128
#### A masked pattern was here ####
]], Vertex did not succeed due to OWN_TASK_FAILURE, failedTasks:1 killedTasks:0, Vertex vertex_#ID# [Map 1] killed/failed due to:OWN_TASK_FAILURE][Masked Vertex killed due to OTHER_VERTEX_FAILURE]DAG did not succeed due to VERTEX_FAILURE. failedVertices:1 killedVertices:1
33 changes: 33 additions & 0 deletions ql/src/test/results/clientnegative/json_serde_neg2.q.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
PREHOOK: query: drop table if exists json_serde2_1
PREHOOK: type: DROPTABLE
PREHOOK: Output: database:default
POSTHOOK: query: drop table if exists json_serde2_1
POSTHOOK: type: DROPTABLE
POSTHOOK: Output: database:default
PREHOOK: query: create table json_serde2_1 (tiny_value TINYINT, small_value SMALLINT, int_value INT, big_value BIGINT)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@json_serde2_1
POSTHOOK: query: create table json_serde2_1 (tiny_value TINYINT, small_value SMALLINT, int_value INT, big_value BIGINT)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@json_serde2_1
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/sampleJson.json' INTO TABLE json_serde2_1
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@json_serde2_1
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/sampleJson.json' INTO TABLE json_serde2_1
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@json_serde2_1
PREHOOK: query: select * from json_serde2_1
PREHOOK: type: QUERY
PREHOOK: Input: default@json_serde2_1
#### A masked pattern was here ####
POSTHOOK: query: select * from json_serde2_1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@json_serde2_1
#### A masked pattern was here ####
Failed with exception java.io.IOException:org.apache.hadoop.hive.serde2.SerDeException: java.lang.IllegalArgumentException: Failed to parse integral value for category BYTE: 128
Loading
Loading