diff --git a/hive-serdes/pom.xml b/hive-serdes/pom.xml index c2358db..c644be0 100644 --- a/hive-serdes/pom.xml +++ b/hive-serdes/pom.xml @@ -21,7 +21,7 @@ com.cloudera.serde hive-serdes - 1.0-SNAPSHOT + 1.0.1-SNAPSHOT jar hive-serdes @@ -91,6 +91,11 @@ jackson-core-asl 1.9.8 + + org.codehaus.jackson + jackson-mapper-asl + 1.9.8 + diff --git a/hive-serdes/src/main/java/com/cloudera/hive/serde/JSONSerDe.java b/hive-serdes/src/main/java/com/cloudera/hive/serde/JSONSerDe.java index f649042..3034dd0 100644 --- a/hive-serdes/src/main/java/com/cloudera/hive/serde/JSONSerDe.java +++ b/hive-serdes/src/main/java/com/cloudera/hive/serde/JSONSerDe.java @@ -1,13 +1,13 @@ /** * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file + * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file + * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -114,6 +114,7 @@ public void initialize(Configuration conf, Properties tbl) * table, and looking up those fields in the parsed JSON object. If the value * of the field is not a primitive, the object is parsed further. */ + @SuppressWarnings("rawtypes") @Override public Object deserialize(Writable blob) throws SerDeException { Map root = null; @@ -129,9 +130,9 @@ public Object deserialize(Writable blob) throws SerDeException { } // Lowercase the keys as expected by hive - Map lowerRoot = new HashMap(); + Map lowerRoot = new HashMap(); for(Map.Entry entry: root.entrySet()) { - lowerRoot.put(((String)entry.getKey()).toLowerCase(), entry.getValue()); + lowerRoot.put(patchKey((String) entry.getKey()), entry.getValue()); } root = lowerRoot; @@ -148,6 +149,25 @@ public Object deserialize(Writable blob) throws SerDeException { return row; } + /* + * replace dot/minus and do lowerCase at once + */ + static final String patchKey(String key) { + StringBuilder buffer = new StringBuilder(key.length()); + for (char character : key.toCharArray()) { + switch (character) { + case '.': + case '-': + buffer.append('_'); + break; + default: + buffer.append(Character.toLowerCase(character)); + break; + } + } + return buffer.toString(); + } + /** * Parses a JSON object according to the Hive column's type. * @@ -186,6 +206,7 @@ private Object parseField(Object field, TypeInfo fieldTypeInfo) { * @return - A map representing the object and its fields */ private Object parseStruct(Object field, StructTypeInfo fieldTypeInfo) { + @SuppressWarnings("unchecked") Map map = (Map)field; ArrayList structTypes = fieldTypeInfo.getAllStructFieldTypeInfos(); ArrayList structNames = fieldTypeInfo.getAllStructFieldNames(); @@ -206,6 +227,7 @@ private Object parseStruct(Object field, StructTypeInfo fieldTypeInfo) { * @return - A list of the parsed elements */ private Object parseList(Object field, ListTypeInfo fieldTypeInfo) { + @SuppressWarnings("unchecked") ArrayList list = (ArrayList) field; TypeInfo elemTypeInfo = fieldTypeInfo.getListElementTypeInfo(); @@ -226,6 +248,7 @@ private Object parseList(Object field, ListTypeInfo fieldTypeInfo) { * @return */ private Object parseMap(Object field, MapTypeInfo fieldTypeInfo) { + @SuppressWarnings("unchecked") Map map = (Map) field; TypeInfo valueTypeInfo = fieldTypeInfo.getMapValueTypeInfo(); diff --git a/hive-serdes/src/test/java/com/cloudera/hive/serde/TestDeserializeJSON.java b/hive-serdes/src/test/java/com/cloudera/hive/serde/TestDeserializeJSON.java new file mode 100644 index 0000000..148f4ce --- /dev/null +++ b/hive-serdes/src/test/java/com/cloudera/hive/serde/TestDeserializeJSON.java @@ -0,0 +1,83 @@ +package com.cloudera.hive.serde; +import static junit.framework.Assert.assertEquals; +import static junit.framework.Assert.assertNotNull; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.List; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.io.Writable; +import org.junit.Test; + + +public class TestDeserializeJSON { + + private static final Writable EXAMPLE = new Writable() { + @Override + public void write(DataOutput out) throws IOException { + } + + @Override + public void readFields(DataInput in) throws IOException { + } + + public String toString() { + return "{\"position.x\":1890,\"position.y\":11430,\"uid\":89775688,\"generator\":\"java:1.3:srv016108:4046 java:1.3:srv016108:4046\",\"pid\":674,\"time\":1399940194255,\"event\":\"playtime.start\"}"; + }; + }; + + @Test(expected = NullPointerException.class) + public void testPatchKeyNull() { + JSONSerDe.patchKey(null); + } + + @Test + public void testPatchKeyEmpty() { + assertEquals("", JSONSerDe.patchKey("")); + } + + @Test + public void testPatchKeyToLowerCase() { + assertEquals("xxx", JSONSerDe.patchKey("XxX")); + } + + @Test + public void testPatchKeyReplaceMinus() { + assertEquals("x_x", JSONSerDe.patchKey("x-x")); + } + + @Test + public void testPatchKeyReplaceDot() { + assertEquals("x_x", JSONSerDe.patchKey("x.x")); + } + + @Test + public void testPatchKeyuntouched() { + assertEquals("xox", JSONSerDe.patchKey("xox")); + } + + @Test + public void testDeserializationByExample() throws SerDeException { + JSONSerDe jsonSerDe = new JSONSerDe(); + + // init + Configuration testConfig = new Configuration(); + Properties properties = new Properties(); + properties.setProperty(serdeConstants.LIST_COLUMNS, "position_x,position_y,uid,pid,time,generator,event"); + properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "int,int,int,tinyint,bigint,string,string"); + jsonSerDe.initialize(testConfig, properties); + + List result = (List)jsonSerDe.deserialize(EXAMPLE); + + assertNotNull(result); + assertEquals(7, result.size()); + for (Object element : result) { + assertNotNull(element); + } + } +}