Skip to content

Commit

Permalink
Added more types support
Browse files Browse the repository at this point in the history
testMoreTypes1 shows these DFDL types work:

- int
- long
- short
- byte
- boolean
- double
- float (does not work. Bug DAFFODIL-2367)
- hexBinary
- string
  • Loading branch information
mbeckerle committed Dec 22, 2023
1 parent 19f4053 commit 9b01eb0
Show file tree
Hide file tree
Showing 8 changed files with 171 additions and 81 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Objects;

import org.apache.daffodil.japi.DataProcessor;
import org.apache.drill.common.AutoCloseables;
Expand Down Expand Up @@ -65,7 +66,7 @@ public DaffodilBatchReader (DaffodilReaderConfig readerConfig, EasySubScan scan,
String schemaURIString = dafConfig.getSchemaURI(); // "schema/complexArray1.dfdl.xsd";
String rootName = dafConfig.getRootName();
String rootNamespace = dafConfig.getRootNamespace();
Boolean validationMode = dafConfig.getValidationMode();
boolean validationMode = dafConfig.getValidationMode();

URI dfdlSchemaURI;
try {
Expand Down Expand Up @@ -128,7 +129,7 @@ public DaffodilBatchReader (DaffodilReaderConfig readerConfig, EasySubScan scan,

/**
* This is the core of actual processing - data movement from Daffodil to Drill.
*
* <p>
* If there is space in the batch, and there is data available to parse
* then this calls the daffodil parser, which parses data, delivering it to the rowWriter
* by way of the infoset outputter.
Expand Down Expand Up @@ -156,6 +157,7 @@ public boolean next() {
try {
dafParser.parse();
if (dafParser.isProcessingError()) {
assert(Objects.nonNull(dafParser.getDiagnostics()));
throw UserException.dataReadError().message(dafParser.getDiagnosticsAsString())
.addContext(errorContext).build(logger);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,13 @@
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.exec.physical.resultSet.RowSetLoader;
import org.apache.drill.exec.record.metadata.ColumnMetadata;
import org.apache.drill.exec.store.daffodil.schema.DrillDaffodilSchemaUtils;
import org.apache.drill.exec.store.daffodil.schema.DrillDaffodilSchemaVisitor;
import org.apache.drill.exec.vector.accessor.ArrayWriter;
import org.apache.drill.exec.vector.accessor.ColumnWriter;
import org.apache.drill.exec.vector.accessor.ObjectType;
import org.apache.drill.exec.vector.accessor.TupleWriter;
import org.apache.drill.exec.vector.complex.writer.BaseWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -137,8 +139,7 @@ public void startSimple(InfosetSimpleElement ise) {
assert cm.isNullable();
cw.setNull();
} else {
Object value = convertDaffodilValueToDrillValue(ise, cm);
cw.setObject(value);
convertDaffodilValueToDrillValue(ise, cm, cw);
}
}

Expand Down Expand Up @@ -215,58 +216,69 @@ public void endArray(InfosetArray ia) {
arrayWriterStack.pop();
}

private Object convertDaffodilValueToDrillValue(InfosetSimpleElement ise, ColumnMetadata cm) {
Object value = null;
assert (cm.type() == TypeProtos.MinorType.INT);
assert (ise.metadata().primitiveType() == PrimitiveType.Int());
private void convertDaffodilValueToDrillValue(InfosetSimpleElement ise, ColumnMetadata cm, ColumnWriter cw) {
PrimitiveType dafType = ise.metadata().primitiveType();
TypeProtos.MinorType drillType = cm.type();
TypeProtos.MinorType drillType = DrillDaffodilSchemaUtils.getDrillDataType(dafType);
assert(drillType == cm.type());
switch (drillType) {
case INT: {
assert (dafType == PrimitiveType.Int());
value = ise.getAnyRef();
//
// FIXME: Javadoc for setObject says "primarily for testing"
// So how are we supposed to assign the column value then?
// Is there a way to get from a ColumnWriter to a typed scalar writer (downcast perhaps?)
cw.setObject(ise.getInt());
break;
}
case BIGINT: {
cw.setObject(ise.getLong());
break;
}
case SMALLINT: {
cw.setObject(ise.getShort());
break;
}
case TINYINT: {
cw.setObject(ise.getByte());
break;
}
// .put("UNSIGNEDLONG", TypeProtos.MinorType.UINT8)
// .put("UNSIGNEDINT", TypeProtos.MinorType.UINT4)
// .put("UNSIGNEDSHORT", TypeProtos.MinorType.UINT2)
// .put("UNSIGNEDBYTE", TypeProtos.MinorType.UINT1)
// .put("INTEGER", TypeProtos.MinorType.BIGINT)
// .put("NONNEGATIVEINTEGER", TypeProtos.MinorType.BIGINT)
case BIT: {
cw.setObject(ise.getBoolean());
break;
}
// .put("DATE", TypeProtos.MinorType.DATE) // requires conversion
// .put("DATETIME", TypeProtos.MinorType.TIMESTAMP) // requires conversion
// .put("DECIMAL", TypeProtos.MinorType.VARDECIMAL) // requires conversion (maybe)
case FLOAT8: {
cw.setObject(ise.getDouble());
break;
}
case FLOAT4: {
cw.setObject(ise.getFloat());
break;
}
case VARBINARY: {
cw.setObject(ise.getHexBinary());
break;
}
// TINYINT = 3; // single byte signed integer
// SMALLINT = 4; // two byte signed integer
// INT = 5; // four byte signed integer
// BIGINT = 6; // eight byte signed integer
// DECIMAL9 = 7; // a decimal supporting precision between 1 and 9
// DECIMAL18 = 8; // a decimal supporting precision between 10 and 18
// DECIMAL28SPARSE = 9; // a decimal supporting precision between 19 and 28
// DECIMAL38SPARSE = 10; // a decimal supporting precision between 29 and 38
// MONEY = 11; // signed decimal with two digit scale
// DATE = 12; // days since 4713bc
// TIME = 13; // time in micros before or after 2000/1/1
// TIMETZ = 14; // time in micros before or after 2000/1/1 with timezone
// TIMESTAMPTZ = 15; // unix epoch time in millis
// TIMESTAMP = 16; // TBD
// INTERVAL = 17; // TBD
// FLOAT4 = 18; // 4 byte ieee 754
// FLOAT8 = 19; // 8 byte ieee 754
// BIT = 20; // single bit value (boolean)
// FIXEDCHAR = 21; // utf8 fixed length string, padded with spaces
// FIXED16CHAR = 22;
// FIXEDBINARY = 23; // fixed length binary, padded with 0 bytes
// VARCHAR = 24; // utf8 variable length string
// VAR16CHAR = 25; // utf16 variable length string
// VARBINARY = 26; // variable length binary
// UINT1 = 29; // unsigned 1 byte integer
// UINT2 = 30; // unsigned 2 byte integer
// UINT4 = 31; // unsigned 4 byte integer
// UINT8 = 32; // unsigned 8 byte integer
// DECIMAL28DENSE = 33; // dense decimal representation, supporting precision between 19 and 28
// DECIMAL38DENSE = 34; // dense decimal representation, supporting precision between 28 and 38
// NULL = 37; // a value of unknown type (e.g. a missing reference).
// INTERVALYEAR = 38; // Interval type specifying YEAR to MONTH
// INTERVALDAY = 39; // Interval type specifying DAY to SECONDS
// LIST = 40;
// GENERIC_OBJECT = 41;
// UNION = 42;
// VARDECIMAL = 43; // variable width decimal (arbitrary precision)
// DICT = 44;
case VARCHAR: {
//
// FIXME: VARCHAR is defined in drill as utf8 string.
// Is Drill expecting something other than a Java string in this setObject call?
// Should we be mapping Daffodil strings to Drill VAR16CHAR type?
//
String s = ise.getString();
cw.setObject(s);
break;
}
// .put("TIME", TypeProtos.MinorType.TIME) // requires conversion

}
return value;
}

private void DFDLParseError(String s) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,11 @@
import org.apache.daffodil.japi.ParseResult;
import org.apache.daffodil.japi.infoset.InfosetOutputter;
import org.apache.daffodil.japi.io.InputSourceDataInputStream;
import org.jdom2.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.InputStream;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.channels.Channels;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;

/**
Expand Down Expand Up @@ -133,7 +127,7 @@ public void parse() {
ParseResult res = dp.parse(dis, outputter);
isProcessingError = res.isProcessingError();
isValidationError = res.isValidationError();
List<Diagnostic> diags = res.getDiagnostics();
diagnostics = res.getDiagnostics();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.apache.drill.exec.store.daffodil.schema;


import org.apache.daffodil.japi.Compiler;
import org.apache.daffodil.japi.Compiler;
import org.apache.daffodil.japi.Daffodil;
import org.apache.daffodil.japi.DataProcessor;
Expand All @@ -31,8 +30,8 @@
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.net.URISyntaxException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.channels.Channels;
import java.util.List;
import java.util.Objects;
Expand Down Expand Up @@ -105,7 +104,7 @@ public DataProcessor getDataProcessor(
}

private void loadSchema(URI schemaFileURI)
throws URISyntaxException, IOException, InvalidParserException {
throws IOException, InvalidParserException {
Compiler c = Daffodil.compiler();
dp = c.reload(Channels.newChannel(schemaFileURI.toURL().openStream()));
}
Expand All @@ -117,6 +116,9 @@ private List<Diagnostic> compileSchema(URI schemaFileURI, String rootName, Strin
ProcessorFactory pf = c.compileSource(schemaFileURI, rootName, rootNS);
List<Diagnostic> pfDiags = pf.getDiagnostics();
if (pf.isError()) {
pfDiags.forEach(diag ->
logger.error(diag.getSomeMessage())
);
throw new CompileFailure(pfDiags);
}
dp = pf.onPath("/");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,8 @@
import org.apache.drill.exec.record.metadata.MapBuilder;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.collection.Seq;

import java.util.Stack;

Expand All @@ -54,7 +52,7 @@ public class DrillDaffodilSchemaVisitor
*/
private final SchemaBuilder builder = new SchemaBuilder();

private final Stack<MapBuilder> mapBuilderStack = new Stack<MapBuilder>();
private final Stack<MapBuilder> mapBuilderStack = new Stack<>();

private MapBuilder mapBuilder() {
return mapBuilderStack.peek();
Expand Down Expand Up @@ -131,11 +129,11 @@ public void startComplexElementMetadata(ComplexElementMetadata md) {
/**
* The original root given to Drill needs to be a schema element corresponding
* to one row of data.
*
* <p>
* Drill will call daffodil parse() to parse one such element. The
* children elements of this element will become the column contents of the
* row.
*
* <p>
* So the metadata for this row, to drill, is ONLY the columns of this
* top level element type.
* @param md
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,11 @@
package org.apache.drill.exec.store.daffodil;

import org.apache.drill.categories.RowSetTest;
import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.physical.rowSet.RowSet;
import org.apache.drill.exec.physical.rowSet.RowSetReader;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.vector.accessor.ArrayReader;
import org.apache.drill.exec.vector.accessor.TupleReader;
import org.apache.drill.test.ClusterFixture;
import org.apache.drill.test.ClusterTest;
import org.apache.drill.test.QueryBuilder;
Expand All @@ -36,14 +33,7 @@
import org.junit.experimental.categories.Category;

import java.nio.file.Paths;
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalTime;

import static org.apache.drill.test.QueryTestUtil.generateCompressedFile;
import static org.apache.drill.test.rowSet.RowSetUtilities.mapArray;
import static org.apache.drill.test.rowSet.RowSetUtilities.objArray;
import static org.apache.drill.test.rowSet.RowSetUtilities.strArray;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;

Expand Down Expand Up @@ -199,9 +189,7 @@ public void testComplexArrayQuery1() throws Exception {
}

/**
* Tests data which is rows of two ints and an array containing a
* map containing two ints.
* Each row can be visualized like this: "{257, 258, [{259, 260},...]}"
* Tests data which is an array of ints in one column of the row set
* @throws Exception
*/
@Test
Expand All @@ -223,8 +211,8 @@ public void testSimpleArrayQuery1() throws Exception {

/**
* Tests data which is rows of two ints and an array containing a
* map containing two ints.
* Each row can be visualized like this: "{257, 258, [{259, 260},...]}"
* map containing an int and a vector of ints.
* Each row can be visualized like this: "{257, 258, [{259, [260, 261, 262]},...]}"
* @throws Exception
*/
@Test
Expand All @@ -243,4 +231,26 @@ public void testComplexArrayQuery2() throws Exception {
assertFalse(rdr.next());
results.clear();
}

@Test
public void testMoreTypes1() throws Exception {

QueryBuilder qb = client.queryBuilder();
QueryBuilder query = qb.sql(selectRow("moreTypes1", "moreTypes1.txt.dat"));
RowSet results = query.rowSet();
results.print();
assertEquals(2, results.rowCount());

RowSetReader rdr = results.reader();
rdr.next();
String map = rdr.getAsString();
assertEquals("{2147483647, 9223372036854775807, 32767, 127, true, " +
"1.7976931348623157E308, 3.4028235E38, [31, 32, 33, 34, 35, 36, 37, 38], \"daffodil\"}", map);
rdr.next();
map = rdr.getAsString();
assertEquals("{-2147483648, -9223372036854775808, -32768, -128, false, " +
"-1.7976931348623157E308, -3.4028235E38, [38, 37, 36, 35, 34, 33, 32, 31], \"drill\"}", map);
assertFalse(rdr.next());
results.clear();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
2147483647 9223372036854775807 32767 127 T 1.7976931348623157E308 3.4028235E38 12345678 'daffodil'
-2147483648 -9223372036854775808 -32768 -128 F -1.7976931348623157E308 -3.4028235E38 87654321 'drill'
Loading

0 comments on commit 9b01eb0

Please sign in to comment.