diff --git a/release-notes/VERSION b/release-notes/VERSION
index 24e1c91..ad32a02 100644
--- a/release-notes/VERSION
+++ b/release-notes/VERSION
@@ -1,6 +1,8 @@
Project: jackson-dataformat-csv
Version: 2.4.0 (xx-xxx-2014)
+#26: Inconsistent quoting of headers, values; add
+ `CsvGenerator.Feature.STRICT_CHECK_FOR_QUOTING` to allow more optimal checks.
#32: Allow disabling of quoteChar
(suggested by Jason D)
#40: Allow (re)ordering of columns of Schema, using `CsvSchema.sortedBy(...)`
diff --git a/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvFactory.java b/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvFactory.java
index 5859bec..22beb3e 100644
--- a/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvFactory.java
+++ b/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvFactory.java
@@ -418,8 +418,7 @@ protected Writer _createWriter(OutputStream out, JsonEncoding enc, IOContext ctx
protected CsvGenerator _createGenerator(IOContext ctxt, Writer out) throws IOException
{
- int feats = _csvGeneratorFeatures;
- CsvGenerator gen = new CsvGenerator(ctxt, _generatorFeatures, feats,
+ CsvGenerator gen = new CsvGenerator(ctxt, _generatorFeatures, _csvGeneratorFeatures,
_objectCodec, out,
_cfgColumnSeparator, _cfgQuoteCharacter, _cfgLineSeparator);
// any other initializations? No?
@@ -434,7 +433,7 @@ protected Reader _createReader(InputStream in, JsonEncoding enc, IOContext ctxt)
if (enc == null || enc == JsonEncoding.UTF8) {
// 28-May-2012, tatu: Custom UTF8 reader should be faster, esp for small input:
// return new InputStreamReader(in, UTF8);
- boolean autoClose = ctxt.isResourceManaged() || this.isEnabled(JsonParser.Feature.AUTO_CLOSE_SOURCE);
+ boolean autoClose = ctxt.isResourceManaged() || isEnabled(JsonParser.Feature.AUTO_CLOSE_SOURCE);
return new UTF8Reader(ctxt, in, autoClose);
}
return new InputStreamReader(in, enc.getJavaName());
diff --git a/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvGenerator.java b/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvGenerator.java
index 28de61e..0be4c1c 100644
--- a/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvGenerator.java
+++ b/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvGenerator.java
@@ -19,9 +19,26 @@ public class CsvGenerator extends GeneratorBase
*/
public enum Feature {
/**
- * Placeholder until we have actual features
+ * Feature that determines how much work is done before determining that
+ * a column value requires quoting: when set as true
, full
+ * check is made to only use quoting when it is strictly necessary;
+ * but when false
, a faster but more conservative check
+ * is made, and possibly quoting is used for values that might not need it.
+ * Trade-offs is basically between optimal/minimal quoting (true), and
+ * faster handling (false).
+ * Faster check involves only checking first N characters of value, as well
+ * as possible looser checks.
+ *
+ * Note, however, that regardless setting, all values that need to be quoted
+ * will be: it is just that when set to false
, other values may
+ * also be quoted (to avoid having to do more expensive checks).
+ *
+ * Default value is false
for "loose" (approximate, conservative)
+ * checking.
+ *
+ * @since 2.4
*/
- BOGUS(false) // placeholder
+ STRICT_CHECK_FOR_QUOTING(false),
;
protected final boolean _defaultState;
@@ -41,15 +58,16 @@ public static int collectDefaults()
}
return flags;
}
-
+
private Feature(boolean defaultState) {
_defaultState = defaultState;
_mask = (1 << ordinal());
}
+ public boolean enabledIn(int flags) { return (flags & getMask()) != 0; }
public boolean enabledByDefault() { return _defaultState; }
public int getMask() { return _mask; }
- };
+ }
protected final static long MIN_INT_AS_LONG = Integer.MIN_VALUE;
protected final static long MAX_INT_AS_LONG = Integer.MAX_VALUE;
@@ -112,7 +130,7 @@ public CsvGenerator(IOContext ctxt, int jsonFeatures, int csvFeatures,
char columnSeparator, char quoteChar, char[] linefeed)
{
this(ctxt, jsonFeatures, csvFeatures, codec,
- new CsvWriter(ctxt, out, columnSeparator, quoteChar, linefeed));
+ new CsvWriter(ctxt, csvFeatures, out, columnSeparator, quoteChar, linefeed));
}
public CsvGenerator(IOContext ctxt, int jsonFeatures, int csvFeatures,
@@ -259,6 +277,7 @@ private final void _writeFieldName(String name)
public CsvGenerator enable(Feature f) {
_csvFeatures |= f.getMask();
+ _writer.setFeatures(_csvFeatures);
return this;
}
@@ -273,11 +292,9 @@ public final boolean isEnabled(Feature f) {
public CsvGenerator configure(Feature f, boolean state) {
if (state) {
- enable(f);
- } else {
- disable(f);
+ return enable(f);
}
- return this;
+ return disable(f);
}
/*
diff --git a/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvMapper.java b/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvMapper.java
index 7139714..ab119a2 100644
--- a/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvMapper.java
+++ b/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvMapper.java
@@ -106,8 +106,7 @@ public final CsvFactory getFactory() {
* Definition will not be strictly typed (that is, all columns are
* just defined to be exposed as String tokens).
*/
- public CsvSchema schemaFor(JavaType pojoType)
- {
+ public CsvSchema schemaFor(JavaType pojoType) {
return _schemaFor(pojoType, _untypedSchemas, false);
}
@@ -126,8 +125,7 @@ public final CsvSchema schemaFor(TypeReference> pojoTypeRef) {
* determine type limitations which may make parsing more efficient
* (especially for numeric types like java.lang.Integer).
*/
- public CsvSchema typedSchemaFor(JavaType pojoType)
- {
+ public CsvSchema typedSchemaFor(JavaType pojoType) {
return _schemaFor(pojoType, _typedSchemas, true);
}
diff --git a/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvWriter.java b/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvWriter.java
index a73fc04..e6f0ee0 100644
--- a/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvWriter.java
+++ b/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvWriter.java
@@ -2,6 +2,7 @@
import com.fasterxml.jackson.core.JsonGenerationException;
import com.fasterxml.jackson.core.io.IOContext;
+import com.fasterxml.jackson.dataformat.csv.CsvGenerator;
import com.fasterxml.jackson.dataformat.csv.CsvSchema;
import java.io.IOException;
@@ -22,7 +23,7 @@ public class CsvWriter
/* Also: only do check for optional quotes for short
* values; longer ones will always be quoted.
*/
- final protected static int MAX_QUOTE_CHECK = 20;
+ final protected static int MAX_QUOTE_CHECK = 24;
final protected BufferedValue[] NO_BUFFERED = new BufferedValue[0];
@@ -36,7 +37,7 @@ public class CsvWriter
*/
final protected IOContext _ioContext;
-
+
/**
* Underlying {@link Writer} used for output.
*/
@@ -57,6 +58,14 @@ public class CsvWriter
* quotes around value
*/
final protected int _cfgMinSafeChar;
+
+ protected int _csvFeatures;
+
+ /**
+ * Marker flag used to determine if to do optimal (aka "strict") quoting
+ * checks or not (looser conservative check)
+ */
+ protected boolean _cfgOptimalQuoting;
/*
/**********************************************************
@@ -123,10 +132,22 @@ public class CsvWriter
/**********************************************************
*/
+ @Deprecated // since 2.4, remove in 2.5
public CsvWriter(IOContext ctxt, Writer out,
char columnSeparator, char quoteChar, char[] linefeed)
+ {
+ this(ctxt, CsvGenerator.Feature.collectDefaults(),
+ out, columnSeparator, quoteChar, linefeed);
+
+ }
+
+ public CsvWriter(IOContext ctxt, int csvFeatures, Writer out,
+ char columnSeparator, char quoteChar, char[] linefeed)
{
_ioContext = ctxt;
+ _csvFeatures = csvFeatures;
+ _cfgOptimalQuoting = CsvGenerator.Feature.STRICT_CHECK_FOR_QUOTING.enabledIn(csvFeatures);
+
_outputBuffer = ctxt.allocConcatBuffer();
_bufferRecyclable = true;
_outputEnd = _outputBuffer.length;
@@ -145,6 +166,9 @@ public CsvWriter(IOContext ctxt, Writer out,
public CsvWriter(CsvWriter base, CsvSchema newSchema)
{
_ioContext = base._ioContext;
+ _csvFeatures = base._csvFeatures;
+ _cfgOptimalQuoting = base._cfgOptimalQuoting;
+
_outputBuffer = base._outputBuffer;
_bufferRecyclable = base._bufferRecyclable;
_outputEnd = base._outputEnd;
@@ -172,6 +196,14 @@ public CsvWriter withSchema(CsvSchema schema) {
return new CsvWriter(this, schema);
}
+ public CsvWriter setFeatures(int feat) {
+ if (feat != _csvFeatures) {
+ _csvFeatures = feat;
+ _cfgOptimalQuoting = CsvGenerator.Feature.STRICT_CHECK_FOR_QUOTING.enabledIn(feat);
+ }
+ return this;
+ }
+
/*
/**********************************************************
/* Read-access to output state
@@ -583,17 +615,49 @@ protected boolean _mayNeedQuotes(String value, int length)
if (_cfgQuoteCharacter < 0) {
return false;
}
- // let's not bother checking long Strings, just quote already:
+ // may skip checks unless we want exact checking
+ if (_cfgOptimalQuoting) {
+ return _needsQuotingStrict(value);
+ }
if (length > _cfgMaxQuoteCheckChars) {
return true;
}
- for (int i = 0; i < length; ++i) {
+ return _needsQuotingLoose(value);
+ }
+
+ /**
+ *
+ * NOTE: final since checking is not expected to be changed here; override
+ * calling method (_mayNeedQuotes
) instead, if necessary.
+ *
+ * @since 2.4
+ */
+ protected final boolean _needsQuotingLoose(String value)
+ {
+ for (int i = 0, len = value.length(); i < len; ++i) {
if (value.charAt(i) < _cfgMinSafeChar) {
return true;
}
}
return false;
}
+
+ /**
+ * @since 2.4
+ */
+ protected boolean _needsQuotingStrict(String value)
+ {
+ for (int i = 0, len = value.length(); i < len; ++i) {
+ char c = value.charAt(i);
+ if (c < _cfgMinSafeChar) {
+ if (c == _cfgColumnSeparator || c == _cfgQuoteCharacter
+ || c == '\r' || c == '\n') {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
protected void _buffer(int index, BufferedValue v)
{
diff --git a/src/test/java/com/fasterxml/jackson/dataformat/csv/TestParser.java b/src/test/java/com/fasterxml/jackson/dataformat/csv/TestParser.java
index 74066f9..e96aa3a 100644
--- a/src/test/java/com/fasterxml/jackson/dataformat/csv/TestParser.java
+++ b/src/test/java/com/fasterxml/jackson/dataformat/csv/TestParser.java
@@ -32,8 +32,21 @@ public void testSimpleExplicit() throws Exception
.addColumn("userImage")
.addColumn("verified")
.build();
+ ObjectReader r = mapper.reader(schema);
+ _testSimpleExplicit(r, false);
+ _testSimpleExplicit(r, true);
+ }
- FiveMinuteUser user = mapper.reader(schema).withType(FiveMinuteUser.class).readValue("Bob,Robertson,MALE,AQIDBAU=,false\n");
+ private void _testSimpleExplicit(ObjectReader r, boolean useBytes) throws Exception
+ {
+ r = r.withType(FiveMinuteUser.class);
+ FiveMinuteUser user;
+ final String INPUT = "Bob,Robertson,MALE,AQIDBAU=,false\n";
+ if (useBytes) {
+ user = r.readValue(INPUT);
+ } else {
+ user = r.readValue(INPUT.getBytes("UTF-8"));
+ }
assertEquals("Bob", user.firstName);
assertEquals("Robertson", user.lastName);
assertEquals(FiveMinuteUser.Gender.MALE, user.getGender());
@@ -79,7 +92,12 @@ public void testSimpleAsMaps() throws Exception
}
// Test for [Issue#10]
- public void testMapsWithLinefeeds() throws Exception
+ public void testMapsWithLinefeeds() throws Exception {
+ _testMapsWithLinefeeds(false);
+ _testMapsWithLinefeeds(true);
+ }
+
+ private void _testMapsWithLinefeeds(boolean useBytes) throws Exception
{
CsvMapper mapper = mapperForCsv();
String CSV = "A,B,C\n"
@@ -91,7 +109,13 @@ public void testMapsWithLinefeeds() throws Exception
CsvSchema cs = CsvSchema.emptySchema().withHeader();
ObjectReader or = mapper.reader(HashMap.class).with(cs);
- MappingIterator