Skip to content
This repository was archived by the owner on Jan 22, 2019. It is now read-only.

Commit 579de90

Browse files
committed
Implement #26
1 parent 8479018 commit 579de90

File tree

8 files changed

+175
-24
lines changed

8 files changed

+175
-24
lines changed

release-notes/VERSION

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
Project: jackson-dataformat-csv
22
Version: 2.4.0 (xx-xxx-2014)
33

4+
#26: Inconsistent quoting of headers, values; add
5+
`CsvGenerator.Feature.STRICT_CHECK_FOR_QUOTING` to allow more optimal checks.
46
#32: Allow disabling of quoteChar
57
(suggested by Jason D)
68
#40: Allow (re)ordering of columns of Schema, using `CsvSchema.sortedBy(...)`

src/main/java/com/fasterxml/jackson/dataformat/csv/CsvFactory.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -418,8 +418,7 @@ protected Writer _createWriter(OutputStream out, JsonEncoding enc, IOContext ctx
418418

419419
protected CsvGenerator _createGenerator(IOContext ctxt, Writer out) throws IOException
420420
{
421-
int feats = _csvGeneratorFeatures;
422-
CsvGenerator gen = new CsvGenerator(ctxt, _generatorFeatures, feats,
421+
CsvGenerator gen = new CsvGenerator(ctxt, _generatorFeatures, _csvGeneratorFeatures,
423422
_objectCodec, out,
424423
_cfgColumnSeparator, _cfgQuoteCharacter, _cfgLineSeparator);
425424
// any other initializations? No?
@@ -434,7 +433,7 @@ protected Reader _createReader(InputStream in, JsonEncoding enc, IOContext ctxt)
434433
if (enc == null || enc == JsonEncoding.UTF8) {
435434
// 28-May-2012, tatu: Custom UTF8 reader should be faster, esp for small input:
436435
// return new InputStreamReader(in, UTF8);
437-
boolean autoClose = ctxt.isResourceManaged() || this.isEnabled(JsonParser.Feature.AUTO_CLOSE_SOURCE);
436+
boolean autoClose = ctxt.isResourceManaged() || isEnabled(JsonParser.Feature.AUTO_CLOSE_SOURCE);
438437
return new UTF8Reader(ctxt, in, autoClose);
439438
}
440439
return new InputStreamReader(in, enc.getJavaName());

src/main/java/com/fasterxml/jackson/dataformat/csv/CsvGenerator.java

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,26 @@ public class CsvGenerator extends GeneratorBase
1919
*/
2020
public enum Feature {
2121
/**
22-
* Placeholder until we have actual features
22+
* Feature that determines how much work is done before determining that
23+
* a column value requires quoting: when set as <code>true</code>, full
24+
* check is made to only use quoting when it is strictly necessary;
25+
* but when <code>false</code>, a faster but more conservative check
26+
* is made, and possibly quoting is used for values that might not need it.
27+
* Trade-offs is basically between optimal/minimal quoting (true), and
28+
* faster handling (false).
29+
* Faster check involves only checking first N characters of value, as well
30+
* as possible looser checks.
31+
*<p>
32+
* Note, however, that regardless setting, all values that need to be quoted
33+
* will be: it is just that when set to <code>false</code>, other values may
34+
* also be quoted (to avoid having to do more expensive checks).
35+
*<p>
36+
* Default value is <code>false</code> for "loose" (approximate, conservative)
37+
* checking.
38+
*
39+
* @since 2.4
2340
*/
24-
BOGUS(false) // placeholder
41+
STRICT_CHECK_FOR_QUOTING(false),
2542
;
2643

2744
protected final boolean _defaultState;
@@ -41,15 +58,16 @@ public static int collectDefaults()
4158
}
4259
return flags;
4360
}
44-
61+
4562
private Feature(boolean defaultState) {
4663
_defaultState = defaultState;
4764
_mask = (1 << ordinal());
4865
}
4966

67+
public boolean enabledIn(int flags) { return (flags & getMask()) != 0; }
5068
public boolean enabledByDefault() { return _defaultState; }
5169
public int getMask() { return _mask; }
52-
};
70+
}
5371

5472
protected final static long MIN_INT_AS_LONG = Integer.MIN_VALUE;
5573
protected final static long MAX_INT_AS_LONG = Integer.MAX_VALUE;
@@ -112,7 +130,7 @@ public CsvGenerator(IOContext ctxt, int jsonFeatures, int csvFeatures,
112130
char columnSeparator, char quoteChar, char[] linefeed)
113131
{
114132
this(ctxt, jsonFeatures, csvFeatures, codec,
115-
new CsvWriter(ctxt, out, columnSeparator, quoteChar, linefeed));
133+
new CsvWriter(ctxt, csvFeatures, out, columnSeparator, quoteChar, linefeed));
116134
}
117135

118136
public CsvGenerator(IOContext ctxt, int jsonFeatures, int csvFeatures,
@@ -259,6 +277,7 @@ private final void _writeFieldName(String name)
259277

260278
public CsvGenerator enable(Feature f) {
261279
_csvFeatures |= f.getMask();
280+
_writer.setFeatures(_csvFeatures);
262281
return this;
263282
}
264283

@@ -273,11 +292,9 @@ public final boolean isEnabled(Feature f) {
273292

274293
public CsvGenerator configure(Feature f, boolean state) {
275294
if (state) {
276-
enable(f);
277-
} else {
278-
disable(f);
295+
return enable(f);
279296
}
280-
return this;
297+
return disable(f);
281298
}
282299

283300
/*

src/main/java/com/fasterxml/jackson/dataformat/csv/CsvMapper.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,7 @@ public final CsvFactory getFactory() {
106106
* Definition will not be strictly typed (that is, all columns are
107107
* just defined to be exposed as String tokens).
108108
*/
109-
public CsvSchema schemaFor(JavaType pojoType)
110-
{
109+
public CsvSchema schemaFor(JavaType pojoType) {
111110
return _schemaFor(pojoType, _untypedSchemas, false);
112111
}
113112

@@ -126,8 +125,7 @@ public final CsvSchema schemaFor(TypeReference<?> pojoTypeRef) {
126125
* determine type limitations which may make parsing more efficient
127126
* (especially for numeric types like java.lang.Integer).
128127
*/
129-
public CsvSchema typedSchemaFor(JavaType pojoType)
130-
{
128+
public CsvSchema typedSchemaFor(JavaType pojoType) {
131129
return _schemaFor(pojoType, _typedSchemas, true);
132130
}
133131

src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvWriter.java

Lines changed: 68 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import com.fasterxml.jackson.core.JsonGenerationException;
44
import com.fasterxml.jackson.core.io.IOContext;
5+
import com.fasterxml.jackson.dataformat.csv.CsvGenerator;
56
import com.fasterxml.jackson.dataformat.csv.CsvSchema;
67

78
import java.io.IOException;
@@ -22,7 +23,7 @@ public class CsvWriter
2223
/* Also: only do check for optional quotes for short
2324
* values; longer ones will always be quoted.
2425
*/
25-
final protected static int MAX_QUOTE_CHECK = 20;
26+
final protected static int MAX_QUOTE_CHECK = 24;
2627

2728
final protected BufferedValue[] NO_BUFFERED = new BufferedValue[0];
2829

@@ -36,7 +37,7 @@ public class CsvWriter
3637
*/
3738

3839
final protected IOContext _ioContext;
39-
40+
4041
/**
4142
* Underlying {@link Writer} used for output.
4243
*/
@@ -57,6 +58,14 @@ public class CsvWriter
5758
* quotes around value
5859
*/
5960
final protected int _cfgMinSafeChar;
61+
62+
protected int _csvFeatures;
63+
64+
/**
65+
* Marker flag used to determine if to do optimal (aka "strict") quoting
66+
* checks or not (looser conservative check)
67+
*/
68+
protected boolean _cfgOptimalQuoting;
6069

6170
/*
6271
/**********************************************************
@@ -123,10 +132,22 @@ public class CsvWriter
123132
/**********************************************************
124133
*/
125134

135+
@Deprecated // since 2.4, remove in 2.5
126136
public CsvWriter(IOContext ctxt, Writer out,
127137
char columnSeparator, char quoteChar, char[] linefeed)
138+
{
139+
this(ctxt, CsvGenerator.Feature.collectDefaults(),
140+
out, columnSeparator, quoteChar, linefeed);
141+
142+
}
143+
144+
public CsvWriter(IOContext ctxt, int csvFeatures, Writer out,
145+
char columnSeparator, char quoteChar, char[] linefeed)
128146
{
129147
_ioContext = ctxt;
148+
_csvFeatures = csvFeatures;
149+
_cfgOptimalQuoting = CsvGenerator.Feature.STRICT_CHECK_FOR_QUOTING.enabledIn(csvFeatures);
150+
130151
_outputBuffer = ctxt.allocConcatBuffer();
131152
_bufferRecyclable = true;
132153
_outputEnd = _outputBuffer.length;
@@ -145,6 +166,9 @@ public CsvWriter(IOContext ctxt, Writer out,
145166
public CsvWriter(CsvWriter base, CsvSchema newSchema)
146167
{
147168
_ioContext = base._ioContext;
169+
_csvFeatures = base._csvFeatures;
170+
_cfgOptimalQuoting = base._cfgOptimalQuoting;
171+
148172
_outputBuffer = base._outputBuffer;
149173
_bufferRecyclable = base._bufferRecyclable;
150174
_outputEnd = base._outputEnd;
@@ -172,6 +196,14 @@ public CsvWriter withSchema(CsvSchema schema) {
172196
return new CsvWriter(this, schema);
173197
}
174198

199+
public CsvWriter setFeatures(int feat) {
200+
if (feat != _csvFeatures) {
201+
_csvFeatures = feat;
202+
_cfgOptimalQuoting = CsvGenerator.Feature.STRICT_CHECK_FOR_QUOTING.enabledIn(feat);
203+
}
204+
return this;
205+
}
206+
175207
/*
176208
/**********************************************************
177209
/* Read-access to output state
@@ -583,17 +615,49 @@ protected boolean _mayNeedQuotes(String value, int length)
583615
if (_cfgQuoteCharacter < 0) {
584616
return false;
585617
}
586-
// let's not bother checking long Strings, just quote already:
618+
// may skip checks unless we want exact checking
619+
if (_cfgOptimalQuoting) {
620+
return _needsQuotingStrict(value);
621+
}
587622
if (length > _cfgMaxQuoteCheckChars) {
588623
return true;
589624
}
590-
for (int i = 0; i < length; ++i) {
625+
return _needsQuotingLoose(value);
626+
}
627+
628+
/**
629+
*<p>
630+
* NOTE: final since checking is not expected to be changed here; override
631+
* calling method (<code>_mayNeedQuotes</code>) instead, if necessary.
632+
*
633+
* @since 2.4
634+
*/
635+
protected final boolean _needsQuotingLoose(String value)
636+
{
637+
for (int i = 0, len = value.length(); i < len; ++i) {
591638
if (value.charAt(i) < _cfgMinSafeChar) {
592639
return true;
593640
}
594641
}
595642
return false;
596643
}
644+
645+
/**
646+
* @since 2.4
647+
*/
648+
protected boolean _needsQuotingStrict(String value)
649+
{
650+
for (int i = 0, len = value.length(); i < len; ++i) {
651+
char c = value.charAt(i);
652+
if (c < _cfgMinSafeChar) {
653+
if (c == _cfgColumnSeparator || c == _cfgQuoteCharacter
654+
|| c == '\r' || c == '\n') {
655+
return true;
656+
}
657+
}
658+
}
659+
return false;
660+
}
597661

598662
protected void _buffer(int index, BufferedValue v)
599663
{

src/test/java/com/fasterxml/jackson/dataformat/csv/TestParser.java

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,21 @@ public void testSimpleExplicit() throws Exception
3232
.addColumn("userImage")
3333
.addColumn("verified")
3434
.build();
35+
ObjectReader r = mapper.reader(schema);
36+
_testSimpleExplicit(r, false);
37+
_testSimpleExplicit(r, true);
38+
}
3539

36-
FiveMinuteUser user = mapper.reader(schema).withType(FiveMinuteUser.class).readValue("Bob,Robertson,MALE,AQIDBAU=,false\n");
40+
private void _testSimpleExplicit(ObjectReader r, boolean useBytes) throws Exception
41+
{
42+
r = r.withType(FiveMinuteUser.class);
43+
FiveMinuteUser user;
44+
final String INPUT = "Bob,Robertson,MALE,AQIDBAU=,false\n";
45+
if (useBytes) {
46+
user = r.readValue(INPUT);
47+
} else {
48+
user = r.readValue(INPUT.getBytes("UTF-8"));
49+
}
3750
assertEquals("Bob", user.firstName);
3851
assertEquals("Robertson", user.lastName);
3952
assertEquals(FiveMinuteUser.Gender.MALE, user.getGender());
@@ -79,7 +92,12 @@ public void testSimpleAsMaps() throws Exception
7992
}
8093

8194
// Test for [Issue#10]
82-
public void testMapsWithLinefeeds() throws Exception
95+
public void testMapsWithLinefeeds() throws Exception {
96+
_testMapsWithLinefeeds(false);
97+
_testMapsWithLinefeeds(true);
98+
}
99+
100+
private void _testMapsWithLinefeeds(boolean useBytes) throws Exception
83101
{
84102
CsvMapper mapper = mapperForCsv();
85103
String CSV = "A,B,C\n"
@@ -91,7 +109,13 @@ public void testMapsWithLinefeeds() throws Exception
91109
CsvSchema cs = CsvSchema.emptySchema().withHeader();
92110
ObjectReader or = mapper.reader(HashMap.class).with(cs);
93111

94-
MappingIterator<Map<String,String>> mi = or.readValues(CSV);
112+
MappingIterator<Map<String,String>> mi;
113+
114+
if (useBytes) {
115+
mi = or.readValues(CSV.getBytes("UTF-8"));
116+
} else {
117+
mi = or.readValues(CSV);
118+
}
95119

96120
assertTrue(mi.hasNext());
97121
Map<String,String> map = mi.nextValue();

src/test/java/com/fasterxml/jackson/dataformat/csv/TestParserQuotes.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
public class TestParserQuotes extends ModuleTestBase
88
{
9-
@JsonPropertyOrder({"age, name"})
9+
@JsonPropertyOrder({"age", "name"})
1010
protected static class AgeName {
1111
public int age;
1212
public String name;
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
package com.fasterxml.jackson.dataformat.csv;
2+
3+
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
4+
5+
// Tests for [Issue#26]
6+
public class TestParserStrictQuoting extends ModuleTestBase
7+
{
8+
@JsonPropertyOrder({"a", "b"})
9+
protected static class AB {
10+
public String a, b;
11+
12+
public AB() { }
13+
public AB(String a, String b) {
14+
this.a = a;
15+
this.b = b;
16+
}
17+
}
18+
19+
/*
20+
/**********************************************************************
21+
/* Test methods
22+
/**********************************************************************
23+
*/
24+
25+
public void testStrictQuoting() throws Exception
26+
{
27+
final String NUMS = "12345 6789";
28+
final String LONG = NUMS + NUMS + NUMS + NUMS; // 40 chars should do it
29+
30+
CsvMapper mapper = mapperForCsv();
31+
32+
assertFalse(mapper.getFactory().isEnabled(CsvGenerator.Feature.STRICT_CHECK_FOR_QUOTING));
33+
CsvSchema schema = mapper.schemaFor(AB.class).withoutHeader();
34+
35+
final AB input = new AB("x", LONG);
36+
37+
// with non-strict, should quote
38+
String csv = mapper.writer(schema).writeValueAsString(input);
39+
assertEquals(aposToQuotes("x,'"+LONG+"'"), csv.trim());
40+
41+
// should be possible to hot-swap
42+
// and with strict/optimal, no quoting
43+
mapper.configure(CsvGenerator.Feature.STRICT_CHECK_FOR_QUOTING, true);
44+
csv = mapper.writer(schema).writeValueAsString(input);
45+
assertEquals(aposToQuotes("x,"+LONG), csv.trim());
46+
}
47+
}

0 commit comments

Comments
 (0)