Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support escaping delimiter with a definable character #25

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions Src/KBCsv.UnitTests/CsvReaderFixture.cs
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,53 @@ public void value_delimiter_cannot_be_set_if_disposed()
Assert.Throws<ObjectDisposedException>(() => reader.ValueDelimiter = '|');
}

[Fact]
public void escape_character_defaults_to_null()
{
using (var reader = CsvReader.FromCsvString(string.Empty))
{
Assert.Null(reader.EscapeCharacter);
}
}

[Fact]
public void escape_character_cannot_be_gotten_if_disposed()
{
var ignore = ' ';
var reader = CsvReader.FromCsvString(string.Empty);
reader.Dispose();
Assert.Throws<ObjectDisposedException>(() => ignore = reader.EscapeCharacter.Value);
}

[Fact]
public void escape_character_can_be_set()
{
using (var reader = CsvReader.FromCsvString(string.Empty))
{
reader.EscapeCharacter = '\\';
Assert.Equal('\\', reader.EscapeCharacter);
}
}

[Fact]
public void escape_character_can_be_set_to_null()
{
using (var reader = CsvReader.FromCsvString(string.Empty))
{
reader.EscapeCharacter = null;
Assert.Null(reader.EscapeCharacter);
}
}

[Fact]
public void escape_character_cannot_be_set_if_disposed()
{
var reader = CsvReader.FromCsvString(string.Empty);
reader.Dispose();
Assert.Throws<ObjectDisposedException>(() => reader.EscapeCharacter = '\\');
}


[Fact]
public void header_record_defaults_to_null()
{
Expand Down Expand Up @@ -1315,6 +1362,44 @@ public async Task read_data_records_async_returns_read_only_records()
}
}

[Fact]
public async Task read_data_records_async_with_escape_character()
{
var csv = @"first,""\""second\"""",third";

using (var reader = CsvReader.FromCsvString(csv))
{
reader.EscapeCharacter = '\\';

var buffer = new DataRecord[1];
Assert.Equal(1, await reader.ReadDataRecordsAsync(buffer, 0, buffer.Length));

var dataRecord = buffer[0];
Assert.Equal(3, dataRecord.Count);
Assert.Equal("first", dataRecord[0]);
Assert.Equal("\"second\"", dataRecord[1]);
Assert.Equal("third", dataRecord[2]);
}
}

[Fact]
public async Task read_data_records_async_with_no_escape_character()
{
var csv = @"first,\""second\"",third";

using (var reader = CsvReader.FromCsvString(csv))
{
var buffer = new DataRecord[1];
Assert.Equal(1, await reader.ReadDataRecordsAsync(buffer, 0, buffer.Length));

var dataRecord = buffer[0];
Assert.Equal(3, dataRecord.Count);
Assert.Equal("first", dataRecord[0]);
Assert.Equal("\\second\\", dataRecord[1]);
Assert.Equal("third", dataRecord[2]);
}
}

[Fact]
public void dispose_disposes_of_underlying_text_reader()
{
Expand Down
24 changes: 24 additions & 0 deletions Src/KBCsv/CsvReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,30 @@ public char? ValueDelimiter
}
}

/// <summary>
/// Gets or sets the character used to escape values (normally to escape the ValueDelimiter)
/// </summary>
/// <remarks>
/// <para>
/// This property specifies what character is used to escape values within the CSV. This is normally only required when the CSV is not following RFC 4180,
/// e.g. the delimiter is escaped as \" rather than ""
/// </para>
/// </remarks>
public char? EscapeCharacter
{
get
{
this.EnsureNotDisposed();
return this.parser.EscapeCharacter;
}

set
{
this.EnsureNotDisposed();
this.parser.EscapeCharacter = value;
}
}

/// <summary>
/// Gets or sets the header record.
/// </summary>
Expand Down
54 changes: 46 additions & 8 deletions Src/KBCsv/Internal/CsvParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ internal sealed partial class CsvParser
private bool preserveTrailingWhiteSpace;
private char valueSeparator;
private char? valueDelimiter;
private char? escapeCharacter;

public CsvParser(TextReader reader)
{
Expand All @@ -33,6 +34,7 @@ public CsvParser(TextReader reader)
this.valueBuilder = new ValueBuilder(this);
this.valueSeparator = Constants.DefaultValueSeparator;
this.valueDelimiter = Constants.DefaultValueDelimiter;
this.escapeCharacter = null;

this.UpdateSpecialCharacterMask();
}
Expand Down Expand Up @@ -78,6 +80,18 @@ public char? ValueDelimiter
}
}

public char? EscapeCharacter
{
get { return this.escapeCharacter; }
set
{
exceptionHelper.ResolveAndThrowIf(value == this.valueSeparator, "valueSeparatorAndEscapeCannotMatch");

this.escapeCharacter = value;
this.UpdateSpecialCharacterMask();
}
}

public bool HasMoreRecords
{
get
Expand Down Expand Up @@ -106,6 +120,7 @@ public int SkipRecords(int skip)

var skipped = 0;
var delimited = false;
var escaped = false;

while (skipped < skip)
{
Expand All @@ -117,13 +132,24 @@ public int SkipRecords(int skip)
{
var ch = this.buffer[this.bufferIndex++];

if (!this.IsPossiblySpecialCharacter(ch))
if (escaped)
{
// we had an escape character previous, just insert this as whatever character it is
this.valueBuilder.NotifyPreviousCharIncluded(true);
escaped = false;
}
else if (ch == this.escapeCharacter)
{
// User specified escape character, do not insert into value, next read character is not special
this.valueBuilder.NotifyPreviousCharExcluded();
escaped = true;
}
else if (!this.IsPossiblySpecialCharacter(ch))
{
// if it's definitely not a special character, then we can just continue on with the loop
continue;
}

if (!delimited)
else if (!delimited)
{
if (ch == this.valueDelimiter)
{
Expand Down Expand Up @@ -200,6 +226,7 @@ public int ParseRecords(HeaderRecord headerRecord, DataRecord[] buffer, int offs
var ch = char.MinValue;
var recordsParsed = 0;
var delimited = false;
var escaped = false;

for (var i = offset; i < offset + count; ++i)
{
Expand All @@ -209,14 +236,25 @@ public int ParseRecords(HeaderRecord headerRecord, DataRecord[] buffer, int offs
{
ch = this.buffer[this.bufferIndex++];

if (!this.IsPossiblySpecialCharacter(ch))
if (escaped)
{
// we had an escape character previous, just insert this as whatever character it is
this.valueBuilder.NotifyPreviousCharIncluded(true);
escaped = false;
}
else if (ch == this.escapeCharacter)
{
// User specified escape character, do not insert into value, next read character is not special
this.valueBuilder.NotifyPreviousCharExcluded();
escaped = true;
}
else if (!this.IsPossiblySpecialCharacter(ch))
{
// if it's definitely not a special character, then we can just append it and continue on with the loop
this.valueBuilder.NotifyPreviousCharIncluded(delimited);
continue;
}

if (!delimited)
}
else if (!delimited)
{
if (ch == this.valueSeparator)
{
Expand All @@ -227,7 +265,7 @@ public int ParseRecords(HeaderRecord headerRecord, DataRecord[] buffer, int offs
this.valueBuilder.NotifyPreviousCharExcluded();
delimited = true;

// since we're in a delimited area, the only special character is the value delimiter
// since we're in a delimited area, the only special character is the value delimiter, or escape character
this.activeSpecialCharacterMask = this.valueDelimiter.Value;
}
else if (ch == Constants.CR)
Expand Down
36 changes: 30 additions & 6 deletions Src/KBCsv/Internal/CsvParser_Async.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ public async Task<int> SkipRecordsAsync(int skip)

var skipped = 0;
var delimited = false;
var escaped = false;

while (skipped < skip)
{
Expand All @@ -26,13 +27,24 @@ public async Task<int> SkipRecordsAsync(int skip)
{
var ch = this.buffer[this.bufferIndex++];

if (!this.IsPossiblySpecialCharacter(ch))
if (escaped)
{
// we had an escape character previous, just insert this as whatever character it is
this.valueBuilder.NotifyPreviousCharIncluded(true);
escaped = false;
}
else if (ch == this.escapeCharacter)
{
// User specified escape character, do not insert into value, next read character is not special
this.valueBuilder.NotifyPreviousCharExcluded();
escaped = true;
}
else if (!this.IsPossiblySpecialCharacter(ch))
{
// if it's definitely not a special character, then we can just continue on with the loop
continue;
}

if (!delimited)
else if (!delimited)
{
if (ch == this.valueDelimiter)
{
Expand Down Expand Up @@ -109,6 +121,7 @@ public async Task<int> ParseRecordsAsync(HeaderRecord headerRecord, DataRecord[]
var ch = char.MinValue;
var recordsParsed = 0;
var delimited = false;
var escaped = false;

for (var i = offset; i < offset + count; ++i)
{
Expand All @@ -118,14 +131,25 @@ public async Task<int> ParseRecordsAsync(HeaderRecord headerRecord, DataRecord[]
{
ch = this.buffer[this.bufferIndex++];

if (!this.IsPossiblySpecialCharacter(ch))
if (escaped)
{
// we had an escape character previous, just insert this as whatever character it is
this.valueBuilder.NotifyPreviousCharIncluded(true);
escaped = false;
}
else if (ch == this.escapeCharacter)
{
// User specified escape character, do not insert into value, next read character is not special
this.valueBuilder.NotifyPreviousCharExcluded();
escaped = true;
}
else if (!this.IsPossiblySpecialCharacter(ch))
{
// if it's definitely not a special character, then we can just append it and continue on with the loop
this.valueBuilder.NotifyPreviousCharIncluded(delimited);
continue;
}

if (!delimited)
else if (!delimited)
{
if (ch == this.valueSeparator)
{
Expand Down