Skip to content

Commit

Permalink
refactor: split CsvReader in multiple files (#15)
Browse files Browse the repository at this point in the history
* refactor: EncodingDetector and StreamExtensions
* refactor: move logic specific to DataTable to CsvDataTable but also parsing rules to RecordParser and FieldParser

---------

Co-authored-by: codefactor-io <[email protected]>
  • Loading branch information
Seddryck and code-factor authored Nov 8, 2024
1 parent 889ea31 commit 41addbf
Show file tree
Hide file tree
Showing 16 changed files with 1,430 additions and 1,346 deletions.
21 changes: 11 additions & 10 deletions PocketCsvReader.Benchmark/CustomerRecord.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@
using System.Threading.Tasks;

namespace PocketCsvReader.Benchmark;
internal class CustomerRecord
{
public string Firstname { get; set; }
public string Lastname { get; set; }
public string Gender { get; set; }
public DateTime DateOfBirth { get; set; }
public int Year { get; set; }
public string Month { get; set; }
public decimal TotalOrder { get; set; }
}
internal record CustomerRecord
(
string Firstname,
string Lastname,
string Gender,
DateTime DateOfBirth,
int Year,
string Month,
decimal TotalOrder
)
{ }
259 changes: 259 additions & 0 deletions PocketCsvReader.Testing/CsvDataTableTest.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
using PocketCsvReader;
using NUnit.Framework;
using System;
using System.Collections.Generic;
using System.Data;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Diagnostics.CodeAnalysis;
using System.Reflection;

namespace PocketCsvReader.Testing
{
[TestFixture]
public class CsvDataTableTest
{
[Test]
[TestCase("Ansi")]
[TestCase("Utf16-BE")]
[TestCase("Utf16-LE")]
[TestCase("Utf8-BOM")]
[TestCase("Utf8")]
public void ToDataTable_Financial_CorrectRowsColumns(string filename)
{
var reader = new CsvReader(new CsvProfile('\t', '\"', "\r\n", true));

using (var stream =
Assembly.GetExecutingAssembly()
.GetManifestResourceStream($"{Assembly.GetExecutingAssembly().GetName().Name}.Resources.{filename}.csv")
?? throw new FileNotFoundException()
)
{
var dataTable = reader.ToDataTable(stream);
Assert.That(dataTable.Columns.Count, Is.EqualTo(14));
Assert.That(dataTable.Rows.Count, Is.EqualTo(21));
}
}

[Test]
[TestCase("a+b+c#a+b#a#a+b", '+', "#", "?")]
public void Read_CsvWithCsvProfileMissingCell_CorrectResults(string text, char fieldSeparator, string recordSeparator, string missingCell)
{
using (var stream = new MemoryStream())
{
var writer = new StreamWriter(stream);
writer.Write(text);
writer.Flush();

stream.Position = 0;

var profile = new CsvProfile(fieldSeparator, '`', '`', recordSeparator, false, true, 4096, "_", missingCell);
var reader = new CsvReader(profile);
var dataTable = reader.ToDataTable(stream);

Assert.That(dataTable.Rows[0].ItemArray[0], Is.EqualTo("a"));
Assert.That(dataTable.Rows[0].ItemArray[1], Is.EqualTo("b"));
Assert.That(dataTable.Rows[0].ItemArray[2], Is.EqualTo("c"));

Assert.That(dataTable.Rows[1].ItemArray[0], Is.EqualTo("a"));
Assert.That(dataTable.Rows[1].ItemArray[1], Is.EqualTo("b"));
Assert.That(dataTable.Rows[1].ItemArray[2], Is.EqualTo("?"));

Assert.That(dataTable.Rows[2].ItemArray[0], Is.EqualTo("a"));
Assert.That(dataTable.Rows[2].ItemArray[1], Is.EqualTo("?"));
Assert.That(dataTable.Rows[2].ItemArray[2], Is.EqualTo("?"));

Assert.That(dataTable.Rows[3].ItemArray[0], Is.EqualTo("a"));
Assert.That(dataTable.Rows[3].ItemArray[1], Is.EqualTo("b"));
Assert.That(dataTable.Rows[3].ItemArray[2], Is.EqualTo("?"));


writer.Dispose();
}
}

[Test]
[TestCase("a+b+c#a++c", '+', "#", "?", "a", "?", "c")]
[TestCase("a+b+c#+b+c", '+', "#", "?", "?", "b", "c")]
[TestCase("a+b+c#+b+", '+', "#", "?", "?", "b", "?")]
public void Read_CsvWithCsvProfileEmptyCell_CorrectResults(string text, char fieldSeparator, string recordSeparator, string emptyCell, params string[] expected)
{
using (var stream = new MemoryStream())
{
var writer = new StreamWriter(stream);
writer.Write(text);
writer.Flush();

stream.Position = 0;
var profile = new CsvProfile(fieldSeparator, '`', '`', recordSeparator, false, true, 4096, emptyCell, "_");
var reader = new CsvReader(profile);
var dataTable = reader.ToDataTable(stream);

Assert.That(dataTable.Rows[0].ItemArray[0], Is.EqualTo("a"));
Assert.That(dataTable.Rows[0].ItemArray[1], Is.EqualTo("b"));
Assert.That(dataTable.Rows[0].ItemArray[2], Is.EqualTo("c"));

for (int i = 0; i < 3;i++)
Assert.That(dataTable.Rows[1].ItemArray[i], Is.EqualTo(expected[i]));

writer.Dispose();
}
}

[Test]
[TestCase("abc\r\ndef\r\nghl\r\nijk", 1, 1)]
[TestCase("abc\r\ndef\r\nghl\r\nijk", 17, 1)]
[TestCase("abc\r\ndef\r\nghl\r\nijk", 18, 1)]
[TestCase("abc\r\ndef\r\nghl\r\nijk", 19, 1)]
[TestCase("abc\r\ndef\r\nghl\r\nijk", 512, 1)]
[TestCase("abc;xyz\r\ndef;xyz\r\nghl\r\n;ijk", 1, 2)]
[TestCase("abc;xyz\r\ndef;xyz\r\nghl\r\n;ijk", 512, 2)]
[TestCase("\"abc\";\"xyz\"\r\n\"def\";\"xyz\"\r\n\"ghl\"\r\n;\"ijk\"", 512, 2)]
[TestCase("abc;\"xyz\"\r\n\"def\";xyz\r\n\"ghl\"\r\n;\"ijk\"", 512, 2)]
[TestCase("abc;\"xyz\"\r\n\"def\";xyz\r\n\"ghl\"\r\n;\"ijk\"", 512, 2)]
public void Read_Csv_CorrectResult(string text, int bufferSize, int fieldCount)
{
using (var stream = new MemoryStream())
{
var writer = new StreamWriter(stream);
writer.Write(text);
writer.Flush();

stream.Position = 0;

var reader = new CsvReader(new CsvProfile(';', '\"', "\r\n", false, false, 4096, "(empty)", "(null)"), bufferSize);
var dataTable = reader.ToDataTable(stream);
Assert.That(dataTable.Rows, Has.Count.EqualTo(4));
Assert.That(dataTable.Columns, Has.Count.EqualTo(fieldCount));
foreach (DataRow row in dataTable.Rows)
{
foreach (var cell in row.ItemArray)
Assert.That(cell!.ToString(), Has.Length.EqualTo(3).Or.EqualTo("(empty)").Or.EqualTo("(null)"));
}
Assert.That(dataTable.Rows[0][0], Is.EqualTo("abc"));
if (dataTable.Columns.Count == 2)
Assert.That(dataTable.Rows[0][1], Is.EqualTo("xyz"));
writer.Dispose();
}
}

[Test]
[TestCase("'azerty';'';'alpha'", 3)]
[TestCase("'azerty';;'alpha'", 3)]
public void Read_CsvWithTextQualifier_CorrectResult(string text, int columnCount)
{
using (var stream = new MemoryStream())
{
var writer = new StreamWriter(stream);
writer.Write(text);
writer.Flush();

stream.Position = 0;

var reader = new CsvReader(new CsvProfile(';', '\'', "\r\n", false, false, 4096, "foo", "(null)"));
var dataTable = reader.ToDataTable(stream);
Assert.That(dataTable.Columns, Has.Count.EqualTo(columnCount));
Assert.That(dataTable.Rows[0][0], Is.EqualTo("azerty"));
Assert.That(dataTable.Rows[0][1], Is.EqualTo("foo"));
Assert.That(dataTable.Rows[0][2], Is.EqualTo("alpha"));
writer.Dispose();
}
}

[Test]
[TestCase("a;b;c\r\nd;e;f;g\r\n", 1, 1)]
[TestCase("a;b;c\r\nd;e;f\r\ng;h;i;j\r\n", 2, 1)]
[TestCase("a;b;c\r\nd;e;f\r\ng;h;i;j;k\r\n", 2, 2)]
public void Read_MoreFieldThanExpected_ExceptionMessage(string text, int rowNumber, int moreField)
{
using (var stream = new MemoryStream())
{
using (var writer = new StreamWriter(stream))
{
writer.Write(text);
writer.Flush();

stream.Position = 0;

var profile = CsvProfile.SemiColumnDoubleQuote;
var reader = new CsvReader(profile, 1024);

var ex = Assert.Throws<InvalidDataException>(() => reader.ToDataTable(stream));
Assert.That(ex!.Message, Does.Contain(string.Format("record {0} ", rowNumber + 1)));
Assert.That(ex.Message, Does.Contain(string.Format("{0} more", moreField)));
}
}
}

[Test]
public void Read_EmptyValue_MatchWithEmpty()
{
using (var stream = new MemoryStream())
{
using (var writer = new StreamWriter(stream))
{
writer.Write("a;;c");
writer.Flush();

stream.Position = 0;

var profile = CsvProfile.SemiColumnDoubleQuote;
var reader = new CsvReader(profile);
var dataTable = reader.ToDataTable(stream);
Assert.That(dataTable.Rows[0][1], Is.EqualTo(string.Empty));
}
}
}

[Test]
public void Read_MissingValue_MatchWithNullValue()
{
using (var stream = new MemoryStream())
{
using (var writer = new StreamWriter(stream))
{
writer.Write("a;b;c\r\na;b\r\na;b;c");
writer.Flush();

stream.Position = 0;

var profile = new CsvProfile(';', '"', "\r\n", false, true, 512, string.Empty, "(null)");
var reader = new CsvReader(profile);
var dataTable = reader.ToDataTable(stream);
Assert.That(dataTable.Rows[1][2], Is.EqualTo("(null)"));
}
}
}

[Test]
[TestCase("a;b;c\r\n1;2;3")]
[TestCase("a;b;c\r\n1;2;3\r\n")]
[TestCase("a;b;c\r\n#\r\n1;2;3")]
[TestCase("a;b;c\r\n#x;y;z\r\n1;2;3")]
[TestCase("a;b;c\r\n1;2;3\r\n#x;y;z")]
[TestCase("#x;y;z\r\na;b;c\r\n1;2;3")]
[TestCase("#x;y;z\r\n#x;y;z\r\na;b;c\r\n1;2;3")]
[TestCase("#x;y;z\r\n#x;y;z\r\na;b;c\r\n1;2;3\r\n#1;2;3")]
public void Read_Comment_CommentedLinesSkipped(string content)
{
using (var stream = new MemoryStream())
{
using (var writer = new StreamWriter(stream))
{
writer.Write(content);
writer.Flush();

stream.Position = 0;

var profile = new CsvProfile(new CsvDialectDescriptor { Header = false, Delimiter = ';', CommentChar = '#', DoubleQuote = false });
var reader = new CsvReader(profile);
var dataTable = reader.ToDataTable(stream);
Assert.That(dataTable.Rows.Count, Is.EqualTo(2));
Assert.That(dataTable.Columns.Count, Is.EqualTo(3));
}
}
}
}
}
24 changes: 1 addition & 23 deletions PocketCsvReader.Testing/CsvReaderTest.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using PocketCsvReader;
using PocketCsvReader;
using NUnit.Framework;
using System;
using System.Collections.Generic;
Expand All @@ -14,27 +14,5 @@ namespace PocketCsvReader.Testing
[TestFixture]
public class CsvReaderTest
{
[Test]
[TestCase("Ansi")]
[TestCase("Utf16-BE")]
[TestCase("Utf16-LE")]
[TestCase("Utf8-BOM")]
[TestCase("Utf8")]
public void ToDataTable_Financial_CorrectRowsColumns(string filename)
{
var reader = new CsvReader(new CsvProfile('\t', '\"', "\r\n", true));

using (var stream =
Assembly.GetExecutingAssembly()
.GetManifestResourceStream($"{Assembly.GetExecutingAssembly().GetName().Name}.Resources.{filename}.csv")
?? throw new FileNotFoundException()
)
{
var dataTable = reader.ToDataTable(stream);
Assert.That(dataTable.Columns.Count, Is.EqualTo(14));
Assert.That(dataTable.Rows.Count, Is.EqualTo(21));
}

}
}
}
77 changes: 77 additions & 0 deletions PocketCsvReader.Testing/EncodingDetectorTest.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
using PocketCsvReader;
using NUnit.Framework;
using System;
using System.Collections.Generic;
using System.Data;
using System.IO;
using System.Linq;
using System.Text;
using System.Reflection;
using Moq;

namespace PocketCsvReader.Testing
{
[TestFixture]
public class EncodingDetectorTest
{
[Test]
[TestCase("Utf16-BE", 2)]
[TestCase("Utf16-LE", 2)]
[TestCase("Utf8-BOM", 3)]
[TestCase("Utf8", 0)]
public void GetStreamEncoding_Financial_CorrectEncodingInfo(string filename, int BomLength)
{
using (var stream =
Assembly.GetExecutingAssembly()
.GetManifestResourceStream($"{Assembly.GetExecutingAssembly().GetName().Name}.Resources.{filename}.csv")
?? throw new FileNotFoundException()
)
{
var detector = new EncodingDetector();
var result = detector.GetStreamEncoding(stream);
Assert.That(result.Encoding.BodyName, Is.EqualTo(filename).Using(new EncodingComparer()));
Assert.That(result.BomBytesCount, Is.EqualTo(BomLength));
}
}

public class EncodingComparer : IEqualityComparer<string>
{
public bool Equals(string? x, string? y)
{
if (x is null || y is null)
return false;

static string normalize(string s) => s.Replace("-LE", "").Replace("-BOM", "").ToLowerInvariant().Replace("-", "");
return normalize(x) == normalize(y);
}

public int GetHashCode(string obj)
=> obj.ToLowerInvariant().Replace("-", "").GetHashCode();
}

private static readonly Encoding[] Encodings =
{
Encoding.Unicode,
Encoding.BigEndianUnicode,
Encoding.UTF8,
Encoding.UTF32,
new UTF32Encoding(true, true),
};

[TestCaseSource(nameof(Encodings))]
public void ToDataReader_Financial_CorrectRowsColumns(Encoding encoding)
{
using (var stream = new MemoryStream())
{
using var writer = new StreamWriter(stream, encoding);
writer.Write("A,B,C\r\n1,2,3\r\n4,5,6\r\n");
writer.Flush();
stream.Position = 0;

var detector = new EncodingDetector();
var result = detector.GetStreamEncoding(stream);
Assert.That(result.Encoding, Is.EqualTo(encoding));
}
}
}
}
Loading

0 comments on commit 41addbf

Please sign in to comment.