diff --git a/.gitignore b/.gitignore index 3e759b7..5d20358 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,9 @@ bld/ [Oo]bj/ [Ll]og/ +# Performance checks +[Nn]u[Gg]etPackages/ + # Visual Studio 2015/2017 cache/options directory .vs/ # Uncomment if you have tasks that create the project's static files in wwwroot diff --git a/Directory.Build.props b/Directory.Build.props index 6b74bd4..25d7ada 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -1,8 +1,10 @@ - + true false + false + false portable diff --git a/PocketCsvReader.Benchmark/PocketCsvReader.Benchmark.csproj b/PocketCsvReader.Benchmark/PocketCsvReader.Benchmark.csproj index bc02360..2a58bcb 100644 --- a/PocketCsvReader.Benchmark/PocketCsvReader.Benchmark.csproj +++ b/PocketCsvReader.Benchmark/PocketCsvReader.Benchmark.csproj @@ -19,5 +19,7 @@ runtime; build; native; contentfiles; analyzers - + + true + diff --git a/PocketCsvReader.Benchmark/Program.cs b/PocketCsvReader.Benchmark/Program.cs index 81bde37..61e99f5 100644 --- a/PocketCsvReader.Benchmark/Program.cs +++ b/PocketCsvReader.Benchmark/Program.cs @@ -10,6 +10,6 @@ public class Program { public static void Main() { - var summary = BenchmarkRunner.Run(); + var summary = BenchmarkRunner.Run(); } } diff --git a/PocketCsvReader.Benchmark/ToDataReader.cs b/PocketCsvReader.Benchmark/ToDataReader.cs new file mode 100644 index 0000000..4feb04d --- /dev/null +++ b/PocketCsvReader.Benchmark/ToDataReader.cs @@ -0,0 +1,156 @@ +using System; +using System.Diagnostics; +using System.Formats.Asn1; +using System.Globalization; +using System.IO; +using System.Reflection; +using System.Runtime.Loader; +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Configs; +using BenchmarkDotNet.Environments; +using BenchmarkDotNet.Jobs; +using Bogus; + +namespace PocketCsvReader.Benchmark; + +[MemoryDiagnoser] +[Config(typeof(CustomConfig))] +public class ToDataReader +{ + private readonly string _filePath = "data/1MBFile.csv"; + + [ParamsSource(nameof(Versions))] + public string VersionPath { get; set; } = string.Empty; + + public static IEnumerable Versions => Directory.GetDirectories(@"C:\Users\cedri\Projects\PocketCsvReader\NuGetPackages\"); + + private Assembly? _csvAssembly; + + //[Params(16_300, 163_000, 1_630_000)] + //[Params(16_300, 163_000)] + //[Params(16_300)] + [Params(1_630_000)] + public int recordCount; + + [GlobalSetup] + public void Setup() + { + if (!Directory.Exists("data")) + Directory.CreateDirectory("data"); + + var faker = new Faker() + .CustomInstantiator(static f => new CustomerRecord( + f.Name.FirstName(), + f.Name.LastName(), + f.PickRandom(new[] { "Male", "Female" }), + f.Date.Past(50, DateTime.Now.AddYears(-18)), + f.Date.Recent(365).Year, + f.Date.Month().ToString(CultureInfo.CurrentCulture), + f.Finance.Amount(50, 500) + )) + .RuleFor(p => p.Year, (f, p) => p.DateOfBirth.Year) + .RuleFor(p => p.Month, (f, p) => p.DateOfBirth.ToString("MMMM", CultureInfo.CurrentCulture)); + + + // Generate the list of records + var records = faker.Generate(recordCount); + + // Write the data to CSV + using (var writer = new StreamWriter(_filePath)) + { + foreach (var record in records) + writer.WriteLine($"{record.Firstname},{record.Lastname},{record.Gender},{record.DateOfBirth},{record.Year},{record.Month},{record.TotalOrder}"); + } + + Console.WriteLine($"CSV file generated at: {_filePath}"); + Console.WriteLine($"CSV file generated with size: {new FileInfo(_filePath).Length}"); + } + + [Benchmark] + public void ReadCsvFile() + { + _csvAssembly = LoadPocketCsvReader(VersionPath); + ReadFile(_filePath, _csvAssembly!); + } + + + private void LogResults(string version, long memoryUsed, long workingSetUsed) + { + string logFile = "BenchmarkResults.txt"; + File.AppendAllText(logFile, $"Version: {version}, Memory Used (GC): {memoryUsed}, Memory Used (Working Set): {workingSetUsed}{Environment.NewLine}"); + } + + private void ReadFile(string filePath, Assembly csvAssembly) + { + var csvReaderType = csvAssembly.GetType("PocketCsvReader.CsvReader")!; + var csvProfileType = csvAssembly.GetType("PocketCsvReader.CsvProfile")!; + + var csvProfile = Activator.CreateInstance(csvProfileType, ',', '\"', "\r\n", false); + dynamic csvReader = Activator.CreateInstance(csvReaderType, csvProfile)!; + + using (var stream = new FileStream(filePath, FileMode.Open)) + { + using var reader = csvReader.ToDataReader(stream); + while (reader.Read()) + { + // Do nothing + } + } + } + + private void MeasureMemory(Action action, string version) + { + GC.Collect(); + GC.WaitForPendingFinalizers(); + GC.Collect(); + + long memoryBefore = GC.GetTotalMemory(true); + var process = Process.GetCurrentProcess(); + long workingSetBefore = process.WorkingSet64; + + action(); // Run the CSV read action + + process.Refresh(); + long workingSetAfter = process.WorkingSet64; + long memoryAfter = GC.GetTotalMemory(false); + + long gcMemoryUsed = memoryAfter - memoryBefore; + long workingSetMemoryUsed = workingSetAfter - workingSetBefore; + + Console.WriteLine($"Memory Used (GC): {gcMemoryUsed} bytes"); + Console.WriteLine($"Memory Used (Working Set): {workingSetMemoryUsed} bytes"); + + LogResults(version, gcMemoryUsed, workingSetMemoryUsed); + } + + private Assembly LoadPocketCsvReader(string versionPath) + { + string dllPath = Path.Combine(versionPath, "PocketCsvReader.dll"); + if (!File.Exists(dllPath)) + { + throw new FileNotFoundException($"DLL not found: {dllPath}"); + } + + // Use a custom AssemblyLoadContext to load the assembly + var context = new AssemblyLoadContext("PocketCsvReaderContext", isCollectible: true); + return context.LoadFromAssemblyPath(dllPath); + } + + private class CustomConfig : ManualConfig + { + public CustomConfig() + { + foreach (var versionPath in ToDataReader.Versions) + { + var versionName = Path.GetFileName(versionPath); + + // Create a specific job for each version + AddJob(Job.Default + .WithRuntime(CoreRuntime.Core80) + .WithWarmupCount(1) // 1 warm-up iteration + .WithIterationCount(5) // 5 actual iterations + ); // Identify the job by version + } + } + } +} diff --git a/PocketCsvReader.Benchmark/ToDataTable.cs b/PocketCsvReader.Benchmark/ToDataTable.cs deleted file mode 100644 index 7d565bc..0000000 --- a/PocketCsvReader.Benchmark/ToDataTable.cs +++ /dev/null @@ -1,94 +0,0 @@ -using System.Diagnostics; -using System.Formats.Asn1; -using System.Globalization; -using BenchmarkDotNet.Attributes; -using BenchmarkDotNet.Jobs; -using Bogus; - -namespace PocketCsvReader.Benchmark; - -[SimpleJob(RuntimeMoniker.Net80)] -[MemoryDiagnoser] -public class ToDataTable -{ - private readonly string _filePath1MB = "data/1MBFile.csv"; - - //[Params(16_300, 163_000, 1_630_000)] - [Params(16_300, 163_000)] - //[Params(16_300)] - public int recordCount; - - [GlobalSetup] - public void Setup() - { - if (!Directory.Exists("data")) - Directory.CreateDirectory("data"); - - var faker = new Faker() - .CustomInstantiator(static f => new CustomerRecord( - f.Name.FirstName(), - f.Name.LastName(), - f.PickRandom(new[] { "Male", "Female" }), - f.Date.Past(50, DateTime.Now.AddYears(-18)), - f.Date.Recent(365).Year, - f.Date.Month().ToString(CultureInfo.CurrentCulture), - f.Finance.Amount(50, 500) - )) - .RuleFor(p => p.Year, (f, p) => p.DateOfBirth.Year) - .RuleFor(p => p.Month, (f, p) => p.DateOfBirth.ToString("MMMM", CultureInfo.CurrentCulture)); - - - // Generate the list of records - var records = faker.Generate(recordCount); - - // Write the data to CSV - using (var writer = new StreamWriter(_filePath1MB)) - { - foreach (var record in records) - writer.WriteLine($"{record.Firstname},{record.Lastname},{record.Gender},{record.DateOfBirth},{record.Year},{record.Month},{record.TotalOrder}"); - } - - Console.WriteLine($"CSV file generated at: {_filePath1MB}"); - Console.WriteLine($"CSV file generated with size: {new FileInfo(_filePath1MB).Length}"); - } - - [Benchmark] - public void Read1MBFile() - { - MeasureMemory(() => ReadFile(_filePath1MB)); - } - - private void ReadFile(string filePath) - { - using (var stream = new FileStream(filePath, FileMode.Open)) - { - // Assume PocketCsvReader takes a StreamReader or stream as input - var csvReader = new CsvReader(new CsvProfile(',', '\"', "\r\n", false)); - var reader = csvReader.ToDataReader(stream); - while (reader.Read()) - { - // Do nothing - } - } - } - - private void MeasureMemory(Action action) - { - GC.Collect(); - GC.WaitForPendingFinalizers(); - GC.Collect(); - - long memoryBefore = GC.GetTotalMemory(true); - var process = Process.GetCurrentProcess(); - long workingSetBefore = process.WorkingSet64; - - action(); // Run the CSV read action - - process.Refresh(); - long workingSetAfter = process.WorkingSet64; - long memoryAfter = GC.GetTotalMemory(false); - - Console.WriteLine($"Memory Used (GC): {memoryAfter - memoryBefore} bytes"); - Console.WriteLine($"Memory Used (Working Set): {workingSetAfter - workingSetBefore} bytes"); - } -} diff --git a/PocketCsvReader.Profiler/PocketCsvReader.Profiler.csproj b/PocketCsvReader.Profiler/PocketCsvReader.Profiler.csproj new file mode 100644 index 0000000..344fb38 --- /dev/null +++ b/PocketCsvReader.Profiler/PocketCsvReader.Profiler.csproj @@ -0,0 +1,15 @@ + + + + Exe + net8.0 + + + + + + + + true + + diff --git a/PocketCsvReader.Profiler/Program.cs b/PocketCsvReader.Profiler/Program.cs new file mode 100644 index 0000000..7fa1881 --- /dev/null +++ b/PocketCsvReader.Profiler/Program.cs @@ -0,0 +1,5 @@ +// See https://aka.ms/new-console-template for more information +using PocketCsvReader.Testing; + +var test = new CsvDataReaderTest(); +test.Read_TestData_Successful(40_000, false); diff --git a/PocketCsvReader.Testing/FieldParserTest.cs b/PocketCsvReader.Testing/FieldParserTest.cs index 39537f0..5212d66 100644 --- a/PocketCsvReader.Testing/FieldParserTest.cs +++ b/PocketCsvReader.Testing/FieldParserTest.cs @@ -35,7 +35,7 @@ public void ReadField_Empty_CorrectString(string item, string result) } [Test] - [TestCase("(null)", null)] //Parse (null) to a real null value + [TestCase("(null)", null)] //InternalParse (null) to a real null value public void ReadField_Null_CorrectString(string item, string result) { Span buffer = stackalloc char[64]; diff --git a/PocketCsvReader.Testing/PocketCsvReader.Testing.csproj b/PocketCsvReader.Testing/PocketCsvReader.Testing.csproj index 5b65deb..6486239 100644 --- a/PocketCsvReader.Testing/PocketCsvReader.Testing.csproj +++ b/PocketCsvReader.Testing/PocketCsvReader.Testing.csproj @@ -1,5 +1,8 @@ + + true + diff --git a/PocketCsvReader.Testing/RecordParserTest.cs b/PocketCsvReader.Testing/RecordParserTest.cs index 3f1604c..0866194 100644 --- a/PocketCsvReader.Testing/RecordParserTest.cs +++ b/PocketCsvReader.Testing/RecordParserTest.cs @@ -20,7 +20,7 @@ public void ReadNextRecord_SingleField_CorrectParsing(string record) var profile = new CsvProfile(';', '\'', '\'', "\r\n", false, true, 4096, string.Empty, string.Empty); using var reader = new RecordParser(new StreamReader(buffer), profile, ArrayPool.Create(256, 5)); - var (values, eof) = reader.ReadNextRecord(); + var eof = reader.ReadNextRecord(out var values); Assert.That(values, Has.Length.EqualTo(1)); Assert.That(values.First(), Is.EqualTo("foo")); } @@ -34,7 +34,7 @@ public void ReadNextRecord_RecordWithLineTerminator_CorrectParsing(string record var profile = new CsvProfile(';', '\'', '\'', "\r\n", false, false, 4096, "(empty)", "(null)"); using var reader = new RecordParser(new StreamReader(buffer), profile, ArrayPool.Create(256, 5)); - (var values, var _) = reader.ReadNextRecord(); + reader.ReadNextRecord(out var values); Assert.That(values, Has.Length.EqualTo(tokens.Length)); for (int i = 0; i < tokens.Length; i++) Assert.That(values[i], Is.EqualTo(tokens[i])); @@ -49,7 +49,7 @@ public void ReadNextRecord_RecordWithoutLineTerminator_CorrectParsing(string rec var profile = new CsvProfile(';', '\'', '\'', "\r\n", false, false, 4096, "(empty)", "(null)"); using var reader = new RecordParser(new StreamReader(buffer), profile, ArrayPool.Create(256, 5)); - (var values, var _) = reader.ReadNextRecord(); + reader.ReadNextRecord(out var values); Assert.That(values, Has.Length.EqualTo(tokens.Length)); for (int i = 0; i < tokens.Length; i++) Assert.That(values[i], Is.EqualTo(tokens[i])); @@ -66,7 +66,7 @@ public void ReadNextRecord_RecordWithUnescapedTextQualifier_ThrowException(strin using var reader = new RecordParser(new StreamReader(buffer), profile, ArrayPool.Create(256, 5)); Assert.Throws(() => { - reader.ReadNextRecord(); + reader.ReadNextRecord(out var values); }); } @@ -91,7 +91,7 @@ public void ReadNextRecord_RecordWithTwoFields_CorrectParsing(string record, str var profile = new CsvProfile( new CsvDialectDescriptor() { Delimiter=';', QuoteChar='\'', DoubleQuote=true }); using var reader = new RecordParser(new StreamReader(buffer), profile, ArrayPool.Create(256, 5)); - (var values, var _) = reader.ReadNextRecord(); + reader.ReadNextRecord(out var values); Assert.That(values[0], Is.EqualTo(firstToken)); Assert.That(values[1], Is.EqualTo("xyz")); } @@ -106,7 +106,7 @@ public void ReadNextRecord_SingleFieldWithTextQualifier_CorrectParsing(string re var profile = new CsvProfile(';', '\'', '\'', "\r\n", false, true, 4096, string.Empty, string.Empty); using var reader = new RecordParser(new StreamReader(buffer), profile, ArrayPool.Create(256, 5)); - var (values, _) = reader.ReadNextRecord(); + reader.ReadNextRecord(out var values); Assert.That(values, Has.Length.EqualTo(1)); Assert.That(values.First(), Is.EqualTo(expected)); } @@ -121,7 +121,7 @@ public void ReadNextRecord_SingleFieldWithTextEscaper_CorrectParsing(string reco var profile = new CsvProfile(';', '\'', escapeTextQualifier, "\r\n", false, true, 4096, string.Empty, string.Empty); using var reader = new RecordParser(new StreamReader(buffer), profile, ArrayPool.Create(256, 5)); - var (values, _) = reader.ReadNextRecord(); + reader.ReadNextRecord(out var values); Assert.That(values, Has.Length.EqualTo(1)); Assert.That(values.First(), Is.EqualTo("fo'o")); } @@ -136,7 +136,7 @@ public void ReadNextRecord_SingleFieldWithDoubleQuote_CorrectParsing(string reco new CsvDialectDescriptor() { Delimiter = ';', QuoteChar = '\'', DoubleQuote=true } ); using var reader = new RecordParser(new StreamReader(buffer), profile, ArrayPool.Create(256, 5)); - var (values, _) = reader.ReadNextRecord(); + reader.ReadNextRecord(out var values); Assert.That(values, Has.Length.EqualTo(1)); Assert.That(values.First(), Is.EqualTo("fo'o")); } @@ -157,7 +157,7 @@ public void ReadNextRecord_RecordWithThreeFields_CorrectParsing(string record, s var profile = new CsvProfile(';', '\'', '\'', "\r\n", false, true, 4096, string.Empty, string.Empty); using var reader = new RecordParser(new StreamReader(buffer), profile, ArrayPool.Create(256, 5)); - var (values, _) = reader.ReadNextRecord(); + reader.ReadNextRecord(out var values); Assert.That(values, Has.Length.EqualTo(3)); Assert.That(values[2], Is.EqualTo(thirdToken)); } @@ -169,7 +169,7 @@ public void ReadNextRecord_NullField_NullValue() var profile = new CsvProfile(new CsvDialectDescriptor() {Delimiter=';', NullSequence="(null)" }); using var reader = new RecordParser(new StreamReader(buffer), profile, ArrayPool.Create(256, 5)); - var (values, eof) = reader.ReadNextRecord(); + var eof = reader.ReadNextRecord(out var values); Assert.That(eof, Is.True); Assert.That(values, Has.Length.EqualTo(2)); Assert.That(values[1], Is.Null); @@ -204,7 +204,7 @@ public void ReadNextRecord_Csv_CorrectResults(string text, string recordSeparato using var reader = new RecordParser(new StreamReader(stream), profile, ArrayPool.Create(256, 5)); using (var streamReader = new StreamReader(stream, Encoding.UTF8, true)) { - var (values, _) = reader.ReadNextRecord(); + reader.ReadNextRecord(out var values); Assert.That(values, Has.Length.GreaterThan(0)); foreach (var value in values) Assert.That(value, Is.EqualTo("abc")); @@ -227,7 +227,7 @@ public void ReadNextRecord_SkipInitialWhitespace_CorrectResults(string record) profile.Descriptor.SkipInitialSpace = true; using var reader = new RecordParser(new StreamReader(stream), profile, ArrayPool.Create(256, 5)); using var streamReader = new StreamReader(stream); - var (values, _) = reader.ReadNextRecord(); + reader.ReadNextRecord(out var values); Assert.That(values, Has.Length.EqualTo(2)); Assert.That(values[0], Is.EqualTo("foo")); Assert.That(values[1], Is.EqualTo("bar")); @@ -300,14 +300,14 @@ public void CountRecords_Rewind_CorrectCount(string text, string recordSeparator for (int i = 0; i < result; i++) { if (i (FieldStart, FieldLength) = (Position, 0); + [MethodImpl(MethodImplOptions.AggressiveInlining)] internal void SetFieldStart() => (FieldStart, FieldLength) = (Position, 1); + [MethodImpl(MethodImplOptions.AggressiveInlining)] internal void SetFieldEnd(int i) => (FieldLength) = (Position - FieldStart + 1 + i); + [MethodImpl(MethodImplOptions.AggressiveInlining)] internal void SetFieldEnd() => SetFieldEnd(0); + [MethodImpl(MethodImplOptions.AggressiveInlining)] internal void ResetFieldState() => IsQuotedField = IsEscapedField = false; + [MethodImpl(MethodImplOptions.AggressiveInlining)] internal void SetQuotedField() => IsQuotedField = true; + [MethodImpl(MethodImplOptions.AggressiveInlining)] internal void SetEscapedField() => IsEscapedField = true; - - internal void Switch(IInternalCharParser parser) - => Internal = parser; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void Switch(InternalParse parse) + => Internal = parse; } public enum ParserState diff --git a/PocketCsvReader/CharParsing/AfterQuoteCharParser.cs b/PocketCsvReader/CharParsing/AfterQuoteCharParser.cs index 79d907f..ceb78b1 100644 --- a/PocketCsvReader/CharParsing/AfterQuoteCharParser.cs +++ b/PocketCsvReader/CharParsing/AfterQuoteCharParser.cs @@ -12,9 +12,6 @@ internal class AfterQuoteCharParser : IInternalCharParser public AfterQuoteCharParser(CharParser parser) => Parser = parser; - public void Initialize() - { } - public virtual ParserState Parse(char c) { if (c == Parser.Profile.Descriptor.Delimiter) diff --git a/PocketCsvReader/CharParsing/CharOfFieldLookupParser.cs b/PocketCsvReader/CharParsing/CharOfFieldLookupParser.cs new file mode 100644 index 0000000..adbdd99 --- /dev/null +++ b/PocketCsvReader/CharParsing/CharOfFieldLookupParser.cs @@ -0,0 +1,58 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace PocketCsvReader.CharParsing; +internal class CharOfFieldLookupParser : IInternalCharParser +{ + protected CharParser Parser { get; set; } + protected readonly bool[] InterestingChars; + private char FirstCharOfLineTerminator { get; set; } + private char Delimiter { get; set; } + private char EscapeChar { get; set; } + + public CharOfFieldLookupParser(CharParser parser) + { + (Parser, FirstCharOfLineTerminator, Delimiter, EscapeChar) + = (parser, parser.Profile.Descriptor.LineTerminator[0], parser.Profile.Descriptor.Delimiter + , parser.Profile.Descriptor.EscapeChar); + + InterestingChars = new bool[char.MaxValue + 1]; + InterestingChars[Delimiter] = true; + InterestingChars[FirstCharOfLineTerminator] = true; + InterestingChars[EscapeChar] = true; + } + + public virtual ParserState Parse(char c) + { + if (!InterestingChars[c]) + return ParserState.Continue; + + if (c == Delimiter) + { + Parser.SetFieldEnd(-1); + Parser.Switch(Parser.FirstCharOfField); + return ParserState.Field; + } + + if (c == FirstCharOfLineTerminator) + { + Parser.SetFieldEnd(-1); + Parser.Switch(Parser.LineTerminator); + return Parser.Profile.Descriptor.LineTerminator.Length == 1 + ? ParserState.Record + : ParserState.Continue; + } + + if (c == EscapeChar) + { + Parser.Switch(Parser.AfterEscapeChar); + return ParserState.Continue; + } + + throw new InvalidOperationException("Unexpected character"); + } +} + diff --git a/PocketCsvReader/CharParsing/CharOfFieldParser.cs b/PocketCsvReader/CharParsing/CharOfFieldParser.cs index 5bdf2b1..dab41e8 100644 --- a/PocketCsvReader/CharParsing/CharOfFieldParser.cs +++ b/PocketCsvReader/CharParsing/CharOfFieldParser.cs @@ -8,22 +8,25 @@ namespace PocketCsvReader.CharParsing; internal class CharOfFieldParser : IInternalCharParser { protected CharParser Parser { get; set; } + private char FirstCharOfLineTerminator { get; set; } + private char Delimiter { get; set; } + private char EscapeChar { get; set; } public CharOfFieldParser(CharParser parser) - => Parser = parser; - public void Initialize() - { } + => (Parser, FirstCharOfLineTerminator, Delimiter, EscapeChar) + = (parser, parser.Profile.Descriptor.LineTerminator[0], parser.Profile.Descriptor.Delimiter + , parser.Profile.Descriptor.EscapeChar); public virtual ParserState Parse(char c) { - if (c == Parser.Profile.Descriptor.Delimiter) + if (c == Delimiter) { Parser.SetFieldEnd(-1); Parser.Switch(Parser.FirstCharOfField); return ParserState.Field; } - if (c == Parser.Profile.Descriptor.LineTerminator[0]) + if (c == FirstCharOfLineTerminator) { Parser.SetFieldEnd(-1); Parser.Switch(Parser.LineTerminator); @@ -32,7 +35,7 @@ public virtual ParserState Parse(char c) : ParserState.Continue; } - if (c == Parser.Profile.Descriptor.EscapeChar) + if (c == EscapeChar) { Parser.Switch(Parser.AfterEscapeChar); return ParserState.Continue; diff --git a/PocketCsvReader/CharParsing/CharOfQuotedFieldParser.cs b/PocketCsvReader/CharParsing/CharOfQuotedFieldParser.cs index b88ab2a..2dd98fb 100644 --- a/PocketCsvReader/CharParsing/CharOfQuotedFieldParser.cs +++ b/PocketCsvReader/CharParsing/CharOfQuotedFieldParser.cs @@ -11,8 +11,6 @@ internal class CharOfQuotedFieldParser : IInternalCharParser public CharOfQuotedFieldParser(CharParser parser) => Parser = parser; - public void Initialize() - { } public virtual ParserState Parse(char c) { diff --git a/PocketCsvReader/CharParsing/FirstCharOfFieldLookupParser.cs b/PocketCsvReader/CharParsing/FirstCharOfFieldLookupParser.cs new file mode 100644 index 0000000..5c60772 --- /dev/null +++ b/PocketCsvReader/CharParsing/FirstCharOfFieldLookupParser.cs @@ -0,0 +1,78 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace PocketCsvReader.CharParsing; + +internal class FirstCharOfFieldLookupParser : IInternalCharParser +{ + protected CharParser Parser { get; set; } + protected readonly bool[] InterestingChars; + private char FirstCharOfLineTerminator { get; set; } + private char QuoteChar { get; set; } + private char Delimiter { get; set; } + private bool IsSkipInitialSpace { get; set; } + private char EscapeChar { get; set; } + + public FirstCharOfFieldLookupParser(CharParser parser) + { + (Parser, FirstCharOfLineTerminator, QuoteChar, Delimiter, IsSkipInitialSpace, EscapeChar) + = (parser, parser.Profile.Descriptor.LineTerminator[0], parser.Profile.Descriptor.QuoteChar + , parser.Profile.Descriptor.Delimiter, parser.Profile.Descriptor.SkipInitialSpace + , parser.Profile.Descriptor.EscapeChar); + + InterestingChars = new bool[char.MaxValue + 1]; + InterestingChars[Delimiter] = true; + InterestingChars[FirstCharOfLineTerminator] = true; + InterestingChars[EscapeChar] = true; + InterestingChars[QuoteChar] = true; + InterestingChars[' '] = IsSkipInitialSpace; + } + + public virtual ParserState Parse(char c) + { + Parser.ResetFieldState(); + + if (!InterestingChars[c]) + { + Parser.SetFieldStart(); + Parser.Switch(Parser.CharOfField); + return ParserState.Continue; + } + + if (c == QuoteChar) + { + Parser.SetQuotedField(); + Parser.Switch(Parser.FirstCharOfQuotedField); + return ParserState.Continue; + } + + if (c == ' ' && IsSkipInitialSpace) + return ParserState.Continue; + + if (c == Delimiter) + { + Parser.ZeroField(); + Parser.Switch(Parser.FirstCharOfField); + return ParserState.Field; + } + + if (c == FirstCharOfLineTerminator) + { + Parser.ZeroField(); + Parser.Switch(Parser.LineTerminator); + return ParserState.Continue; + } + + + if (c == EscapeChar) + { + Parser.Switch(Parser.AfterEscapeChar); + return ParserState.Continue; + } + + throw new InvalidOperationException("Unexpected character"); + } +} diff --git a/PocketCsvReader/CharParsing/FirstCharOfFieldParser.cs b/PocketCsvReader/CharParsing/FirstCharOfFieldParser.cs index df43bb9..0d8d66d 100644 --- a/PocketCsvReader/CharParsing/FirstCharOfFieldParser.cs +++ b/PocketCsvReader/CharParsing/FirstCharOfFieldParser.cs @@ -10,39 +10,47 @@ internal class FirstCharOfFieldParser : IInternalCharParser { protected CharParser Parser { get; set; } - public FirstCharOfFieldParser(CharParser parser) - => Parser = parser; + private char FirstCharOfLineTerminator { get; set; } + private char QuoteChar { get; set; } + private char Delimiter { get; set; } + private bool IsSkipInitialSpace { get; set; } + private char EscapeChar { get; set; } - public void Initialize() - => Parser.ResetFieldState(); + public FirstCharOfFieldParser(CharParser parser) + => (Parser, FirstCharOfLineTerminator, QuoteChar, Delimiter, IsSkipInitialSpace, EscapeChar) + = (parser, parser.Profile.Descriptor.LineTerminator[0], parser.Profile.Descriptor.QuoteChar + , parser.Profile.Descriptor.Delimiter, parser.Profile.Descriptor.SkipInitialSpace + , parser.Profile.Descriptor.EscapeChar); public virtual ParserState Parse(char c) { - if (c == Parser.Profile.Descriptor.QuoteChar) + Parser.ResetFieldState(); + + if (c == QuoteChar) { Parser.SetQuotedField(); Parser.Switch(Parser.FirstCharOfQuotedField); return ParserState.Continue; } - if (c == ' ' && Parser.Profile.Descriptor.SkipInitialSpace) + if (c == ' ' && IsSkipInitialSpace) return ParserState.Continue; - if (c == Parser.Profile.Descriptor.Delimiter) + if (c == Delimiter) { Parser.ZeroField(); Parser.Switch(Parser.FirstCharOfField); return ParserState.Field; } - if (c == Parser.Profile.Descriptor.LineTerminator[0]) + if (c == FirstCharOfLineTerminator) { Parser.ZeroField(); Parser.Switch(Parser.LineTerminator); return ParserState.Continue; } - if (c == Parser.Profile.Descriptor.EscapeChar) + if (c == EscapeChar) { Parser.Switch(Parser.AfterEscapeChar); return ParserState.Continue; diff --git a/PocketCsvReader/CharParsing/FirstCharOfQuotedFieldParser.cs b/PocketCsvReader/CharParsing/FirstCharOfQuotedFieldParser.cs index 3d58d04..6ebdd75 100644 --- a/PocketCsvReader/CharParsing/FirstCharOfQuotedFieldParser.cs +++ b/PocketCsvReader/CharParsing/FirstCharOfQuotedFieldParser.cs @@ -9,24 +9,24 @@ namespace PocketCsvReader.CharParsing; internal class FirstCharOfQuotedFieldParser : IInternalCharParser { protected CharParser Parser { get; set; } + private char QuoteChar { get; set; } + private char EscapeChar { get; set; } public FirstCharOfQuotedFieldParser(CharParser parser) - => Parser = parser; - - public void Initialize() - { } + => (Parser, QuoteChar, EscapeChar) + = (parser, parser.Profile.Descriptor.QuoteChar, parser.Profile.Descriptor.EscapeChar); public virtual ParserState Parse(char c) { Parser.SetFieldStart(); - if (c == Parser.Profile.Descriptor.QuoteChar) + if (c == QuoteChar) { Parser.Switch(Parser.AfterQuoteChar); return ParserState.Continue; } - if (c == Parser.Profile.Descriptor.EscapeChar) + if (c == EscapeChar) { Parser.Switch(Parser.AfterEscapeCharQuotedField); return ParserState.Continue; diff --git a/PocketCsvReader/CharParsing/FirstCharOfRecordParser.cs b/PocketCsvReader/CharParsing/FirstCharOfRecordParser.cs index 369cdd4..a5c8ea9 100644 --- a/PocketCsvReader/CharParsing/FirstCharOfRecordParser.cs +++ b/PocketCsvReader/CharParsing/FirstCharOfRecordParser.cs @@ -7,12 +7,14 @@ namespace PocketCsvReader.CharParsing; internal class FirstCharOfRecordParser : FirstCharOfFieldParser { + private char CommentChar { get; set; } + public FirstCharOfRecordParser(CharParser parser) - : base(parser) { } + : base(parser) { CommentChar = Parser.Profile.Descriptor.CommentChar; } public override ParserState Parse(char c) { - if (c == Parser.Profile.Descriptor.CommentChar) + if (c == CommentChar) { Parser.ZeroField(); Parser.Switch(Parser.Comment); diff --git a/PocketCsvReader/CharParsing/LineTerminatorParser.cs b/PocketCsvReader/CharParsing/LineTerminatorParser.cs index a408ce8..cae9ef9 100644 --- a/PocketCsvReader/CharParsing/LineTerminatorParser.cs +++ b/PocketCsvReader/CharParsing/LineTerminatorParser.cs @@ -22,9 +22,6 @@ protected void Reset() private bool IsLast() => Index == _length; - public void Initialize() - { } - public ParserState Parse(char c) { if (c == Parser.Profile.Descriptor.LineTerminator[Index]) diff --git a/PocketCsvReader/CsvArrayString.cs b/PocketCsvReader/CsvArrayString.cs index 03fcb4d..aacc1ca 100644 --- a/PocketCsvReader/CsvArrayString.cs +++ b/PocketCsvReader/CsvArrayString.cs @@ -66,10 +66,9 @@ public void Initialize() if (IsEof) return null; - string?[]? values; - (values, IsEof) = RecordParser!.ReadNextRecord(); + IsEof = RecordParser!.ReadNextRecord(out var values); - if (IsEof && values!.Length == 0) + if (IsEof && values.Length == 0) { values = null; Extra = null; diff --git a/PocketCsvReader/CsvDataReader.cs b/PocketCsvReader/CsvDataReader.cs index 9de9a66..6956d15 100644 --- a/PocketCsvReader/CsvDataReader.cs +++ b/PocketCsvReader/CsvDataReader.cs @@ -3,6 +3,7 @@ using System.Data; using System.Diagnostics.CodeAnalysis; using System.IO; +using System.Reflection.PortableExecutable; using System.Text; using System.Threading.Tasks; @@ -55,7 +56,8 @@ public bool Read() if (IsEof) return false; - (Values, IsEof) = RecordParser!.ReadNextRecord(); + IsEof = RecordParser!.ReadNextRecord(out var values); + Values = values; if (IsEof && Values!.Length == 0) { Values = null; @@ -67,7 +69,8 @@ public bool Read() if (RowCount == 0 && RecordParser.Profile.Descriptor.Header) { - (Values, IsEof) = RecordParser.ReadNextRecord(); + IsEof = RecordParser.ReadNextRecord(out values); + Values = values; if (IsEof && Values!.Length == 0) { Values = null; diff --git a/PocketCsvReader/FieldParser.cs b/PocketCsvReader/FieldParser.cs index 5a43bf0..3f410a5 100644 --- a/PocketCsvReader/FieldParser.cs +++ b/PocketCsvReader/FieldParser.cs @@ -15,6 +15,8 @@ public class FieldParser protected ArrayPool? Pool { get; } protected PoolString FetchString { get; } + protected bool HandlesSpecialValues { get; } + protected bool UnescapesChars { get; } private static readonly PoolString defaultPoolString = (ReadOnlySpan span) => span.ToString(); @@ -22,7 +24,9 @@ public FieldParser(CsvProfile profile) : this(profile, ArrayPool.Shared) { } public FieldParser(CsvProfile profile, ArrayPool? pool, PoolString? fetchString = null) - => (Profile, Pool, FetchString) = (profile, pool, profile.ParserOptimizations.PoolString ?? defaultPoolString); + => (Profile, Pool, FetchString, HandlesSpecialValues, UnescapesChars) + = (profile, pool, profile.ParserOptimizations.PoolString ?? defaultPoolString + , profile.ParserOptimizations.HandleSpecialValues, profile.ParserOptimizations.UnescapeChars); public string? ReadField(ReadOnlySpan buffer, int start, int length, bool isEscapedField, bool wasQuotedField) => ReadField(Span.Empty, buffer, start, length, isEscapedField, wasQuotedField); @@ -31,41 +35,36 @@ public FieldParser(CsvProfile profile, ArrayPool? pool, PoolString? fetchS { ReadOnlySpan fieldSpan; if (longSpan.Length > 0 && length>=0) - { - var newSize = longSpan.Length + length; - var newArray = Pool?.Rent(newSize) ?? new char[newSize]; - longSpan.CopyTo(newArray); - buffer.Slice(start, length).ToArray().CopyTo(newArray, longSpan.Length); - fieldSpan = newArray; - fieldSpan = fieldSpan.Slice(0, newSize); - Pool?.Return(newArray); - } + fieldSpan = longSpan.Concat(buffer.Slice(start, length)); else if (longSpan.Length > 0 && length < 0) fieldSpan = longSpan.Slice(0, longSpan.Length + length); else fieldSpan = buffer.Slice(start, length); - return ReadField(fieldSpan, isEscapedField, wasQuotedField); + return ExtractField(fieldSpan, isEscapedField, wasQuotedField); } - public string? ReadField(ReadOnlySpan buffer, bool isEscapedField, bool wasQuotedField) + public string? ExtractField(ReadOnlySpan buffer, bool isEscapedField, bool wasQuotedField) { - if (Profile.ParserOptimizations.HandleSpecialValues && buffer.Length == 0) - return Profile.EmptyCell; - else if (Profile.ParserOptimizations.HandleSpecialValues && !isEscapedField && !wasQuotedField) - { - var strField = FetchString(buffer); - if (Profile.Sequences.TryGetValue(strField, out var value)) - return value; - return strField; + if (HandlesSpecialValues) + { + if (buffer.Length == 0) + return Profile.EmptyCell; + else if (!isEscapedField && !wasQuotedField) + { + var strField = FetchString(buffer); + if (Profile.Sequences.TryGetValue(strField, out var value)) + return value; + return strField; + } } - if (Profile.ParserOptimizations.UnescapeChars && isEscapedField) + if (UnescapesChars && isEscapedField) { var span = UnescapeField(buffer); return FetchString(span); } - else - return FetchString(buffer); + + return FetchString(buffer); } private ReadOnlySpan UnescapeField(ReadOnlySpan value) diff --git a/PocketCsvReader/IInternalCharParser.cs b/PocketCsvReader/IInternalCharParser.cs index d07a818..935c8eb 100644 --- a/PocketCsvReader/IInternalCharParser.cs +++ b/PocketCsvReader/IInternalCharParser.cs @@ -7,6 +7,5 @@ namespace PocketCsvReader; public interface IInternalCharParser { - void Initialize(); ParserState Parse(char c); } diff --git a/PocketCsvReader/ParserOptimizationOptions.cs b/PocketCsvReader/ParserOptimizationOptions.cs index b015f3c..771a61f 100644 --- a/PocketCsvReader/ParserOptimizationOptions.cs +++ b/PocketCsvReader/ParserOptimizationOptions.cs @@ -14,5 +14,6 @@ public record ParserOptimizationOptions bool ExtendIncompleteRecords = true, bool ReadAhead = true, int BufferSize = 4096, - PoolString? PoolString = null + PoolString? PoolString = null, + bool LookupTableChar = true ) { } diff --git a/PocketCsvReader/RecordParser.cs b/PocketCsvReader/RecordParser.cs index a624725..d7727c6 100644 --- a/PocketCsvReader/RecordParser.cs +++ b/PocketCsvReader/RecordParser.cs @@ -16,6 +16,8 @@ public class RecordParser : IDisposable protected ReadOnlyMemory Buffer { get; private set; } protected ArrayPool? Pool { get; } + private int? FieldsCount { get; set; } + public RecordParser(StreamReader reader, CsvProfile profile) : this(reader, profile, ArrayPool.Shared) { } @@ -31,71 +33,50 @@ public RecordParser(StreamReader reader, CsvProfile profile, ArrayPool? po protected RecordParser(CsvProfile profile, IBufferReader buffer, ArrayPool? pool) => (Profile, Reader, FieldParser, CharParser) = (profile, buffer, new FieldParser(profile, pool ?? ArrayPool.Shared), new(profile)); - public virtual (string?[] fields, bool eof) ReadNextRecord() + public virtual bool ReadNextRecord(out string?[] fields) { - var bufferSize = 0; var index = 0; var eof = false; - var fields = new List(); - Span longSpan = stackalloc char[0]; + var listFields = new List(FieldsCount ?? 20); + var longSpan = Span.Empty; if (Buffer.Length == 0) { - if (Reader.IsEof) - bufferSize = 0; - else - { + if (!Reader.IsEof) Buffer = Reader.Read(); - bufferSize = Buffer.Length; - } - - eof = bufferSize == 0; + eof = Buffer.Length == 0; } - else - bufferSize = Buffer.Length; var span = Buffer.Span; - span = span.Slice(0, bufferSize); + var bufferSize = span.Length; while (!eof && index < bufferSize) { char c = span[index]; - if (c == '\0') - { - eof = true; - break; - } var state = CharParser.Parse(c); - if (state == ParserState.Field || state == ParserState.Record) { - fields.Add(FieldParser.ReadField(longSpan, span, CharParser.FieldStart, CharParser.FieldLength, CharParser.IsEscapedField, CharParser.IsQuotedField)); + // InternalParse field and reset longSpan + listFields.Add(FieldParser.ReadField(longSpan, span, CharParser.FieldStart, CharParser.FieldLength, CharParser.IsEscapedField, CharParser.IsQuotedField)); longSpan = Span.Empty; - } - if (state == ParserState.Record) - { - CharParser.Reset(); - Buffer = Buffer.Slice(index + 1); - return (fields.ToArray(), false); + if (state == ParserState.Record) + { + CharParser.Reset(); + Buffer = Buffer.Slice(index + 1); + FieldsCount ??= listFields.Count; + fields = [.. listFields]; + return false; + } } - - if (state == ParserState.Error) + else if (state == ParserState.Error) throw new InvalidDataException($"Invalid character '{c}' at position {index}."); + // Handle continuation for fields spanning multiple buffers if (++index == bufferSize) { if (state == ParserState.Continue) - { - var newLength = longSpan.Length + bufferSize - CharParser.FieldStart; - var newArray = Pool?.Rent(newLength) ?? new char[newLength]; - var newSpan = newArray.AsSpan().Slice(0, newLength); - longSpan.CopyTo(newSpan); - var remaining = span.Slice(CharParser.FieldStart, bufferSize - CharParser.FieldStart); - remaining.CopyTo(newSpan.Slice(longSpan.Length)); - longSpan = newSpan.Slice(0, newLength); - Pool?.Return(newArray); - } + longSpan = longSpan.Concat(span.Slice(CharParser.FieldStart, bufferSize - CharParser.FieldStart), Pool); if (!Reader.IsEof) { @@ -119,10 +100,12 @@ public virtual (string?[] fields, bool eof) ReadNextRecord() switch (CharParser.ParseEof()) { case ParserState.Record: - fields.Add(FieldParser.ReadField(longSpan, 0, longSpan.Length + CharParser.FieldLength, CharParser.IsEscapedField, CharParser.IsQuotedField)); - return (fields.ToArray(), true); + listFields.Add(FieldParser.ReadField(longSpan, 0, longSpan.Length + CharParser.FieldLength, CharParser.IsEscapedField, CharParser.IsQuotedField)); + fields = [.. listFields]; + return true; case ParserState.Eof: - return ([], true); + fields = []; + return true; case ParserState.Error: throw new InvalidDataException($"Invalid character End-of-File."); default: @@ -251,8 +234,8 @@ public string GetFirstRecord(StreamReader reader, string recordSeparator, int bu public virtual string[] ReadHeaders() { var unnamedFieldIndex = -1; - return ReadNextRecord().fields - .Select(value => + ReadNextRecord(out var fields); + return fields.Select(value => { unnamedFieldIndex++; return string.IsNullOrWhiteSpace(value) || !Profile.Descriptor.Header diff --git a/PocketCsvReader/SpanExtensions.cs b/PocketCsvReader/SpanExtensions.cs new file mode 100644 index 0000000..bda500a --- /dev/null +++ b/PocketCsvReader/SpanExtensions.cs @@ -0,0 +1,34 @@ +using System; +using System.Buffers; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace PocketCsvReader; +public static class SpanExtensions +{ + public static Span Concat(this Span prefix, ReadOnlySpan suffix, ArrayPool? pool = null) + { + var newLength = prefix.Length + suffix.Length; + var newArray = pool?.Rent(newLength) ?? new T[newLength]; + var newSpan = newArray.AsSpan().Slice(0, newLength); + prefix.CopyTo(newSpan); + suffix.CopyTo(newSpan.Slice(prefix.Length)); + newSpan = newSpan.Slice(0, newLength); + pool?.Return(newArray); + return newSpan; + } + + public static Span Concat(this ReadOnlySpan prefix, ReadOnlySpan suffix, ArrayPool? pool = null) + { + var newLength = prefix.Length + suffix.Length; + var newArray = pool?.Rent(newLength) ?? new T[newLength]; + var newSpan = newArray.AsSpan().Slice(0, newLength); + prefix.CopyTo(newSpan); + suffix.CopyTo(newSpan.Slice(prefix.Length)); + newSpan = newSpan.Slice(0, newLength); + pool?.Return(newArray); + return newSpan; + } +} diff --git a/nuget-download-package-version.ps1 b/nuget-download-package-version.ps1 new file mode 100644 index 0000000..468fb81 --- /dev/null +++ b/nuget-download-package-version.ps1 @@ -0,0 +1,58 @@ +param ( + [string]$PackageName, # The name of the NuGet package + [string[]]$Versions, # List of versions to download + [string]$OutputFolder = "./NuGetPackages" # Output folder for organizing dlls +) + +# Ensure the output folder exists +if (!(Test-Path -Path $OutputFolder)) { + New-Item -ItemType Directory -Path $OutputFolder +} + +foreach ($version in $Versions) { + Write-Host "Processing version $version of $PackageName..." + + # Create folder for the specific version + $versionFolder = Join-Path -Path $OutputFolder -ChildPath $version + if (!(Test-Path -Path $versionFolder)) { + New-Item -ItemType Directory -Path $versionFolder + } + + # Download the package using `nuget.exe` + $nugetPath = "nuget.exe" + if (!(Get-Command $nugetPath -ErrorAction SilentlyContinue)) { + Write-Error "nuget.exe not found. Ensure it's installed and in your PATH." + break + } + + # Download the package into a temp folder + $tempFolder = Join-Path -Path $OutputFolder -ChildPath "temp_$version" + if (!(Test-Path -Path $tempFolder)) { + New-Item -ItemType Directory -Path $tempFolder + } + + Write-Host "Downloading $PackageName version $version..." + & $nugetPath install $PackageName -Version $version -OutputDirectory $tempFolder -Source "https://api.nuget.org/v3/index.json" + + # Locate the .dll file + $packageFolder = Get-ChildItem -Path $tempFolder -Directory | Where-Object { $_.Name -like "$PackageName.$version*" } + if ($packageFolder -eq $null) { + Write-Error "Failed to find the downloaded package for version $version." + continue + } + + $dllPath = Get-ChildItem -Path $packageFolder.FullName -Recurse -Filter "*.dll" | Select-Object -First 1 + if ($dllPath -eq $null) { + Write-Error "No DLL found for $PackageName version $version." + continue + } + + # Move the .dll file to the version folder + Copy-Item -Path $dllPath.FullName -Destination $versionFolder -Force + Write-Host "DLL for version $version placed in $versionFolder." + + # Clean up temp folder + Remove-Item -Recurse -Force $tempFolder +} + +Write-Host "Completed processing all versions."