Skip to content

Commit

Permalink
refactor: char parser (#38)
Browse files Browse the repository at this point in the history
* refactor: proper classes for char by char parsing
* perf: review structure of if to improve performances
* perf: usage of delegates and lookupTable
* perf: add also lookup for FirstCharOfField
* refactor: change signature of ReadNextRecord
  • Loading branch information
Seddryck authored Nov 24, 2024
1 parent b12d6a0 commit ebb692d
Show file tree
Hide file tree
Showing 30 changed files with 580 additions and 248 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ bld/
[Oo]bj/
[Ll]og/

# Performance checks
[Nn]u[Gg]etPackages/

# Visual Studio 2015/2017 cache/options directory
.vs/
# Uncomment if you have tasks that create the project's static files in wwwroot
Expand Down
4 changes: 3 additions & 1 deletion Directory.Build.props
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
<Project>
<PropertyGroup>
<!-- By default every projects are packable except Testing projects-->
<!-- By default every projects are packable except Testing, Benchmark, Profiler projects-->
<IsPackable>true</IsPackable>
<IsPackable Condition="$(MSBuildProjectName.EndsWith('Testing'))">false</IsPackable>
<IsPackable Condition="$(MSBuildProjectName.EndsWith('Benchmark'))">false</IsPackable>
<IsPackable Condition="$(MSBuildProjectName.EndsWith('Profiler'))">false</IsPackable>
<DebugType>portable</DebugType>
</PropertyGroup>

Expand Down
4 changes: 3 additions & 1 deletion PocketCsvReader.Benchmark/PocketCsvReader.Benchmark.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,7 @@
<IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
</PackageReference>
</ItemGroup>

<PropertyGroup>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
</Project>
2 changes: 1 addition & 1 deletion PocketCsvReader.Benchmark/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ public class Program
{
public static void Main()
{
var summary = BenchmarkRunner.Run<ToDataTable>();
var summary = BenchmarkRunner.Run<ToDataReader>();
}
}
156 changes: 156 additions & 0 deletions PocketCsvReader.Benchmark/ToDataReader.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
using System;
using System.Diagnostics;
using System.Formats.Asn1;
using System.Globalization;
using System.IO;
using System.Reflection;
using System.Runtime.Loader;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Environments;
using BenchmarkDotNet.Jobs;
using Bogus;

namespace PocketCsvReader.Benchmark;

[MemoryDiagnoser]
[Config(typeof(CustomConfig))]
public class ToDataReader
{
private readonly string _filePath = "data/1MBFile.csv";

[ParamsSource(nameof(Versions))]
public string VersionPath { get; set; } = string.Empty;

public static IEnumerable<string> Versions => Directory.GetDirectories(@"C:\Users\cedri\Projects\PocketCsvReader\NuGetPackages\");

private Assembly? _csvAssembly;

//[Params(16_300, 163_000, 1_630_000)]
//[Params(16_300, 163_000)]
//[Params(16_300)]
[Params(1_630_000)]
public int recordCount;

[GlobalSetup]
public void Setup()
{
if (!Directory.Exists("data"))
Directory.CreateDirectory("data");

var faker = new Faker<CustomerRecord>()
.CustomInstantiator(static f => new CustomerRecord(
f.Name.FirstName(),
f.Name.LastName(),
f.PickRandom(new[] { "Male", "Female" }),
f.Date.Past(50, DateTime.Now.AddYears(-18)),
f.Date.Recent(365).Year,
f.Date.Month().ToString(CultureInfo.CurrentCulture),
f.Finance.Amount(50, 500)
))
.RuleFor(p => p.Year, (f, p) => p.DateOfBirth.Year)
.RuleFor(p => p.Month, (f, p) => p.DateOfBirth.ToString("MMMM", CultureInfo.CurrentCulture));


// Generate the list of records
var records = faker.Generate(recordCount);

// Write the data to CSV
using (var writer = new StreamWriter(_filePath))
{
foreach (var record in records)
writer.WriteLine($"{record.Firstname},{record.Lastname},{record.Gender},{record.DateOfBirth},{record.Year},{record.Month},{record.TotalOrder}");
}

Console.WriteLine($"CSV file generated at: {_filePath}");
Console.WriteLine($"CSV file generated with size: {new FileInfo(_filePath).Length}");
}

[Benchmark]
public void ReadCsvFile()
{
_csvAssembly = LoadPocketCsvReader(VersionPath);
ReadFile(_filePath, _csvAssembly!);
}


private void LogResults(string version, long memoryUsed, long workingSetUsed)
{
string logFile = "BenchmarkResults.txt";
File.AppendAllText(logFile, $"Version: {version}, Memory Used (GC): {memoryUsed}, Memory Used (Working Set): {workingSetUsed}{Environment.NewLine}");
}

private void ReadFile(string filePath, Assembly csvAssembly)
{
var csvReaderType = csvAssembly.GetType("PocketCsvReader.CsvReader")!;
var csvProfileType = csvAssembly.GetType("PocketCsvReader.CsvProfile")!;

var csvProfile = Activator.CreateInstance(csvProfileType, ',', '\"', "\r\n", false);
dynamic csvReader = Activator.CreateInstance(csvReaderType, csvProfile)!;

using (var stream = new FileStream(filePath, FileMode.Open))
{
using var reader = csvReader.ToDataReader(stream);
while (reader.Read())
{
// Do nothing
}
}
}

private void MeasureMemory(Action action, string version)
{
GC.Collect();
GC.WaitForPendingFinalizers();
GC.Collect();

long memoryBefore = GC.GetTotalMemory(true);
var process = Process.GetCurrentProcess();
long workingSetBefore = process.WorkingSet64;

action(); // Run the CSV read action

process.Refresh();
long workingSetAfter = process.WorkingSet64;
long memoryAfter = GC.GetTotalMemory(false);

long gcMemoryUsed = memoryAfter - memoryBefore;
long workingSetMemoryUsed = workingSetAfter - workingSetBefore;

Console.WriteLine($"Memory Used (GC): {gcMemoryUsed} bytes");
Console.WriteLine($"Memory Used (Working Set): {workingSetMemoryUsed} bytes");

LogResults(version, gcMemoryUsed, workingSetMemoryUsed);
}

private Assembly LoadPocketCsvReader(string versionPath)
{
string dllPath = Path.Combine(versionPath, "PocketCsvReader.dll");
if (!File.Exists(dllPath))
{
throw new FileNotFoundException($"DLL not found: {dllPath}");
}

// Use a custom AssemblyLoadContext to load the assembly
var context = new AssemblyLoadContext("PocketCsvReaderContext", isCollectible: true);
return context.LoadFromAssemblyPath(dllPath);
}

private class CustomConfig : ManualConfig
{
public CustomConfig()
{
foreach (var versionPath in ToDataReader.Versions)
{
var versionName = Path.GetFileName(versionPath);

// Create a specific job for each version
AddJob(Job.Default
.WithRuntime(CoreRuntime.Core80)
.WithWarmupCount(1) // 1 warm-up iteration
.WithIterationCount(5) // 5 actual iterations
); // Identify the job by version
}
}
}
}
94 changes: 0 additions & 94 deletions PocketCsvReader.Benchmark/ToDataTable.cs

This file was deleted.

15 changes: 15 additions & 0 deletions PocketCsvReader.Profiler/PocketCsvReader.Profiler.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="..\PocketCsvReader.Testing\PocketCsvReader.Testing.csproj" />
<ProjectReference Include="..\PocketCsvReader\PocketCsvReader.csproj" />
</ItemGroup>
<PropertyGroup>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
</Project>
5 changes: 5 additions & 0 deletions PocketCsvReader.Profiler/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// See https://aka.ms/new-console-template for more information
using PocketCsvReader.Testing;

var test = new CsvDataReaderTest();
test.Read_TestData_Successful(40_000, false);
2 changes: 1 addition & 1 deletion PocketCsvReader.Testing/FieldParserTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public void ReadField_Empty_CorrectString(string item, string result)
}

[Test]
[TestCase("(null)", null)] //Parse (null) to a real null value
[TestCase("(null)", null)] //InternalParse (null) to a real null value
public void ReadField_Null_CorrectString(string item, string result)
{
Span<char> buffer = stackalloc char[64];
Expand Down
3 changes: 3 additions & 0 deletions PocketCsvReader.Testing/PocketCsvReader.Testing.csproj
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
<?xml version="1.0" encoding="utf-8"?>
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<ItemGroup>
<None Remove="Resources\Ansi.csv" />
<None Remove="Resources\Utf16-BE.csv" />
Expand Down
Loading

0 comments on commit ebb692d

Please sign in to comment.