From f2fcf59e8d6f2cd6ddcdbade51608b15b4f42126 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20L=2E=20Charlier?= Date: Sun, 9 Feb 2025 11:57:56 +0100 Subject: [PATCH] feat: add a descriptor for the runtime type parser (#113) * feat: add a descriptor for the runtime type parser * docs: review for all fluent APIs --------- Co-authored-by: codefactor-io --- .../Configuration/CsvReaderBuilderTest.cs | 13 +++ PocketCsvReader.Testing/CsvDataReaderTest.cs | 6 +- PocketCsvReader.Testing/CsvDataRecordTests.cs | 92 ++++++++++++++++++- .../Configuration/CsvReaderBuilder.cs | 16 +++- .../CustomFieldDescriptorBuilder.cs | 2 + .../Configuration/FieldDescriptor.cs | 1 + .../Configuration/FieldDescriptorBuilder.cs | 8 +- .../IntegerFieldDescriptionBuilder.cs | 2 + .../NumberFieldDescriptorBuilder.cs | 4 +- .../Configuration/ParseFunction.cs | 9 ++ .../Configuration/RuntimeParsersDescriptor.cs | 31 +++++++ .../RuntimeParsersDescriptorBuilder.cs | 37 ++++++++ .../TemporalFieldDescriptionBuilder.cs | 3 + PocketCsvReader/CsvDataRecord.cs | 70 +++++++------- PocketCsvReader/CsvProfile.cs | 5 +- PocketCsvReader/FieldParsing/TypeIndexer.cs | 29 +++--- docs/_data/navigation_docs.yml | 2 + docs/_docs/fluent-api-resource.md | 65 +++++++++++++ docs/_docs/fluent-api-runtime-parsers.md | 74 +++++++++++++++ docs/_docs/fluent-api-schema.md | 43 ++++++++- docs/_docs/sequences.md | 2 +- 21 files changed, 446 insertions(+), 68 deletions(-) create mode 100644 PocketCsvReader/Configuration/ParseFunction.cs create mode 100644 PocketCsvReader/Configuration/RuntimeParsersDescriptor.cs create mode 100644 PocketCsvReader/Configuration/RuntimeParsersDescriptorBuilder.cs create mode 100644 docs/_docs/fluent-api-resource.md create mode 100644 docs/_docs/fluent-api-runtime-parsers.md diff --git a/PocketCsvReader.Testing/Configuration/CsvReaderBuilderTest.cs b/PocketCsvReader.Testing/Configuration/CsvReaderBuilderTest.cs index 0e87634..1628625 100644 --- a/PocketCsvReader.Testing/Configuration/CsvReaderBuilderTest.cs +++ b/PocketCsvReader.Testing/Configuration/CsvReaderBuilderTest.cs @@ -66,4 +66,17 @@ public void WithSchema_ShouldSetSchema() Assert.That(reader.Profile.Schema, Is.Not.Null); Assert.That(reader.Profile.Schema!.Fields, Is.Not.Null.And.Not.Empty); } + + [Test] + public void WithParsers_ShouldSetParsers() + { + var builder = new CsvReaderBuilder().WithParsers + ( + new RuntimeParsersDescriptorBuilder() + .WithParser((string s) => s.Equals(s, StringComparison.InvariantCultureIgnoreCase)) + ); + var reader = builder.Build(); + Assert.That(reader.Profile.Parsers, Is.Not.Null); + Assert.That(reader.Profile.Parsers!.Count, Is.EqualTo(1)); + } } diff --git a/PocketCsvReader.Testing/CsvDataReaderTest.cs b/PocketCsvReader.Testing/CsvDataReaderTest.cs index 123e7a0..b7f9a28 100644 --- a/PocketCsvReader.Testing/CsvDataReaderTest.cs +++ b/PocketCsvReader.Testing/CsvDataReaderTest.cs @@ -1,5 +1,4 @@ -using PocketCsvReader; -using NUnit.Framework; +using NUnit.Framework; using System; using System.Collections.Generic; using System.Data; @@ -7,10 +6,7 @@ using System.Linq; using System.Text; using System.Reflection; -using System.Buffers; -using Microsoft.VisualStudio.TestPlatform.PlatformAbstractions.Interfaces; using PocketCsvReader.Configuration; -using System.Globalization; namespace PocketCsvReader.Testing; diff --git a/PocketCsvReader.Testing/CsvDataRecordTests.cs b/PocketCsvReader.Testing/CsvDataRecordTests.cs index 21db159..b277ae8 100644 --- a/PocketCsvReader.Testing/CsvDataRecordTests.cs +++ b/PocketCsvReader.Testing/CsvDataRecordTests.cs @@ -64,24 +64,108 @@ YearMonth parse(string input) [TestCase("yyyy.MM", "2025.01")] [TestCase("MM.yyyy", "01.2025")] public void GetValue_RegisteredGlobally_NotParsable_Correct(string format, string input) + { + YearMonth parse(string input) + { + (int year, int month) = new YearMonthParser().Parse(input, format, CultureInfo.InvariantCulture); + return new YearMonth(year, month); + } + + var profile = new CsvProfile( + new DialectDescriptorBuilder().Build() + , new SchemaDescriptorBuilder() + .Indexed() + .WithField() + .Build() + , null + , new RuntimeParsersDescriptorBuilder() + .WithParser(parse) + .Build() + ); + + var record = new CsvDataRecord(new RecordMemory(input, [new FieldSpan(0, input.Length)]), profile); + var value = record.GetValue(0); + Assert.That(value, Is.EqualTo(new YearMonth(2025, 1))); + } + + + [Test] + [TestCase("yyyy-MM", "2025-01")] + [TestCase("yyyy-M", "2025-1")] + [TestCase("yyyy.MM", "2025.01")] + [TestCase("MM.yyyy", "01.2025")] + public void GetValue_AutoDiscoveryParsable_Correct(string format, string input) { var culture = new CultureInfo("en-US"); culture.DateTimeFormat.YearMonthPattern = format; + var profile = new CsvProfile( + new DialectDescriptorBuilder().Build() + , new SchemaDescriptorBuilder().Indexed().WithTemporalField( + tf => tf.WithFormat(format) + ).Build() + ); + + var record = new CsvDataRecord(new RecordMemory(input, [new FieldSpan(0, input.Length)]), profile); + var value = record.GetValue(0); + Assert.That(value, Is.EqualTo(new YearMonth(2025, 1))); + } + + [Test] + [TestCase("J25")] + public void GetValue_RegisteredWithBuilderNotParsable_Correct(string input) + { YearMonth parse(string input) { - (int year, int month) = new YearMonthParser().Parse(input, format, culture); - return new YearMonth(year, month); + if (input[0] == 'J' && input.EndsWith("25")) + return new YearMonth(2025, 1); + throw new ArgumentException(); } var profile = new CsvProfile( new DialectDescriptorBuilder().Build() - , new SchemaDescriptorBuilder().Indexed().WithField().Build() + , new SchemaDescriptorBuilder().Indexed().WithTemporalField( + tf => tf.WithParser((str) => parse(str)) + ).Build() ); var record = new CsvDataRecord(new RecordMemory(input, [new FieldSpan(0, input.Length)]), profile); - record.Register(parse); var value = record.GetValue(0); Assert.That(value, Is.EqualTo(new YearMonth(2025, 1))); } + + + [Test] + [TestCase("J25;F25")] + public void GetValue_RegisteredWithManyBuilderNotParsable_Correct(string input) + { + YearMonth parseAlpha(string input) + { + if (input[0] == 'J' && input.EndsWith("25")) + return new YearMonth(2025, 1); + throw new ArgumentException(); + } + + YearMonth parseBeta(string input) + { + if (input[0] == 'F' && input.EndsWith("25")) + return new YearMonth(2025, 2); + throw new ArgumentException(); + } + + var profile = new CsvProfile( + new DialectDescriptorBuilder().WithDelimiter(';').Build() + , new SchemaDescriptorBuilder().Indexed() + .WithTemporalField( + tf => tf.WithParser((str) => parseAlpha(str)) + ) + .WithTemporalField( + tf => tf.WithParser((str) => parseBeta(str)) + ).Build() + ); + + var record = new CsvDataRecord(new RecordMemory(input, [new FieldSpan(0, 3), new FieldSpan(4,3)]), profile); + Assert.That(record.GetValue(0), Is.EqualTo(new YearMonth(2025, 1))); + Assert.That(record.GetValue(1), Is.EqualTo(new YearMonth(2025, 2))); + } } diff --git a/PocketCsvReader/Configuration/CsvReaderBuilder.cs b/PocketCsvReader/Configuration/CsvReaderBuilder.cs index 2d5d824..406cca4 100644 --- a/PocketCsvReader/Configuration/CsvReaderBuilder.cs +++ b/PocketCsvReader/Configuration/CsvReaderBuilder.cs @@ -10,6 +10,7 @@ public class CsvReaderBuilder private DialectDescriptorBuilder _dialectBuilder = new(); private ISchemaDescriptorBuilder? _schemaBuilder; private ResourceDescriptorBuilder? _resourceBuilder; + private RuntimeParsersDescriptorBuilder? _parserBuilder; public CsvReaderBuilder WithDialect(Func func) { @@ -34,7 +35,6 @@ public CsvReaderBuilder WithSchema(ISchemaDescriptorBuilder schemaBuilder) return this; } - public CsvReaderBuilder WithResource(Func func) { _resourceBuilder = func(new()); @@ -47,6 +47,18 @@ public CsvReaderBuilder WithResource(ResourceDescriptorBuilder resourceBuilder) return this; } + public CsvReaderBuilder WithParsers(Func func) + { + _parserBuilder = func(new()); + return this; + } + + public CsvReaderBuilder WithParsers(RuntimeParsersDescriptorBuilder parserBuilder) + { + _parserBuilder = parserBuilder; + return this; + } + public CsvReader Build() - => new (new CsvProfile(_dialectBuilder.Build(), _schemaBuilder?.Build(), _resourceBuilder?.Build())); + => new (new CsvProfile(_dialectBuilder.Build(), _schemaBuilder?.Build(), _resourceBuilder?.Build(), _parserBuilder?.Build())); } diff --git a/PocketCsvReader/Configuration/CustomFieldDescriptorBuilder.cs b/PocketCsvReader/Configuration/CustomFieldDescriptorBuilder.cs index 3a178b3..3dd777d 100644 --- a/PocketCsvReader/Configuration/CustomFieldDescriptorBuilder.cs +++ b/PocketCsvReader/Configuration/CustomFieldDescriptorBuilder.cs @@ -25,4 +25,6 @@ public CustomFieldDescriptorBuilder WithFormat(string pattern, IFormatProvider? public new CustomFieldDescriptorBuilder WithDataSourceTypeName(string typeName) => (CustomFieldDescriptorBuilder)base.WithDataSourceTypeName(typeName); + public new CustomFieldDescriptorBuilder WithParser(ParseFunction parse) + => (CustomFieldDescriptorBuilder)base.WithParser(parse); } diff --git a/PocketCsvReader/Configuration/FieldDescriptor.cs b/PocketCsvReader/Configuration/FieldDescriptor.cs index bfcea31..11e2494 100644 --- a/PocketCsvReader/Configuration/FieldDescriptor.cs +++ b/PocketCsvReader/Configuration/FieldDescriptor.cs @@ -11,6 +11,7 @@ public record FieldDescriptor Type RuntimeType , string? Name = null , IFormatDescriptor? Format = null + , ParseFunction? Parse = null , ImmutableSequenceCollection? Sequences = null , string DataSourceTypeName = "" ) diff --git a/PocketCsvReader/Configuration/FieldDescriptorBuilder.cs b/PocketCsvReader/Configuration/FieldDescriptorBuilder.cs index c9eb105..417cae9 100644 --- a/PocketCsvReader/Configuration/FieldDescriptorBuilder.cs +++ b/PocketCsvReader/Configuration/FieldDescriptorBuilder.cs @@ -13,6 +13,7 @@ public class FieldDescriptorBuilder private Dictionary DefaultFormatBuilders = new(); protected Type _runtimeType; protected FormatDescriptorBuilder? _format; + protected ParseFunction? _parse; protected string? _name; protected SequenceCollection? _sequences; protected string? _dataSourceTypeName; @@ -57,6 +58,11 @@ public FieldDescriptorBuilder WithDataSourceTypeName(string typeName) return this; } + public FieldDescriptorBuilder WithParser(ParseFunction parse) + { + _parse = parse; + return this; + } private FormatDescriptorBuilder GetDefaultFormat() { @@ -67,6 +73,6 @@ private FormatDescriptorBuilder GetDefaultFormat() public virtual FieldDescriptor Build() { - return new FieldDescriptor(_runtimeType, _name, (_format ?? GetDefaultFormat()).Build(), _sequences?.ToImmutable(), _dataSourceTypeName ?? string.Empty); + return new FieldDescriptor(_runtimeType, _name, (_format ?? GetDefaultFormat()).Build(), _parse, _sequences?.ToImmutable(), _dataSourceTypeName ?? string.Empty); } } diff --git a/PocketCsvReader/Configuration/IntegerFieldDescriptionBuilder.cs b/PocketCsvReader/Configuration/IntegerFieldDescriptionBuilder.cs index 27a218f..fdb55ab 100644 --- a/PocketCsvReader/Configuration/IntegerFieldDescriptionBuilder.cs +++ b/PocketCsvReader/Configuration/IntegerFieldDescriptionBuilder.cs @@ -24,4 +24,6 @@ public IntegerFieldDescriptorBuilder WithFormat(Func (IntegerFieldDescriptorBuilder)base.WithDataSourceTypeName(typeName); + public new IntegerFieldDescriptorBuilder WithParser(ParseFunction parse) + => (IntegerFieldDescriptorBuilder)base.WithParser(parse); } diff --git a/PocketCsvReader/Configuration/NumberFieldDescriptorBuilder.cs b/PocketCsvReader/Configuration/NumberFieldDescriptorBuilder.cs index 6a517b4..3e40315 100644 --- a/PocketCsvReader/Configuration/NumberFieldDescriptorBuilder.cs +++ b/PocketCsvReader/Configuration/NumberFieldDescriptorBuilder.cs @@ -25,7 +25,9 @@ public NumberFieldDescriptorBuilder WithFormat(Func (NumberFieldDescriptorBuilder)base.WithDataSourceTypeName(typeName); + public new NumberFieldDescriptorBuilder WithParser(ParseFunction parse) + => (NumberFieldDescriptorBuilder)base.WithParser(parse); public override FieldDescriptor Build() - => new FieldDescriptor(_runtimeType, _name, _format?.Build(), _sequences?.ToImmutable(), _dataSourceTypeName ?? string.Empty); + => new FieldDescriptor(_runtimeType, _name, _format?.Build(), _parse, _sequences?.ToImmutable(), _dataSourceTypeName ?? string.Empty); } diff --git a/PocketCsvReader/Configuration/ParseFunction.cs b/PocketCsvReader/Configuration/ParseFunction.cs new file mode 100644 index 0000000..d1b306f --- /dev/null +++ b/PocketCsvReader/Configuration/ParseFunction.cs @@ -0,0 +1,9 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace PocketCsvReader.Configuration; +public delegate object ParseFunction(string input); +public delegate T ParseFunction(string input); diff --git a/PocketCsvReader/Configuration/RuntimeParsersDescriptor.cs b/PocketCsvReader/Configuration/RuntimeParsersDescriptor.cs new file mode 100644 index 0000000..7667941 --- /dev/null +++ b/PocketCsvReader/Configuration/RuntimeParsersDescriptor.cs @@ -0,0 +1,31 @@ +using System; +using System.Collections; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace PocketCsvReader.Configuration; +public class RuntimeParsersDescriptor : IEnumerable> +{ + private Dictionary Parsers { get; init; } = []; + + public void AddParser(ParseFunction parse) + => AddParser(typeof(T), (string str) => parse.Invoke(str)!); + + public void AddParser(Type type, ParseFunction parse) + { + var returnType = parse.Method.ReturnType; + if (!type.IsAssignableTo(returnType)) + throw new ArgumentException($"The provided parser returns {returnType}, which is not assignable from {type}."); + + Parsers.Add(type, parse); + } + + public int Count => Parsers.Count; + + public IEnumerator> GetEnumerator() + => Parsers.GetEnumerator(); + IEnumerator IEnumerable.GetEnumerator() + => GetEnumerator(); +} diff --git a/PocketCsvReader/Configuration/RuntimeParsersDescriptorBuilder.cs b/PocketCsvReader/Configuration/RuntimeParsersDescriptorBuilder.cs new file mode 100644 index 0000000..a69f205 --- /dev/null +++ b/PocketCsvReader/Configuration/RuntimeParsersDescriptorBuilder.cs @@ -0,0 +1,37 @@ +using System; +using System.Collections.Generic; +using System.ComponentModel.Design; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using System.Xml.Linq; + +namespace PocketCsvReader.Configuration; + +public class RuntimeParsersDescriptorBuilder +{ + private Dictionary _types = new(); + private RuntimeParsersDescriptor? Descriptor { get; set; } + + public RuntimeParsersDescriptorBuilder WithParser(ParseFunction parse) + => WithParser(typeof(T), (string str) => parse.Invoke(str)!); + + public RuntimeParsersDescriptorBuilder WithParser(Type type, ParseFunction parse) + { + var returnType = parse.Method.ReturnType; + if (!type.IsAssignableTo(returnType)) + throw new ArgumentException($"The provided parser returns {returnType}, which is not assignable from {type}."); + + if (!_types.TryAdd(type, parse)) + _types[type] = parse; + return this; + } + + public RuntimeParsersDescriptor? Build() + { + Descriptor = new RuntimeParsersDescriptor(); + foreach (var type in _types) + Descriptor.AddParser(type.Key, type.Value); + return Descriptor.Count > 0 ? Descriptor : null; + } +} diff --git a/PocketCsvReader/Configuration/TemporalFieldDescriptionBuilder.cs b/PocketCsvReader/Configuration/TemporalFieldDescriptionBuilder.cs index c0164bc..398d78f 100644 --- a/PocketCsvReader/Configuration/TemporalFieldDescriptionBuilder.cs +++ b/PocketCsvReader/Configuration/TemporalFieldDescriptionBuilder.cs @@ -26,4 +26,7 @@ public TemporalFieldDescriptorBuilder WithFormat(string pattern, Func (TemporalFieldDescriptorBuilder)base.WithDataSourceTypeName(typeName); + + public new TemporalFieldDescriptorBuilder WithParser(ParseFunction parse) + => (TemporalFieldDescriptorBuilder)base.WithParser(parse); } diff --git a/PocketCsvReader/CsvDataRecord.cs b/PocketCsvReader/CsvDataRecord.cs index 4143751..dbdcf22 100644 --- a/PocketCsvReader/CsvDataRecord.cs +++ b/PocketCsvReader/CsvDataRecord.cs @@ -1,48 +1,44 @@ using System; using System.Collections.Generic; using System.Data; -using System.Diagnostics.CodeAnalysis; -using System.IO; -using System.Reflection.PortableExecutable; using System.Text; using System.Threading.Tasks; using System.Globalization; using PocketCsvReader.Configuration; using System.Reflection; -using System.Xml.Linq; using PocketCsvReader.FieldParsing; -using System.Linq.Expressions; -using static System.Net.WebRequestMethods; namespace PocketCsvReader; public class CsvDataRecord : CsvRawRecord, IDataRecord { - protected TypeIndexer TypeFunctions = new(); + private TypeIndexer TypeParsers = new(); + protected Dictionary FieldParsers = new(); public CsvDataRecord(CsvProfile profile) : base(profile) { - TypeFunctions.Register(GetByte); - TypeFunctions.Register(GetChar); - TypeFunctions.Register(GetString); - TypeFunctions.Register(GetBoolean); - TypeFunctions.Register(GetInt16); - TypeFunctions.Register(GetInt32); - TypeFunctions.Register(GetInt64); - TypeFunctions.Register(GetFloat); - TypeFunctions.Register(GetDouble); - TypeFunctions.Register(GetDecimal); - TypeFunctions.Register(GetGuid); - TypeFunctions.Register(GetDate); - TypeFunctions.Register(GetTime); - TypeFunctions.Register(GetDateTime); - TypeFunctions.Register(GetDateTimeOffset); - } - - public void Register(Func parse) - { - ArgumentNullException.ThrowIfNull(parse); - TypeFunctions.Register((i) => parse(GetValueOrThrow(i).Value.ToString())); + TypeParsers.Register(GetByte); + TypeParsers.Register(GetChar); + TypeParsers.Register(GetString); + TypeParsers.Register(GetBoolean); + TypeParsers.Register(GetInt16); + TypeParsers.Register(GetInt32); + TypeParsers.Register(GetInt64); + TypeParsers.Register(GetFloat); + TypeParsers.Register(GetDouble); + TypeParsers.Register(GetDecimal); + TypeParsers.Register(GetGuid); + TypeParsers.Register(GetDate); + TypeParsers.Register(GetTime); + TypeParsers.Register(GetDateTime); + TypeParsers.Register(GetDateTimeOffset); + + foreach (var parser in profile.Parsers ?? []) + { + string getValue(int i) => GetValueOrThrow(i).Value.ToString(); + object parse(int i) => parser.Value(getValue(i)); + TypeParsers.Register(parser.Key, parse); + } } internal CsvDataRecord(RecordMemory record, CsvProfile? profile = null) @@ -54,7 +50,6 @@ internal CsvDataRecord(RecordMemory record, CsvProfile? profile = null) Fields = record.FieldSpans.Select(_ => $"field_{i++}").ToArray(); } - public object this[int i] { get => GetValue(i); @@ -158,7 +153,11 @@ public object GetValue(int i) if (!TryGetFieldDescriptor(i, out var field)) return GetString(i); - Func? parse = TypeFunctions.TryGetFunction(field.RuntimeType, out var dlg) + Func? parse = field.Parse is not null + ? FieldParsers.TryGetValue(i, out var fparse) + ? (int i) => fparse + : RegisterParser(i, field.Parse) + : TypeParsers.TryGetParser(field.RuntimeType, out var dlg) ? (int i) => dlg.DynamicInvoke(i)! : RegisterFunction(field); try @@ -172,6 +171,13 @@ public object GetValue(int i) } } + + private Func? RegisterParser(int i, ParseFunction parse) + { + FieldParsers.Add(i, parse); + return (int i) => parse.Invoke(GetValueOrThrow(i).Value.ToString()); + } + private Func? RegisterFunction(FieldDescriptor field) { var type = typeof(TypeParserLocator<>).MakeGenericType(field.RuntimeType); @@ -202,7 +208,7 @@ IEnumerable GetParameters(object? format) string getValue(int i) => GetValueOrThrow(i).Value.ToString(); var parse = (int i) => func.Invoke(getValue(i))!; - TypeFunctions.Register(field!.RuntimeType, parse); + TypeParsers.Register(field!.RuntimeType, parse); return parse; } @@ -215,7 +221,7 @@ public T GetFieldValue(int i) if (IsDBNull(i)) return default!; - if (TypeFunctions.TryGetFunction(out var func)) + if (TypeParsers.TryGetParser(out var func)) return func.Invoke(i); throw new NotImplementedException($"No function registered for type {typeof(T).Name}"); diff --git a/PocketCsvReader/CsvProfile.cs b/PocketCsvReader/CsvProfile.cs index eb138c8..b3035a1 100644 --- a/PocketCsvReader/CsvProfile.cs +++ b/PocketCsvReader/CsvProfile.cs @@ -10,6 +10,8 @@ public class CsvProfile public DialectDescriptor Dialect { get; private set; } public SchemaDescriptor? Schema { get; private set; } public ResourceDescriptor? Resource { get; private set; } + public RuntimeParsersDescriptor? Parsers { get; private set; } + public ParserOptimizationOptions ParserOptimizations { get; set; } public virtual string EmptyCell { get; private set; } @@ -67,7 +69,7 @@ public CsvProfile(char fieldSeparator, char textQualifier, char escapeTextQualif MissingCell = missingCell; } - public CsvProfile(DialectDescriptor dialect, SchemaDescriptor? schema = null, ResourceDescriptor? resource = null) + public CsvProfile(DialectDescriptor dialect, SchemaDescriptor? schema = null, ResourceDescriptor? resource = null, RuntimeParsersDescriptor? parsers = null) { if (dialect.NullSequence is not null) resource = (resource ??= new ResourceDescriptor()) with @@ -85,6 +87,7 @@ public CsvProfile(DialectDescriptor dialect, SchemaDescriptor? schema = null, Re Schema = schema; Resource = resource; + Parsers = parsers; } private static CsvProfile? _commaDoubleQuote; diff --git a/PocketCsvReader/FieldParsing/TypeIndexer.cs b/PocketCsvReader/FieldParsing/TypeIndexer.cs index 10ac6a2..030a178 100644 --- a/PocketCsvReader/FieldParsing/TypeIndexer.cs +++ b/PocketCsvReader/FieldParsing/TypeIndexer.cs @@ -1,4 +1,4 @@ -using System; +using System; using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; using System.Linq; @@ -6,15 +6,12 @@ using System.Threading.Tasks; namespace PocketCsvReader.FieldParsing; -public class TypeIndexer +internal class TypeIndexer { - private readonly Dictionary _typeToFunctionMap = new(); + private readonly Dictionary> _typeToFunctionMap = new(); public void Register(Func func) - { - ArgumentNullException.ThrowIfNull(func); - _typeToFunctionMap[typeof(T)] = func; - } + => Register(typeof(T), (int i) => func.Invoke(i)!); public void Register(Type type, Func func) { @@ -22,27 +19,24 @@ public void Register(Type type, Func func) _typeToFunctionMap[type] = func; } - public bool TryGetFunction([NotNullWhen(true)] out Func? func) + public bool TryGetParser([NotNullWhen(true)] out Func? func) { if (_typeToFunctionMap.TryGetValue(typeof(T), out var value)) { - func = (Func)value; + func = (int i) => (T)value(i); return true; } func = null; return false; } - public bool TryGetFunction(Type type, [NotNullWhen(true)] out Delegate? dlg) + public bool TryGetParser(Type type, [NotNullWhen(true)] out Func? func) { ArgumentNullException.ThrowIfNull(type); - if (_typeToFunctionMap.TryGetValue(type, out var func)) - { - dlg = (Delegate)func; + if (_typeToFunctionMap.TryGetValue(type, out func)) return true; - } - dlg = null; + func = null; return false; } @@ -52,7 +46,7 @@ public Delegate GetFunction(Type type) ArgumentNullException.ThrowIfNull(type); if (_typeToFunctionMap.TryGetValue(type, out var func)) - return (Delegate)func; + return func; throw new InvalidOperationException($"No function registered for type {type.Name}"); } @@ -60,7 +54,8 @@ public Delegate GetFunction(Type type) public Func GetFunction() { if (_typeToFunctionMap.TryGetValue(typeof(T), out var func)) - return (Func)func; + return (func as Func) + ?? throw new InvalidOperationException($"No function returning a type registered {typeof(T).Name} for type {typeof(T).Name}"); throw new InvalidOperationException($"No function registered for type {typeof(T).Name}"); } diff --git a/docs/_data/navigation_docs.yml b/docs/_data/navigation_docs.yml index 2efb3ee..4628b1c 100644 --- a/docs/_data/navigation_docs.yml +++ b/docs/_data/navigation_docs.yml @@ -14,6 +14,8 @@ - installation - basic-usage - fluent-api-profile-configuration + - fluent-api-runtime-parsers + - fluent-api-resource - fluent-api-schema - mapper-object-builder diff --git a/docs/_docs/fluent-api-resource.md b/docs/_docs/fluent-api-resource.md new file mode 100644 index 0000000..5c15d78 --- /dev/null +++ b/docs/_docs/fluent-api-resource.md @@ -0,0 +1,65 @@ +--- +title: Fluent API for Resource Configuration +tags: [configuration] +--- + +## Overview + +The Fluent API for resource configuration in PocketCsvReader provides an intuitive and flexible way to define resource-level settings, such as file encoding and value substitution sequences. + +## Accessing the Resource Descriptor + +You can configure resource settings using the `CsvReaderBuilder`, class and the `WithResource` method, which allows you to instantiate a `ResourceDescriptorBuilder`: + +```csharp +var builder = new CsvReaderBuilder().WithResource +( + new ResourceDescriptorBuilder() + .WithEncoding("utf-8") +); +var reader = builder.Build(); +``` + +This method allows you to define **resource-level behaviors**, including encoding settings and sequences. + +## Configuring File Encoding + +PocketCsvReader can automatically detect file encoding based on the Byte Order Mark (BOM). The BOM is a sequence of bytes at the beginning of a file that indicates the encoding format. + +However, some CSV files do not include a BOM, and certain encodings lack a defined BOM. In such cases, you can manually specify the encoding using the `WithEncoding` method. + +```csharp +var builder = new CsvReaderBuilder().WithResource +( + new ResourceDescriptorBuilder() + .WithEncoding("ISO-8859-2") +); +var reader = builder.Build(); +``` + +### Supported Encodings + +The `WithEncoding` method expects a MIME-type encoding name. Case sensitivity is not enforced when validating MIME types. + +Examples: + +- `WithEncoding("ISO-8859-2")` +- `WithEncoding("utf-8")` + +### Registering Sequences + +A sequence substitution allows you to replace specific values in the CSV data before parsing. The concept of sequences is explained in detail [here](/docs/sequences). + +You can define sequences using the WithSequence method of the ResourceDescriptorBuilder class. This method allows you to specify: + +1. The pattern to match in the CSV data. +2. The replacement value to substitute in place of the matched pattern. + +```csharp +var builder = new CsvReaderBuilder() + .WithResource( + (r) => r.WithSequence("0", "-1") + ); +``` + + In this example, all occurrences of "0" in the CSV data will be replaced with "-1" before parsing. diff --git a/docs/_docs/fluent-api-runtime-parsers.md b/docs/_docs/fluent-api-runtime-parsers.md new file mode 100644 index 0000000..873f36a --- /dev/null +++ b/docs/_docs/fluent-api-runtime-parsers.md @@ -0,0 +1,74 @@ +--- +title: Fluent API for Runtime Type Parsers +tags: [configuration] +--- + +## Overview + +The Fluent API for runtime type parsers in PocketCsvReader provides an intuitive and flexible way to define parsers for various runtime types. This is particularly useful when working with custom or non-standard types serialized in a CSV file that need to be correctly deserialized. + +By defining a set of parsers, you ensure that these types are always deserialized properly across different CSV files. However, if the deserialization logic varies depending on the field, you should register the parser at the field level instead. See: [Providing a Custom Parser](/docs/fluent-api-schema#providing-a-custom-parser). + +## Defining a set of parsers + +You can configure runtime type parsers in two ways: + +1. Using a CSV Profile + +The `CsvProfile` class allows you to define a set of parsers as part of the CSV processing configuration: + +```csharp +var profile = new CsvProfile( + new DialectDescriptorBuilder().Build() + , new SchemaDescriptorBuilder() + .Indexed() + .WithField() + .Build() + , null + , new RuntimeParsersDescriptorBuilder() + .WithParser(...) + .Build() +); +``` + +1. Using a CSV Reader Builder + +You can also configure parsers using the `CsvReaderBuilder` class: + +```csharp +var builder = new CsvReaderBuilder().WithParsers +( + new RuntimeParsersDescriptorBuilder() + .WithParser(...) +); +var reader = builder.Build(); +``` + +### Registering a Custom Parser + +To define custom parsers, instantiate a `RuntimeParsersDescriptorBuilder` and use its method `WithParser` method. + +**Example:** Custom Parser for `Point` + +```csharp +Point parse(string input) +{ + var parts = input.Split(';'); + return new Point(parts[0], parts[1]); +} + +var schema = new RuntimeParsersDescriptorBuilder() + .WithParser(parse) + .Build(); +``` + +In this example: + +- The custom parser splits the input string using ; as a delimiter. +- It parses the extracted values into integers. +- It creates a Point instance from the parsed values. + +### Handling Multiple Parsers + +- You can define multiple parsers for different types. +- If two parsers are registered for the same type, the most recently registered parser overrides the previous one. diff --git a/docs/_docs/fluent-api-schema.md b/docs/_docs/fluent-api-schema.md index aea7109..03cb04b 100644 --- a/docs/_docs/fluent-api-schema.md +++ b/docs/_docs/fluent-api-schema.md @@ -5,7 +5,7 @@ tags: [configuration] ## Overview -The Fluent API for schema definition in PocketCsvReader provides an intuitive and expressive way to define the structure of CSV data. This is particularly useful when working with `IDataReader`, where the `GetValue` method returns a boxed `object`. This powerful feature enables dynamic retrieval of any column's value without prior type knowledge, making it highly flexible for handling various data types. It seamlessly integrates with schema definitions to ensure proper casting and minimize conversion overhead.. +The Fluent API for schema definition in PocketCsvReader provides an intuitive and expressive way to define the structure of CSV data. This is particularly useful when working with `IDataReader`, where the `GetValue` method returns a boxed `object`. This powerful feature enables dynamic retrieval of any column's value without prior type knowledge, making it highly flexible for handling various data types. It seamlessly integrates with schema definitions to ensure proper casting and minimize conversion overhead. Defining a schema ensures that values are correctly interpreted and cast to their expected types, avoiding unnecessary type conversions at runtime. @@ -88,7 +88,11 @@ var schema = new SchemaDescriptorBuilder() This defines an "Amount" field as a double, using `,` as the decimal separator and disabling digit grouping. -### Custom Field Formatting +### Custom Field + +#### Defining a Custom Field with Formatting + +You can define a custom field in your schema and specify how format should be interpreted when deserialized. ```csharp var schema = new SchemaDescriptorBuilder() @@ -97,9 +101,40 @@ var schema = new SchemaDescriptorBuilder() .Build(); ``` -This ensures that the "Location" field is interpreted as a `Point` and formatted accordingly. +In this example, the "Location" field is registered as a Point and formatted using the `x;y` pattern. + +By default, when assigning a custom field, the system looks for a Parse method on the specified type that: + +- Accepts a string (the input to parse). +- Has an optional string parameter for a custom format. +- Takes an IFormatProvider as the last argument. + +If such a method exists, it is used automatically. + +#### Providing a Custom Parser + +If the default method resolution does not find a suitable Parse method, or if the required method is inaccessible, you can manually register a parser using `WithParser`: + +```csharp +Point parse(string input) +{ + var parts = input.Split(';'); + return new Point(parts[0], parts[1]); +} + +var schema = new SchemaDescriptorBuilder() + .Named() + .WithCustomField("Location", x => x.WithParser(parse)) + .Build(); +``` + +Here, the custom parser method: + +- Splits the input string using ; as a delimiter. +- Parses the resulting parts into Point coordinates. +- Returns a new Point instance. -When assigning a custom field, the parser is automatically searched for a method named `Parse` that accepts a string (the span to read) and an `IFormatProvider` as the last argument. Optionally, a second argument of type string can be provided to accept a format. +This approach ensures that custom parsing logic is used when the default method resolution does not apply. ## Benefits of Using a Schema diff --git a/docs/_docs/sequences.md b/docs/_docs/sequences.md index e7f3f73..f125846 100644 --- a/docs/_docs/sequences.md +++ b/docs/_docs/sequences.md @@ -86,4 +86,4 @@ profile.Sequences.Add("AIS", "Available in Store"); profile.Sequences.Add("-", 0); ``` -This configuration will apply these sequences during parsing to replace or interpret the specified strings. \ No newline at end of file +This configuration will apply these sequences during parsing to replace or interpret the specified strings.