Skip to content

Commit

Permalink
Sanitize (header) value before logging (#58)
Browse files Browse the repository at this point in the history
* Merge main branch

* Improve sanitization

* Introduce benchmark of log sanitization (default configuration)

* Improve sanitization before logging value

* Use `string.Create()` to build sanitized correlation ID value

* Reorganize benchmarks; Benchmark logger value sanitizer

* Use `SearchValues<>` in `net8.0`'s `CorrelationIdValueSanitizer`

* Use `SearchValues` fully in .NET 8.0

* Make `net6.0` log value sanitizer on par with `net8.0` code (add tests; adjust benchmarks)

* Make safe chars shorter; Adjust code & benchmarks

* Update dependencies

* Fix build warnings

* Make `Exception` type parameter nullable for logging
  • Loading branch information
wdolek authored Nov 27, 2023
1 parent 7e86856 commit 60a2b6b
Show file tree
Hide file tree
Showing 33 changed files with 664 additions and 158 deletions.
2 changes: 1 addition & 1 deletion .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ dotnet_naming_style.s_camelcase.capitalization = camel_case

# .NET diagnostics

dotnet_diagnostic.IDE0057.severity = none # IDE0057: Use range operator
dotnet_diagnostic.SA1101.severity = none # SA1101: Prefix local calls with this
dotnet_diagnostic.SA1129.severity = none # SA1129: Do not use default value type constructor
dotnet_diagnostic.SA1309.severity = none # SA1309: Field '_...' should not begin with an underscore
Expand All @@ -393,4 +394,3 @@ dotnet_diagnostic.SA1600.severity = none # SA1600: Elements should be documented
dotnet_diagnostic.SA1601.severity = none # SA1601: Partial elements should be documented
dotnet_diagnostic.SA1633.severity = none # SA1633: The file header is missing or not located at the top of the file
dotnet_diagnostic.VSTHRD200.severity = none # VSTHRD200: Use "Async" suffix in names of methods that return an awaitable type

7 changes: 4 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
- name: Checkout
uses: actions/checkout@v4

- name: Setup .NET 8.0
- name: Setup .NET
uses: actions/setup-dotnet@v3
with:
dotnet-version: 8
Expand All @@ -33,6 +33,7 @@ jobs:
run: dotnet build --nologo --no-restore --configuration Release

- name: Test
env:
DOTNET_ROLL_FORWARD: Major
run: dotnet test --nologo --no-build --configuration Release --verbosity normal -f net8.0

- name: Test (net6.0)
run: dotnet test --nologo --no-build --configuration Release --verbosity normal -f net6.0 ./test/W4k.AspNetCore.Correlator.UnitTests/
Original file line number Diff line number Diff line change
@@ -0,0 +1,342 @@
using System;
using System.Buffers;
using System.Collections.Generic;
using BenchmarkDotNet.Attributes;
using Bogus;

namespace W4k.AspNetCore.Correlator.Benchmarks.AlgorithmBenchmarks;

[MemoryDiagnoser]
public class LogValueSanitizerBenchmarks
{
private const string SafeChars = "!#$&+-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
private const string UnsafeChars = "<>/\r\n\t\b@'\"{}[]?\u00a2\u00a3\u20ac\u00a5¶Æ \u00ae\u00a9‹›«»バトル・ロワイアル";

private readonly string[] _correlationIds;

[ParamsAllValues]
public CorrelationIdLength Length { get; set; }

[ParamsAllValues]
public CorrelationIdContent Content { get; set; }

public LogValueSanitizerBenchmarks()
{
_correlationIds = new string[1024];

Randomizer.Seed = new Random(74656);
var faker = new Faker();

for (int i = 0; i < _correlationIds.Length; i++)
{
_correlationIds[i] = GenerateCorrelationId(faker, Length, Content);
}
}

[Benchmark(Description = "Sanitize: Iterate one by one")]
public string SanitizeIterating()
{
string last = null;
foreach (var correlationId in _correlationIds)
{
last = CorrelationIdValueSanitizer_Iterate.Sanitize(correlationId);
}

return last;
}

[Benchmark(Description = "Sanitize: Iterate one by one, use hash set")]
public string SanitizeIteratingWithHashSet()
{
string last = null;
foreach (var correlationId in _correlationIds)
{
last = CorrelationIdValueSanitizer_Iterate.Sanitize(correlationId);
}

return last;
}

[Benchmark(Description = "Sanitize: Find next, sanitize one by one")]
public string SanitizeSearchValuesAndIterating()
{
string last = null;
foreach (var correlationId in _correlationIds)
{
last = CorrelationIdValueSanitizer_SearchValues_IterateRestOfStr.Sanitize(correlationId);
}

return last;
}

[Benchmark(Description = "Sanitize: Find next, sanitize, repeat")]
public string SanitizeSearchValuesAndJumpingToNextUnsafe()
{
string last = null;
foreach (var correlationId in _correlationIds)
{
last = CorrelationIdValueSanitizer_SearchValues_JumpToNextUnsafe.Sanitize(correlationId);
}

return last;
}

private static string GenerateCorrelationId(
Faker faker,
CorrelationIdLength correlationIdLength,
CorrelationIdContent correlationIdContent)
{
(int minLength, int maxLength) = correlationIdLength == CorrelationIdLength.Short
? (8, 32)
: (100, 256);

return correlationIdContent switch
{
CorrelationIdContent.AllSafe =>
faker.Random.String2(minLength, maxLength, SafeChars),

CorrelationIdContent.AllUnsafe =>
faker.Random.String2(minLength, maxLength, UnsafeChars),

CorrelationIdContent.Combined =>
faker.Random.String2(minLength, maxLength, SafeChars + UnsafeChars),

CorrelationIdContent.FirstPartUnsafe =>
faker.Random.String2(minLength / 2, maxLength / 2, UnsafeChars)
+ faker.Random.String2(minLength / 2, maxLength / 2, SafeChars),

CorrelationIdContent.SecondPartUnsafe =>
faker.Random.String2(minLength / 2, maxLength / 2, SafeChars)
+ faker.Random.String2(minLength / 2, maxLength / 2, UnsafeChars),

_ => throw new ArgumentException()
};
}

public enum CorrelationIdLength
{
Short,
Long,
}

public enum CorrelationIdContent
{
AllSafe,
AllUnsafe,
Combined,
FirstPartUnsafe,
SecondPartUnsafe,
}
}

file static class CorrelationIdValueSanitizer_Iterate
{
private const int MaxValueLength = 64;
private const char SanitizedChar = '_';

public static string Sanitize(string value)
{
var valueLength = Math.Min(value.Length, MaxValueLength);
for (int i = 0; i < valueLength; i++)
{
if (IsUnsafeChar(value[i]))
{
return SanitizeToNewString(value, valueLength, i);
}
}

return value.Length > MaxValueLength
? value.Substring(0, valueLength)
: value;
}

private static string SanitizeToNewString(string source, int length, int firstUnsafeCharPosition) =>
string.Create(length, (firstUnsafeCharPosition, source), CreateValue);

private static void CreateValue(Span<char> buffer, (int FirstUnsafeCharPos, string SourceValue) state)
{
(int firstUnsafeCharPosition, string source) = state;

source.AsSpan(0, firstUnsafeCharPosition).CopyTo(buffer);
for (int i = firstUnsafeCharPosition; i < buffer.Length; i++)
{
var c = source[i];
buffer[i] = IsUnsafeChar(c)
? SanitizedChar
: c;
}
}

private static bool IsUnsafeChar(char c)
{
if (c <= ' ' || c >= '~')
{
return true;
}

if (char.IsLetterOrDigit(c))
{
return false;
}

return c == '"'
|| c == '%'
|| c is >= '\'' and <= '*'
|| c == ','
|| c == '?'
|| c == '@'
|| c == '<'
|| c == '>'
|| c == '{'
|| c == '}';
}
}

file static class CorrelationIdValueSanitizer_HashSet
{
private const int MaxValueLength = 64;
private const char SanitizedChar = '_';

private static readonly HashSet<char> ValidCorrelationIdChars = new HashSet<char>(
"!#$&+-./0123456789:=ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~");

public static string Sanitize(string value)
{
var valueLength = Math.Min(value.Length, MaxValueLength);
for (int i = 0; i < valueLength; i++)
{
if (IsUnsafeChar(value[i]))
{
return SanitizeToNewString(value, valueLength, i);
}
}

return value.Length > MaxValueLength
? value.Substring(0, valueLength)
: value;
}

private static string SanitizeToNewString(string source, int length, int firstUnsafeCharPosition) =>
string.Create(length, (firstUnsafeCharPosition, source), CreateValue);

private static void CreateValue(Span<char> buffer, (int FirstUnsafeCharPos, string SourceValue) state)
{
(int firstUnsafeCharPosition, string source) = state;

source.AsSpan(0, firstUnsafeCharPosition).CopyTo(buffer);
for (int i = firstUnsafeCharPosition; i < buffer.Length; i++)
{
var c = source[i];
buffer[i] = IsUnsafeChar(c)
? SanitizedChar
: c;
}
}

private static bool IsUnsafeChar(char c) => !ValidCorrelationIdChars.Contains(c);
}

file static class CorrelationIdValueSanitizer_SearchValues_IterateRestOfStr
{
private const int MaxValueLength = 64;
private const char SanitizedChar = '_';

private static readonly SearchValues<char> ValidCorrelationIdChars =
SearchValues.Create("!#$&+-./0123456789:=ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~");

public static string Sanitize(string value)
{
var valueLength = Math.Min(value.Length, MaxValueLength);
var valueSpan = value.AsSpan(0, valueLength);

var firstUnsafeCharPosition = valueSpan.IndexOfAnyExcept(ValidCorrelationIdChars);
if (firstUnsafeCharPosition >= 0)
{
return SanitizeToNewString(value, valueLength, firstUnsafeCharPosition);
}

return value.Length > MaxValueLength
? valueSpan.ToString()
: value;
}

private static string SanitizeToNewString(string source, int length, int firstUnsafeCharPosition) =>
string.Create(length, (firstUnsafeCharPosition, source), CreateValue);

private static void CreateValue(Span<char> buffer, (int FirstUnsafeCharPos, string SourceValue) state)
{
(int firstUnsafeCharPosition, string source) = state;

source.AsSpan(0, firstUnsafeCharPosition).CopyTo(buffer);
for (int i = firstUnsafeCharPosition; i < buffer.Length; i++)
{
var c = source[i];
buffer[i] = IsUnsafeChar(c)
? SanitizedChar
: c;
}
}

private static bool IsUnsafeChar(char c) =>
!ValidCorrelationIdChars.Contains(c);
}

file static class CorrelationIdValueSanitizer_SearchValues_JumpToNextUnsafe
{
private static readonly SearchValues<char> ValidCorrelationIdChars =
SearchValues.Create("!#$&+-./0123456789:=ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~");

private const int MaxValueLength = 64;
private const char SanitizedChar = '_';

public static string Sanitize(string value)
{
var valueLength = Math.Min(value.Length, MaxValueLength);
var valueSpan = value.AsSpan(0, valueLength);

var firstInvalidCharPos = valueSpan.IndexOfAnyExcept(ValidCorrelationIdChars);
if (firstInvalidCharPos >= 0)
{
return SanitizeToNewString(value, valueLength, firstInvalidCharPos);
}

return value.Length > MaxValueLength
? valueSpan.ToString()
: value;
}

private static string SanitizeToNewString(string source, int length, int firstUnsafeCharPosition) =>
string.Create(length, (firstUnsafeCharPosition, source), CreateValue);

private static void CreateValue(Span<char> buffer, (int FirstUnsafeCharPos, string SourceValue) state)
{
(int sourceIndex, string source) = state;

// copy all safe chars before first unsafe char
source.AsSpan(0, sourceIndex).CopyTo(buffer);

buffer[sourceIndex] = SanitizedChar;
++sourceIndex;

// jump to next unsafe char, copy all safe chars between
while (sourceIndex < buffer.Length)
{
var remainingSpan = source.AsSpan(sourceIndex);
var nextUnsafeCharPos = remainingSpan.IndexOfAnyExcept(ValidCorrelationIdChars);

// no more unsafe characters, copy remaining chars and break
if (nextUnsafeCharPos == -1)
{
remainingSpan.CopyTo(buffer.Slice(sourceIndex));
break;
}

remainingSpan
.Slice(0, nextUnsafeCharPos)
.CopyTo(buffer.Slice(sourceIndex));

buffer[sourceIndex + nextUnsafeCharPos] = SanitizedChar;
sourceIndex += nextUnsafeCharPos + 1;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
using Microsoft.AspNetCore.Hosting;
using Microsoft.AspNetCore.TestHost;
using Microsoft.Extensions.DependencyInjection;
using W4k.AspNetCore.Correlator.Benchmarks.Helpers;
using W4k.AspNetCore.Correlator.Benchmarks.Middleware;
using W4k.AspNetCore.Correlator.Options;

namespace W4k.AspNetCore.Correlator.Benchmarks.ComparingBenchmarks;
Expand Down Expand Up @@ -99,7 +97,7 @@ public void ConfigureServices(IServiceCollection services)
options.ReadFrom.Clear();
options.ReadFrom.Add("X-Correlation-Id");
options.Factory = (_) =>
options.Factory = _ =>
CorrelationId.FromString(
Guid.NewGuid().ToString("D", CultureInfo.InvariantCulture));
Expand Down
Loading

0 comments on commit 60a2b6b

Please sign in to comment.