-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Sanitize (header) value before logging (#58)
* Merge main branch * Improve sanitization * Introduce benchmark of log sanitization (default configuration) * Improve sanitization before logging value * Use `string.Create()` to build sanitized correlation ID value * Reorganize benchmarks; Benchmark logger value sanitizer * Use `SearchValues<>` in `net8.0`'s `CorrelationIdValueSanitizer` * Use `SearchValues` fully in .NET 8.0 * Make `net6.0` log value sanitizer on par with `net8.0` code (add tests; adjust benchmarks) * Make safe chars shorter; Adjust code & benchmarks * Update dependencies * Fix build warnings * Make `Exception` type parameter nullable for logging
- Loading branch information
Showing
33 changed files
with
664 additions
and
158 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
342 changes: 342 additions & 0 deletions
342
...s/W4k.AspNetCore.Correlator.Benchmarks/AlgorithmBenchmarks/LogValueSanitizerBenchmarks.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,342 @@ | ||
using System; | ||
using System.Buffers; | ||
using System.Collections.Generic; | ||
using BenchmarkDotNet.Attributes; | ||
using Bogus; | ||
|
||
namespace W4k.AspNetCore.Correlator.Benchmarks.AlgorithmBenchmarks; | ||
|
||
[MemoryDiagnoser] | ||
public class LogValueSanitizerBenchmarks | ||
{ | ||
private const string SafeChars = "!#$&+-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; | ||
private const string UnsafeChars = "<>/\r\n\t\b@'\"{}[]?\u00a2\u00a3\u20ac\u00a5¶Æ \u00ae\u00a9‹›«»バトル・ロワイアル"; | ||
|
||
private readonly string[] _correlationIds; | ||
|
||
[ParamsAllValues] | ||
public CorrelationIdLength Length { get; set; } | ||
|
||
[ParamsAllValues] | ||
public CorrelationIdContent Content { get; set; } | ||
|
||
public LogValueSanitizerBenchmarks() | ||
{ | ||
_correlationIds = new string[1024]; | ||
|
||
Randomizer.Seed = new Random(74656); | ||
var faker = new Faker(); | ||
|
||
for (int i = 0; i < _correlationIds.Length; i++) | ||
{ | ||
_correlationIds[i] = GenerateCorrelationId(faker, Length, Content); | ||
} | ||
} | ||
|
||
[Benchmark(Description = "Sanitize: Iterate one by one")] | ||
public string SanitizeIterating() | ||
{ | ||
string last = null; | ||
foreach (var correlationId in _correlationIds) | ||
{ | ||
last = CorrelationIdValueSanitizer_Iterate.Sanitize(correlationId); | ||
} | ||
|
||
return last; | ||
} | ||
|
||
[Benchmark(Description = "Sanitize: Iterate one by one, use hash set")] | ||
public string SanitizeIteratingWithHashSet() | ||
{ | ||
string last = null; | ||
foreach (var correlationId in _correlationIds) | ||
{ | ||
last = CorrelationIdValueSanitizer_Iterate.Sanitize(correlationId); | ||
} | ||
|
||
return last; | ||
} | ||
|
||
[Benchmark(Description = "Sanitize: Find next, sanitize one by one")] | ||
public string SanitizeSearchValuesAndIterating() | ||
{ | ||
string last = null; | ||
foreach (var correlationId in _correlationIds) | ||
{ | ||
last = CorrelationIdValueSanitizer_SearchValues_IterateRestOfStr.Sanitize(correlationId); | ||
} | ||
|
||
return last; | ||
} | ||
|
||
[Benchmark(Description = "Sanitize: Find next, sanitize, repeat")] | ||
public string SanitizeSearchValuesAndJumpingToNextUnsafe() | ||
{ | ||
string last = null; | ||
foreach (var correlationId in _correlationIds) | ||
{ | ||
last = CorrelationIdValueSanitizer_SearchValues_JumpToNextUnsafe.Sanitize(correlationId); | ||
} | ||
|
||
return last; | ||
} | ||
|
||
private static string GenerateCorrelationId( | ||
Faker faker, | ||
CorrelationIdLength correlationIdLength, | ||
CorrelationIdContent correlationIdContent) | ||
{ | ||
(int minLength, int maxLength) = correlationIdLength == CorrelationIdLength.Short | ||
? (8, 32) | ||
: (100, 256); | ||
|
||
return correlationIdContent switch | ||
{ | ||
CorrelationIdContent.AllSafe => | ||
faker.Random.String2(minLength, maxLength, SafeChars), | ||
|
||
CorrelationIdContent.AllUnsafe => | ||
faker.Random.String2(minLength, maxLength, UnsafeChars), | ||
|
||
CorrelationIdContent.Combined => | ||
faker.Random.String2(minLength, maxLength, SafeChars + UnsafeChars), | ||
|
||
CorrelationIdContent.FirstPartUnsafe => | ||
faker.Random.String2(minLength / 2, maxLength / 2, UnsafeChars) | ||
+ faker.Random.String2(minLength / 2, maxLength / 2, SafeChars), | ||
|
||
CorrelationIdContent.SecondPartUnsafe => | ||
faker.Random.String2(minLength / 2, maxLength / 2, SafeChars) | ||
+ faker.Random.String2(minLength / 2, maxLength / 2, UnsafeChars), | ||
|
||
_ => throw new ArgumentException() | ||
}; | ||
} | ||
|
||
public enum CorrelationIdLength | ||
{ | ||
Short, | ||
Long, | ||
} | ||
|
||
public enum CorrelationIdContent | ||
{ | ||
AllSafe, | ||
AllUnsafe, | ||
Combined, | ||
FirstPartUnsafe, | ||
SecondPartUnsafe, | ||
} | ||
} | ||
|
||
file static class CorrelationIdValueSanitizer_Iterate | ||
{ | ||
private const int MaxValueLength = 64; | ||
private const char SanitizedChar = '_'; | ||
|
||
public static string Sanitize(string value) | ||
{ | ||
var valueLength = Math.Min(value.Length, MaxValueLength); | ||
for (int i = 0; i < valueLength; i++) | ||
{ | ||
if (IsUnsafeChar(value[i])) | ||
{ | ||
return SanitizeToNewString(value, valueLength, i); | ||
} | ||
} | ||
|
||
return value.Length > MaxValueLength | ||
? value.Substring(0, valueLength) | ||
: value; | ||
} | ||
|
||
private static string SanitizeToNewString(string source, int length, int firstUnsafeCharPosition) => | ||
string.Create(length, (firstUnsafeCharPosition, source), CreateValue); | ||
|
||
private static void CreateValue(Span<char> buffer, (int FirstUnsafeCharPos, string SourceValue) state) | ||
{ | ||
(int firstUnsafeCharPosition, string source) = state; | ||
|
||
source.AsSpan(0, firstUnsafeCharPosition).CopyTo(buffer); | ||
for (int i = firstUnsafeCharPosition; i < buffer.Length; i++) | ||
{ | ||
var c = source[i]; | ||
buffer[i] = IsUnsafeChar(c) | ||
? SanitizedChar | ||
: c; | ||
} | ||
} | ||
|
||
private static bool IsUnsafeChar(char c) | ||
{ | ||
if (c <= ' ' || c >= '~') | ||
{ | ||
return true; | ||
} | ||
|
||
if (char.IsLetterOrDigit(c)) | ||
{ | ||
return false; | ||
} | ||
|
||
return c == '"' | ||
|| c == '%' | ||
|| c is >= '\'' and <= '*' | ||
|| c == ',' | ||
|| c == '?' | ||
|| c == '@' | ||
|| c == '<' | ||
|| c == '>' | ||
|| c == '{' | ||
|| c == '}'; | ||
} | ||
} | ||
|
||
file static class CorrelationIdValueSanitizer_HashSet | ||
{ | ||
private const int MaxValueLength = 64; | ||
private const char SanitizedChar = '_'; | ||
|
||
private static readonly HashSet<char> ValidCorrelationIdChars = new HashSet<char>( | ||
"!#$&+-./0123456789:=ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~"); | ||
|
||
public static string Sanitize(string value) | ||
{ | ||
var valueLength = Math.Min(value.Length, MaxValueLength); | ||
for (int i = 0; i < valueLength; i++) | ||
{ | ||
if (IsUnsafeChar(value[i])) | ||
{ | ||
return SanitizeToNewString(value, valueLength, i); | ||
} | ||
} | ||
|
||
return value.Length > MaxValueLength | ||
? value.Substring(0, valueLength) | ||
: value; | ||
} | ||
|
||
private static string SanitizeToNewString(string source, int length, int firstUnsafeCharPosition) => | ||
string.Create(length, (firstUnsafeCharPosition, source), CreateValue); | ||
|
||
private static void CreateValue(Span<char> buffer, (int FirstUnsafeCharPos, string SourceValue) state) | ||
{ | ||
(int firstUnsafeCharPosition, string source) = state; | ||
|
||
source.AsSpan(0, firstUnsafeCharPosition).CopyTo(buffer); | ||
for (int i = firstUnsafeCharPosition; i < buffer.Length; i++) | ||
{ | ||
var c = source[i]; | ||
buffer[i] = IsUnsafeChar(c) | ||
? SanitizedChar | ||
: c; | ||
} | ||
} | ||
|
||
private static bool IsUnsafeChar(char c) => !ValidCorrelationIdChars.Contains(c); | ||
} | ||
|
||
file static class CorrelationIdValueSanitizer_SearchValues_IterateRestOfStr | ||
{ | ||
private const int MaxValueLength = 64; | ||
private const char SanitizedChar = '_'; | ||
|
||
private static readonly SearchValues<char> ValidCorrelationIdChars = | ||
SearchValues.Create("!#$&+-./0123456789:=ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~"); | ||
|
||
public static string Sanitize(string value) | ||
{ | ||
var valueLength = Math.Min(value.Length, MaxValueLength); | ||
var valueSpan = value.AsSpan(0, valueLength); | ||
|
||
var firstUnsafeCharPosition = valueSpan.IndexOfAnyExcept(ValidCorrelationIdChars); | ||
if (firstUnsafeCharPosition >= 0) | ||
{ | ||
return SanitizeToNewString(value, valueLength, firstUnsafeCharPosition); | ||
} | ||
|
||
return value.Length > MaxValueLength | ||
? valueSpan.ToString() | ||
: value; | ||
} | ||
|
||
private static string SanitizeToNewString(string source, int length, int firstUnsafeCharPosition) => | ||
string.Create(length, (firstUnsafeCharPosition, source), CreateValue); | ||
|
||
private static void CreateValue(Span<char> buffer, (int FirstUnsafeCharPos, string SourceValue) state) | ||
{ | ||
(int firstUnsafeCharPosition, string source) = state; | ||
|
||
source.AsSpan(0, firstUnsafeCharPosition).CopyTo(buffer); | ||
for (int i = firstUnsafeCharPosition; i < buffer.Length; i++) | ||
{ | ||
var c = source[i]; | ||
buffer[i] = IsUnsafeChar(c) | ||
? SanitizedChar | ||
: c; | ||
} | ||
} | ||
|
||
private static bool IsUnsafeChar(char c) => | ||
!ValidCorrelationIdChars.Contains(c); | ||
} | ||
|
||
file static class CorrelationIdValueSanitizer_SearchValues_JumpToNextUnsafe | ||
{ | ||
private static readonly SearchValues<char> ValidCorrelationIdChars = | ||
SearchValues.Create("!#$&+-./0123456789:=ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~"); | ||
|
||
private const int MaxValueLength = 64; | ||
private const char SanitizedChar = '_'; | ||
|
||
public static string Sanitize(string value) | ||
{ | ||
var valueLength = Math.Min(value.Length, MaxValueLength); | ||
var valueSpan = value.AsSpan(0, valueLength); | ||
|
||
var firstInvalidCharPos = valueSpan.IndexOfAnyExcept(ValidCorrelationIdChars); | ||
if (firstInvalidCharPos >= 0) | ||
{ | ||
return SanitizeToNewString(value, valueLength, firstInvalidCharPos); | ||
} | ||
|
||
return value.Length > MaxValueLength | ||
? valueSpan.ToString() | ||
: value; | ||
} | ||
|
||
private static string SanitizeToNewString(string source, int length, int firstUnsafeCharPosition) => | ||
string.Create(length, (firstUnsafeCharPosition, source), CreateValue); | ||
|
||
private static void CreateValue(Span<char> buffer, (int FirstUnsafeCharPos, string SourceValue) state) | ||
{ | ||
(int sourceIndex, string source) = state; | ||
|
||
// copy all safe chars before first unsafe char | ||
source.AsSpan(0, sourceIndex).CopyTo(buffer); | ||
|
||
buffer[sourceIndex] = SanitizedChar; | ||
++sourceIndex; | ||
|
||
// jump to next unsafe char, copy all safe chars between | ||
while (sourceIndex < buffer.Length) | ||
{ | ||
var remainingSpan = source.AsSpan(sourceIndex); | ||
var nextUnsafeCharPos = remainingSpan.IndexOfAnyExcept(ValidCorrelationIdChars); | ||
|
||
// no more unsafe characters, copy remaining chars and break | ||
if (nextUnsafeCharPos == -1) | ||
{ | ||
remainingSpan.CopyTo(buffer.Slice(sourceIndex)); | ||
break; | ||
} | ||
|
||
remainingSpan | ||
.Slice(0, nextUnsafeCharPos) | ||
.CopyTo(buffer.Slice(sourceIndex)); | ||
|
||
buffer[sourceIndex + nextUnsafeCharPos] = SanitizedChar; | ||
sourceIndex += nextUnsafeCharPos + 1; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.