Skip to content

Commit 1c341de

Browse files
[release/7.0] Ensure ReadBufferState resets any BOM offsets every time the buffer is advanced. (#78235)
* Ensure the async reader state resets the BOM offset in every AdvanceBuffer() call. * Add BOM insertion to async serialization stress testing * Update src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/JsonSerializerWrapper.Reflection.cs Co-authored-by: Eirik Tsarpalis <[email protected]>
1 parent 6ab036f commit 1c341de

File tree

3 files changed

+134
-15
lines changed

3 files changed

+134
-15
lines changed

src/libraries/System.Text.Json/src/System/Text/Json/Serialization/ReadBufferState.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,6 @@ public void AdvanceBuffer(int bytesConsumed)
122122
// Copy the unprocessed data to the new buffer while shifting the processed bytes.
123123
Buffer.BlockCopy(oldBuffer, _offset + bytesConsumed, newBuffer, 0, _count);
124124
_buffer = newBuffer;
125-
_offset = 0;
126125
_maxCount = _count;
127126

128127
// Clear and return the old buffer
@@ -133,9 +132,10 @@ public void AdvanceBuffer(int bytesConsumed)
133132
{
134133
// Shift the processed bytes to the beginning of buffer to make more room.
135134
Buffer.BlockCopy(_buffer, _offset + bytesConsumed, _buffer, 0, _count);
136-
_offset = 0;
137135
}
138136
}
137+
138+
_offset = 0;
139139
}
140140

141141
private void ProcessReadBytes()

src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/ContinuationTests.cs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,31 @@ public static void InvalidJsonShouldFailAtAnyPosition_Sequence(
235235
Assert.Equal(expectedFailure.Column, ex.BytePositionInLine);
236236
}
237237

238+
[Fact]
239+
public static async Task BomHandlingRegressionTest()
240+
{
241+
byte[] utf8Bom = Encoding.UTF8.GetPreamble();
242+
byte[] json = """{ "Value" : "Hello" }"""u8.ToArray();
243+
244+
using var stream = new MemoryStream();
245+
stream.Write(utf8Bom, 0, utf8Bom.Length);
246+
stream.Write(json, 0, json.Length);
247+
stream.Position = 0;
248+
249+
var options = new JsonSerializerOptions
250+
{
251+
DefaultBufferSize = 32
252+
};
253+
254+
Test result = await JsonSerializer.DeserializeAsync<Test>(stream, options);
255+
Assert.Equal("Hello", result.Value);
256+
}
257+
258+
private class Test
259+
{
260+
public string Value { get; set; }
261+
}
262+
238263
private class Chunk : ReadOnlySequenceSegment<byte>
239264
{
240265
public Chunk(string json, int firstSegmentLength)

src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/JsonSerializerWrapper.Reflection.cs

Lines changed: 107 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33

4+
using System.Diagnostics;
45
using System.IO;
56
using System.Runtime.CompilerServices;
67
using System.Text.Json.Nodes;
@@ -15,9 +16,9 @@ public abstract partial class JsonSerializerWrapper
1516
public static JsonSerializerWrapper SpanSerializer { get; } = new SpanSerializerWrapper();
1617
public static JsonSerializerWrapper StringSerializer { get; } = new StringSerializerWrapper();
1718
public static StreamingJsonSerializerWrapper AsyncStreamSerializer { get; } = new AsyncStreamSerializerWrapper();
18-
public static StreamingJsonSerializerWrapper AsyncStreamSerializerWithSmallBuffer { get; } = new AsyncStreamSerializerWrapper(forceSmallBufferInOptions: true);
19+
public static StreamingJsonSerializerWrapper AsyncStreamSerializerWithSmallBuffer { get; } = new AsyncStreamSerializerWrapper(forceSmallBufferInOptions: true, forceBomInsertions: true);
1920
public static StreamingJsonSerializerWrapper SyncStreamSerializer { get; } = new SyncStreamSerializerWrapper();
20-
public static StreamingJsonSerializerWrapper SyncStreamSerializerWithSmallBuffer { get; } = new SyncStreamSerializerWrapper(forceSmallBufferInOptions: true);
21+
public static StreamingJsonSerializerWrapper SyncStreamSerializerWithSmallBuffer { get; } = new SyncStreamSerializerWrapper(forceSmallBufferInOptions: true, forceBomInsertions: true);
2122
public static JsonSerializerWrapper ReaderWriterSerializer { get; } = new ReaderWriterSerializerWrapper();
2223
public static JsonSerializerWrapper DocumentSerializer { get; } = new DocumentSerializerWrapper();
2324
public static JsonSerializerWrapper ElementSerializer { get; } = new ElementSerializerWrapper();
@@ -120,17 +121,22 @@ public override Task<object> DeserializeWrapper(string json, Type type, JsonSeri
120121
private class AsyncStreamSerializerWrapper : StreamingJsonSerializerWrapper
121122
{
122123
private readonly bool _forceSmallBufferInOptions;
124+
private readonly bool _forceBomInsertions;
123125

124126
public override bool IsAsyncSerializer => true;
125127

126-
public AsyncStreamSerializerWrapper(bool forceSmallBufferInOptions = false)
128+
public AsyncStreamSerializerWrapper(bool forceSmallBufferInOptions = false, bool forceBomInsertions = false)
127129
{
128130
_forceSmallBufferInOptions = forceSmallBufferInOptions;
131+
_forceBomInsertions = forceBomInsertions;
129132
}
130133

131134
private JsonSerializerOptions? ResolveOptionsInstance(JsonSerializerOptions? options)
132135
=> _forceSmallBufferInOptions ? JsonSerializerOptionsSmallBufferMapper.ResolveOptionsInstanceWithSmallBuffer(options) : options;
133136

137+
private Stream ResolveReadStream(Stream stream)
138+
=> stream is not null && _forceBomInsertions ? new Utf8BomInsertingStream(stream) : stream;
139+
134140
public override Task SerializeWrapper<T>(Stream utf8Json, T value, JsonSerializerOptions options = null)
135141
{
136142
return JsonSerializer.SerializeAsync<T>(utf8Json, value, ResolveOptionsInstance(options));
@@ -153,38 +159,43 @@ public override Task SerializeWrapper(Stream stream, object value, Type inputTyp
153159

154160
public override async Task<T> DeserializeWrapper<T>(Stream utf8Json, JsonSerializerOptions options = null)
155161
{
156-
return await JsonSerializer.DeserializeAsync<T>(utf8Json, ResolveOptionsInstance(options));
162+
return await JsonSerializer.DeserializeAsync<T>(ResolveReadStream(utf8Json), ResolveOptionsInstance(options));
157163
}
158164

159165
public override async Task<object> DeserializeWrapper(Stream utf8Json, Type returnType, JsonSerializerOptions options = null)
160166
{
161-
return await JsonSerializer.DeserializeAsync(utf8Json, returnType, ResolveOptionsInstance(options));
167+
return await JsonSerializer.DeserializeAsync(ResolveReadStream(utf8Json), returnType, ResolveOptionsInstance(options));
162168
}
163169

164170
public override async Task<T> DeserializeWrapper<T>(Stream utf8Json, JsonTypeInfo<T> jsonTypeInfo)
165171
{
166-
return await JsonSerializer.DeserializeAsync<T>(utf8Json, jsonTypeInfo);
172+
return await JsonSerializer.DeserializeAsync<T>(ResolveReadStream(utf8Json), jsonTypeInfo);
167173
}
168174

169175
public override async Task<object> DeserializeWrapper(Stream utf8Json, Type returnType, JsonSerializerContext context)
170176
{
171-
return await JsonSerializer.DeserializeAsync(utf8Json, returnType, context);
177+
return await JsonSerializer.DeserializeAsync(ResolveReadStream(utf8Json), returnType, context);
172178
}
173179
}
174180

175181
private class SyncStreamSerializerWrapper : StreamingJsonSerializerWrapper
176182
{
177183
private readonly bool _forceSmallBufferInOptions;
184+
private readonly bool _forceBomInsertions;
185+
186+
public override bool IsAsyncSerializer => false;
178187

179-
public SyncStreamSerializerWrapper(bool forceSmallBufferInOptions = false)
188+
public SyncStreamSerializerWrapper(bool forceSmallBufferInOptions = false, bool forceBomInsertions = false)
180189
{
181190
_forceSmallBufferInOptions = forceSmallBufferInOptions;
191+
_forceBomInsertions = forceBomInsertions;
182192
}
183193

184194
private JsonSerializerOptions? ResolveOptionsInstance(JsonSerializerOptions? options)
185195
=> _forceSmallBufferInOptions ? JsonSerializerOptionsSmallBufferMapper.ResolveOptionsInstanceWithSmallBuffer(options) : options;
186196

187-
public override bool IsAsyncSerializer => false;
197+
private Stream ResolveReadStream(Stream stream)
198+
=> stream is not null && _forceBomInsertions ? new Utf8BomInsertingStream(stream) : stream;
188199

189200
public override Task SerializeWrapper<T>(Stream utf8Json, T value, JsonSerializerOptions options = null)
190201
{
@@ -212,25 +223,25 @@ public override Task SerializeWrapper(Stream stream, object value, Type inputTyp
212223

213224
public override Task<T> DeserializeWrapper<T>(Stream utf8Json, JsonSerializerOptions options = null)
214225
{
215-
T result = JsonSerializer.Deserialize<T>(utf8Json, ResolveOptionsInstance(options));
226+
T result = JsonSerializer.Deserialize<T>(ResolveReadStream(utf8Json), ResolveOptionsInstance(options));
216227
return Task.FromResult(result);
217228
}
218229

219230
public override Task<object> DeserializeWrapper(Stream utf8Json, Type returnType, JsonSerializerOptions options = null)
220231
{
221-
object result = JsonSerializer.Deserialize(utf8Json, returnType, ResolveOptionsInstance(options));
232+
object result = JsonSerializer.Deserialize(ResolveReadStream(utf8Json), returnType, ResolveOptionsInstance(options));
222233
return Task.FromResult(result);
223234
}
224235

225236
public override Task<T> DeserializeWrapper<T>(Stream utf8Json, JsonTypeInfo<T> jsonTypeInfo)
226237
{
227-
T result = JsonSerializer.Deserialize<T>(utf8Json, jsonTypeInfo);
238+
T result = JsonSerializer.Deserialize<T>(ResolveReadStream(utf8Json), jsonTypeInfo);
228239
return Task.FromResult(result);
229240
}
230241

231242
public override Task<object> DeserializeWrapper(Stream utf8Json, Type returnType, JsonSerializerContext context)
232243
{
233-
object result = JsonSerializer.Deserialize(utf8Json, returnType, context);
244+
object result = JsonSerializer.Deserialize(ResolveReadStream(utf8Json), returnType, context);
234245
return Task.FromResult(result);
235246
}
236247
}
@@ -653,5 +664,88 @@ public static JsonSerializerOptions ResolveOptionsInstanceWithSmallBuffer(JsonSe
653664
return smallBufferCopy;
654665
}
655666
}
667+
668+
private sealed class Utf8BomInsertingStream : Stream
669+
{
670+
private const int Utf8BomLength = 3;
671+
private readonly static byte[] s_utf8Bom = Encoding.UTF8.GetPreamble();
672+
673+
private readonly Stream _source;
674+
private byte[]? _prefixBytes;
675+
private int _prefixBytesOffset = 0;
676+
private int _prefixBytesCount = 0;
677+
678+
public Utf8BomInsertingStream(Stream source)
679+
{
680+
Debug.Assert(source.CanRead);
681+
_source = source;
682+
}
683+
684+
public override bool CanRead => _source.CanRead;
685+
public override bool CanSeek => false;
686+
public override bool CanWrite => false;
687+
688+
public override int Read(byte[] buffer, int offset, int count)
689+
{
690+
if (_prefixBytes is null)
691+
{
692+
// This is the first read operation; read the first 3 bytes
693+
// from the source to determine if it already includes a BOM.
694+
// Only insert a BOM if it's missing from the source stream.
695+
696+
_prefixBytes = new byte[2 * Utf8BomLength];
697+
int bytesRead = ReadExactlyFromSource(_prefixBytes, Utf8BomLength, Utf8BomLength);
698+
699+
if (_prefixBytes.AsSpan(Utf8BomLength).SequenceEqual(s_utf8Bom))
700+
{
701+
_prefixBytesOffset = Utf8BomLength;
702+
_prefixBytesCount = Utf8BomLength;
703+
}
704+
else
705+
{
706+
s_utf8Bom.CopyTo(_prefixBytes, 0);
707+
_prefixBytesOffset = 0;
708+
_prefixBytesCount = Utf8BomLength + bytesRead;
709+
}
710+
}
711+
712+
int prefixBytesToWrite = Math.Min(_prefixBytesCount, count);
713+
if (prefixBytesToWrite > 0)
714+
{
715+
_prefixBytes.AsSpan(_prefixBytesOffset, prefixBytesToWrite).CopyTo(buffer.AsSpan(offset, count));
716+
_prefixBytesOffset += prefixBytesToWrite;
717+
_prefixBytesCount -= prefixBytesToWrite;
718+
offset += prefixBytesToWrite;
719+
count -= prefixBytesToWrite;
720+
}
721+
722+
return prefixBytesToWrite + _source.Read(buffer, offset, count);
723+
}
724+
725+
private int ReadExactlyFromSource(byte[] buffer, int offset, int count)
726+
{
727+
int totalRead = 0;
728+
729+
while (totalRead < count)
730+
{
731+
int read = _source.Read(buffer, offset + totalRead, count - totalRead);
732+
if (read == 0)
733+
{
734+
break;
735+
}
736+
737+
totalRead += read;
738+
}
739+
740+
return totalRead;
741+
}
742+
743+
public override long Length => throw new NotSupportedException();
744+
public override long Position { get => throw new NotSupportedException(); set => throw new NotSupportedException(); }
745+
public override void Flush() => throw new NotSupportedException();
746+
public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException();
747+
public override void SetLength(long value) => throw new NotSupportedException();
748+
public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException();
749+
}
656750
}
657751
}

0 commit comments

Comments
 (0)