From 36d1093f5f6fbd9bef20872a3ceccdf7abdc8828 Mon Sep 17 00:00:00 2001 From: Jason Nelson Date: Sun, 14 Apr 2024 19:50:52 -0700 Subject: [PATCH] Update int buffers to exact unsigned max length and eliminate additional byte allocation --- src/UglyToad.PdfPig.Core/ReadHelper.cs | 35 +++++++++++++++----------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/src/UglyToad.PdfPig.Core/ReadHelper.cs b/src/UglyToad.PdfPig.Core/ReadHelper.cs index b2971ffd6..1ede57fbe 100644 --- a/src/UglyToad.PdfPig.Core/ReadHelper.cs +++ b/src/UglyToad.PdfPig.Core/ReadHelper.cs @@ -199,17 +199,21 @@ public static long ReadLong(IInputBytes bytes) { SkipSpaces(bytes); - ReadOnlySpan longBuffer = ReadNumberAsUtf8Bytes(bytes); + Span buffer = stackalloc byte[19]; // max ulong32 length - if (Utf8Parser.TryParse(longBuffer, out long result, out _)) + ReadNumberAsUtf8Bytes(bytes, buffer, out int bytesRead); + + ReadOnlySpan longBytes = buffer.Slice(0, bytesRead); + + if (Utf8Parser.TryParse(longBytes, out long result, out _)) { return result; } else { - bytes.Seek(bytes.CurrentOffset - longBuffer.Length); + bytes.Seek(bytes.CurrentOffset - bytesRead); - throw new InvalidOperationException($"Error: Expected a long type at offset {bytes.CurrentOffset}, instead got \'{OtherEncodings.BytesAsLatin1String(longBuffer)}\'"); + throw new InvalidOperationException($"Error: Expected a long type at offset {bytes.CurrentOffset}, instead got \'{OtherEncodings.BytesAsLatin1String(longBytes)}\'"); } } @@ -233,17 +237,21 @@ public static int ReadInt(IInputBytes bytes) SkipSpaces(bytes); - ReadOnlySpan intBuffer = ReadNumberAsUtf8Bytes(bytes); + Span buffer = stackalloc byte[10]; // max uint32 length + + ReadNumberAsUtf8Bytes(bytes, buffer, out int bytesRead); - if (Utf8Parser.TryParse(intBuffer, out int result, out _)) + var intBytes = buffer.Slice(0, bytesRead); + + if (Utf8Parser.TryParse(intBytes, out int result, out _)) { return result; } else { - bytes.Seek(bytes.CurrentOffset - intBuffer.Length); + bytes.Seek(bytes.CurrentOffset - bytesRead); - throw new PdfDocumentFormatException($"Error: Expected an integer type at offset {bytes.CurrentOffset}, instead got \'{OtherEncodings.BytesAsLatin1String(intBuffer)}\'"); + throw new PdfDocumentFormatException($"Error: Expected an integer type at offset {bytes.CurrentOffset}, instead got \'{OtherEncodings.BytesAsLatin1String(intBytes)}\'"); } } @@ -296,9 +304,8 @@ public static bool IsValidUtf8(byte[] input) #endif } - private static byte[] ReadNumberAsUtf8Bytes(IInputBytes reader) + private static void ReadNumberAsUtf8Bytes(IInputBytes reader, scoped Span buffer, out int bytesRead) { - Span buffer = stackalloc byte[MaximumNumberStringLength]; // 20 bytes int position = 0; byte lastByte; @@ -311,12 +318,12 @@ private static byte[] ReadNumberAsUtf8Bytes(IInputBytes reader) lastByte != '(' && // PDFBOX-2579 lastByte != 0) { - buffer[position++] = lastByte; - - if (position > MaximumNumberStringLength) + if (position >= buffer.Length) { throw new InvalidOperationException($"Number \'{OtherEncodings.BytesAsLatin1String(buffer.Slice(0, position))}\' is getting too long, stop reading at offset {reader.CurrentOffset}"); } + + buffer[position++] = lastByte; } if (!reader.IsAtEnd()) @@ -324,7 +331,7 @@ private static byte[] ReadNumberAsUtf8Bytes(IInputBytes reader) reader.Seek(reader.CurrentOffset - 1); } - return buffer.Slice(0, position).ToArray(); + bytesRead = position; } } }