Skip to content

Commit

Permalink
Improve Code Quality (#818)
Browse files Browse the repository at this point in the history
* Make AdobeFontMetricsLigature a struct

* Make AdobeFontMetricsCharacterSize a struct

* Eliminate allocation in CompactFontFormatData

* Pass TransformationMatrix  by reference

* Seal Encoding classes

* Make SubTableHeaderEntry a readonly struct

* Introduce StringSplitter and eliminate various allocations in GlyphListFactory

* Eliminate a few substring allocations

* Use char overload on StringBuilder

* Eliminate virtual calls on stringIndex

* Optimize ReadHelper ReadLong and ReadInt methods

* Add additional readonly annotations to PdfRectangle

* Optimize NameTokenizer

* Eliminate allocation in TrueTypeGlyphTableSubsetter

* Use empty arrays

* Eliminate allocations in OperationWriteHelper.WriteHex

* Use simplified DecryptCbc method on .NET 6+

* Fix windows-1252 encoding not working on net6.0 and 8.0

* Update int buffers to exact unsigned max length and eliminate additional byte allocation

* Fix typo

* Remove unused constant
  • Loading branch information
iamcarbon authored Apr 18, 2024
1 parent 0f7077b commit 1ef2e12
Show file tree
Hide file tree
Showing 42 changed files with 325 additions and 166 deletions.
8 changes: 4 additions & 4 deletions src/UglyToad.PdfPig.Core/PdfRectangle.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,22 @@ public struct PdfRectangle
/// <summary>
/// Top left point of the rectangle.
/// </summary>
public PdfPoint TopLeft { get; }
public readonly PdfPoint TopLeft { get; }

/// <summary>
/// Top right point of the rectangle.
/// </summary>
public PdfPoint TopRight { get; }
public readonly PdfPoint TopRight { get; }

/// <summary>
/// Bottom right point of the rectangle.
/// </summary>
public PdfPoint BottomRight { get; }
public readonly PdfPoint BottomRight { get; }

/// <summary>
/// Bottom left point of the rectangle.
/// </summary>
public PdfPoint BottomLeft { get; }
public readonly PdfPoint BottomLeft { get; }

/// <summary>
/// Centroid point of the rectangle.
Expand Down
69 changes: 34 additions & 35 deletions src/UglyToad.PdfPig.Core/ReadHelper.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
namespace UglyToad.PdfPig.Core
{
using System;
using System.Buffers.Text;
using System.Collections.Generic;
using System.Globalization;
using System.Text;

#if NET8_0_OR_GREATER
Expand Down Expand Up @@ -41,8 +41,6 @@ public static class ReadHelper
'\f'
];

private static readonly int MaximumNumberStringLength = long.MaxValue.ToString("D").Length;

/// <summary>
/// Read a string from the input until a newline.
/// </summary>
Expand Down Expand Up @@ -134,7 +132,7 @@ public static bool IsEndOfName(int ch)
/// </remarks>
public static bool IsWhitespace(byte c)
{
return c == 0 || c == 32 || c == AsciiLineFeed || c == AsciiCarriageReturn || c == 9 || c == 12;
return c is 0 or 32 or AsciiLineFeed or AsciiCarriageReturn or 9 or 12;
}

/// <summary>
Expand Down Expand Up @@ -198,25 +196,24 @@ public static bool IsString(IInputBytes bytes, string s)
public static long ReadLong(IInputBytes bytes)
{
SkipSpaces(bytes);
long retval;

StringBuilder longBuffer = ReadStringNumber(bytes);
Span<byte> buffer = stackalloc byte[19]; // max formatted uint64 length

try
ReadNumberAsUtf8Bytes(bytes, buffer, out int bytesRead);

ReadOnlySpan<byte> longBytes = buffer.Slice(0, bytesRead);

if (Utf8Parser.TryParse(longBytes, out long result, out _))
{
retval = long.Parse(longBuffer.ToString(), CultureInfo.InvariantCulture);
return result;
}
catch (FormatException e)
else
{
var bytesToReverse = OtherEncodings.StringAsLatin1Bytes(longBuffer.ToString());
bytes.Seek(bytes.CurrentOffset - bytesToReverse.Length);
bytes.Seek(bytes.CurrentOffset - bytesRead);

throw new InvalidOperationException($"Error: Expected a long type at offset {bytes.CurrentOffset}, instead got \'{longBuffer}\'", e);
throw new InvalidOperationException($"Error: Expected a long type at offset {bytes.CurrentOffset}, instead got \'{OtherEncodings.BytesAsLatin1String(longBytes)}\'");
}

return retval;
}


/// <summary>
/// Whether the given value is a digit or not.
Expand All @@ -231,28 +228,29 @@ public static bool IsDigit(int c)
/// </summary>
public static int ReadInt(IInputBytes bytes)
{
if (bytes == null)
if (bytes is null)
{
throw new ArgumentNullException(nameof(bytes));
}

SkipSpaces(bytes);
int result;

var intBuffer = ReadStringNumber(bytes);
Span<byte> buffer = stackalloc byte[10]; // max formatted uint32 length

try
ReadNumberAsUtf8Bytes(bytes, buffer, out int bytesRead);

var intBytes = buffer.Slice(0, bytesRead);

if (Utf8Parser.TryParse(intBytes, out int result, out _))
{
result = int.Parse(intBuffer.ToString(), CultureInfo.InvariantCulture);
return result;
}
catch (Exception e)
else
{
bytes.Seek(bytes.CurrentOffset - OtherEncodings.StringAsLatin1Bytes(intBuffer.ToString()).Length);

throw new PdfDocumentFormatException($"Error: Expected an integer type at offset {bytes.CurrentOffset}", e);
bytes.Seek(bytes.CurrentOffset - bytesRead);
throw new PdfDocumentFormatException($"Error: Expected an integer type at offset {bytes.CurrentOffset}, instead got \'{OtherEncodings.BytesAsLatin1String(intBytes)}\'");
}

return result;
}

/// <summary>
Expand Down Expand Up @@ -304,33 +302,34 @@ public static bool IsValidUtf8(byte[] input)
#endif
}

private static StringBuilder ReadStringNumber(IInputBytes reader)
private static void ReadNumberAsUtf8Bytes(IInputBytes reader, scoped Span<byte> buffer, out int bytesRead)
{
byte lastByte;
StringBuilder buffer = new StringBuilder();
int position = 0;

byte lastByte;

while (reader.MoveNext() && (lastByte = reader.CurrentByte) != ' ' &&
lastByte != AsciiLineFeed &&
lastByte != AsciiCarriageReturn &&
lastByte != 60 && //see sourceforge bug 1714707
lastByte != 60 && // see sourceforge bug 1714707
lastByte != '[' && // PDFBOX-1845
lastByte != '(' && // PDFBOX-2579
lastByte != 0)
{
buffer.Append((char)lastByte);

if (buffer.Length > MaximumNumberStringLength)
if (position >= buffer.Length)
{
throw new InvalidOperationException($"Number \'{buffer}\' is getting too long, stop reading at offset {reader.CurrentOffset}");
throw new InvalidOperationException($"Number \'{OtherEncodings.BytesAsLatin1String(buffer.Slice(0, position))}\' is getting too long, stop reading at offset {reader.CurrentOffset}");
}

buffer[position++] = lastByte;
}

if (!reader.IsAtEnd())
{
reader.Seek(reader.CurrentOffset - 1);
}

return buffer;
bytesRead = position;
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
/// The x and y components of the width vector of the font's characters.
/// Presence implies that IsFixedPitch is true.
/// </summary>
public class AdobeFontMetricsCharacterSize
public readonly struct AdobeFontMetricsCharacterSize
{
/// <summary>
/// The horizontal width.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
/// <summary>
/// A ligature in an Adobe Font Metrics individual character.
/// </summary>
public class AdobeFontMetricsLigature
public readonly struct AdobeFontMetricsLigature
{
/// <summary>
/// The character to join with to form a ligature.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat
{
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Text;
using Core;

/// <summary>
/// Provides access to the raw bytes of this Compact Font Format file with utility methods for reading data types from it.
Expand Down Expand Up @@ -37,14 +35,7 @@ public CompactFontFormatData(ReadOnlyMemory<byte> dataBytes)
/// </summary>
public string ReadString(int length, Encoding encoding)
{
var bytes = new byte[length];

for (var i = 0; i < bytes.Length; i++)
{
bytes[i] = ReadByte();
}

return encoding.GetString(bytes);
return encoding.GetString(ReadSpan(length));
}

/// <summary>
Expand Down Expand Up @@ -86,6 +77,20 @@ public int ReadOffset(int offsetSize)
return value;
}

internal ReadOnlySpan<byte> ReadSpan(int count)
{
if (Position + count >= dataBytes.Length)
{
throw new IndexOutOfRangeException($"Cannot read past end of data. Attempted to read to {Position + count} when the underlying data is {dataBytes.Length} bytes long.");
}

var result = dataBytes.Span.Slice(Position + 1, count);

Position += count;

return result;
}

/// <summary>
/// Read byte.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

internal static class CompactFontFormatEncodingReader
{
public static Encoding ReadEncoding(CompactFontFormatData data, ICompactFontFormatCharset charset, IReadOnlyList<string> stringIndex)
public static Encoding ReadEncoding(CompactFontFormatData data, ICompactFontFormatCharset charset, ReadOnlySpan<string> stringIndex)
{
if (data == null)
{
Expand All @@ -32,7 +32,7 @@ public static Encoding ReadEncoding(CompactFontFormatData data, ICompactFontForm
}
}

private static CompactFontFormatFormat0Encoding ReadFormat0Encoding(CompactFontFormatData data, ICompactFontFormatCharset charset, IReadOnlyList<string> stringIndex, byte format)
private static CompactFontFormatFormat0Encoding ReadFormat0Encoding(CompactFontFormatData data, ICompactFontFormatCharset charset, ReadOnlySpan<string> stringIndex, byte format)
{
var numberOfCodes = data.ReadCard8();

Expand All @@ -45,7 +45,7 @@ private static CompactFontFormatFormat0Encoding ReadFormat0Encoding(CompactFontF
values.Add((code, sid, str));
}

IReadOnlyList<CompactFontFormatBuiltInEncoding.Supplement> supplements = new List<CompactFontFormatBuiltInEncoding.Supplement>();
IReadOnlyList<CompactFontFormatBuiltInEncoding.Supplement> supplements = [];
if (HasSupplement(format))
{
supplements = ReadSupplement(data, stringIndex);
Expand All @@ -54,7 +54,7 @@ private static CompactFontFormatFormat0Encoding ReadFormat0Encoding(CompactFontF
return new CompactFontFormatFormat0Encoding(values, supplements);
}

private static CompactFontFormatFormat1Encoding ReadFormat1Encoding(CompactFontFormatData data, ICompactFontFormatCharset charset, IReadOnlyList<string> stringIndex, byte format)
private static CompactFontFormatFormat1Encoding ReadFormat1Encoding(CompactFontFormatData data, ICompactFontFormatCharset charset, ReadOnlySpan<string> stringIndex, byte format)
{
var numberOfRanges = data.ReadCard8();

Expand Down Expand Up @@ -85,7 +85,7 @@ private static CompactFontFormatFormat1Encoding ReadFormat1Encoding(CompactFontF
}

private static IReadOnlyList<CompactFontFormatBuiltInEncoding.Supplement> ReadSupplement(CompactFontFormatData dataInput,
IReadOnlyList<string> stringIndex)
ReadOnlySpan<string> stringIndex)
{
var numberOfSupplements = dataInput.ReadCard8();
var supplements = new CompactFontFormatBuiltInEncoding.Supplement[numberOfSupplements];
Expand All @@ -101,13 +101,13 @@ private static CompactFontFormatFormat1Encoding ReadFormat1Encoding(CompactFontF
return supplements;
}

private static string ReadString(int index, IReadOnlyList<string> stringIndex)
private static string ReadString(int index, ReadOnlySpan<string> stringIndex)
{
if (index >= 0 && index <= 390)
{
return CompactFontFormatStandardStrings.GetName(index);
}
if (index - 391 < stringIndex.Count)
if (index - 391 < stringIndex.Length)
{
return stringIndex[index - 391];
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
{
using System;
using System.Collections.Generic;
using System.Linq;
using Charsets;
using CharStrings;
using Core;
Expand All @@ -23,7 +22,7 @@ public CompactFontFormatIndividualFontParser(CompactFontFormatTopLevelDictionary
this.privateDictionaryReader = privateDictionaryReader;
}

public CompactFontFormatFont Parse(CompactFontFormatData data, string name, ReadOnlySpan<byte> topDictionaryIndex, IReadOnlyList<string> stringIndex,
public CompactFontFormatFont Parse(CompactFontFormatData data, string name, ReadOnlySpan<byte> topDictionaryIndex, ReadOnlySpan<string> stringIndex,
CompactFontFormatIndex globalSubroutineIndex)
{
var individualData = new CompactFontFormatData(topDictionaryIndex.ToArray());
Expand Down Expand Up @@ -127,8 +126,10 @@ public CompactFontFormatFont Parse(CompactFontFormatData data, string name, Read
return new CompactFontFormatFont(topDictionary, privateDictionary, charset, Union<Type1CharStrings, Type2CharStrings>.Two(charStrings), fontEncoding);
}

private static ICompactFontFormatCharset ReadCharset(CompactFontFormatData data, CompactFontFormatTopLevelDictionary topDictionary,
CompactFontFormatIndex charStringIndex, IReadOnlyList<string> stringIndex)
private static ICompactFontFormatCharset ReadCharset(CompactFontFormatData data,
CompactFontFormatTopLevelDictionary topDictionary,
CompactFontFormatIndex charStringIndex,
ReadOnlySpan<string> stringIndex)
{
data.Seek(topDictionary.CharSetOffset);

Expand Down Expand Up @@ -180,13 +181,13 @@ private static ICompactFontFormatCharset ReadCharset(CompactFontFormatData data,
}
}

private static string ReadString(int index, IReadOnlyList<string> stringIndex)
private static string ReadString(int index, ReadOnlySpan<string> stringIndex)
{
if (index >= 0 && index <= 390)
{
return CompactFontFormatStandardStrings.GetName(index);
}
if (index - 391 < stringIndex.Count)
if (index - 391 < stringIndex.Length)
{
return stringIndex[index - 391];
}
Expand All @@ -213,9 +214,10 @@ private static Type2CharStrings ReadCharStrings(CompactFontFormatData data, Comp
}
}

private CompactFontFormatCidFont ReadCidFont(CompactFontFormatData data, CompactFontFormatTopLevelDictionary topLevelDictionary,
private CompactFontFormatCidFont ReadCidFont(CompactFontFormatData data,
CompactFontFormatTopLevelDictionary topLevelDictionary,
int numberOfGlyphs,
IReadOnlyList<string> stringIndex,
ReadOnlySpan<string> stringIndex,
CompactFontFormatPrivateDictionary privateDictionary,
ICompactFontFormatCharset charset,
CompactFontFormatIndex globalSubroutines,
Expand Down
Loading

0 comments on commit 1ef2e12

Please sign in to comment.