Skip to content

Commit

Permalink
Reduce Allocations (#821)
Browse files Browse the repository at this point in the history
* Introduce ValueStringBuilder

* Make NumericTokenizer and PlanTextTokenizer thread-safe

* Replace ListPool with ArrayPoolBufferWriter

* Seal ITokenizer classes

* Eliminate array allocation in Type1ArrayTokenizer

* Eliminate array allocation in AcroFormFactory

* Eliminate StringBuilder allocation in Page.GetText

* Optimize PdfSubpath.ToLines

* Eliminate various allocations when parsing CompactFontFormat

* Remove unused FromOctalInt helper

* Ensure Pdf.Content is not null

* Write ASCII values directly to stream (avoiding allocations)

* Avoid encoding additional ASCII values

* Eliminate allocations in TokenWriter.WriteName

* Eliminate allocation in TokenWriter.WriteNumber

* Add System.Memory reference to Fonts
  • Loading branch information
iamcarbon authored Apr 28, 2024
1 parent 1ef2e12 commit 7f42a8d
Show file tree
Hide file tree
Showing 32 changed files with 515 additions and 470 deletions.
8 changes: 0 additions & 8 deletions src/UglyToad.PdfPig.Core/OctalHelpers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,6 @@ public static int FromOctalDigits(ReadOnlySpan<short> octal)
return sum;
}

/// <summary>
/// Interpret an int as octal.
/// </summary>
public static int FromOctalInt(int input)
{
return Convert.ToInt32($"{input}", 8);
}

private static int QuickPower(int x, int pow)
{
int ret = 1;
Expand Down
8 changes: 4 additions & 4 deletions src/UglyToad.PdfPig.Core/PdfSubpath.cs
Original file line number Diff line number Diff line change
Expand Up @@ -634,15 +634,15 @@ public override IReadOnlyList<Line> ToLines(int n)
throw new ArgumentException("BezierCurve.ToLines(): n must be greater than 0.");
}

List<Line> lines = new List<Line>();
var lines = new Line[n];
var previousPoint = StartPoint;

for (int p = 1; p <= n; p++)
{
double t = p / (double)n;
var currentPoint = new PdfPoint(ValueWithT(StartPoint.X, ControlPoint.X, EndPoint.X, t),
ValueWithT(StartPoint.Y, ControlPoint.Y, EndPoint.Y, t));
lines.Add(new Line(previousPoint, currentPoint));
lines[p - 1] = new Line(previousPoint, currentPoint);
previousPoint = currentPoint;
}

Expand Down Expand Up @@ -793,15 +793,15 @@ public override IReadOnlyList<Line> ToLines(int n)
throw new ArgumentException("BezierCurve.ToLines(): n must be greater than 0.");
}

List<Line> lines = new List<Line>();
var lines = new Line[n];
var previousPoint = StartPoint;

for (int p = 1; p <= n; p++)
{
double t = p / (double)n;
var currentPoint = new PdfPoint(ValueWithT(StartPoint.X, FirstControlPoint.X, SecondControlPoint.X, EndPoint.X, t),
ValueWithT(StartPoint.Y, FirstControlPoint.Y, SecondControlPoint.Y, EndPoint.Y, t));
lines.Add(new Line(previousPoint, currentPoint));
lines[p - 1] = new Line(previousPoint, currentPoint);
previousPoint = currentPoint;
}
return lines;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,8 @@ internal abstract class CompactFontFormatCharset : ICompactFontFormatCharset

public bool IsCidCharset { get; } = false;

protected CompactFontFormatCharset(IReadOnlyList<(int glyphId, int stringId, string name)> data)
protected CompactFontFormatCharset(ReadOnlySpan<(int glyphId, int stringId, string name)> data)
{
if (data == null)
{
throw new ArgumentNullException(nameof(data));
}

var dictionary = new Dictionary<int, (int stringId, string name)>
{
{0, (0, ".notdef")}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat.Charsets
{
using System.Collections.Generic;
using System;

/// <summary>
/// A Charset from a Compact Font Format font file best for fonts with relatively unordered string ids.
/// </summary>
internal class CompactFontFormatFormat0Charset : CompactFontFormatCharset
internal sealed class CompactFontFormatFormat0Charset : CompactFontFormatCharset
{
public CompactFontFormatFormat0Charset(IReadOnlyList<(int glyphId, int stringId, string name)> data)
:base(data)
public CompactFontFormatFormat0Charset(ReadOnlySpan<(int glyphId, int stringId, string name)> data)
: base(data)
{
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat.Charsets
{
using System.Collections.Generic;
using System;

/// <summary>
/// A Charset from a Compact Font Format font file best for fonts with well ordered string ids.
/// </summary>
internal class CompactFontFormatFormat1Charset : CompactFontFormatCharset
internal sealed class CompactFontFormatFormat1Charset : CompactFontFormatCharset
{
public CompactFontFormatFormat1Charset(IReadOnlyList<(int glyphId, int stringId, string name)> data)
public CompactFontFormatFormat1Charset(ReadOnlySpan<(int glyphId, int stringId, string name)> data)
: base(data)
{
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat.Charsets
{
using System.Collections.Generic;
using System;

/// <summary>
/// A Charset from a Compact Font Format font file best for fonts with a large number of well ordered string ids.
/// </summary>
internal class CompactFontFormatFormat2Charset : CompactFontFormatCharset
{
public CompactFontFormatFormat2Charset(IReadOnlyList<(int glyphId, int stringId, string name)> data)
public CompactFontFormatFormat2Charset(ReadOnlySpan<(int glyphId, int stringId, string name)> data)
: base(data)
{
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using Charsets;
using Encodings;
using Fonts;
using UglyToad.PdfPig.Core;

internal static class CompactFontFormatEncodingReader
{
Expand Down Expand Up @@ -36,13 +37,13 @@ private static CompactFontFormatFormat0Encoding ReadFormat0Encoding(CompactFontF
{
var numberOfCodes = data.ReadCard8();

var values = new List<(int code, int sid, string str)>();
using var values = new ArrayPoolBufferWriter<(int code, int sid, string str)>();
for (var i = 1; i <= numberOfCodes; i++)
{
var code = data.ReadCard8();
var sid = charset.GetStringIdByGlyphId(i);
var str = ReadString(sid, stringIndex);
values.Add((code, sid, str));
values.Write((code, sid, str));
}

IReadOnlyList<CompactFontFormatBuiltInEncoding.Supplement> supplements = [];
Expand All @@ -51,7 +52,7 @@ private static CompactFontFormatFormat0Encoding ReadFormat0Encoding(CompactFontF
supplements = ReadSupplement(data, stringIndex);
}

return new CompactFontFormatFormat0Encoding(values, supplements);
return new CompactFontFormatFormat0Encoding(values.WrittenSpan, supplements);
}

private static CompactFontFormatFormat1Encoding ReadFormat1Encoding(CompactFontFormatData data, ICompactFontFormatCharset charset, ReadOnlySpan<string> stringIndex, byte format)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
namespace UglyToad.PdfPig.Fonts.CompactFontFormat
{
using System;
using System.Collections.Generic;

internal sealed class CompactFontFormatFormat0Encoding : CompactFontFormatBuiltInEncoding
{
public CompactFontFormatFormat0Encoding(IReadOnlyList<(int code, int sid, string str)> values,
public CompactFontFormatFormat0Encoding(
ReadOnlySpan<(int code, int sid, string str)> values,
IReadOnlyList<Supplement> supplements) : base(supplements)
{
Add(0, 0, NotDefined);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,42 +139,42 @@ private static ICompactFontFormatCharset ReadCharset(CompactFontFormatData data,
{
case 0:
{
var glyphToNamesAndStringId = new List<(int glyphId, int stringId, string name)>();
using var glyphToNamesAndStringId = new ArrayPoolBufferWriter<(int glyphId, int stringId, string name)>();

for (var glyphId = 1; glyphId < charStringIndex.Count; glyphId++)
{
var stringId = data.ReadSid();
glyphToNamesAndStringId.Add((glyphId, stringId, ReadString(stringId, stringIndex)));
glyphToNamesAndStringId.Write((glyphId, stringId, ReadString(stringId, stringIndex)));
}

return new CompactFontFormatFormat0Charset(glyphToNamesAndStringId);
return new CompactFontFormatFormat0Charset(glyphToNamesAndStringId.WrittenSpan);
}
case 1:
case 2:
{
var glyphToNamesAndStringId = new List<(int glyphId, int stringId, string name)>();
using var glyphToNamesAndStringId = new ArrayPoolBufferWriter<(int glyphId, int stringId, string name)>();

for (var glyphId = 1; glyphId < charStringIndex.Count; glyphId++)
{
var firstSid = data.ReadSid();
var numberInRange = format == 1 ? data.ReadCard8() : data.ReadCard16();

glyphToNamesAndStringId.Add((glyphId, firstSid, ReadString(firstSid, stringIndex)));
glyphToNamesAndStringId.Write((glyphId, firstSid, ReadString(firstSid, stringIndex)));
for (var i = 0; i < numberInRange; i++)
{
glyphId++;
var sid = firstSid + i + 1;
glyphToNamesAndStringId.Add((glyphId, sid, ReadString(sid, stringIndex)));
glyphToNamesAndStringId.Write((glyphId, sid, ReadString(sid, stringIndex)));
}
}

if (format == 1)
{

return new CompactFontFormatFormat1Charset(glyphToNamesAndStringId);
return new CompactFontFormatFormat1Charset(glyphToNamesAndStringId.WrittenSpan);
}

return new CompactFontFormatFormat2Charset(glyphToNamesAndStringId);
return new CompactFontFormatFormat2Charset(glyphToNamesAndStringId.WrittenSpan);
}
default:
throw new InvalidOperationException($"Unrecognized format for the Charset table in a CFF font. Got: {format}.");
Expand Down
11 changes: 6 additions & 5 deletions src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1ArrayTokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@
using Tokenization;

/// <inheritdoc />
public class Type1ArrayTokenizer : ITokenizer
public sealed class Type1ArrayTokenizer : ITokenizer
{
/// <inheritdoc />
public bool ReadsNextByte { get; } = false;

private static readonly string[] Space = [" "];

/// <inheritdoc />
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{
Expand All @@ -36,7 +38,7 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok
builder.Append((char) inputBytes.CurrentByte);
}

var parts = builder.ToString().Split(new[] {" "}, StringSplitOptions.RemoveEmptyEntries);
var parts = builder.ToString().Split(Space, StringSplitOptions.RemoveEmptyEntries);

var tokens = new List<IToken>();

Expand All @@ -50,7 +52,7 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok
}
else
{
tokens.Add(OperatorToken.Create(part));
tokens.Add(OperatorToken.Create(part.AsSpan()));
}

continue;
Expand All @@ -68,8 +70,7 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok
continue;
}

tokens.Add(OperatorToken.Create(part));

tokens.Add(OperatorToken.Create(part.AsSpan()));
}

token = new ArrayToken(tokens);
Expand Down
3 changes: 3 additions & 0 deletions src/UglyToad.PdfPig.Fonts/UglyToad.PdfPig.Fonts.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,7 @@
<ItemGroup Condition="'$(TargetFramework)'=='net462'">
<PackageReference Include="System.ValueTuple" Version="4.5.0" />
</ItemGroup>
<ItemGroup Condition="'$(TargetFramework)'=='netstandard2.0' or '$(TargetFramework)'=='net462' or '$(TargetFramework)'=='net471'">
<PackageReference Include="System.Memory" Version="4.5.5" />
</ItemGroup>
</Project>
15 changes: 0 additions & 15 deletions src/UglyToad.PdfPig.Tests/Util/OctalHelpersTests.cs

This file was deleted.

2 changes: 1 addition & 1 deletion src/UglyToad.PdfPig.Tokenization/ArrayTokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
using Scanner;
using Tokens;

internal class ArrayTokenizer : ITokenizer
internal sealed class ArrayTokenizer : ITokenizer
{
private readonly bool usePdfDocEncoding;

Expand Down
6 changes: 3 additions & 3 deletions src/UglyToad.PdfPig.Tokenization/CommentTokenizer.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
namespace UglyToad.PdfPig.Tokenization
{
using System.Text;
using Core;
using System.Text;
using Tokens;

internal class CommentTokenizer : ITokenizer
internal sealed class CommentTokenizer : ITokenizer
{
public bool ReadsNextByte { get; } = true;

Expand All @@ -17,7 +17,7 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok
return false;
}

var builder = new StringBuilder();
using var builder = new ValueStringBuilder();

while (inputBytes.MoveNext() && !ReadHelper.IsEndOfLine(inputBytes.CurrentByte))
{
Expand Down
2 changes: 1 addition & 1 deletion src/UglyToad.PdfPig.Tokenization/EndOfLineTokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
/// <summary>
/// Read an <see cref="EndOfLineToken"/>.
/// </summary>
public class EndOfLineTokenizer : ITokenizer
public sealed class EndOfLineTokenizer : ITokenizer
{
/// <inheritdoc />
public bool ReadsNextByte { get; } = false;
Expand Down
64 changes: 0 additions & 64 deletions src/UglyToad.PdfPig.Tokenization/ListPool.cs

This file was deleted.

2 changes: 1 addition & 1 deletion src/UglyToad.PdfPig.Tokenization/NameTokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
using System.Text.Unicode;
#endif

internal class NameTokenizer : ITokenizer
internal sealed class NameTokenizer : ITokenizer
{
static NameTokenizer()
{
Expand Down
Loading

0 comments on commit 7f42a8d

Please sign in to comment.