From eef3b8f13e5c00e61ed588b5c59f97f0ebad7625 Mon Sep 17 00:00:00 2001 From: Adrian-George Bostan Date: Tue, 8 Oct 2019 23:41:20 +0300 Subject: [PATCH] Improve text chunk component (#181) * Add more functionality to text chunks * Add creator text chunk test cases * Improve documentation of the text chunk Fit method --- creator/paragraph.go | 80 +++------------------- creator/styled_paragraph.go | 11 +-- creator/text_chunk.go | 113 +++++++++++++++++++++++++++++-- creator/text_chunk_test.go | 130 ++++++++++++++++++++++++++++++++++++ 4 files changed, 256 insertions(+), 78 deletions(-) create mode 100644 creator/text_chunk_test.go diff --git a/creator/paragraph.go b/creator/paragraph.go index 338f5701e..1d308d5a0 100644 --- a/creator/paragraph.go +++ b/creator/paragraph.go @@ -260,81 +260,20 @@ func (p *Paragraph) wrapText() error { return nil } - var line []rune - lineWidth := 0.0 - p.textLines = nil + chunk := NewTextChunk(p.text, TextStyle{ + Font: p.textFont, + FontSize: p.fontSize, + }) - runes := []rune(p.text) - var widths []float64 - - for _, r := range runes { - // Newline wrapping. - if r == '\u000A' { // LF - // Moves to next line. - p.textLines = append(p.textLines, string(line)) - line = nil - lineWidth = 0 - widths = nil - continue - } - - metrics, found := p.textFont.GetRuneMetrics(r) - if !found { - common.Log.Debug("ERROR: Rune char metrics not found! rune=0x%04x=%c font=%s %#q", - r, r, p.textFont.BaseFont(), p.textFont.Subtype()) - common.Log.Trace("Font: %#v", p.textFont) - common.Log.Trace("Encoder: %#v", p.textFont.Encoder()) - return errors.New("glyph char metrics missing") - } - - w := p.fontSize * metrics.Wx - if lineWidth+w > p.wrapWidth*1000.0 { - // Goes out of bounds: Wrap. - // Breaks on the character. - idx := -1 - for i := len(line) - 1; i >= 0; i-- { - if line[i] == ' ' { // TODO: What about other space glyphs like controlHT? - idx = i - break - } - } - if idx > 0 { - // Back up to last space. - p.textLines = append(p.textLines, string(line[0:idx+1])) - - // Remainder of line. - line = append(line[idx+1:], r) - widths = append(widths[idx+1:], w) - lineWidth = sum(widths) - - } else { - p.textLines = append(p.textLines, string(line)) - line = []rune{r} - widths = []float64{w} - lineWidth = w - } - } else { - line = append(line, r) - lineWidth += w - widths = append(widths, w) - } - } - if len(line) > 0 { - p.textLines = append(p.textLines, string(line)) + lines, err := chunk.Wrap(p.wrapWidth) + if err != nil { + return err } + p.textLines = lines return nil } -// sum returns the sums of the elements in `widths`. -func sum(widths []float64) float64 { - total := 0.0 - for _, w := range widths { - total += w - } - return total -} - // GeneratePageBlocks generates the page blocks. Multiple blocks are generated if the contents wrap // over multiple pages. Implements the Drawable interface. func (p *Paragraph) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawContext, error) { @@ -489,6 +428,9 @@ func drawParagraphOnBlock(blk *Block, p *Paragraph, ctx DrawContext) (DrawContex var encoded []byte for _, r := range runes { + if r == '\u000A' { // LF + continue + } if r == ' ' { // TODO: What about \t and other spaces. if len(encoded) > 0 { objs = append(objs, core.MakeStringFromBytes(encoded)) diff --git a/creator/styled_paragraph.go b/creator/styled_paragraph.go index 1ba7ce4d7..02ed8195e 100644 --- a/creator/styled_paragraph.go +++ b/creator/styled_paragraph.go @@ -96,7 +96,7 @@ func (p *StyledParagraph) appendChunk(chunk *TextChunk) *TextChunk { // Append adds a new text chunk to the paragraph. func (p *StyledParagraph) Append(text string) *TextChunk { - chunk := newTextChunk(text, p.defaultStyle) + chunk := NewTextChunk(text, p.defaultStyle) return p.appendChunk(chunk) } @@ -107,7 +107,7 @@ func (p *StyledParagraph) Insert(index uint, text string) *TextChunk { index = l } - chunk := newTextChunk(text, p.defaultStyle) + chunk := NewTextChunk(text, p.defaultStyle) p.chunks = append(p.chunks[:index], append([]*TextChunk{chunk}, p.chunks[index:]...)...) p.wrapText() @@ -118,7 +118,7 @@ func (p *StyledParagraph) Insert(index uint, text string) *TextChunk { // The text parameter represents the text that is displayed and the url // parameter sets the destionation of the link. func (p *StyledParagraph) AddExternalLink(text, url string) *TextChunk { - chunk := newTextChunk(text, p.defaultLinkStyle) + chunk := NewTextChunk(text, p.defaultLinkStyle) chunk.annotation = newExternalLinkAnnotation(url) return p.appendChunk(chunk) } @@ -130,7 +130,7 @@ func (p *StyledParagraph) AddExternalLink(text, url string) *TextChunk { // The zoom of the destination page is controlled with the zoom // parameter. Pass in 0 to keep the current zoom value. func (p *StyledParagraph) AddInternalLink(text string, page int64, x, y, zoom float64) *TextChunk { - chunk := newTextChunk(text, p.defaultLinkStyle) + chunk := NewTextChunk(text, p.defaultLinkStyle) chunk.annotation = newInternalLinkAnnotation(page-1, x, y, zoom) return p.appendChunk(chunk) } @@ -745,6 +745,9 @@ func drawStyledParagraphOnBlock(blk *Block, p *StyledParagraph, ctx DrawContext) var encStr []byte for _, rn := range chunk.Text { + if r == '\u000A' { // LF + continue + } if rn == ' ' { if len(encStr) > 0 { cc.Add_rg(r, g, b). diff --git a/creator/text_chunk.go b/creator/text_chunk.go index 5a664bc8d..32e3cf2fb 100644 --- a/creator/text_chunk.go +++ b/creator/text_chunk.go @@ -6,6 +6,11 @@ package creator import ( + "errors" + "strings" + "unicode" + + "github.com/unidoc/unipdf/v3/common" "github.com/unidoc/unipdf/v3/core" "github.com/unidoc/unipdf/v3/model" ) @@ -26,17 +31,115 @@ type TextChunk struct { annotationProcessed bool } +// NewTextChunk returns a new text chunk instance. +func NewTextChunk(text string, style TextStyle) *TextChunk { + return &TextChunk{ + Text: text, + Style: style, + } +} + // SetAnnotation sets a annotation on a TextChunk. func (tc *TextChunk) SetAnnotation(annotation *model.PdfAnnotation) { tc.annotation = annotation } -// newTextChunk returns a new text chunk instance. -func newTextChunk(text string, style TextStyle) *TextChunk { - return &TextChunk{ - Text: text, - Style: style, +// Wrap wraps the text of the chunk into lines based on its style and the +// specified width. +func (tc *TextChunk) Wrap(width float64) ([]string, error) { + if int(width) <= 0 { + return []string{tc.Text}, nil } + + var lines []string + var line []rune + var lineWidth float64 + var widths []float64 + + style := tc.Style + runes := []rune(tc.Text) + + for _, r := range runes { + // Move to the next line due to newline wrapping (LF). + if r == '\u000A' { + lines = append(lines, strings.TrimRightFunc(string(line), unicode.IsSpace)+string(r)) + line = nil + lineWidth = 0 + widths = nil + continue + } + + metrics, found := style.Font.GetRuneMetrics(r) + if !found { + common.Log.Debug("ERROR: Rune char metrics not found! rune=0x%04x=%c font=%s %#q", + r, r, style.Font.BaseFont(), style.Font.Subtype()) + common.Log.Trace("Font: %#v", style.Font) + common.Log.Trace("Encoder: %#v", style.Font.Encoder()) + return nil, errors.New("glyph char metrics missing") + } + + w := style.FontSize * metrics.Wx + charWidth := w + style.CharSpacing*1000.0 + if lineWidth+w > width*1000.0 { + // Goes out of bounds. Break on the character. + idx := -1 + for i := len(line) - 1; i >= 0; i-- { + if line[i] == ' ' { + idx = i + break + } + } + if idx > 0 { + // Back up to last space. + lines = append(lines, strings.TrimRightFunc(string(line[0:idx+1]), unicode.IsSpace)) + + // Remainder of line. + line = append(line[idx+1:], r) + widths = append(widths[idx+1:], charWidth) + + lineWidth = 0 + for _, width := range widths { + lineWidth += width + } + } else { + lines = append(lines, strings.TrimRightFunc(string(line), unicode.IsSpace)) + line = []rune{r} + widths = []float64{charWidth} + lineWidth = charWidth + } + } else { + line = append(line, r) + lineWidth += charWidth + widths = append(widths, charWidth) + } + } + if len(line) > 0 { + lines = append(lines, string(line)) + } + + return lines, nil +} + +// Fit fits the chunk into the specified bounding box, cropping off the +// remainder in a new chunk, if it exceeds the specified dimensions. +// NOTE: The method assumes a line height of 1.0. In order to account for other +// line height values, the passed in height must be divided by the line height: +// height = height / lineHeight +func (tc *TextChunk) Fit(width, height float64) (*TextChunk, error) { + lines, err := tc.Wrap(width) + if err != nil { + return nil, err + } + + fit := int(height / tc.Style.FontSize) + if fit >= len(lines) { + return nil, nil + } + lf := "\u000A" + tc.Text = strings.Replace(strings.Join(lines[:fit], " "), lf+" ", lf, -1) + + remainder := strings.Replace(strings.Join(lines[fit:], " "), lf+" ", lf, -1) + return NewTextChunk(remainder, tc.Style), nil } // newExternalLinkAnnotation returns a new external link annotation. diff --git a/creator/text_chunk_test.go b/creator/text_chunk_test.go new file mode 100644 index 000000000..3a1952bef --- /dev/null +++ b/creator/text_chunk_test.go @@ -0,0 +1,130 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package creator + +import ( + "testing" + + "github.com/stretchr/testify/require" + "github.com/unidoc/unipdf/v3/model" +) + +func TestTextChunkWrap(t *testing.T) { + text := "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.\nUt enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum." + tc := NewTextChunk(text, TextStyle{ + Font: model.DefaultFont(), + FontSize: 10, + }) + + // Check wrap when width <= 0. + expectedLines := []string{text} + + lines, err := tc.Wrap(0) + require.NoError(t, err) + require.Equal(t, len(lines), len(expectedLines)) + require.Equal(t, lines, expectedLines) + + // Check wrap for width = 500. + expectedLines = []string{ + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore", + "magna aliqua.\n", + "Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.", + "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint", + "occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", + } + + lines, err = tc.Wrap(500) + require.NoError(t, err) + require.Equal(t, len(lines), 5) + require.Equal(t, lines, expectedLines) + + // Check wrap for width = 100. + expectedLines = []string{ + "Lorem ipsum dolor sit", + "amet, consectetur", + "adipiscing elit, sed do", + "eiusmod tempor", + "incididunt ut labore et", + "dolore magna aliqua.\n", + "Ut enim ad minim", + "veniam, quis nostrud", + "exercitation ullamco", + "laboris nisi ut aliquip", + "ex ea commodo", + "consequat. Duis aute", + "irure dolor in", + "reprehenderit in", + "voluptate velit esse", + "cillum dolore eu", + "fugiat nulla pariatur.", + "Excepteur sint", + "occaecat cupidatat", + "non proident, sunt in", + "culpa qui officia", + "deserunt mollit anim", + "id est laborum.", + } + + lines, err = tc.Wrap(100) + require.NoError(t, err) + require.Equal(t, len(lines), len(expectedLines)) + require.Equal(t, lines, expectedLines) + + // Check wrap for width = 2000. + expectedLines = []string{ + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.\n", + "Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", + } + + lines, err = tc.Wrap(2000) + require.NoError(t, err) + require.Equal(t, len(lines), len(expectedLines)) + require.Equal(t, lines, expectedLines) +} + +func TestTextChunkFit(t *testing.T) { + text := "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.\nUt enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum." + tc := NewTextChunk(text, TextStyle{ + Font: model.DefaultFont(), + FontSize: 10, + }) + + expected := [][2]string{ + [2]string{ + "Lorem ipsum dolor sit", + "amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.\nUt enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", + }, + [2]string{ + "amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et", + "dolore magna aliqua.\nUt enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", + }, + [2]string{ + "dolore magna aliqua.\nUt enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in", + "reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", + }, + [2]string{ + "reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", + "", + }, + } + + for i := 1; i < 10; i++ { + tc2, err := tc.Fit(float64(i*100), float64(i*10)) + require.NoError(t, err) + + remainder := "" + if tc2 != nil { + remainder = tc2.Text + } + require.Equal(t, tc.Text, expected[i-1][0]) + require.Equal(t, remainder, expected[i-1][1]) + + if tc2 == nil { + break + } + tc = tc2 + } +}