From 71d6bb189a655a660154536799dd49f41e1c4b41 Mon Sep 17 00:00:00 2001 From: rhysd Date: Mon, 7 Oct 2024 01:03:56 +0900 Subject: [PATCH] combine all contiguous text nodes because text nodes are split on specific characters. For example, "A_1_" is split into two nodes "A_" and "1_". --- reflink.go | 37 +++++++++++++++++++++++-------------- reflink_test.go | 9 +++++++-- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/reflink.go b/reflink.go index 9c49d37..2546129 100644 --- a/reflink.go +++ b/reflink.go @@ -211,11 +211,11 @@ func (l *Reflinker) linkCommitSHA(begin, end int) int { return begin + hashLen } -func (l *Reflinker) linkGitHubRefs(t *ast.Text) { - o := t.Segment.Start // start offset +func (l *Reflinker) linkGitHubRefs(start, stop int) { + o := start - for o < t.Segment.Stop-1 { // `-1` means the last character is not checked - s := l.src[o:t.Segment.Stop] + for o < stop-1 { // `-1` means the last character is not checked + s := l.src[o:stop] i := bytes.IndexAny(s, "#@1234567890abcdef") if i < 0 || len(s)-1 <= i { return @@ -223,12 +223,12 @@ func (l *Reflinker) linkGitHubRefs(t *ast.Text) { switch s[i] { case '#': - o = l.linkIssueRef(o+i, t.Segment.Stop) + o = l.linkIssueRef(o+i, stop) case '@': - o = l.linkUserRef(o+i, t.Segment.Stop) + o = l.linkUserRef(o+i, stop) default: // hex character [0-9a-f] - o = l.linkCommitSHA(o+i, t.Segment.Stop) + o = l.linkCommitSHA(o+i, stop) } } } @@ -264,10 +264,10 @@ func (l *Reflinker) linkExtRef(start, end int) int { return end // Not found } -func (l *Reflinker) linkExtRefs(t *ast.Text) { - o := t.Segment.Start - for o < t.Segment.Stop-1 { - o = l.linkExtRef(o, t.Segment.Stop) +func (l *Reflinker) linkExtRefs(start, stop int) { + o := start + for o < stop-1 { + o = l.linkExtRef(o, stop) } } @@ -398,6 +398,7 @@ func (l *Reflinker) Link(input string) string { md := goldmark.New(goldmark.WithExtensions(extension.GFM)) t := md.Parser().Parse(text.NewReader(src)) l.reset(src) + textStart := -1 ast.Walk(t, func(n ast.Node, entering bool) (ast.WalkStatus, error) { if !entering { @@ -411,9 +412,17 @@ func (l *Reflinker) Link(input string) string { l.linkURL(n) return ast.WalkSkipChildren, nil case *ast.Text: - l.linkGitHubRefs(n) - l.linkExtRefs(n) - return ast.WalkContinue, nil + // Combine all contiguous text nodes. For example, text nodes are split on '_'. + if textStart < 0 { + textStart = n.Segment.Start + } + // Link the combined text + if _, ok := n.NextSibling().(*ast.Text); !ok { + l.linkGitHubRefs(textStart, n.Segment.Stop) + l.linkExtRefs(textStart, n.Segment.Stop) + textStart = -1 + } + return ast.WalkSkipChildren, nil default: return ast.WalkContinue, nil } diff --git a/reflink_test.go b/reflink_test.go index 267a834..fc68182 100644 --- a/reflink_test.go +++ b/reflink_test.go @@ -607,14 +607,19 @@ func TestLinkCustomReferences(t *testing.T) { }, { what: "alphanumeric reference", - input: "ref BAR-abc123 is alphanumeric", - want: "ref [BAR-abc123](https://example.com/bar/abc123) is alphanumeric", + input: "ref BAR-abC123 is alphanumeric", + want: "ref [BAR-abC123](https://example.com/bar/abC123) is alphanumeric", }, { what: "alphanumeric ref followed by non-alphanumeric", input: "ref BAR-abc123あ is linked", want: "ref [BAR-abc123](https://example.com/bar/abc123)あ is linked", }, + { + what: "alphanumeric ref with underscores", + input: "ref BAR-A_1_ is linked", + want: "ref [BAR-A_1_](https://example.com/bar/A_1_) is linked", + }, } for _, tc := range tests {