Skip to content

Commit

Permalink
combine all contiguous text nodes
Browse files Browse the repository at this point in the history
because text nodes are split on specific characters. For example, "A_1_"
is split into two nodes "A_" and "1_".
  • Loading branch information
rhysd committed Oct 6, 2024
1 parent 9d499aa commit 71d6bb1
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 16 deletions.
37 changes: 23 additions & 14 deletions reflink.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,24 +211,24 @@ func (l *Reflinker) linkCommitSHA(begin, end int) int {
return begin + hashLen
}

func (l *Reflinker) linkGitHubRefs(t *ast.Text) {
o := t.Segment.Start // start offset
func (l *Reflinker) linkGitHubRefs(start, stop int) {
o := start

for o < t.Segment.Stop-1 { // `-1` means the last character is not checked
s := l.src[o:t.Segment.Stop]
for o < stop-1 { // `-1` means the last character is not checked
s := l.src[o:stop]
i := bytes.IndexAny(s, "#@1234567890abcdef")
if i < 0 || len(s)-1 <= i {
return
}

switch s[i] {
case '#':
o = l.linkIssueRef(o+i, t.Segment.Stop)
o = l.linkIssueRef(o+i, stop)
case '@':
o = l.linkUserRef(o+i, t.Segment.Stop)
o = l.linkUserRef(o+i, stop)
default:
// hex character [0-9a-f]
o = l.linkCommitSHA(o+i, t.Segment.Stop)
o = l.linkCommitSHA(o+i, stop)
}
}
}
Expand Down Expand Up @@ -264,10 +264,10 @@ func (l *Reflinker) linkExtRef(start, end int) int {
return end // Not found
}

func (l *Reflinker) linkExtRefs(t *ast.Text) {
o := t.Segment.Start
for o < t.Segment.Stop-1 {
o = l.linkExtRef(o, t.Segment.Stop)
func (l *Reflinker) linkExtRefs(start, stop int) {
o := start
for o < stop-1 {
o = l.linkExtRef(o, stop)
}
}

Expand Down Expand Up @@ -398,6 +398,7 @@ func (l *Reflinker) Link(input string) string {
md := goldmark.New(goldmark.WithExtensions(extension.GFM))
t := md.Parser().Parse(text.NewReader(src))
l.reset(src)
textStart := -1

ast.Walk(t, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
Expand All @@ -411,9 +412,17 @@ func (l *Reflinker) Link(input string) string {
l.linkURL(n)
return ast.WalkSkipChildren, nil
case *ast.Text:
l.linkGitHubRefs(n)
l.linkExtRefs(n)
return ast.WalkContinue, nil
// Combine all contiguous text nodes. For example, text nodes are split on '_'.
if textStart < 0 {
textStart = n.Segment.Start
}
// Link the combined text
if _, ok := n.NextSibling().(*ast.Text); !ok {
l.linkGitHubRefs(textStart, n.Segment.Stop)
l.linkExtRefs(textStart, n.Segment.Stop)
textStart = -1
}
return ast.WalkSkipChildren, nil
default:
return ast.WalkContinue, nil
}
Expand Down
9 changes: 7 additions & 2 deletions reflink_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -607,14 +607,19 @@ func TestLinkCustomReferences(t *testing.T) {
},
{
what: "alphanumeric reference",
input: "ref BAR-abc123 is alphanumeric",
want: "ref [BAR-abc123](https://example.com/bar/abc123) is alphanumeric",
input: "ref BAR-abC123 is alphanumeric",
want: "ref [BAR-abC123](https://example.com/bar/abC123) is alphanumeric",
},
{
what: "alphanumeric ref followed by non-alphanumeric",
input: "ref BAR-abc123あ is linked",
want: "ref [BAR-abc123](https://example.com/bar/abc123)あ is linked",
},
{
what: "alphanumeric ref with underscores",
input: "ref BAR-A_1_ is linked",
want: "ref [BAR-A_1_](https://example.com/bar/A_1_) is linked",
},
}

for _, tc := range tests {
Expand Down

0 comments on commit 71d6bb1

Please sign in to comment.