Skip to content

Commit

Permalink
fix auto link behavior is different from GitHub when _ follows refs o…
Browse files Browse the repository at this point in the history
…r followed by refs
  • Loading branch information
rhysd committed Oct 7, 2024
1 parent d8faf9c commit 78f3dfd
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 75 deletions.
94 changes: 38 additions & 56 deletions reflink.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,8 @@ func (l byStart) Len() int { return len(l) }
func (l byStart) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
func (l byStart) Less(i, j int) bool { return l[i].start < l[j].start }

// Note: '_' is actually not boundary. But it's hard to check if the '_' is a part of italic/bold
// syntax.
// For example, _#123_ should be linked because '_'s are part of italic syntax. But _#123 and #123_
// should not be linked because '_'s are NOT part of italic syntax.
// Checking if the parent node is Italic/Bold or not does not help to solve this issue. For example,
// _foo_#1 should be linked. However #1 itself is not an italic text though the neighbor node is
// Italic.
// Fortunately this is very edge case. To keep our implementation simple, we compromise to treat '_'
// as a boundary. For example, _#1 and #1_ are linked incorrectly, but I believe they are OK for our
// use cases.
func isBoundary(b byte) bool {
if '0' <= b && b <= '9' || 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z' {
if '0' <= b && b <= '9' || 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z' || b == '_' {
return false
}
return true
Expand Down Expand Up @@ -93,38 +83,34 @@ func (l *Reflinker) isBoundaryAt(idx int) bool {
return isBoundary(l.src[idx])
}

func (l *Reflinker) lastIndexIssueRef(begin, end int) int {
if !l.isBoundaryAt(begin - 1) {
func (l *Reflinker) lastIndexIssueRef(offset, start, end int) int {
if start < offset && !l.isBoundaryAt(offset-1) {
return -1 // Issue ref must follow a boundary (e.g. 'foo#bar')
}

for i := 1; begin+i < end; i++ {
b := l.src[begin+i]
for i := 1; offset+i < end; i++ {
b := l.src[offset+i]
if '0' <= b && b <= '9' {
continue
}
if i == 1 || !isBoundary(b) {
return -1
}
return begin + i
}

if !l.isBoundaryAt(end) {
return -1
return offset + i
}

return end // The text ends with issue number
}

func (l *Reflinker) linkIssueRef(begin, end int) int {
e := l.lastIndexIssueRef(begin, end)
func (l *Reflinker) linkIssueRef(offset, start, end int) int {
e := l.lastIndexIssueRef(offset, start, end)
if e < 0 {
return begin + 1
return offset + 1
}

r := l.src[begin:e]
r := l.src[offset:e]
l.links = append(l.links, refLink{
start: begin,
start: offset,
end: e,
// Note: The link may be for PR, but GitHub can redirect this issue link to the PR
text: fmt.Sprintf("[%s](%s/issues/%s)", r, l.repo, r[1:]),
Expand All @@ -133,51 +119,46 @@ func (l *Reflinker) linkIssueRef(begin, end int) int {
return e
}

func (l *Reflinker) lastIndexUserRef(begin, end int) int {
if !l.isBoundaryAt(begin - 1) {
func (l *Reflinker) lastIndexUserRef(offset, start, end int) int {
if start < offset && !l.isBoundaryAt(offset-1) {
return -1 // e.g. foo@bar, _@foo (-@foo is ok)
}

// Note: Username may only contain alphanumeric characters or single hyphens, and cannot begin
// or end with a hyphen: @foo-, @-foo
// Note: '/' just after user name like @foo/ is not allowed

if b := l.src[begin+1]; !isUserNameChar(b) || b == '-' {
if b := l.src[offset+1]; !isUserNameChar(b) || b == '-' {
return -1
}

for i := 2; begin+i < end; i++ {
b := l.src[begin+i]
for i := 2; offset+i < end; i++ {
b := l.src[offset+i]
if isUserNameChar(b) {
continue
}
if !isBoundary(b) || b == '/' || l.src[begin+i-1] == '-' {
if !isBoundary(b) || b == '/' || l.src[offset+i-1] == '-' {
return -1
}
return begin + i
return offset + i
}

if l.src[end-1] == '-' {
return -1
}
if end < len(l.src) {
if b := l.src[end]; !isBoundary(b) || b == '/' {
return -1
}
}

return end
}

func (l *Reflinker) linkUserRef(begin, end int) int {
e := l.lastIndexUserRef(begin, end)
func (l *Reflinker) linkUserRef(offset, start, end int) int {
e := l.lastIndexUserRef(offset, start, end)
if e < 0 {
return begin + 1
return offset + 1
}

u := l.src[begin:e]
u := l.src[offset:e]
l.links = append(l.links, refLink{
start: begin,
start: offset,
end: e,
text: fmt.Sprintf("[%s](%s/%s)", u, l.home, u[1:]),
})
Expand All @@ -187,28 +168,29 @@ func (l *Reflinker) linkUserRef(begin, end int) int {

const hashLen int = 40

func (l *Reflinker) linkCommitSHA(begin, end int) int {
for i := 1; i < hashLen; i++ { // Since l.src[begin] was already checked, i starts from 1
if begin+i >= end {
return begin + i
func (l *Reflinker) linkCommitSHA(offset, start, end int) int {
for i := 1; i < hashLen; i++ { // Since l.src[offset] was already checked, i starts from 1
if offset+i >= end {
return offset + i
}
b := l.src[begin+i]
b := l.src[offset+i]
if '0' <= b && b <= '9' || 'a' <= b && b <= 'f' {
continue
}
return begin + i
return offset + i
}

if l.isBoundaryAt(begin-1) && l.isBoundaryAt(begin+hashLen) {
h := l.src[begin : begin+hashLen]
hashEnd := offset + hashLen
if (start == offset || l.isBoundaryAt(offset-1)) && (hashEnd == end || l.isBoundaryAt(hashEnd)) {
h := l.src[offset:hashEnd]
l.links = append(l.links, refLink{
start: begin,
end: begin + hashLen,
start: offset,
end: offset + hashLen,
text: fmt.Sprintf("[`%s`](%s/commit/%s)", h[:10], l.repo, h),
})
}

return begin + hashLen
return offset + hashLen
}

func (l *Reflinker) linkGitHubRefs(start, stop int) {
Expand All @@ -223,12 +205,12 @@ func (l *Reflinker) linkGitHubRefs(start, stop int) {

switch s[i] {
case '#':
o = l.linkIssueRef(o+i, stop)
o = l.linkIssueRef(o+i, start, stop)
case '@':
o = l.linkUserRef(o+i, stop)
o = l.linkUserRef(o+i, start, stop)
default:
// hex character [0-9a-f]
o = l.linkCommitSHA(o+i, stop)
o = l.linkCommitSHA(o+i, start, stop)
}
}
}
Expand Down
61 changes: 42 additions & 19 deletions reflink_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,40 +70,49 @@ func TestLinkRefs(t *testing.T) {
want: "#123a",
},
{
// Note: This behavior is different from GitHub (#123 should not be linked). But aligning
// to GitHub's behavior is hard and it is very edge case for us. So we determined not to
// align the behavior
what: "issue follows _",
input: "_#123",
want: "_[#123](https://github.com/u/r/issues/123)",
want: "_#123",
},
{
// Note: This behavior is different from GitHub (#123 should not be linked). But aligning
// to GitHub's behavior is hard and it is very edge case for us. So we determined not to
// align the behavior
what: "issue followed by _",
input: "#123_",
want: "[#123](https://github.com/u/r/issues/123)_",
want: "#123_",
},
{
what: "issue as italic text",
input: "_#123_",
want: "_[#123](https://github.com/u/r/issues/123)_", // Linked because it's an italic text
},
{
what: "issue as part of italic text",
what: "issue as italic text with asterisk",
input: "*#123*",
want: "*[#123](https://github.com/u/r/issues/123)*", // Linked because it's an italic text
},
{
what: "issue as bold text",
input: "__#123__",
want: "__[#123](https://github.com/u/r/issues/123)__", // Linked because it's an bold text
},
{
what: "issue at end of italic text",
input: "_foo #123_",
want: "_foo [#123](https://github.com/u/r/issues/123)_", // Linked because it's an italic text
},
{
what: "issue at start of italic text",
input: "_#123 foo_",
want: "_[#123](https://github.com/u/r/issues/123) foo_", // Linked because it's an italic text
},
{
what: "issue next to italic",
input: "_foo_#123",
want: "_foo_[#123](https://github.com/u/r/issues/123)",
},
{
what: "italic next to issue",
what: "non-italic underscore text follows issue",
input: "#123_foo_",
want: "[#123](https://github.com/u/r/issues/123)_foo_",
want: "#123_foo_",
},
{
what: "issue follows number",
Expand Down Expand Up @@ -136,20 +145,14 @@ func TestLinkRefs(t *testing.T) {
want: "a@foo",
},
{
// Note: This behavior is different from GitHub (@foo should not be linked). But aligning
// to GitHub's behavior is hard and it is very edge case for us. So we determined not to
// align the behavior
what: "user follows _",
input: "_@foo",
want: "_[@foo](https://github.com/foo)",
want: "_@foo",
},
{
// Note: This behavior is different from GitHub (@foo should not be linked). But aligning
// to GitHub's behavior is hard and it is very edge case for us. So we determined not to
// align the behavior
what: "user followed by _",
input: "@foo_",
want: "[@foo](https://github.com/foo)_",
want: "@foo_",
},
{
what: "user as italic text",
Expand Down Expand Up @@ -181,6 +184,16 @@ func TestLinkRefs(t *testing.T) {
input: "1@foo",
want: "1@foo",
},
{
what: "user followed by underscore",
input: "@foo_bar",
want: "@foo_bar",
},
{
what: "user follows underscore",
input: "bar_@foo",
want: "bar_@foo",
},
{
what: "user followed by other user",
input: "@a@b",
Expand Down Expand Up @@ -393,6 +406,11 @@ func TestLinkRefs(t *testing.T) {
input: "[_41608e5f4109208a6ab995c58266554e6071c5b2_](https://example.com)",
want: "[_41608e5f4109208a6ab995c58266554e6071c5b2_](https://example.com)",
},
{
what: "commit sha less than 10 characters",
input: "z41608e5f",
want: "z41608e5f",
},
{
what: "non-GitHub URL",
input: "https://example.com",
Expand Down Expand Up @@ -531,6 +549,11 @@ func TestLinkRefs(t *testing.T) {
input: "the PR review is https://github.com/u/r/pull/123#pullrequestreview-1212591132",
want: "the PR review is [#123 (review)](https://github.com/u/r/pull/123#pullrequestreview-1212591132)",
},
{
what: "underscores in repository name of PR URL",
input: "the PR is https://github.com/foo/a_b_c_d/pull/123!",
want: "the PR is [foo/a_b_c_d#123](https://github.com/foo/a_b_c_d/pull/123)!",
},
{
what: "GitHub external reference in text",
input: "This is GH-123 link",
Expand Down

0 comments on commit 78f3dfd

Please sign in to comment.