Skip to content

Commit

Permalink
opt
Browse files Browse the repository at this point in the history
  • Loading branch information
LukeShu committed Apr 24, 2024
1 parent eb23663 commit 1e4aea6
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 13 deletions.
11 changes: 5 additions & 6 deletions html/entity.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,13 @@ const longestEntityWithoutSemicolon = 6
// https://www.w3.org/TR/html4/sgml/entities.html
var entity map[string]entityVal

type entityVal struct {
// 8 bytes total, for good alignment
Len int16
Val [6]byte
}
type entityVal [8]byte

func mkEntityVal(str string) (ret entityVal) {
ret.Len = int16(copy(ret.Val[:], str))
ret[0] = byte(len(str))
if copy(ret[1:], str) < len(str) {
panic("entity value does not fit in buffer: "+str)
}
return ret
}

Expand Down
8 changes: 7 additions & 1 deletion html/entity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,13 @@ func TestEntityLength(t *testing.T) {
t.Fatal("maps not loaded")
}

for k := range entity {
// We verify that the length of UTF-8 encoding of each value
// is no more than 1 + len("&"+key), which is an assuption
// made in unescapeEntity.
for k, v := range entity {
if 2+len(k) < int(v[0]) {
t.Error("escaped entity &" + k + " is more than 1 byte shorter than its UTF-8 encoding " + string(v[1:1+v[0]]))
}
if len(k) > longestEntityWithoutSemicolon && k[len(k)-1] != ';' {
t.Errorf("entity name %s is %d characters, but longestEntityWithoutSemicolon=%d", k, len(k), longestEntityWithoutSemicolon)
}
Expand Down
12 changes: 6 additions & 6 deletions html/escape.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,28 +157,28 @@ func unescapeEntity[S ~[]byte | string](dst []byte, src S, dstPos, srcPos int, a
// No-op.
} else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' {
// No-op.
} else if entityVal := entity[string(entityName)]; entityVal.Len != 0 {
if entityVal.Len > int16(i) {
} else if x := entity[string(entityName)]; x[0] != 0 {
if int(x[0]) > i {
// This assumes that it only ever has to grow by 1 byte per entity.
if dstPos == srcPos && dstIsSrc {
// make a copy + grow
dst = append(dst[:len(dst):len(dst)], 0)
} else if dstPos+int(entityVal.Len) >= len(dst) {
} else if dstPos+int(x[0]) >= len(dst) {
// grow, but don't necessarily make a copy
dst = append(dst, 0)
}
}
return dst, dstPos + copy(dst[dstPos:], entityVal.Val[:entityVal.Len]), srcPos + i
return dst, dstPos + copy(dst[dstPos:], x[1:1+x[0]]), srcPos + i
} else if !attribute {
maxLen := len(entityName) - 1
if maxLen > longestEntityWithoutSemicolon {
maxLen = longestEntityWithoutSemicolon
}
for j := maxLen; j > 1; j-- {
if entityVal := entity[string(entityName[:j])]; entityVal.Len != 0 {
if x := entity[string(entityName[:j])]; x[0] != 0 {
// This assumes that no entity without a semicolon
// has a value that is wider than its name.
return dst, dstPos + copy(dst[dstPos:], entityVal.Val[:entityVal.Len]), srcPos + j + 1
return dst, dstPos + copy(dst[dstPos:], x[1:1+x[0]]), srcPos + j + 1
}
}
}
Expand Down

0 comments on commit 1e4aea6

Please sign in to comment.