Skip to content

Commit

Permalink
tools/internal: wrap use of collators in mutexes (#2175)
Browse files Browse the repository at this point in the history
Despite being a process of reading from read-only state, somehow these
APIs turn out to be thread-unsafe, and parsing multiple PSLs concurrently
in the same process results in data races and panics.

Thankfully each use of the collator is quite short, so wrapping their use
in a global mutex, while unfortunate, is acceptable. And enabling parallel
parsing of PSLs speeds up online validations by a factor of 10x on many cored
machines.
  • Loading branch information
danderson authored Sep 25, 2024
1 parent 0105cc3 commit 3fa6630
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 4 deletions.
20 changes: 16 additions & 4 deletions tools/internal/domain/domain.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"fmt"
"slices"
"strings"
"sync"

"golang.org/x/net/idna"
"golang.org/x/text/collate"
Expand Down Expand Up @@ -203,10 +204,7 @@ func (l Label) Compare(m Label) int {
// If two labels aren't equal, we are free to order them however
// we want. We choose to order them with the English Unicode
// collation.
var buf collate.Buffer
kl := labelCollator.KeyFromString(&buf, l.label)
km := labelCollator.KeyFromString(&buf, m.label)
if res := bytes.Compare(kl, km); res != 0 {
if res := compareLabel(l, m); res != 0 {
return res
}

Expand Down Expand Up @@ -297,4 +295,18 @@ var domainValidator = idna.New(
// byte compare. However, this option is buggy and silently ignored in
// some cases (https://github.com/golang/go/issues/68379), so we do
// this tie breaking ourselves in Label.Compare.
var labelCollatorMu sync.Mutex
var labelCollator = collate.New(language.English)

func compareLabel(a, b Label) int {
// Unfortunately individual collators are not safe for concurrent
// use. Wrap them in a global mutex. We could also construct a new
// collator for each use, but that ends up being more expensive
// and less performant than sharing one collator with a mutex.
labelCollatorMu.Lock()
defer labelCollatorMu.Unlock()
var buf collate.Buffer
kl := labelCollator.KeyFromString(&buf, a.label)
km := labelCollator.KeyFromString(&buf, b.label)
return bytes.Compare(kl, km)
}
10 changes: 10 additions & 0 deletions tools/internal/parser/unicode.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package parser

import (
"bytes"
"sync"

"golang.org/x/text/collate"
"golang.org/x/text/language"
Expand Down Expand Up @@ -67,6 +68,14 @@ func compareCommentText(a string, b string) int {
// corresponding "sort keys", and then bytes.Compare those. There
// are more exhaustive tests for sort key computation, so there is
// higher confidence that it works correctly.
//
// Unfortunately individual collators are also not safe for
// concurrent use. Wrap them in a global mutex. We could also
// construct a new collator for each use, but that ends up being
// more expensive and less performant than sharing one collator
// with a mutex.
commentCollatorMu.Lock()
defer commentCollatorMu.Unlock()
var buf collate.Buffer
ka := commentCollator.KeyFromString(&buf, a)
kb := commentCollator.KeyFromString(&buf, b)
Expand All @@ -77,3 +86,4 @@ func compareCommentText(a string, b string) int {
// non-suffix text. See the comment at the start of this file for more
// details.
var commentCollator = collate.New(language.MustParse("en"))
var commentCollatorMu sync.Mutex

0 comments on commit 3fa6630

Please sign in to comment.