From 3fa6630dad692836b804b99c74b423d1334da44c Mon Sep 17 00:00:00 2001 From: Dave Anderson Date: Wed, 25 Sep 2024 07:14:07 -0700 Subject: [PATCH] tools/internal: wrap use of collators in mutexes (#2175) Despite being a process of reading from read-only state, somehow these APIs turn out to be thread-unsafe, and parsing multiple PSLs concurrently in the same process results in data races and panics. Thankfully each use of the collator is quite short, so wrapping their use in a global mutex, while unfortunate, is acceptable. And enabling parallel parsing of PSLs speeds up online validations by a factor of 10x on many cored machines. --- tools/internal/domain/domain.go | 20 ++++++++++++++++---- tools/internal/parser/unicode.go | 10 ++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/tools/internal/domain/domain.go b/tools/internal/domain/domain.go index 9bc04d6c3..67780dd35 100644 --- a/tools/internal/domain/domain.go +++ b/tools/internal/domain/domain.go @@ -8,6 +8,7 @@ import ( "fmt" "slices" "strings" + "sync" "golang.org/x/net/idna" "golang.org/x/text/collate" @@ -203,10 +204,7 @@ func (l Label) Compare(m Label) int { // If two labels aren't equal, we are free to order them however // we want. We choose to order them with the English Unicode // collation. - var buf collate.Buffer - kl := labelCollator.KeyFromString(&buf, l.label) - km := labelCollator.KeyFromString(&buf, m.label) - if res := bytes.Compare(kl, km); res != 0 { + if res := compareLabel(l, m); res != 0 { return res } @@ -297,4 +295,18 @@ var domainValidator = idna.New( // byte compare. However, this option is buggy and silently ignored in // some cases (https://github.com/golang/go/issues/68379), so we do // this tie breaking ourselves in Label.Compare. +var labelCollatorMu sync.Mutex var labelCollator = collate.New(language.English) + +func compareLabel(a, b Label) int { + // Unfortunately individual collators are not safe for concurrent + // use. Wrap them in a global mutex. We could also construct a new + // collator for each use, but that ends up being more expensive + // and less performant than sharing one collator with a mutex. + labelCollatorMu.Lock() + defer labelCollatorMu.Unlock() + var buf collate.Buffer + kl := labelCollator.KeyFromString(&buf, a.label) + km := labelCollator.KeyFromString(&buf, b.label) + return bytes.Compare(kl, km) +} diff --git a/tools/internal/parser/unicode.go b/tools/internal/parser/unicode.go index 6842ce924..4bda84c7c 100644 --- a/tools/internal/parser/unicode.go +++ b/tools/internal/parser/unicode.go @@ -2,6 +2,7 @@ package parser import ( "bytes" + "sync" "golang.org/x/text/collate" "golang.org/x/text/language" @@ -67,6 +68,14 @@ func compareCommentText(a string, b string) int { // corresponding "sort keys", and then bytes.Compare those. There // are more exhaustive tests for sort key computation, so there is // higher confidence that it works correctly. + // + // Unfortunately individual collators are also not safe for + // concurrent use. Wrap them in a global mutex. We could also + // construct a new collator for each use, but that ends up being + // more expensive and less performant than sharing one collator + // with a mutex. + commentCollatorMu.Lock() + defer commentCollatorMu.Unlock() var buf collate.Buffer ka := commentCollator.KeyFromString(&buf, a) kb := commentCollator.KeyFromString(&buf, b) @@ -77,3 +86,4 @@ func compareCommentText(a string, b string) int { // non-suffix text. See the comment at the start of this file for more // details. var commentCollator = collate.New(language.MustParse("en")) +var commentCollatorMu sync.Mutex