From f47fcb34adade6ff248d56a7d0ed302f17a149bb Mon Sep 17 00:00:00 2001 From: Dave Anderson Date: Thu, 8 Aug 2024 01:23:48 -0700 Subject: [PATCH] tools/internal/parser: add more offline, diff-aware validations (#2089) * tools/internal/parser: highlight .Changed() AST nodes in debug output * tools/internal/parser: make metadata validator diff-aware, add tests * tools/internal/parser: add more validations --- tools/internal/domain/domain.go | 12 ++ tools/internal/parser/clean.go | 22 +- tools/internal/parser/clean_test.go | 26 ++- tools/internal/parser/errors.go | 44 ++++ tools/internal/parser/parser_test.go | 10 + tools/internal/parser/validate.go | 82 ++++++- tools/internal/parser/validate_test.go | 287 +++++++++++++++++++++++++ tools/internal/parser/write.go | 18 +- 8 files changed, 488 insertions(+), 13 deletions(-) diff --git a/tools/internal/domain/domain.go b/tools/internal/domain/domain.go index 0657c3929..9bc04d6c3 100644 --- a/tools/internal/domain/domain.go +++ b/tools/internal/domain/domain.go @@ -142,6 +142,18 @@ func (d Name) CutSuffix(suffix Name) (rest []Label, found bool) { return ret, true } +// AddPrefix returns d prefixed with label. +// +// For example, AddPrefix of "bar" to "foo.com" is "bar.foo.com". +func (d Name) AddPrefix(label Label) (Name, error) { + // Due to total name length restrictions, we have to fully + // re-check the shape of the extended domain name. The simplest + // way to do that is to round-trip through a string and leverage + // Parse again. + retStr := label.String() + "." + d.String() + return Parse(retStr) +} + // Label is a domain name label. type Label struct { label string diff --git a/tools/internal/parser/clean.go b/tools/internal/parser/clean.go index db81571e8..24c45baea 100644 --- a/tools/internal/parser/clean.go +++ b/tools/internal/parser/clean.go @@ -54,7 +54,7 @@ func cleanBlock(b Block) []error { ret = append(ret, sortSuffixes(v)...) rewriteSuffixesMetadata(v) case *Wildcard: - slices.SortFunc(v.Exceptions, domain.Label.Compare) + cleanWildcard(v) case *Comment, *Suffix: // No cleaning required default: @@ -64,6 +64,12 @@ func cleanBlock(b Block) []error { return ret } +func cleanWildcard(w *Wildcard) { + // Sort and deduplicate exception domains. + slices.SortFunc(w.Exceptions, domain.Label.Compare) + w.Exceptions = slices.Compact(w.Exceptions) +} + func sortSection(s *Section) []error { // There are two difficult parts aspects to sorting a section: // free-floating comments, and Amazon. @@ -224,6 +230,9 @@ func sortSuffixes(s *Suffixes) []error { prevGroupEnd Block // last suffix/wildcard of previous group prevComment *Comment // last Comment seen, for error reporting thisGroupStart int + // We have to construct a new output slice, because the + // deduplicating sort below might shrink s.Blocks. + out = make([]Block, 0, len(s.Blocks)) ) sortAndCheck := func(group []Block) { @@ -231,7 +240,14 @@ func sortSuffixes(s *Suffixes) []error { return } + // Sort and deduplicate. Within a single group inside a suffix + // block, duplicate suffixes are semantically equivalent so + // it's safe to remove dupes. slices.SortFunc(group, compareSuffixAndWildcard) + group = slices.CompactFunc(group, func(a, b Block) bool { + return compareSuffixAndWildcard(a, b) == 0 + }) + out = append(out, group...) if prevGroupEnd == nil { // First group. @@ -258,6 +274,7 @@ func sortSuffixes(s *Suffixes) []error { sortAndCheck(group) prevComment = v } + out = append(out, v) thisGroupStart = i + 1 default: panic("unknown ast node") @@ -267,8 +284,7 @@ func sortSuffixes(s *Suffixes) []error { sortAndCheck(s.Blocks[thisGroupStart:]) } - // Unlike in sortSection, no need to construct a new s.Blocks, - // we've sorted the blocks in-place. + s.Blocks = out return errs } diff --git a/tools/internal/parser/clean_test.go b/tools/internal/parser/clean_test.go index 39010a2b6..cdcb982e2 100644 --- a/tools/internal/parser/clean_test.go +++ b/tools/internal/parser/clean_test.go @@ -107,6 +107,29 @@ func TestClean(t *testing.T) { ), }, + { + name: "sort_suffixes_dedup", + in: list( + suffixes(1, 1, noInfo, + suffix(1, "com"), + wildcard(2, 2, "foo.com"), + suffix(3, "zot.com"), + suffix(4, "qux.com"), + suffix(5, "qux.foo.com"), + suffix(6, "zot.com"), + ), + ), + want: list( + suffixes(1, 1, noInfo, + suffix(1, "com"), + wildcard(2, 2, "foo.com"), + suffix(5, "qux.foo.com"), + suffix(4, "qux.com"), + suffix(3, "zot.com"), + ), + ), + }, + { name: "sort_suffixes_with_nonblocking_comment", in: list( @@ -162,7 +185,8 @@ func TestClean(t *testing.T) { name: "sort_suffixes_wildcard_exceptions", in: list( suffixes(1, 1, noInfo, - wildcard(1, 1, "foo.com", "mmm", "aaa", "zzz"), + // Also has a duplicate exception that needs cleaning up + wildcard(1, 1, "foo.com", "mmm", "aaa", "zzz", "aaa"), suffix(2, "foo.com"), ), ), diff --git a/tools/internal/parser/errors.go b/tools/internal/parser/errors.go index f053bda1b..ee3871d84 100644 --- a/tools/internal/parser/errors.go +++ b/tools/internal/parser/errors.go @@ -137,3 +137,47 @@ type ErrCommentPreventsSectionSort struct { func (e ErrCommentPreventsSectionSort) Error() string { return fmt.Sprintf("%s: comment prevents full sorting of PSL section", e.SourceRange.LocationString()) } + +type ErrDuplicateSection struct { + *Section + FirstDefinition *Section +} + +func (e ErrDuplicateSection) Error() string { + return fmt.Sprintf("%s: duplicate section %q, first definition at %s", e.LocationString(), e.Name, e.FirstDefinition.LocationString()) +} + +type ErrUnknownSection struct { + *Section +} + +func (e ErrUnknownSection) Error() string { + return fmt.Sprintf("%s: unknown section %q, allowed sections are 'ICANN DOMAINS' and 'PRIVATE DOMAINS'", e.LocationString(), e.Name) +} + +type ErrMissingSection struct { + Name string +} + +func (e ErrMissingSection) Error() string { + return fmt.Sprintf("missing required section %q", e.Name) +} + +type ErrDuplicateSuffix struct { + Name string + Block // Suffix or Wildcard + FirstDefinition Block // Suffix or Wildcard +} + +func (e ErrDuplicateSuffix) Error() string { + return fmt.Sprintf("%s: duplicate suffix definition for %q, first definition at %s", e.SrcRange().LocationString(), e.Name, e.FirstDefinition.SrcRange().LocationString()) +} + +type ErrConflictingSuffixAndException struct { + *Suffix + Wildcard *Wildcard +} + +func (e ErrConflictingSuffixAndException) Error() string { + return fmt.Sprintf("%s: suffix %s conflicts with exception in wildcard at %s", e.LocationString(), e.Domain, e.Wildcard.LocationString()) +} diff --git a/tools/internal/parser/parser_test.go b/tools/internal/parser/parser_test.go index 7264d872b..a783cfc84 100644 --- a/tools/internal/parser/parser_test.go +++ b/tools/internal/parser/parser_test.go @@ -457,3 +457,13 @@ func zeroSourceRange(b Block) Block { } return b } + +// markUnchanged makes .Changed() return false for b. It does not +// touch parent or child blocks. +// +// It's generic so that it works in places that require a specific +// instance type, not just places that accept a Block interface. +func markUnchanged[T Block](b T) T { + b.info().isUnchanged = true + return b +} diff --git a/tools/internal/parser/validate.go b/tools/internal/parser/validate.go index 6342c830e..4c851ded8 100644 --- a/tools/internal/parser/validate.go +++ b/tools/internal/parser/validate.go @@ -1,5 +1,9 @@ package parser +import ( + "github.com/creachadair/mds/mapset" +) + // ValidateOffline runs offline validations on a parsed PSL. func ValidateOffline(l *List) []error { var ret []error @@ -10,20 +14,27 @@ func ValidateOffline(l *List) []error { break } } + validateExpectedSections(l) + validateSuffixUniqueness(l) return ret } // validateEntityMetadata verifies that all suffix blocks have some // kind of entity name. -func validateEntityMetadata(block *Section) []error { +func validateEntityMetadata(block Block) []error { var ret []error for _, block := range BlocksOfType[*Suffixes](block) { + if !block.Changed() { + continue + } + if block.Info.Name == "" { ret = append(ret, ErrMissingEntityName{ Suffixes: block, }) - } else if len(block.Info.Maintainers) == 0 && !exemptFromContactInfo(block.Info.Name) { + } + if len(block.Info.Maintainers) == 0 && !exemptFromContactInfo(block.Info.Name) { ret = append(ret, ErrMissingEntityEmail{ Suffixes: block, }) @@ -31,3 +42,70 @@ func validateEntityMetadata(block *Section) []error { } return ret } + +// validateExpectedSections verifies that the two top-level sections +// (ICANN and private domains) exist, are not duplicated, and that no +// other sections are present. +func validateExpectedSections(block Block) (errs []error) { + // Use an ordered set for the wanted sections, so that we can + // check section names in O(1) but also report missing sections in + // a deterministic order. + wanted := mapset.New("ICANN DOMAINS", "PRIVATE DOMAINS") + found := map[string]*Section{} + for _, section := range BlocksOfType[*Section](block) { + if !wanted.Has(section.Name) && section.Changed() { + errs = append(errs, ErrUnknownSection{section}) + } else if other, ok := found[section.Name]; ok && (section.Changed() || other.Changed()) { + errs = append(errs, ErrDuplicateSection{section, other}) + } else { + found[section.Name] = section + } + } + + for _, name := range wanted.Slice() { + if _, ok := found[name]; !ok { + errs = append(errs, ErrMissingSection{name}) + } + } + + return errs +} + +// validateSuffixUniqueness verifies that suffixes only appear once +// each. +func validateSuffixUniqueness(block Block) (errs []error) { + suffixes := map[string]*Suffix{} // domain.Name.String() -> Suffix + wildcards := map[string]*Wildcard{} // base domain.Name.String() -> Wildcard + + for _, suffix := range BlocksOfType[*Suffix](block) { + name := suffix.Domain.String() + if other, ok := suffixes[name]; ok && (suffix.Changed() || other.Changed()) { + errs = append(errs, ErrDuplicateSuffix{name, suffix, other}) + } else { + suffixes[name] = suffix + } + } + + for _, wildcard := range BlocksOfType[*Wildcard](block) { + name := wildcard.Domain.String() + if other, ok := wildcards[name]; ok && (wildcard.Changed() || other.Changed()) { + errs = append(errs, ErrDuplicateSuffix{"*." + name, wildcard, other}) + } else { + wildcards[name] = wildcard + } + + for _, exc := range wildcard.Exceptions { + fqdn, err := wildcard.Domain.AddPrefix(exc) + if err != nil && wildcard.Changed() { + errs = append(errs, err) + continue + } + name := fqdn.String() + if suffix, ok := suffixes[name]; ok && (wildcard.Changed() || suffix.Changed()) { + errs = append(errs, ErrConflictingSuffixAndException{suffix, wildcard}) + } + } + } + + return errs +} diff --git a/tools/internal/parser/validate_test.go b/tools/internal/parser/validate_test.go index 0bfe2c257..ebf40e1e1 100644 --- a/tools/internal/parser/validate_test.go +++ b/tools/internal/parser/validate_test.go @@ -1 +1,288 @@ package parser + +import ( + "testing" +) + +func TestValidateEntityMetadata(t *testing.T) { + in := list( + section(1, 1, "PRIVATE DOMAINS", + suffixes(1, 1, info("", nil, emails("Example", "example@example.com"), nil, true), + comment(1, "Submitted by Example "), + suffix(2, "example.com"), + ), + + suffixes(2, 2, info("Example Ltd", nil, nil, nil, true), + comment(1, "Example Ltd"), + suffix(2, "example.org"), + ), + + suffixes(3, 3, noInfo, + suffix(1, "example.net"), + ), + + suffixes(4, 4, info("Foo Ltd", nil, emails("Someone", "example@example.com"), nil, true), + comment(1, "Submitted by Someone "), + suffix(2, "blah.example.com"), + ), + ), + ) + want := []error{ + ErrMissingEntityName{ + Suffixes: suffixes(1, 1, + info("", nil, emails("Example", "example@example.com"), nil, true), + comment(1, "Submitted by Example "), + suffix(2, "example.com"), + ), + }, + ErrMissingEntityEmail{ + Suffixes: suffixes(2, 2, info("Example Ltd", nil, nil, nil, true), + comment(1, "Example Ltd"), + suffix(2, "example.org"), + ), + }, + ErrMissingEntityName{ + Suffixes: suffixes(3, 3, noInfo, + suffix(1, "example.net"), + ), + }, + ErrMissingEntityEmail{ + Suffixes: suffixes(3, 3, noInfo, + suffix(1, "example.net"), + ), + }, + } + + got := validateEntityMetadata(in) + checkDiff(t, "validateEntityMetadata", got, want) + + // Make the change be a diff and check the reduced error set. + prev := list( + section(1, 1, "PRIVATE DOMAINS", + suffixes(1, 1, info("", nil, emails("Example", "example@example.com"), nil, true), + comment(1, "Submitted by Example "), + suffix(2, "example.com"), + ), + + suffixes(2, 2, info("Example Ltd", nil, nil, nil, true), + comment(1, "Example Ltd"), + suffix(2, "example.org"), + ), + + suffixes(3, 3, info("Foo Ltd", nil, emails("Someone", "example@example.com"), nil, true), + comment(1, "Submitted by Someone "), + suffix(2, "blah.example.com"), + ), + ), + ) + + in.SetBaseVersion(prev, false) + got = validateEntityMetadata(in) + + // Second suffix block no longer reports any errors. First one + // still does, because its empty name is a dupe of the last block. + want = []error{ + ErrMissingEntityName{ + Suffixes: suffixes(1, 1, + info("", nil, emails("Example", "example@example.com"), nil, true), + markUnchanged(comment(1, "Submitted by Example ")), + markUnchanged(suffix(2, "example.com")), + ), + }, + ErrMissingEntityName{ + Suffixes: suffixes(3, 3, noInfo, + suffix(1, "example.net"), + ), + }, + ErrMissingEntityEmail{ + Suffixes: suffixes(3, 3, noInfo, + suffix(1, "example.net"), + ), + }, + } + + checkDiff(t, "validateEntityMetadata (changed blocks only)", got, want) +} + +func TestValidateExpectedSections(t *testing.T) { + tests := []struct { + name string + in *List + want []error + }{ + { + name: "ok", + in: list( + section(1, 1, "ICANN DOMAINS"), + section(2, 2, "PRIVATE DOMAINS"), + ), + want: nil, + }, + { + name: "all_missing", + in: list(), + want: []error{ + ErrMissingSection{"ICANN DOMAINS"}, + ErrMissingSection{"PRIVATE DOMAINS"}, + }, + }, + { + name: "one_missing", + in: list( + section(1, 1, "ICANN DOMAINS"), + ), + want: []error{ + ErrMissingSection{"PRIVATE DOMAINS"}, + }, + }, + { + name: "unknown", + in: list( + section(1, 1, "ICANN DOMAINS"), + section(2, 2, "PRIVATE DOMAINS"), + section(3, 3, "NON EUCLIDEAN DOMAINS"), + ), + want: []error{ + ErrUnknownSection{section(3, 3, "NON EUCLIDEAN DOMAINS")}, + }, + }, + { + name: "duplicate_known", + in: list( + section(1, 1, "ICANN DOMAINS"), + section(2, 2, "PRIVATE DOMAINS"), + section(3, 3, "ICANN DOMAINS"), + ), + want: []error{ + ErrDuplicateSection{ + section(3, 3, "ICANN DOMAINS"), + section(1, 1, "ICANN DOMAINS"), + }, + }, + }, + { + name: "duplicate_unknown", + in: list( + section(1, 1, "RIDICULOUS DOMAINS"), + section(2, 2, "ICANN DOMAINS"), + section(3, 3, "PRIVATE DOMAINS"), + section(4, 4, "RIDICULOUS DOMAINS"), + ), + want: []error{ + ErrUnknownSection{section(1, 1, "RIDICULOUS DOMAINS")}, + ErrUnknownSection{section(4, 4, "RIDICULOUS DOMAINS")}, + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := validateExpectedSections(tc.in) + checkDiff(t, "validateExpectedSections output", got, tc.want) + }) + } +} + +func TestValidateSuffixUniqueness(t *testing.T) { + tests := []struct { + name string + in *List + want []error + }{ + { + name: "ok", + in: list( + section(1, 2, "PRIVATE DOMAINS", + suffixes(2, 3, noInfo, + suffix(3, "foo.com"), + suffix(4, "bar.com"), + ), + ), + ), + want: nil, + }, + + { + name: "dupe_suffixes", + in: list( + section(1, 2, "PRIVATE DOMAINS", + suffixes(2, 3, noInfo, + suffix(3, "foo.com"), + suffix(4, "bar.com"), + suffix(5, "foo.com"), + ), + ), + ), + want: []error{ + ErrDuplicateSuffix{"foo.com", suffix(5, "foo.com"), suffix(3, "foo.com")}, + }, + }, + + { + name: "dupe_wildcards", + in: list( + section(1, 2, "PRIVATE DOMAINS", + suffixes(2, 3, noInfo, + wildcard(3, 4, "foo.com"), + suffix(4, "bar.com"), + wildcard(5, 6, "foo.com"), + ), + ), + ), + want: []error{ + ErrDuplicateSuffix{"*.foo.com", wildcard(5, 6, "foo.com"), wildcard(3, 4, "foo.com")}, + }, + }, + + { + name: "dupe_wildcard_exceptions", + in: list( + section(1, 2, "PRIVATE DOMAINS", + suffixes(2, 3, noInfo, + wildcard(3, 4, "foo.com", "a", "b", "c", "a"), + suffix(4, "bar.com"), + suffix(5, "b.foo.com"), + ), + ), + ), + want: []error{ + ErrConflictingSuffixAndException{ + Suffix: suffix(5, "b.foo.com"), + Wildcard: wildcard(3, 4, "foo.com", "a", "b", "c", "a"), + }, + }, + }, + + { + name: "dupe_spanning_blocks_and_sections", + in: list( + section(1, 2, "PRIVATE DOMAINS", + suffixes(2, 3, noInfo, + suffix(3, "foo.com"), + suffix(4, "bar.com"), + ), + suffixes(5, 6, noInfo, + suffix(6, "foo.com"), + ), + ), + section(7, 8, "ICANN DOMAINS", + suffixes(8, 9, noInfo, + suffix(9, "qux.com"), + suffix(10, "foo.com"), + ), + ), + ), + want: []error{ + ErrDuplicateSuffix{"foo.com", suffix(6, "foo.com"), suffix(3, "foo.com")}, + ErrDuplicateSuffix{"foo.com", suffix(10, "foo.com"), suffix(3, "foo.com")}, + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := validateSuffixUniqueness(tc.in) + checkDiff(t, "validateSuffixUniqueness", got, tc.want) + }) + } +} diff --git a/tools/internal/parser/write.go b/tools/internal/parser/write.go index 35d87226d..9ed01aa48 100644 --- a/tools/internal/parser/write.go +++ b/tools/internal/parser/write.go @@ -67,6 +67,10 @@ func (l *List) MarshalDebug() []byte { } func writeBlockDebug(w io.Writer, b Block, indent string) { + changemark := "" + if b.Changed() { + changemark = "!!" + } f := func(msg string, args ...any) { fmt.Fprintf(w, indent+msg+"\n", args...) } @@ -82,13 +86,13 @@ func writeBlockDebug(w io.Writer, b Block, indent string) { switch v := b.(type) { case *List: - f("List(%s) {", loc) + f("%sList(%s) {", changemark, loc) for _, child := range v.Blocks { writeBlockDebug(w, child, nextIndent) } f("} // List") case *Section: - f("Section(%s, name=%q) {", loc, v.Name) + f("%sSection(%s, name=%q) {", changemark, loc, v.Name) for _, child := range v.Blocks { writeBlockDebug(w, child, nextIndent) } @@ -111,22 +115,22 @@ func writeBlockDebug(w io.Writer, b Block, indent string) { const open = "SuffixBlock(" pad := strings.Repeat(" ", len(open)) - f("%s%s) {", open, strings.Join(items, fmt.Sprintf(",\n%s%s", indent, pad))) + f("%s%s%s) {", changemark, open, strings.Join(items, fmt.Sprintf(",\n%s%s", indent, pad))) for _, child := range v.Blocks { writeBlockDebug(w, child, nextIndent) } f("} // SuffixBlock(name=%q)", v.Info.Name) case *Suffix: - f("Suffix(%s, %q)", loc, v.Domain) + f("%sSuffix(%s, %q)", changemark, loc, v.Domain) case *Wildcard: w := fmt.Sprintf("*.%s", v.Domain) if len(v.Exceptions) > 0 { - f("Wildcard(%s, %q, except=%v)", loc, w, v.Exceptions) + f("%sWildcard(%s, %q, except=%v)", changemark, loc, w, v.Exceptions) } else { - f("Wildcard(%s, %q)", loc, w) + f("%sWildcard(%s, %q)", changemark, loc, w) } case *Comment: - f("Comment(%s) {", loc) + f("%sComment(%s) {", changemark, loc) for _, line := range v.Text { f("%s%s", extraIndent, line) }