Skip to content

Commit

Permalink
tools/internal/parser: add more offline, diff-aware validations (#2089)
Browse files Browse the repository at this point in the history
* tools/internal/parser: highlight .Changed() AST nodes in debug output

* tools/internal/parser: make metadata validator diff-aware, add tests

* tools/internal/parser: add more validations
  • Loading branch information
danderson authored Aug 8, 2024
1 parent 4f58803 commit f47fcb3
Show file tree
Hide file tree
Showing 8 changed files with 488 additions and 13 deletions.
12 changes: 12 additions & 0 deletions tools/internal/domain/domain.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,18 @@ func (d Name) CutSuffix(suffix Name) (rest []Label, found bool) {
return ret, true
}

// AddPrefix returns d prefixed with label.
//
// For example, AddPrefix of "bar" to "foo.com" is "bar.foo.com".
func (d Name) AddPrefix(label Label) (Name, error) {
// Due to total name length restrictions, we have to fully
// re-check the shape of the extended domain name. The simplest
// way to do that is to round-trip through a string and leverage
// Parse again.
retStr := label.String() + "." + d.String()
return Parse(retStr)
}

// Label is a domain name label.
type Label struct {
label string
Expand Down
22 changes: 19 additions & 3 deletions tools/internal/parser/clean.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func cleanBlock(b Block) []error {
ret = append(ret, sortSuffixes(v)...)
rewriteSuffixesMetadata(v)
case *Wildcard:
slices.SortFunc(v.Exceptions, domain.Label.Compare)
cleanWildcard(v)
case *Comment, *Suffix:
// No cleaning required
default:
Expand All @@ -64,6 +64,12 @@ func cleanBlock(b Block) []error {
return ret
}

func cleanWildcard(w *Wildcard) {
// Sort and deduplicate exception domains.
slices.SortFunc(w.Exceptions, domain.Label.Compare)
w.Exceptions = slices.Compact(w.Exceptions)
}

func sortSection(s *Section) []error {
// There are two difficult parts aspects to sorting a section:
// free-floating comments, and Amazon.
Expand Down Expand Up @@ -224,14 +230,24 @@ func sortSuffixes(s *Suffixes) []error {
prevGroupEnd Block // last suffix/wildcard of previous group
prevComment *Comment // last Comment seen, for error reporting
thisGroupStart int
// We have to construct a new output slice, because the
// deduplicating sort below might shrink s.Blocks.
out = make([]Block, 0, len(s.Blocks))
)

sortAndCheck := func(group []Block) {
if len(group) == 0 {
return
}

// Sort and deduplicate. Within a single group inside a suffix
// block, duplicate suffixes are semantically equivalent so
// it's safe to remove dupes.
slices.SortFunc(group, compareSuffixAndWildcard)
group = slices.CompactFunc(group, func(a, b Block) bool {
return compareSuffixAndWildcard(a, b) == 0
})
out = append(out, group...)

if prevGroupEnd == nil {
// First group.
Expand All @@ -258,6 +274,7 @@ func sortSuffixes(s *Suffixes) []error {
sortAndCheck(group)
prevComment = v
}
out = append(out, v)
thisGroupStart = i + 1
default:
panic("unknown ast node")
Expand All @@ -267,8 +284,7 @@ func sortSuffixes(s *Suffixes) []error {
sortAndCheck(s.Blocks[thisGroupStart:])
}

// Unlike in sortSection, no need to construct a new s.Blocks,
// we've sorted the blocks in-place.
s.Blocks = out

return errs
}
Expand Down
26 changes: 25 additions & 1 deletion tools/internal/parser/clean_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,29 @@ func TestClean(t *testing.T) {
),
},

{
name: "sort_suffixes_dedup",
in: list(
suffixes(1, 1, noInfo,
suffix(1, "com"),
wildcard(2, 2, "foo.com"),
suffix(3, "zot.com"),
suffix(4, "qux.com"),
suffix(5, "qux.foo.com"),
suffix(6, "zot.com"),
),
),
want: list(
suffixes(1, 1, noInfo,
suffix(1, "com"),
wildcard(2, 2, "foo.com"),
suffix(5, "qux.foo.com"),
suffix(4, "qux.com"),
suffix(3, "zot.com"),
),
),
},

{
name: "sort_suffixes_with_nonblocking_comment",
in: list(
Expand Down Expand Up @@ -162,7 +185,8 @@ func TestClean(t *testing.T) {
name: "sort_suffixes_wildcard_exceptions",
in: list(
suffixes(1, 1, noInfo,
wildcard(1, 1, "foo.com", "mmm", "aaa", "zzz"),
// Also has a duplicate exception that needs cleaning up
wildcard(1, 1, "foo.com", "mmm", "aaa", "zzz", "aaa"),
suffix(2, "foo.com"),
),
),
Expand Down
44 changes: 44 additions & 0 deletions tools/internal/parser/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,47 @@ type ErrCommentPreventsSectionSort struct {
func (e ErrCommentPreventsSectionSort) Error() string {
return fmt.Sprintf("%s: comment prevents full sorting of PSL section", e.SourceRange.LocationString())
}

type ErrDuplicateSection struct {
*Section
FirstDefinition *Section
}

func (e ErrDuplicateSection) Error() string {
return fmt.Sprintf("%s: duplicate section %q, first definition at %s", e.LocationString(), e.Name, e.FirstDefinition.LocationString())
}

type ErrUnknownSection struct {
*Section
}

func (e ErrUnknownSection) Error() string {
return fmt.Sprintf("%s: unknown section %q, allowed sections are 'ICANN DOMAINS' and 'PRIVATE DOMAINS'", e.LocationString(), e.Name)
}

type ErrMissingSection struct {
Name string
}

func (e ErrMissingSection) Error() string {
return fmt.Sprintf("missing required section %q", e.Name)
}

type ErrDuplicateSuffix struct {
Name string
Block // Suffix or Wildcard
FirstDefinition Block // Suffix or Wildcard
}

func (e ErrDuplicateSuffix) Error() string {
return fmt.Sprintf("%s: duplicate suffix definition for %q, first definition at %s", e.SrcRange().LocationString(), e.Name, e.FirstDefinition.SrcRange().LocationString())
}

type ErrConflictingSuffixAndException struct {
*Suffix
Wildcard *Wildcard
}

func (e ErrConflictingSuffixAndException) Error() string {
return fmt.Sprintf("%s: suffix %s conflicts with exception in wildcard at %s", e.LocationString(), e.Domain, e.Wildcard.LocationString())
}
10 changes: 10 additions & 0 deletions tools/internal/parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -457,3 +457,13 @@ func zeroSourceRange(b Block) Block {
}
return b
}

// markUnchanged makes .Changed() return false for b. It does not
// touch parent or child blocks.
//
// It's generic so that it works in places that require a specific
// instance type, not just places that accept a Block interface.
func markUnchanged[T Block](b T) T {
b.info().isUnchanged = true
return b
}
82 changes: 80 additions & 2 deletions tools/internal/parser/validate.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
package parser

import (
"github.com/creachadair/mds/mapset"
)

// ValidateOffline runs offline validations on a parsed PSL.
func ValidateOffline(l *List) []error {
var ret []error
Expand All @@ -10,24 +14,98 @@ func ValidateOffline(l *List) []error {
break
}
}
validateExpectedSections(l)
validateSuffixUniqueness(l)

return ret
}

// validateEntityMetadata verifies that all suffix blocks have some
// kind of entity name.
func validateEntityMetadata(block *Section) []error {
func validateEntityMetadata(block Block) []error {
var ret []error
for _, block := range BlocksOfType[*Suffixes](block) {
if !block.Changed() {
continue
}

if block.Info.Name == "" {
ret = append(ret, ErrMissingEntityName{
Suffixes: block,
})
} else if len(block.Info.Maintainers) == 0 && !exemptFromContactInfo(block.Info.Name) {
}
if len(block.Info.Maintainers) == 0 && !exemptFromContactInfo(block.Info.Name) {
ret = append(ret, ErrMissingEntityEmail{
Suffixes: block,
})
}
}
return ret
}

// validateExpectedSections verifies that the two top-level sections
// (ICANN and private domains) exist, are not duplicated, and that no
// other sections are present.
func validateExpectedSections(block Block) (errs []error) {
// Use an ordered set for the wanted sections, so that we can
// check section names in O(1) but also report missing sections in
// a deterministic order.
wanted := mapset.New("ICANN DOMAINS", "PRIVATE DOMAINS")
found := map[string]*Section{}
for _, section := range BlocksOfType[*Section](block) {
if !wanted.Has(section.Name) && section.Changed() {
errs = append(errs, ErrUnknownSection{section})
} else if other, ok := found[section.Name]; ok && (section.Changed() || other.Changed()) {
errs = append(errs, ErrDuplicateSection{section, other})
} else {
found[section.Name] = section
}
}

for _, name := range wanted.Slice() {
if _, ok := found[name]; !ok {
errs = append(errs, ErrMissingSection{name})
}
}

return errs
}

// validateSuffixUniqueness verifies that suffixes only appear once
// each.
func validateSuffixUniqueness(block Block) (errs []error) {
suffixes := map[string]*Suffix{} // domain.Name.String() -> Suffix
wildcards := map[string]*Wildcard{} // base domain.Name.String() -> Wildcard

for _, suffix := range BlocksOfType[*Suffix](block) {
name := suffix.Domain.String()
if other, ok := suffixes[name]; ok && (suffix.Changed() || other.Changed()) {
errs = append(errs, ErrDuplicateSuffix{name, suffix, other})
} else {
suffixes[name] = suffix
}
}

for _, wildcard := range BlocksOfType[*Wildcard](block) {
name := wildcard.Domain.String()
if other, ok := wildcards[name]; ok && (wildcard.Changed() || other.Changed()) {
errs = append(errs, ErrDuplicateSuffix{"*." + name, wildcard, other})
} else {
wildcards[name] = wildcard
}

for _, exc := range wildcard.Exceptions {
fqdn, err := wildcard.Domain.AddPrefix(exc)
if err != nil && wildcard.Changed() {
errs = append(errs, err)
continue
}
name := fqdn.String()
if suffix, ok := suffixes[name]; ok && (wildcard.Changed() || suffix.Changed()) {
errs = append(errs, ErrConflictingSuffixAndException{suffix, wildcard})
}
}
}

return errs
}
Loading

0 comments on commit f47fcb3

Please sign in to comment.