diff --git a/.github/workflows/tld-update.yml.hold b/.github/workflows/tld-update.yml similarity index 96% rename from .github/workflows/tld-update.yml.hold rename to .github/workflows/tld-update.yml index 2248924061..7950067241 100644 --- a/.github/workflows/tld-update.yml.hold +++ b/.github/workflows/tld-update.yml @@ -19,6 +19,9 @@ jobs: with: go-version: ^1.15 + - name: Run unit tests + run: go test tools/*.go + - name: Set current date id: get-date run: echo "::set-output name=now::$(date +'%Y-%m-%dT%H:%M:%S %Z')" diff --git a/.gitignore b/.gitignore index 75ec50d1f5..86af741708 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ linter/log libpsl - +coverage.out diff --git a/.travis.yml b/.travis.yml index 3434bacae4..fd1be9d3be 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,12 @@ language: c compiler: gcc -script: make +script: + - make + - go test -v -coverprofile=coverage.out tools/*.go + +go: + - "1.15.x" addons: apt: @@ -18,4 +23,3 @@ addons: - libicu-dev - libunistring0 - libunistring-dev - diff --git a/tools/newgtlds.go b/tools/newgtlds.go index 33cf8e452b..750a661d7e 100644 --- a/tools/newgtlds.go +++ b/tools/newgtlds.go @@ -6,6 +6,7 @@ import ( "bytes" "encoding/json" "errors" + "flag" "fmt" "io" "io/ioutil" @@ -16,10 +17,18 @@ import ( "time" ) -// ICANN_GTLD_JSON_URL is the URL for the ICANN gTLD JSON registry (version -// 2). See https://www.icann.org/resources/pages/registries/registries-en for -// more information. -const ICANN_GTLD_JSON_URL = "https://www.icann.org/resources/registries/gtlds/v2/gtlds.json" +const ( + // ICANN_GTLD_JSON_URL is the URL for the ICANN gTLD JSON registry (version + // 2). See https://www.icann.org/resources/pages/registries/registries-en for + // more information. + ICANN_GTLD_JSON_URL = "https://www.icann.org/resources/registries/gtlds/v2/gtlds.json" + // PSL_GTLDS_SECTION_HEADER marks the start of the newGTLDs section of the + // overall public suffix dat file. + PSL_GTLDS_SECTION_HEADER = "// newGTLDs" + // PSL_GTLDS_SECTION_FOOTER marks the end of the newGTLDs section of the + // overall public suffix dat file. + PSL_GTLDS_SECTION_FOOTER = "// ===END ICANN DOMAINS===" +) var ( // legacyGTLDs are gTLDs that predate ICANN's new gTLD program. These legacy @@ -48,22 +57,31 @@ var ( "xxx": true, } + // pslHeaderTemplate is a parsed text/template instance for rendering the header + // before the data rendered with the pslTemplate. We use two separate templates + // so that we can avoid having a variable date stamp in the pslTemplate, allowing + // us to easily check that the data in the current .dat file is unchanged from + // what we render when there are no updates to add. + // + // Expected template data: + // URL - the string URL that the data was fetched from. + // Date - the time.Date that the data was fetched. + // DateFormat - the format string to use with the date. + pslHeaderTemplate = template.Must(template.New("public-suffix-list-gtlds-header").Parse(` +// List of new gTLDs imported from {{ .URL }} on {{ .Date.Format .DateFormat }} +// This list is auto-generated, don't edit it manually.`)) + // pslTemplate is a parsed text/template instance for rendering a list of pslEntry // objects in the format used by the public suffix list. // // It expects the following template data: - // URL - the string URL that the data was fetched from. - // Date - the time.Date that the data was fetched. // Entries - a list of pslEntry objects. - pslTemplate = template.Must(template.New("public-suffix-list-gtlds").Parse(` -// List of new gTLDs imported from {{ .URL }} on {{ .Date.Format "2006-01-02T15:04:05Z07:00" }} -// This list is auto-generated, don't edit it manually. - + pslTemplate = template.Must( + template.New("public-suffix-list-gtlds").Parse(` {{- range .Entries }} -{{ .Comment }} -{{ printf "%s\n" .ULabel}} -{{- end }} -`)) +{{- .Comment }} +{{ printf "%s\n" .ULabel }} +{{ end }}`)) ) // pslEntry is a struct matching a subset of the gTLD data fields present in @@ -131,6 +149,176 @@ func (e pslEntry) Comment() string { return strings.Join(parts, " ") } +// gTLDDatSpan represents the span between the PSL_GTLD_SECTION_HEADER and +// the PSL_GTLDS_SECTION_FOOTER in the PSL dat file. +type gTLDDatSpan struct { + startIndex int + endIndex int +} + +var ( + errNoHeader = fmt.Errorf("did not find expected header line %q", + PSL_GTLDS_SECTION_HEADER) + errMultipleHeaders = fmt.Errorf("found expected header line %q more than once", + PSL_GTLDS_SECTION_HEADER) + errNoFooter = fmt.Errorf("did not find expected footer line %q", + PSL_GTLDS_SECTION_FOOTER) +) + +type errInvertedSpan struct { + span gTLDDatSpan +} + +func (e errInvertedSpan) Error() string { + return fmt.Sprintf( + "found footer line %q before header line %q (index %d vs %d)", + PSL_GTLDS_SECTION_FOOTER, PSL_GTLDS_SECTION_HEADER, + e.span.endIndex, e.span.startIndex) +} + +// validate checks that a given gTLDDatSpan is sensible. It returns an err if +// the span is nil, if the start or end index haven't been set to > 0, or if the +// end index is <= the the start index. +func (s gTLDDatSpan) validate() error { + if s.startIndex <= 0 { + return errNoHeader + } + if s.endIndex <= 0 { + return errNoFooter + } + if s.endIndex <= s.startIndex { + return errInvertedSpan{span: s} + } + return nil +} + +// datFile holds the individual lines read from the public suffix list dat file and +// the span that holds the gTLD specific data section. It supports reading the +// gTLD specific data, and replacing it. +type datFile struct { + // lines holds the datfile contents split by "\n" + lines []string + // gTLDSpan holds the indexes where the gTLD data can be found in lines. + gTLDSpan gTLDDatSpan +} + +type errSpanOutOfBounds struct { + span gTLDDatSpan + numLines int +} + +func (e errSpanOutOfBounds) Error() string { + return fmt.Sprintf( + "span out of bounds: start index %d, end index %d, number of lines %d", + e.span.startIndex, e.span.endIndex, e.numLines) +} + +// validate validates the state of the datFile. It returns an error if +// the gTLD span validate() returns an error, or if gTLD span endIndex is >= the +// number of lines in the file. +func (d datFile) validate() error { + if err := d.gTLDSpan.validate(); err != nil { + return err + } + if d.gTLDSpan.endIndex >= len(d.lines) { + return errSpanOutOfBounds{span: d.gTLDSpan, numLines: len(d.lines)} + } + return nil +} + +// getGTLDLines returns the lines from the dat file within the gTLD data span, +// or an error if the span isn't valid for the dat file. +func (d datFile) getGTLDLines() ([]string, error) { + if err := d.validate(); err != nil { + return nil, err + } + return d.lines[d.gTLDSpan.startIndex:d.gTLDSpan.endIndex], nil +} + +// ReplaceGTLDContent updates the dat file's lines to replace the gTLD data span +// with new content. +func (d *datFile) ReplaceGTLDContent(content string) error { + if err := d.validate(); err != nil { + return err + } + + contentLines := strings.Split(content, "\n") + beforeLines := d.lines[0:d.gTLDSpan.startIndex] + afterLines := d.lines[d.gTLDSpan.endIndex:] + newLines := append(beforeLines, append(contentLines, afterLines...)...) + + // Update the span based on the new content length + d.gTLDSpan.endIndex = len(beforeLines) + len(contentLines) + // and update the data file lines + d.lines = newLines + return nil +} + +// String returns the dat file's lines joined together. +func (d datFile) String() string { + return strings.Join(d.lines, "\n") +} + +// readDatFile reads the contents of the PSL dat file from the provided path +// and returns a representation holding all of the lines and the span where the gTLD +// data is found within the dat file. An error is returned if the file can't be read +// or if the gTLD data span can't be found or is invalid. +func readDatFile(datFilePath string) (*datFile, error) { + pslDatBytes, err := ioutil.ReadFile(datFilePath) + if err != nil { + return nil, err + } + return readDatFileContent(string(pslDatBytes)) +} + +func readDatFileContent(pslData string) (*datFile, error) { + pslDatLines := strings.Split(pslData, "\n") + + headerIndex, footerIndex := 0, 0 + for i := 0; i < len(pslDatLines); i++ { + line := pslDatLines[i] + + if line == PSL_GTLDS_SECTION_HEADER && headerIndex == 0 { + // If the line matches the header and we haven't seen the header yet, capture + // the index + headerIndex = i + } else if line == PSL_GTLDS_SECTION_HEADER && headerIndex != 0 { + // If the line matches the header and we've already seen the header return + // an error. This is unexpected. + return nil, errMultipleHeaders + } else if line == PSL_GTLDS_SECTION_FOOTER && footerIndex == 0 { + // If the line matches the footer, capture the index. We don't need + // to consider the case where we've already seen a footer because we break + // below when we have both a header and footer index. + footerIndex = i + } + + // Break when we have found one header and one footer. + if headerIndex != 0 && footerIndex != 0 { + break + } + } + + if headerIndex == 0 { + return nil, errNoHeader + } else if footerIndex == 0 { + return nil, errNoFooter + } + + datFile := &datFile{ + lines: pslDatLines, + gTLDSpan: gTLDDatSpan{ + startIndex: headerIndex + 1, + endIndex: footerIndex, + }, + } + if err := datFile.validate(); err != nil { + return nil, err + } + + return datFile, nil +} + // getData performs a HTTP GET request to the given URL and returns the // response body bytes or returns an error. An HTTP response code other than // http.StatusOK (200) is considered to be an error. @@ -215,34 +403,121 @@ func getPSLEntries(url string) ([]*pslEntry, error) { return filtered, nil } +// renderTemplate renders the given template to the provided writer, using the +// templateData, or returns an error. +func renderTemplate(writer io.Writer, template *template.Template, templateData interface{}) error { + var buf bytes.Buffer + if err := template.Execute(&buf, templateData); err != nil { + return err + } + + _, err := writer.Write(buf.Bytes()) + if err != nil { + return err + } + return nil +} + +// clock is a small interface that lets us mock time in unit tests. +type clock interface { + Now() time.Time +} + +// realClock is an implementation of clock that uses time.Now() natively. +type realClock struct{} + +// Now returns the current time.Time using the system clock. +func (c realClock) Now() time.Time { + return time.Now() +} + +// renderHeader renders the pslHeaderTemplate to the writer or returns an error. The +// provided clock instance is used for the header last update timestamp. If no +// clk instance is provided realClock is used. +func renderHeader(writer io.Writer, clk clock) error { + if clk == nil { + clk = &realClock{} + } + templateData := struct { + URL string + Date time.Time + DateFormat string + }{ + URL: ICANN_GTLD_JSON_URL, + Date: clk.Now().UTC(), + DateFormat: time.RFC3339, + } + + return renderTemplate(writer, pslHeaderTemplate, templateData) +} + // renderData renders the given list of pslEntry objects using the pslTemplate. -// The rendered template data is written to the provided writer. -func renderData(entries []*pslEntry, writer io.Writer) error { +// The rendered template data is written to the provided writer or an error is +// returned. +func renderData(writer io.Writer, entries []*pslEntry) error { templateData := struct { - URL string - Date time.Time Entries []*pslEntry }{ - URL: ICANN_GTLD_JSON_URL, - Date: time.Now(), Entries: entries, } - var buf bytes.Buffer - if err := pslTemplate.Execute(&buf, templateData); err != nil { - return err + return renderTemplate(writer, pslTemplate, templateData) +} + +// Process handles updating a datFile with new gTLD content. If there are no +// gTLD updates the existing dat file's contents will be returned. If there are +// updates, the new updates will be spliced into place and the updated file contents +// returned. +func process(datFile *datFile, dataURL string, clk clock) (string, error) { + // Get the lines for the gTLD data span - this includes both the header with the + // date and the actual gTLD entries. + spanLines, err := datFile.getGTLDLines() + if err != nil { + return "", err } - _, err := writer.Write(buf.Bytes()) + // Render a new header for the gTLD data. + var newHeaderBuf strings.Builder + if err := renderHeader(&newHeaderBuf, clk); err != nil { + return "", err + } + + // Figure out how many lines the header with the dynamic date is. + newHeaderLines := strings.Split(newHeaderBuf.String(), "\n") + headerLen := len(newHeaderLines) + + // We should have at least that many lines in the existing span data. + if len(spanLines) <= headerLen { + return "", errors.New("gtld span data was too small, missing header?") + } + + // The gTLD data can be found by skipping the header lines + existingData := strings.Join(spanLines[headerLen:], "\n") + + // Fetch new PSL entries. + entries, err := getPSLEntries(dataURL) if err != nil { - return err + return "", err } - return nil + + // Render the new gTLD PSL section with the new entries. + var newDataBuf strings.Builder + if err := renderData(&newDataBuf, entries); err != nil { + return "", err + } + + // If the newly rendered data doesn't match the existing data then we want to + // update the dat file content by replacing the old span with the new content. + if newDataBuf.String() != existingData { + newContent := newHeaderBuf.String() + "\n" + newDataBuf.String() + if err := datFile.ReplaceGTLDContent(newContent); err != nil { + return "", err + } + } + + return datFile.String(), nil } -// main will fetch the PSL entires from the ICANN gTLD JSON registry, parse -// them, normalize them, remove legacy and terminated gTLDs, and finally render -// them with the pslTemplate, printing the results to standard out. func main() { ifErrQuit := func(err error) { if err != nil { @@ -251,9 +526,35 @@ func main() { } } - entries, err := getPSLEntries(ICANN_GTLD_JSON_URL) + pslDatFile := flag.String( + "psl-dat-file", + "public_suffix_list.dat", + "file path to the public_suffix.dat data file to be updated with new gTLDs") + + overwrite := flag.Bool( + "overwrite", + false, + "overwrite -psl-dat-file with the new data instead of printing to stdout") + + // Parse CLI flags. + flag.Parse() + + // Read the existing file content and find the span that contains the gTLD data. + datFile, err := readDatFile(*pslDatFile) ifErrQuit(err) - err = renderData(entries, os.Stdout) + // Process the dat file. + content, err := process(datFile, ICANN_GTLD_JSON_URL, nil) + ifErrQuit(err) + + // If we're not overwriting the file, print the content to stdout. + if !*overwrite { + fmt.Println(content) + os.Exit(0) + } + + // Otherwise print nothing to stdout and write the content over the exiting + // pslDatFile path we read earlier. + err = ioutil.WriteFile(*pslDatFile, []byte(content), 0644) ifErrQuit(err) } diff --git a/tools/newgtlds_test.go b/tools/newgtlds_test.go index b73ffb8d0a..cc2e9b005f 100644 --- a/tools/newgtlds_test.go +++ b/tools/newgtlds_test.go @@ -5,11 +5,14 @@ import ( "encoding/json" "fmt" "io" + "io/ioutil" "net/http" "net/http/httptest" + "os" "reflect" "strings" "testing" + "time" ) func TestEntryNormalize(t *testing.T) { @@ -124,7 +127,7 @@ type badStatusHandler struct{} func (h *badStatusHandler) ServeHTTP(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusUnavailableForLegalReasons) - w.Write([]byte("sorry")) + _, _ = w.Write([]byte("sorry")) } func TestGetData(t *testing.T) { @@ -151,7 +154,7 @@ type mockHandler struct { } func (h *mockHandler) ServeHTTP(w http.ResponseWriter, _ *http.Request) { - w.Write(h.respData) + _, _ = w.Write(h.respData) } func TestGetPSLEntries(t *testing.T) { @@ -320,23 +323,486 @@ ceepeeyou ` var buf bytes.Buffer - if err := renderData(entries, io.Writer(&buf)); err != nil { + if err := renderData(io.Writer(&buf), entries); err != nil { t.Fatalf("unexpected error from renderData: %v", err) } - rendered := buf.String() + if rendered := buf.String(); rendered != expectedList { + t.Errorf("expected rendered list content %q, got %q", + expectedList, rendered) + } +} - lines := strings.Split(rendered, "\n") - if len(lines) < 3 { - t.Fatalf("expected at least two header lines in rendered data. "+ - "Found only %d lines", len(lines)) +func TestErrInvertedSpan(t *testing.T) { + err := errInvertedSpan{gTLDDatSpan{startIndex: 50, endIndex: 10}} + expected := `found footer line "// ===END ICANN DOMAINS===" ` + + `before header line "// newGTLDs" (index 10 vs 50)` + if actual := err.Error(); actual != expected { + t.Errorf("expected %#v Error() to return %q got %q", err, expected, actual) } +} - listContent := strings.Join(lines[3:], "\n") - fmt.Printf("Got: \n%s\n", listContent) - fmt.Printf("Expected: \n%s\n", expectedList) - if listContent != expectedList { - t.Errorf("expected rendered list content %q, got %q", - expectedList, listContent) +func TestGTLDDatSpanValidate(t *testing.T) { + testCases := []struct { + name string + span gTLDDatSpan + expected error + }{ + { + name: "no header", + span: gTLDDatSpan{}, + expected: errNoHeader, + }, + { + name: "no footer", + span: gTLDDatSpan{startIndex: 10}, + expected: errNoFooter, + }, + { + name: "inverted", + span: gTLDDatSpan{startIndex: 50, endIndex: 10}, + expected: errInvertedSpan{gTLDDatSpan{startIndex: 50, endIndex: 10}}, + }, + { + name: "valid", + span: gTLDDatSpan{startIndex: 10, endIndex: 20}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + if actual := tc.span.validate(); actual != tc.expected { + t.Errorf("expected span %v validate to return %v got %v", + tc.span, tc.expected, actual) + } + }) + } +} + +func TestErrSpanOutOfBounds(t *testing.T) { + err := errSpanOutOfBounds{ + span: gTLDDatSpan{startIndex: 5, endIndex: 50}, + numLines: 20, + } + expected := `span out of bounds: start index 5, end index 50, number of lines 20` + if actual := err.Error(); actual != expected { + t.Errorf("expected %#v Error() to return %q got %q", err, expected, actual) + } +} + +func TestDatFileValidate(t *testing.T) { + testCases := []struct { + name string + file datFile + expected error + }{ + { + name: "bad gTLD span", + file: datFile{gTLDSpan: gTLDDatSpan{}}, + expected: errNoHeader, + }, + { + name: "out of bounds span", + file: datFile{ + lines: []string{"one line"}, + gTLDSpan: gTLDDatSpan{startIndex: 5, endIndex: 10}, + }, + expected: errSpanOutOfBounds{ + span: gTLDDatSpan{startIndex: 5, endIndex: 10}, + numLines: 1, + }, + }, + { + name: "valid", + file: datFile{ + lines: []string{"one line", "two line", "three line", "four"}, + gTLDSpan: gTLDDatSpan{startIndex: 2, endIndex: 3}}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + if actual := tc.file.validate(); actual != tc.expected { + t.Errorf("expected dat file %v validate to return %v got %v", + tc.file, tc.expected, actual) + } + }) + } +} + +func TestGetGTLDLines(t *testing.T) { + lines := []string{ + "some junk", // Index 0 + PSL_GTLDS_SECTION_HEADER, // Index 1 + "here be gTLDs", // Index 2 + "so many gTLDs", // Index 3 + PSL_GTLDS_SECTION_FOOTER, // Index 4 + "more junk", // Index 5 + } + file := datFile{ + lines: lines, + gTLDSpan: gTLDDatSpan{startIndex: 2, endIndex: 4}, + } + + expectedLines := []string{ + lines[2], lines[3], + } + + if actual, err := file.getGTLDLines(); err != nil { + t.Errorf("unexpected err: %v", err) + } else if !reflect.DeepEqual(actual, expectedLines) { + t.Errorf("expected %v got %v", expectedLines, actual) + } + + // Now update the gTLDSpan to be invalid and try again + file.gTLDSpan.endIndex = 99 + expectedErr := errSpanOutOfBounds{ + numLines: len(lines), + span: gTLDDatSpan{startIndex: 2, endIndex: 99}, + } + if _, err := file.getGTLDLines(); err != expectedErr { + t.Errorf("expected err %v got %v", expectedErr, err) + } +} + +func TestReplaceGTLDContent(t *testing.T) { + origLines := []string{ + "some junk", // Index 0 + PSL_GTLDS_SECTION_HEADER, // Index 1 + "here be gTLDs", // Index 2 + "so many gTLDs", // Index 3 + PSL_GTLDS_SECTION_FOOTER, // Index 4 + "more junk", // Index 5 + } + file := datFile{ + lines: origLines, + gTLDSpan: gTLDDatSpan{startIndex: 2, endIndex: 4}, + } + newLines := []string{ + "new gTLD A", // Index 0 + "new gTLD B", // Index 1 + "new gTLD C", // Index 2 + } + + newContent := strings.Join(newLines, "\n") + if err := file.ReplaceGTLDContent(newContent); err != nil { + t.Errorf("unexpected err %v", err) + } + + expectedLines := []string{ + origLines[0], + origLines[1], + newLines[0], + newLines[1], + newLines[2], + origLines[4], + origLines[5], + } + if !reflect.DeepEqual(file.lines, expectedLines) { + t.Errorf("expected lines to be updated to %v was %v", expectedLines, file.lines) + } + if file.gTLDSpan.endIndex != 5 { + t.Errorf("expected file to have gTLDSpan end updated to 5, was %d", + file.gTLDSpan.endIndex) + } + + // Now update the gTLDSpan to be invalid and try again + file.gTLDSpan.endIndex = 99 + expectedErr := errSpanOutOfBounds{ + numLines: len(expectedLines), + span: gTLDDatSpan{startIndex: 2, endIndex: 99}, + } + if err := file.ReplaceGTLDContent("ignored content"); err != expectedErr { + t.Errorf("expected err %v got %v", expectedErr, err) + } else if !reflect.DeepEqual(file.lines, expectedLines) { + t.Errorf("expected lines to still be %v was changed to %v", + expectedLines, file.lines) + } +} + +func TestDatFileString(t *testing.T) { + file := datFile{ + lines: []string{"hello", "world"}, + } + expected := "hello\nworld" + if actual := file.String(); actual != expected { + t.Errorf("expected file %v String() to be %q was %q", file, expected, actual) + } +} + +func TestReadDatFile(t *testing.T) { + mustWriteTemp := func(t *testing.T, content string) string { + tmpfile, err := ioutil.TempFile("", "dat") + if err != nil { + t.Fatalf("Failed to create temp file: %v", err) + } + if _, err := tmpfile.Write([]byte(content)); err != nil { + t.Fatalf("Failed to write temp file: %v", err) + } + if err := tmpfile.Close(); err != nil { + t.Fatalf("Failed to close temp file: %v", err) + } + return tmpfile.Name() + } + + noHeaderContent := strings.Join([]string{ + "foo", + "bar", + }, "\n") + noHeaderFile := mustWriteTemp(t, noHeaderContent) + defer os.Remove(noHeaderFile) + + noFooterContent := strings.Join([]string{ + "foo", + PSL_GTLDS_SECTION_HEADER, + "bar", + }, "\n") + noFooterFile := mustWriteTemp(t, noFooterContent) + defer os.Remove(noFooterFile) + + multiHeaderContent := strings.Join([]string{ + "foo", + PSL_GTLDS_SECTION_HEADER, + "test", + PSL_GTLDS_SECTION_HEADER, + "test", + PSL_GTLDS_SECTION_FOOTER, + "bar", + }, "\n") + multiHeaderFile := mustWriteTemp(t, multiHeaderContent) + defer os.Remove(multiHeaderFile) + + invertedContent := strings.Join([]string{ + "foo", + PSL_GTLDS_SECTION_FOOTER, + "test", + PSL_GTLDS_SECTION_HEADER, + "bar", + }, "\n") + invertedFile := mustWriteTemp(t, invertedContent) + defer os.Remove(invertedFile) + + validContent := strings.Join([]string{ + "foo", // Index 0 + PSL_GTLDS_SECTION_HEADER, // Index 1 + "test", // Index 2 + PSL_GTLDS_SECTION_FOOTER, // Index 3 + "bar", // Index 4 + }, "\n") + validFile := mustWriteTemp(t, validContent) + defer os.Remove(validFile) + + testCases := []struct { + name string + path string + expectedErrMsg string + expectedDatFile *datFile + }{ + { + name: "no such file", + path: "", + expectedErrMsg: "open : no such file or directory", + }, + { + name: "no header", + path: noHeaderFile, + expectedErrMsg: errNoHeader.Error(), + }, + { + name: "no footer", + path: noFooterFile, + expectedErrMsg: errNoFooter.Error(), + }, + { + name: "multiple headers", + path: multiHeaderFile, + expectedErrMsg: errMultipleHeaders.Error(), + }, + { + name: "inverted header/footer", + path: invertedFile, + expectedErrMsg: (errInvertedSpan{gTLDDatSpan{startIndex: 4, endIndex: 1}}).Error(), + }, + { + name: "valid", + path: validFile, + expectedDatFile: &datFile{ + lines: strings.Split(validContent, "\n"), + gTLDSpan: gTLDDatSpan{ + startIndex: 2, + endIndex: 3, + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + actual, err := readDatFile(tc.path) + if err != nil && tc.expectedErrMsg == "" { + t.Errorf("unexpected err: %v", err) + } else if err != nil && err.Error() != tc.expectedErrMsg { + t.Errorf("expected err: %q, got: %q", tc.expectedErrMsg, err.Error()) + } else if err == nil && tc.expectedErrMsg != "" { + t.Errorf("expected err: %q, got: nil", tc.expectedErrMsg) + } else if !reflect.DeepEqual(actual, tc.expectedDatFile) { + t.Errorf("expected dat file: %q, got %q", tc.expectedDatFile, actual) + } + }) + } +} + +type mockClock struct { + fakeUnixTime int64 +} + +func (m mockClock) Now() time.Time { + return time.Unix(m.fakeUnixTime, 0) +} + +func TestProcess(t *testing.T) { + mockHandler := func(content string) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + fmt.Fprintln(w, content) + } + } + + existingData := ` +... + +// newGTLDs + +// List of new gTLDs imported from https://www.icann.org/resources/registries/gtlds/v2/gtlds.json on 2021-02-07T13:25:56-05:00 +// This list is auto-generated, don't edit it manually. +// aaa : 2015-02-26 American Automobile Association, Inc. +aaa + + +// ===END ICANN DOMAINS=== + +... +` + existingJSON := ` +{ + "gTLDs": [ + { + "contractTerminated": false, + "dateOfContractSignature": "2015-02-26", + "gTLD": "aaa", + "registryOperator": "American Automobile Association, Inc.", + "removalDate": null, + "uLabel": null + } + ] +} +` + + newJSON := ` +{ + "gTLDs": [ + { + "contractTerminated": false, + "dateOfContractSignature": "2015-02-26", + "gTLD": "aaa", + "registryOperator": "American Automobile Association, Inc.", + "removalDate": null, + "uLabel": null + }, + { + "contractTerminated": false, + "dateOfContractSignature": "2014-03-20", + "gTLD": "accountants", + "registryOperator": "Binky Moon, LLC", + "removalDate": null, + "uLabel": null + } + ] +} +` + + fakeClock := mockClock{ + fakeUnixTime: 1612916654, + } + newData := ` +... + +// newGTLDs + +// List of new gTLDs imported from https://www.icann.org/resources/registries/gtlds/v2/gtlds.json on 2021-02-10T00:24:14Z +// This list is auto-generated, don't edit it manually. +// aaa : 2015-02-26 American Automobile Association, Inc. +aaa + +// accountants : 2014-03-20 Binky Moon, LLC +accountants + + +// ===END ICANN DOMAINS=== + +... +` + + mustReadDatFile := func(t *testing.T, content string) *datFile { + datFile, err := readDatFileContent(content) + if err != nil { + t.Fatalf("failed to readDatFileContent %q: %v", content, err) + } + return datFile + } + + testCases := []struct { + name string + file *datFile + pslJSON string + expectedErrMsg string + expectedContent string + }{ + { + name: "bad span", + file: &datFile{}, + expectedErrMsg: errNoHeader.Error(), + }, + { + name: "span too small", + file: &datFile{ + lines: []string{"a", "b", "c"}, + gTLDSpan: gTLDDatSpan{startIndex: 1, endIndex: 2}, + }, + expectedErrMsg: "gtld span data was too small, missing header?", + }, + { + name: "no change in data", + file: mustReadDatFile(t, existingData), + pslJSON: existingJSON, + expectedContent: existingData, + }, + { + name: "change in data", + file: mustReadDatFile(t, existingData), + pslJSON: newJSON, + expectedContent: newData, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + s := httptest.NewServer(mockHandler(tc.pslJSON)) + defer s.Close() + + content, err := process(tc.file, s.URL, fakeClock) + if err != nil && tc.expectedErrMsg == "" { + t.Errorf("unexpected err: %v", err) + } else if err != nil && err.Error() != tc.expectedErrMsg { + t.Errorf("expected err: %q, got: %q", tc.expectedErrMsg, err.Error()) + } else if err == nil && tc.expectedErrMsg != "" { + t.Errorf("expected err: %q, got: nil", tc.expectedErrMsg) + } else if content != tc.expectedContent { + fmt.Printf("got content:\n%s", content) + fmt.Printf("expected content:\n%s", tc.expectedContent) + t.Errorf("expected content: %q, got %q", tc.expectedContent, content) + } + }) } } diff --git a/tools/patchnewgtlds b/tools/patchnewgtlds index baecf2cbcd..ddc93a9007 100755 --- a/tools/patchnewgtlds +++ b/tools/patchnewgtlds @@ -11,8 +11,6 @@ fi BASEDIR=$(dirname "$0") -go run "$BASEDIR/newgtlds.go" | \ - "$BASEDIR/replace-between" \ - "$BASEDIR/../public_suffix_list.dat" \ - "// newGTLDs" \ - "// ===END ICANN DOMAINS" +go run "$BASEDIR/newgtlds.go" \ + -overwrite \ + -psl-dat-file="$BASEDIR/../public_suffix_list.dat" diff --git a/tools/replace-between b/tools/replace-between deleted file mode 100755 index d6ea40ad5f..0000000000 --- a/tools/replace-between +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/perl -w -# -# This script takes a target file, a start marker and an end marker, and -# replaces the text in that file between those two markers with some -# alternative text from another file or from STDIN. - -binmode STDIN, ':utf8'; - -usage() if (!$ARGV[2]); - -my ($base_filename, $start_marker, $end_marker, $insert_filename) = @ARGV; - -my $base = read_file_utf8($base_filename); - -my $new; -if ($insert_filename) { - $new = read_file_utf8($insert_filename); -} -else { - $new = do { local $/; }; -} - -$base =~ s/\Q$start_marker\E.*\Q$end_marker\E/$start_marker\n$new\n$end_marker/s; - -write_file_utf8($base_filename, $base); - -sub usage { - print "Usage: replace-between START_MARKER END_MARKER \n"; - print "Or, give data to insert on STDIN.\n"; - exit(1); -} - -sub read_file_utf8 { - my $name = shift; - open my $fh, '<:encoding(UTF-8)', $name - or die "Couldn't open '$name': $!"; - local $/; - my $data = <$fh>; - return $data; -}; - -sub write_file_utf8 { - my $name = shift; - open my $fh, '>:encoding(UTF-8)', $name - or die "Couldn't create '$name': $!"; - local $/; - print {$fh} $_ for @_; -};