diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..6798cb2 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +version: 2 +updates: +- package-ecosystem: gomod + directory: "/" + schedule: + interval: daily + open-pull-requests-limit: 10 +- package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: daily diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml new file mode 100644 index 0000000..eb093de --- /dev/null +++ b/.github/workflows/codeql-analysis.yml @@ -0,0 +1,52 @@ +name: "Code scanning - action" + +on: + push: + branches-ignore: + - 'dependabot/**' + pull_request: + schedule: + - cron: '0 11 * * 2' + +jobs: + CodeQL-Build: + + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + # We must fetch at least the immediate parents so that if this is + # a pull request then we can checkout the head. + fetch-depth: 2 + + # If this run was triggered by a pull request event, then checkout + # the head of the pull request instead of the merge commit. + - run: git checkout HEAD^2 + if: ${{ github.event_name == 'pull_request' }} + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v2 + with: + languages: go + + # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v2 + + # ℹ️ Command-line programs to run using the OS shell. + # 📚 https://git.io/JvXDl + + # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines + # and modify them (or add more) to build your code if your project + # uses a compiled language + + #- run: | + # make bootstrap + # make release + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v2 diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml new file mode 100644 index 0000000..6dcb362 --- /dev/null +++ b/.github/workflows/go.yml @@ -0,0 +1,38 @@ +name: Go + +on: + push: + pull_request: + schedule: + - cron: '6 15 * * SUN' + +jobs: + + build: + strategy: + matrix: + go-version: [1.20.x, 1.21.x] + # We don't test on macOS and windows as the database builds aren't + # repeatable there for some reason. As such, tests fail. It'd + # probably be worth looking into this at some point. + platform: [ubuntu-latest] + runs-on: ${{ matrix.platform }} + name: "Build ${{ matrix.go-version }} test on ${{ matrix.platform }}" + steps: + - name: Set up Go 1.x + uses: actions/setup-go@v4 + with: + go-version: ${{ matrix.go-version }} + id: go + + - name: Check out code into the Go module directory + uses: actions/checkout@v4 + + - name: Get dependencies + run: go get -v -t -d ./... + + - name: Build + run: go build -v ./... + + - name: Test + run: go test -race -v ./... diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml new file mode 100644 index 0000000..bd9ad1b --- /dev/null +++ b/.github/workflows/golangci-lint.yml @@ -0,0 +1,21 @@ +name: golangci-lint + +on: + push: + pull_request: + schedule: + - cron: '6 15 * * SUN' + +jobs: + golangci: + name: lint + runs-on: ubuntu-latest + steps: + - uses: actions/setup-go@v4 + with: + go-version: 1.21 + - uses: actions/checkout@v4 + - name: golangci-lint + uses: golangci/golangci-lint-action@v3 + with: + version: latest diff --git a/.gitignore b/.gitignore index 3d89953..1c752c2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /_site .tidyall.d *.swp +/cmd/write-test-data/write-test-data diff --git a/.golangci.toml b/.golangci.toml new file mode 100644 index 0000000..09b1486 --- /dev/null +++ b/.golangci.toml @@ -0,0 +1,708 @@ +[run] + deadline = "10m" + tests = true + +[linters] + disable-all = true + enable = [ + "asasalint", + "asciicheck", + "bidichk", + "bodyclose", + "containedctx", + "contextcheck", + "depguard", + # This is probably worthwhile, but there are a number of false positives + # that would need to be addressed. + # "dupword", + "durationcheck", + "errcheck", + "errchkjson", + "errname", + "errorlint", + # This doesn't seem to know about CTEs or DELETEs with RETURNING + # "execinquery", + "exhaustive", + # We often don't initialize all of the struct fields. This is fine + # generally + # "exhaustruct", + "exportloopref", + "forbidigo", + # We tried this linter but most places we do forced type asserts are + # pretty safe, e.g., an atomic.Value when everything is encapsulated + # in a small package. + # "forcetypeassert", + "goconst", + "gocyclo", + "gocritic", + "godot", + "gofumpt", + "gomodguard", + "gosec", + "gosimple", + # This only "caught" one thing, and it seemed like a reasonable use + # of Han script. Generally, I don't think we want to prevent the use + # of particulr scripts. The time.Local checks might be useful, but + # this didn't actually catch anything of note there. + # "gosmopolitan", + "govet", + "grouper", + "ineffassign", + "lll", + # We don't use these loggers + # "loggercheck", + "makezero", + # Maintainability Index. Seems like it could be a good idea, but a + # lot of things fail and we would need to make some decisions about + # what to allow. + # "maintidx", + "misspell", + # Causes panics, e.g., when processing mmerrors + # "musttag", + "nakedret", + "nilerr", + # Perhaps too opinionated. We do have some legitimate uses of "return nil, nil" + # "nilnil", + "noctx", + "nolintlint", + # We occasionally use named returns for documentation, which is helpful. + # Named returns are only really a problem when used in conjunction with + # a bare return statement. I _think_ Revive's bare-return covers that + # case. + # "nonamedreturns", + "nosprintfhostport", + "predeclared", + "revive", + "rowserrcheck", + # https://github.com/golangci/golangci-lint/issues/287 + # "safesql", + "sqlclosecheck", + "staticcheck", + "stylecheck", + # We have very few structs with multiple tags and for the couple we had, this + # actually made it harder to read. + # "tagalign", + "tenv", + "tparallel", + "typecheck", + "unconvert", + "unparam", + "unused", + "usestdlibvars", + "vetshadow", + "wastedassign", + # We don't currently wrap external errors in this module. + # "wrapcheck", + ] + +# Please note that we only use depguard for stdlib as gomodguard only +# supports modules currently. See https://github.com/ryancurrah/gomodguard/issues/12 +[[linters-settings.depguard.rules.main.deny]] +pkg = "io/ioutil" +desc = "Deprecated. Functions have been moved elsewhere." + +[[linters-settings.depguard.rules.main.deny]] +# golang.org/x/exp/slices has better alternatives. The proposal to add this +# to Go has been accepted, https://github.com/golang/go/issues/57433 +pkg = "sort" +desc = "Use golang.org/x/exp/slices instead" + +[linters-settings.errcheck] + # Don't allow setting of error to the blank identifier. If there is a legitimate + # reason, there should be a nolint with an explanation. + check-blank = true + + exclude-functions = [ + # If we are rolling back a transaction, we are often already in an error + # state. + '(*database/sql.Tx).Rollback', + + # It is reasonable to ignore errors if Cleanup fails in most cases. + '(*github.com/google/renameio/v2.PendingFile).Cleanup', + + # We often don't care if removing a file failed (e.g., it doesn't exist) + 'os.Remove', + 'os.RemoveAll', + ] + + # Ignoring Close so that we don't have to have a bunch of + # `defer func() { _ = r.Close() }()` constructs when we + # don't actually care about the error. + ignore = "Close,fmt:.*" + +[linters-settings.errorlint] + errorf = true + asserts = true + comparison = true + +[linters-settings.exhaustive] + default-signifies-exhaustive = true + +[linters-settings.forbidigo] + # Forbid the following identifiers + forbid = [ + "Geoip", # use "GeoIP" + "^geoIP", # use "geoip" + "^hubSpot", # use "hubspot" + "Maxmind", # use "MaxMind" + "^maxMind", # use "maxmind" + "Minfraud", # use "MinFraud" + "^minFraud", # use "minfraud" + "[Uu]ser[iI][dD]", # use "accountID" or "AccountID" + + # use netip.ParsePrefix unless you really need a *net.IPNet + "^net.ParseCIDR", + + # use netip.ParseAddr unless you really need a net.IP + "^net.ParseIP", + ] + +[linters-settings.gocritic] + enabled-checks = [ + "appendAssign", + "appendCombine", + "argOrder", + "assignOp", + "badCall", + "badCond", + "badLock", + "badRegexp", + "badSorting", + "boolExprSimplify", + "builtinShadow", + "builtinShadowDecl", + "captLocal", + "caseOrder", + "codegenComment", + "commentedOutCode", + "commentedOutImport", + "commentFormatting", + "defaultCaseOrder", + # Revive's defer rule already captures this. This caught no extra cases. + # "deferInLoop", + "deferUnlambda", + "deprecatedComment", + "docStub", + "dupArg", + "dupBranchBody", + "dupCase", + "dupImport", + "dupSubExpr", + "dynamicFmtString", + "elseif", + "emptyDecl", + "emptyFallthrough", + "emptyStringTest", + "equalFold", + "evalOrder", + "exitAfterDefer", + "exposedSyncMutex", + "externalErrorReassign", + # Given that all of our code runs on Linux and the / separate should + # work fine, this seems less important. + # "filepathJoin", + "flagDeref", + "flagName", + "hexLiteral", + # This seems like it could be good, but we would need to update current + # uses. It supports "--fix", but the fixing is a bit broken. + # "httpNoBody", + # This might be good, but we would have to revist a lot of code. + # "hugeParam", + "ifElseChain", + "importShadow", + "indexAlloc", + "initClause", + "mapKey", + "methodExprCall", + "nestingReduce", + "newDeref", + "nilValReturn", + "octalLiteral", + "offBy1", + "paramTypeCombine", + "preferDecodeRune", + "preferFilepathJoin", + "preferFprint", + "preferStringWriter", + "preferWriteByte", + "ptrToRefParam", + "rangeExprCopy", + "rangeValCopy", + "redundantSprint", + "regexpMust", + "regexpPattern", + # This might be good, but I don't think we want to encourage + # significant changes to regexes as we port stuff from Perl. + # "regexpSimplify", + "returnAfterHttpError", + "ruleguard", + "singleCaseSwitch", + "sliceClear", + "sloppyLen", + # This seems like it might also be good, but a lot of existing code + # fails. + # "sloppyReassign", + # This complains about helper functions in tests. + # "sloppyTestFuncName", + "sloppyTypeAssert", + "sortSlice", + "sprintfQuotedString", + "sqlQuery", + "stringsCompare", + "stringConcatSimplify", + "stringXbytes", + "switchTrue", + "syncMapLoadAndDelete", + "timeExprSimplify", + "todoCommentWithoutDetail", + "tooManyResultsChecker", + "truncateCmp", + "typeAssertChain", + "typeDefFirst", + "typeSwitchVar", + "typeUnparen", + "underef", + "unlabelStmt", + "unlambda", + # I am not sure we would want this linter and a lot of existing + # code fails. + # "unnamedResult", + "unnecessaryBlock", + "unnecessaryDefer", + "unslice", + "valSwap", + "weakCond", + # Covered by nolintlint + # "whyNoLint" + "wrapperFunc", + "yodaStyleExpr", + ] + +[linters-settings.gofumpt] + extra-rules = true + lang-version = "1.18" + +[linters-settings.gomodguard] + [[linters-settings.gomodguard.blocked.modules]] + [linters-settings.gomodguard.blocked.modules."github.com/avct/uasurfer"] + recommendations = ["github.com/xavivars/uasurfer"] + reason = "The original avct module appears abandoned." + + [[linters-settings.gomodguard.blocked.modules]] + [linters-settings.gomodguard.blocked.modules."github.com/BurntSushi/toml"] + recommendations = ["github.com/pelletier/go-toml/v2"] + reason = "This library panics frequently on invalid input." + + [[linters-settings.gomodguard.blocked.modules]] + [linters-settings.gomodguard.blocked.modules."github.com/gofrs/uuid"] + recommendations = ["github.com/google/uuid"] + + [[linters-settings.gomodguard.blocked.modules]] + [linters-settings.gomodguard.blocked.modules."github.com/pelletier/go-toml"] + recommendations = ["github.com/pelletier/go-toml/v2"] + reason = "This is an outdated version." + + [[linters-settings.gomodguard.blocked.modules]] + [linters-settings.gomodguard.blocked.modules."github.com/satori/go.uuid"] + recommendations = ["github.com/google/uuid"] + + [[linters-settings.gomodguard.blocked.modules]] + [linters-settings.gomodguard.blocked.modules."github.com/lib/pq"] + recommendations = ["github.com/jackc/pgx"] + reason = "This library is no longer actively maintained." + + [[linters-settings.gomodguard.blocked.modules]] + [linters-settings.gomodguard.blocked.modules."github.com/neilotoole/errgroup"] + recommendations = ["golang.org/x/sync/errgroup"] + reason = "This library can lead to subtle deadlocks in certain use cases." + + [[linters-settings.gomodguard.blocked.modules]] + [linters-settings.gomodguard.blocked.modules."github.com/pariz/gountries"] + reason = "This library's data is not actively maintained. Use GeoInfo data." + + [linters-settings.gomodguard.blocked.modules."github.com/pkg/errors"] + reason = "pkg/errors is no longer maintained." + + [[linters-settings.gomodguard.blocked.modules]] + [linters-settings.gomodguard.blocked.modules."github.com/RackSec/srslog"] + recommendations = ["github.com/RackSec/srslog"] + reason = "This library's data is not actively maintained." + + [[linters-settings.gomodguard.blocked.modules]] + [linters-settings.gomodguard.blocked.modules."github.com/ua-parser/uap-go/uaparser"] + recommendations = ["github.com/xavivars/uasurfer"] + reason = "The performance of this library is absolutely abysmal." + + [[linters-settings.gomodguard.blocked.modules]] + [linters-settings.gomodguard.blocked.modules."github.com/ugorji/go/codec"] + recommendations = ["encoding/json", "github.com/mailru/easyjson"] + reason = "This library is poorly maintained. We should default to using encoding/json and use easyjson where performance really matters." + + [[linters-settings.gomodguard.blocked.modules]] + [linters-settings.gomodguard.blocked.modules."io/ioutil"] + + [[linters-settings.gomodguard.blocked.versions]] + [linters-settings.gomodguard.blocked.versions."github.com/jackc/pgconn"] + reason = "Use github.com/jackc/pgx/v5" + + [[linters-settings.gomodguard.blocked.versions]] + [linters-settings.gomodguard.blocked.versions."github.com/jackc/pgtype"] + reason = "Use github.com/jackc/pgx/v5" + + [[linters-settings.gomodguard.blocked.versions]] + [linters-settings.gomodguard.blocked.versions."github.com/jackc/pgx"] + version = "< 5.0.0" + reason = "Use github.com/jackc/pgx/v5" + + [[linters-settings.gomodguard.blocked.versions]] + [linters-settings.gomodguard.blocked.versions."github.com/stretchr/testify/assert"] + reason = "Use github.com/stretchr/testify/assert" + + [[linters-settings.gomodguard.blocked.modules]] + [linters-settings.gomodguard.blocked.modules."inet.af/netaddr"] + recommendations = ["go4.org/netipx"] + reason = "inet.af/netaddr has been deprecated." + +[linters-settings.gosec] + excludes = [ + # G104 - "Audit errors not checked." We use errcheck for this. + "G104", + + # G306 - "Expect WriteFile permissions to be 0600 or less". + "G306", + + # Prohibits defer (*os.File).Close, which we allow when reading from file. + "G307", + ] + + +[linters-settings.govet] + # This seems to be duplicate setting, but enable it for good measure. + check-shadowing = true + "enable-all" = true + + # Although it is very useful in particular cases where we are trying to + # use as little memory as possible, there are even more cases where + # other organizations may make more sense. + disable = ["fieldalignment"] + + [linters-settings.govet.settings.shadow] + strict = true + +[linters-settings.lll] + line-length = 120 + tab-width = 4 + +[linters-settings.nolintlint] + allow-leading-space = false + allow-unused = false + allow-no-explanation = ["lll", "misspell"] + require-explanation = true + require-specific = true + +[linters-settings.revive] + ignore-generated-header = true + severity = "warning" + + # This might be nice but it is so common that it is hard + # to enable. + # [[linters-settings.revive.rules]] + # name = "add-constant" + + # [[linters-settings.revive.rules]] + # name = "argument-limit" + + [[linters-settings.revive.rules]] + name = "atomic" + + [[linters-settings.revive.rules]] + name = "bare-return" + + [[linters-settings.revive.rules]] + name = "blank-imports" + + [[linters-settings.revive.rules]] + name = "bool-literal-in-expr" + + [[linters-settings.revive.rules]] + name = "call-to-gc" + + # [[linters-settings.revive.rules]] + # name = "cognitive-complexity" + + [[linters-settings.revive.rules]] + name = "comment-spacings" + arguments = ["easyjson", "nolint"] + + # Probably a good rule, but we have a lot of names that + # only have case differences. + # [[linters-settings.revive.rules]] + # name = "confusing-naming" + + [[linters-settings.revive.rules]] + name = "confusing-results" + + [[linters-settings.revive.rules]] + name = "constant-logical-expr" + + [[linters-settings.revive.rules]] + name = "context-as-argument" + + [[linters-settings.revive.rules]] + name = "context-keys-type" + + # [[linters-settings.revive.rules]] + # name = "cyclomatic" + + [[linters-settings.revive.rules]] + name = "datarace" + + [[linters-settings.revive.rules]] + name = "deep-exit" + + [[linters-settings.revive.rules]] + name = "defer" + + [[linters-settings.revive.rules]] + name = "dot-imports" + + [[linters-settings.revive.rules]] + name = "duplicated-imports" + + [[linters-settings.revive.rules]] + name = "early-return" + + [[linters-settings.revive.rules]] + name = "empty-block" + + [[linters-settings.revive.rules]] + name = "empty-lines" + + [[linters-settings.revive.rules]] + name = "errorf" + + [[linters-settings.revive.rules]] + name = "error-naming" + + [[linters-settings.revive.rules]] + name = "error-return" + + [[linters-settings.revive.rules]] + name = "error-strings" + + [[linters-settings.revive.rules]] + name = "exported" + + # [[linters-settings.revive.rules]] + # name = "file-header" + + # We have a lot of flag parameters. This linter probably makes + # a good point, but we would need some cleanup or a lot of nolints. + # [[linters-settings.revive.rules]] + # name = "flag-parameter" + + # [[linters-settings.revive.rules]] + # name = "function-result-limit" + + [[linters-settings.revive.rules]] + name = "get-return" + + [[linters-settings.revive.rules]] + name = "identical-branches" + + [[linters-settings.revive.rules]] + name = "if-return" + + [[linters-settings.revive.rules]] + name = "imports-blacklist" + + [[linters-settings.revive.rules]] + name = "import-shadowing" + + [[linters-settings.revive.rules]] + name = "increment-decrement" + + [[linters-settings.revive.rules]] + name = "indent-error-flow" + + # [[linters-settings.revive.rules]] + # name = "line-length-limit" + + # [[linters-settings.revive.rules]] + # name = "max-public-structs" + + [[linters-settings.revive.rules]] + name = "modifies-parameter" + + [[linters-settings.revive.rules]] + name = "modifies-value-receiver" + + # We frequently use nested structs, particularly in tests. + # [[linters-settings.revive.rules]] + # name = "nested-structs" + + [[linters-settings.revive.rules]] + name = "optimize-operands-order" + + [[linters-settings.revive.rules]] + name = "package-comments" + + [[linters-settings.revive.rules]] + name = "range" + + [[linters-settings.revive.rules]] + name = "range-val-address" + + [[linters-settings.revive.rules]] + name = "range-val-in-closure" + + [[linters-settings.revive.rules]] + name = "receiver-naming" + + [[linters-settings.revive.rules]] + name = "redefines-builtin-id" + + [[linters-settings.revive.rules]] + name = "string-of-int" + + [[linters-settings.revive.rules]] + name = "struct-tag" + + [[linters-settings.revive.rules]] + name = "superfluous-else" + + [[linters-settings.revive.rules]] + name = "time-equal" + + [[linters-settings.revive.rules]] + name = "time-naming" + + [[linters-settings.revive.rules]] + name = "unconditional-recursion" + + [[linters-settings.revive.rules]] + name = "unexported-naming" + + [[linters-settings.revive.rules]] + name = "unexported-return" + + # This is covered elsewhere and we want to ignore some + # functions such as fmt.Fprintf. + # [[linters-settings.revive.rules]] + # name = "unhandled-error" + + [[linters-settings.revive.rules]] + name = "unnecessary-stmt" + + [[linters-settings.revive.rules]] + name = "unreachable-code" + + [[linters-settings.revive.rules]] + name = "unused-parameter" + + # We generally have unused receivers in tests for meeting the + # requirements of an interface. + # [[linters-settings.revive.rules]] + # name = "unused-receiver" + + [[linters-settings.revive.rules]] + name = "use-any" + + [[linters-settings.revive.rules]] + name = "useless-break" + + [[linters-settings.revive.rules]] + name = "var-declaration" + + [[linters-settings.revive.rules]] + name = "var-naming" + + [[linters-settings.revive.rules]] + name = "waitgroup-by-value" + +[linters-settings.unparam] + check-exported = true + +[issues] +exclude-use-default = false + + # This goes off for MD5 usage, which we use heavily + [[issues.exclude-rules]] + text = "weak cryptographic primitive" + linters = ["gosec"] + + [[issues.exclude-rules]] + linters = [ + "bodyclose" + ] + # This rule doesn't really make sense for tests where we don't have an open + # connection and we might be passing around the response for other reasons. + path = "_test.go" + + [[issues.exclude-rules]] + linters = [ + "forbidigo" + ] + # This refers to a minFraud field, not the MaxMind Account ID + text = "AccountUserID|Account\\.UserID" + + [[issues.exclude-rules]] + linters = [ + "gocritic" + ] + # For some reason the imports stuff in ruleguard doesn't work in golangci-lint. + # Perhaps it has an outdated version or something + path = "_test.go" + text = "ruleguard: Prefer the alternative Context method instead" + + [[issues.exclude-rules]] + linters = [ + "gocritic" + ] + # The nolintlint linter behaves oddly with ruleguard rules + source = "// *no-ruleguard" + + [[issues.exclude-rules]] + linters = [ + "govet" + ] + # These are usually fine to shadow and not allowing shadowing for them can + # make the code unnecessarily verbose. + text = 'shadow: declaration of "(ctx|err|ok)" shadows declaration' + + [[issues.exclude-rules]] + linters = [ + "contextcheck", + "nilerr", + "wrapcheck", + ] + path = "_test.go" + + [[issues.exclude-rules]] + linters = [ + "stylecheck", + ] + # ST1016 - methods on the same type should have the same receiver name. + # easyjson doesn't interact well with this. + text = "ST1016" + + [[issues.exclude-rules]] + linters = [ + "staticcheck", + ] + # SA5008: unknown JSON option "intern" - easyjson specific option. + text = 'SA5008: unknown JSON option "intern"' + + [[issues.exclude-rules]] + linters = [ + "wrapcheck", + ] + path = "_easyjson.go" + + [[issues.exclude-rules]] + linters = [ + "gocritic", + ] + source = "Chmod|WriteFile" + text = "octalLiteral" diff --git a/cmd/write-test-data/main.go b/cmd/write-test-data/main.go new file mode 100644 index 0000000..48d00a0 --- /dev/null +++ b/cmd/write-test-data/main.go @@ -0,0 +1,68 @@ +// write-test-data generates test mmdb files. +package main + +import ( + "flag" + "fmt" + "os" + + "github.com/maxmind/MaxMind-DB/pkg/writer" +) + +func main() { + source := flag.String("source", "", "Source data directory") + target := flag.String("target", "", "Destination directory for the generated mmdb files") + + flag.Parse() + + w, err := writer.New(*source, *target) + if err != nil { + fmt.Printf("creating writer: %+v\n", err) + os.Exit(1) + } + + if err := w.WriteIPv4TestDB(); err != nil { + fmt.Printf("writing IPv4 test databases: %+v\n", err) + os.Exit(1) + } + + if err := w.WriteIPv6TestDB(); err != nil { + fmt.Printf("writing IPv6 test databases: %+v\n", err) + os.Exit(1) + } + + if err := w.WriteMixedIPTestDB(); err != nil { + fmt.Printf("writing IPv6 test databases: %+v\n", err) + os.Exit(1) + } + + if err := w.WriteNoIPv4TestDB(); err != nil { + fmt.Printf("writing no IPv4 test databases: %+v\n", err) + os.Exit(1) + } + + if err := w.WriteNoMapTestDB(); err != nil { + fmt.Printf("writing no map test databases: %+v\n", err) + os.Exit(1) + } + + if err := w.WriteMetadataPointersTestDB(); err != nil { + fmt.Printf("writing metadata pointers test databases: %+v\n", err) + os.Exit(1) + } + + if err := w.WriteDecoderTestDB(); err != nil { + fmt.Printf("writing decoder test databases: %+v\n", err) + os.Exit(1) + } + + if err := w.WriteDeeplyNestedStructuresTestDB(); err != nil { + fmt.Printf("writing decoder test databases: %+v\n", err) + os.Exit(1) + } + + if err := w.WriteGeoIP2TestDB(); err != nil { + fmt.Printf("writing GeoIP2 test databases: %+v\n", err) + os.Exit(1) + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..9e0dd23 --- /dev/null +++ b/go.mod @@ -0,0 +1,13 @@ +module github.com/maxmind/MaxMind-DB + +go 1.21 + +require ( + github.com/maxmind/mmdbwriter v1.0.0 + go4.org/netipx v0.0.0-20230824141953-6213f710f925 +) + +require ( + github.com/oschwald/maxminddb-golang v1.12.0 // indirect + golang.org/x/sys v0.10.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..7c8e649 --- /dev/null +++ b/go.sum @@ -0,0 +1,16 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/maxmind/mmdbwriter v1.0.0 h1:bieL4P6yaYaHvbtLSwnKtEvScUKKD6jcKaLiTM3WSMw= +github.com/maxmind/mmdbwriter v1.0.0/go.mod h1:noBMCUtyN5PUQ4H8ikkOvGSHhzhLok51fON2hcrpKj8= +github.com/oschwald/maxminddb-golang v1.12.0 h1:9FnTOD0YOhP7DGxGsq4glzpGy5+w7pq50AS6wALUMYs= +github.com/oschwald/maxminddb-golang v1.12.0/go.mod h1:q0Nob5lTCqyQ8WT6FYgS1L7PXKVVbgiymefNwIjPzgY= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +go4.org/netipx v0.0.0-20230824141953-6213f710f925 h1:eeQDDVKFkx0g4Hyy8pHgmZaK0EqB4SD6rvKbUdN3ziQ= +go4.org/netipx v0.0.0-20230824141953-6213f710f925/go.mod h1:PLyyIXexvUFg3Owu6p/WfdlivPbZJsZdgWZlrGope/Y= +golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= +golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/pkg/writer/decoder.go b/pkg/writer/decoder.go new file mode 100644 index 0000000..c6c29c4 --- /dev/null +++ b/pkg/writer/decoder.go @@ -0,0 +1,178 @@ +package writer + +import ( + "bytes" + "encoding/binary" + "fmt" + "math" + "math/big" + "net/netip" + + "github.com/maxmind/mmdbwriter" + "github.com/maxmind/mmdbwriter/mmdbtype" + "go4.org/netipx" +) + +// WriteDecoderTestDB writes an mmdb file with all possible record value types. +func (w *Writer) WriteDecoderTestDB() error { + dbWriter, err := mmdbwriter.New( + mmdbwriter.Options{ + DatabaseType: "MaxMind DB Decoder Test", + Description: map[string]string{ + "en": "MaxMind DB Decoder Test database - contains every MaxMind DB data type", + }, + DisableIPv4Aliasing: false, + IncludeReservedNetworks: true, + IPVersion: 6, + Languages: []string{"en"}, + RecordSize: 24, + }, + ) + if err != nil { + return fmt.Errorf("creating mmdbwriter: %w", err) + } + + addrs, err := parseIPSlice(ipSample) + if err != nil { + return fmt.Errorf("parsing ip addresses: %w", err) + } + if err := insertAllTypes(dbWriter, addrs); err != nil { + return fmt.Errorf("inserting all types records: %w", err) + } + + zeroAddr, err := netip.ParsePrefix("::0.0.0.0/128") + if err != nil { + return fmt.Errorf("parsing ip: %w", err) + } + if err := insertAllTypesZero(dbWriter, []netip.Prefix{zeroAddr}); err != nil { + return fmt.Errorf("inserting all types records: %w", err) + } + + maxAddr, err := netip.ParsePrefix("::255.255.255.255/128") + if err != nil { + return fmt.Errorf("parsing ip: %w", err) + } + if err := insertNumericMax(dbWriter, []netip.Prefix{maxAddr}); err != nil { + return fmt.Errorf("inserting all types records: %w", err) + } + + if err := w.write(dbWriter, "MaxMind-DB-test-decoder.mmdb"); err != nil { + return fmt.Errorf("writing database: %w", err) + } + return nil +} + +// insertAllTypes inserts records with all possible value types. +func insertAllTypes(w *mmdbwriter.Tree, ipAddresses []netip.Prefix) error { + buf := new(bytes.Buffer) + if err := binary.Write(buf, binary.BigEndian, uint32(42)); err != nil { + return fmt.Errorf("creating buffer for all types record: %w", err) + } + + ui64 := big.Int{} + ui64.Lsh(big.NewInt(1), 60) + + ui128 := big.Int{} + ui128.Lsh(big.NewInt(1), 120) + mmdbUint128 := mmdbtype.Uint128(ui128) + + allTypes := mmdbtype.Map{ + "array": mmdbtype.Slice{ + mmdbtype.Uint32(1), + mmdbtype.Uint32(2), + mmdbtype.Uint32(3), + }, + "bytes": mmdbtype.Bytes(buf.Bytes()), + "boolean": mmdbtype.Bool(true), + "double": mmdbtype.Float64(42.123456), + "float": mmdbtype.Float32(1.1), + "int32": mmdbtype.Int32(-1 * math.Pow(2, 28)), + "map": mmdbtype.Map{ + "mapX": mmdbtype.Map{ + "utf8_stringX": mmdbtype.String("hello"), + "arrayX": mmdbtype.Slice{ + mmdbtype.Uint32(7), + mmdbtype.Uint32(8), + mmdbtype.Uint32(9), + }, + }, + }, + "uint16": mmdbtype.Uint16(100), + "uint32": mmdbtype.Uint32(math.Pow(2, 28)), + "uint64": mmdbtype.Uint64(ui64.Uint64()), + "uint128": mmdbUint128.Copy(), + "utf8_string": mmdbtype.String("unicode! ☯ - ♫"), + } + + for _, addr := range ipAddresses { + err := w.Insert( + netipx.PrefixIPNet(addr), + allTypes, + ) + if err != nil { + return fmt.Errorf("inserting ip: %w", err) + } + } + return nil +} + +// insertAllTypesZero inserts records with all possible value types with zero values. +func insertAllTypesZero(w *mmdbwriter.Tree, ipAddresses []netip.Prefix) error { + var uint128 big.Int + mmdbUint128 := mmdbtype.Uint128(uint128) + + zeroValues := mmdbtype.Map{ + "array": mmdbtype.Slice{}, + "bytes": mmdbtype.Bytes([]byte{}), + "boolean": mmdbtype.Bool(false), + "double": mmdbtype.Float64(0), + "float": mmdbtype.Float32(0), + "int32": mmdbtype.Int32(0), + "map": mmdbtype.Map{}, + "uint16": mmdbtype.Uint16(0), + "uint32": mmdbtype.Uint32(0), + "uint64": mmdbtype.Uint64(0), + "uint128": mmdbUint128.Copy(), + "utf8_string": mmdbtype.String(""), + } + + for _, addr := range ipAddresses { + err := w.Insert( + netipx.PrefixIPNet(addr), + zeroValues, + ) + if err != nil { + return fmt.Errorf("inserting ip: %w", err) + } + } + return nil +} + +// insertNumericMax inserts records with numeric types maxed out. +func insertNumericMax(w *mmdbwriter.Tree, ipAddresses []netip.Prefix) error { + var uint128Max big.Int + uint128Max.Exp(big.NewInt(2), big.NewInt(128), nil) + uint128Max.Sub(&uint128Max, big.NewInt(1)) + mmdbUint128 := mmdbtype.Uint128(uint128Max) + + numMax := mmdbtype.Map{ + "double": mmdbtype.Float64(math.Inf(1)), + "float": mmdbtype.Float32(float32(math.Inf(1))), + "int32": mmdbtype.Int32(1<<31 - 1), + "uint16": mmdbtype.Uint16(0xffff), + "uint32": mmdbtype.Uint32(0xffffffff), + "uint64": mmdbtype.Uint64(0xffffffffffffffff), + "uint128": mmdbUint128.Copy(), + } + + for _, addr := range ipAddresses { + err := w.Insert( + netipx.PrefixIPNet(addr), + numMax, + ) + if err != nil { + return fmt.Errorf("inserting ip: %w", err) + } + } + return nil +} diff --git a/pkg/writer/geoip2.go b/pkg/writer/geoip2.go new file mode 100644 index 0000000..b42633d --- /dev/null +++ b/pkg/writer/geoip2.go @@ -0,0 +1,182 @@ +package writer + +import ( + "encoding/json" + "fmt" + "net/netip" + "os" + "path/filepath" + "strings" + + "github.com/maxmind/mmdbwriter" + "github.com/maxmind/mmdbwriter/mmdbtype" + "go4.org/netipx" +) + +// WriteGeoIP2TestDB writes GeoIP2 test mmdb files. +func (w *Writer) WriteGeoIP2TestDB() error { + dbTypes := []string{ + "GeoIP2-Anonymous-IP", + "GeoIP2-City", + "GeoIP2-Connection-Type", + "GeoIP2-Country", + "GeoIP2-DensityIncome", + "GeoIP2-Domain", + "GeoIP2-Enterprise", + "GeoIP2-ISP", + "GeoIP2-Precision-Enterprise", + "GeoIP2-Static-IP-Score", + "GeoIP2-User-Count", + "GeoLite2-ASN", + "GeoLite2-City", + "GeoLite2-Country", + } + + for _, dbType := range dbTypes { + languages := []string{"en"} + description := map[string]string{ + "en": strings.ReplaceAll(dbType, "-", " ") + + " Test Database (fake GeoIP2 data, for example purposes only)", + } + + if dbType == "GeoIP2-City" { + languages = append(languages, "zh") + description["zh"] = "小型数据库" + } + + dbWriter, err := mmdbwriter.New( + mmdbwriter.Options{ + DatabaseType: dbType, + Description: description, + DisableIPv4Aliasing: false, + IPVersion: 6, + Languages: languages, + RecordSize: 28, + }, + ) + if err != nil { + return fmt.Errorf("creating mmdbwriter: %w", err) + } + + if dbType == "GeoIP2-Anonymous-IP" { + if err := populateAllNetworks(dbWriter); err != nil { + return fmt.Errorf("inserting all networks: %w", err) + } + } + + jsonFileName := fmt.Sprintf("%s-Test.json", dbType) + if err := w.insertJSON(dbWriter, jsonFileName); err != nil { + return fmt.Errorf("inserting json: %w", err) + } + + dbFileName := fmt.Sprintf("%s-Test.mmdb", dbType) + if err := w.write(dbWriter, dbFileName); err != nil { + return fmt.Errorf("writing database: %w", err) + } + } + + return nil +} + +// insertJSON reads and parses a json file into mmdbtypes values and inserts +// them into the mmdbwriter tree. +func (w *Writer) insertJSON(dbWriter *mmdbwriter.Tree, fileName string) error { + file, err := os.Open(filepath.Clean(filepath.Join(w.source, fileName))) + if err != nil { + return fmt.Errorf("opening json file: %w", err) + } + defer file.Close() + + var data []map[string]any + if err := json.NewDecoder(file).Decode(&data); err != nil { + return fmt.Errorf("decoding json file: %w", err) + } + + for _, record := range data { + for k, v := range record { + prefix, err := netip.ParsePrefix(k) + if err != nil { + return fmt.Errorf("parsing ip: %w", err) + } + + mmdbValue, err := toMMDBType(prefix.String(), v) + if err != nil { + return fmt.Errorf("converting value to mmdbtype: %w", err) + } + + err = dbWriter.Insert( + netipx.PrefixIPNet(prefix), + mmdbValue, + ) + if err != nil { + return fmt.Errorf("inserting ip: %w", err) + } + } + } + return nil +} + +// toMMDBType key converts field values read from json into their corresponding mmdbtype.DataType. +// It makes some assumptions for numeric types based on previous knowledge about field types. +func toMMDBType(key string, value any) (mmdbtype.DataType, error) { + switch v := value.(type) { + case bool: + return mmdbtype.Bool(v), nil + case string: + return mmdbtype.String(v), nil + case map[string]any: + m := mmdbtype.Map{} + for innerKey, val := range v { + innerVal, err := toMMDBType(innerKey, val) + if err != nil { + return nil, fmt.Errorf("parsing mmdbtype.Map for key %q: %w", key, err) + } + m[mmdbtype.String(innerKey)] = innerVal + } + return m, nil + case []any: + s := mmdbtype.Slice{} + for _, val := range v { + innerVal, err := toMMDBType(key, val) + if err != nil { + return nil, fmt.Errorf("parsing mmdbtype.Slice for key %q: %w", key, err) + } + s = append(s, innerVal) + } + return s, nil + case float64: + switch key { + case "accuracy_radius", "confidence", "metro_code": + return mmdbtype.Uint16(v), nil + case "autonomous_system_number", "average_income", + "geoname_id", "ipv4_24", "ipv4_32", "ipv6_32", + "ipv6_48", "ipv6_64", "population_density": + return mmdbtype.Uint32(v), nil + case "ip_risk", "latitude", "longitude", "score", + "static_ip_score": + return mmdbtype.Float64(v), nil + default: + return nil, fmt.Errorf("unsupported numberic type for key %q: %T", key, value) + } + default: + return nil, fmt.Errorf("unsupported type for key %q: %T", key, value) + } +} + +// populate all networks inserts all networks into the writer with an empty map value. +func populateAllNetworks(w *mmdbwriter.Tree) error { + defaultNet, err := netip.ParsePrefix("::/0") + if err != nil { + return fmt.Errorf("parsing ip: %w", err) + } + + err = w.Insert( + netipx.PrefixIPNet(defaultNet), + mmdbtype.Map{}, + ) + if err != nil { + return fmt.Errorf("inserting ip: %w", err) + } + + return nil +} diff --git a/pkg/writer/ip.go b/pkg/writer/ip.go new file mode 100644 index 0000000..8b6b919 --- /dev/null +++ b/pkg/writer/ip.go @@ -0,0 +1,39 @@ +package writer + +import ( + "fmt" + "net/netip" + + "go4.org/netipx" +) + +// parseIPRange takes IP addresses in string presentation form that represent a +// range and returns an IP range. +func parseIPRange(from, to string) (netipx.IPRange, error) { + startIP, err := netip.ParseAddr(from) + if err != nil { + return netipx.IPRange{}, fmt.Errorf("parsing %s as an IP: %w", from, err) + } + endIP, err := netip.ParseAddr(to) + if err != nil { + return netipx.IPRange{}, fmt.Errorf("parsing %s as an IP: %w", to, err) + } + ipRange := netipx.IPRangeFrom(startIP, endIP) + if !ipRange.IsValid() { + return netipx.IPRange{}, fmt.Errorf("%s-%s is an invalid IP range", startIP, endIP) + } + return ipRange, nil +} + +// parseIPSlice parses a slice of IP address strings and returns a slice of netip.Prefix. +func parseIPSlice(ipAddresses []string) ([]netip.Prefix, error) { + var addrs []netip.Prefix + for _, ip := range ipAddresses { + addr, err := netip.ParsePrefix(ip) + if err != nil { + return nil, fmt.Errorf("parsing %s as an IP: %w", ip, err) + } + addrs = append(addrs, addr) + } + return addrs, nil +} diff --git a/pkg/writer/maxmind.go b/pkg/writer/maxmind.go new file mode 100644 index 0000000..683db9e --- /dev/null +++ b/pkg/writer/maxmind.go @@ -0,0 +1,245 @@ +package writer + +import ( + "fmt" + "net/netip" + + "github.com/maxmind/mmdbwriter" + "github.com/maxmind/mmdbwriter/mmdbtype" + "go4.org/netipx" +) + +// WriteIPv4TestDB writes mmdb files for an ip range between 1.1.1.1 and 1.1.1.32 +// with various record sizes. +func (w *Writer) WriteIPv4TestDB() error { + ipRange, err := parseIPRange("1.1.1.1", "1.1.1.32") + if err != nil { + return fmt.Errorf("parsing ip range: %w", err) + } + + for _, recordSize := range []int{24, 28, 32} { + err := w.writeMaxMindTestDB( + recordSize, + []netipx.IPRange{ipRange}, + "ipv4", + ) + if err != nil { + return fmt.Errorf("writing test database: %w", err) + } + } + + return nil +} + +// WriteIPv6TestDB writes mmdb files for an ip range between ::1:ffff:ffff and ::2:0000:0059 +// with various record sizes. +func (w *Writer) WriteIPv6TestDB() error { + ipRange, err := parseIPRange("::1:ffff:ffff", "::2:0000:0059") + if err != nil { + return fmt.Errorf("parsing ip range: %w", err) + } + + for _, recordSize := range []int{24, 28, 32} { + err := w.writeMaxMindTestDB( + recordSize, + []netipx.IPRange{ipRange}, + "ipv6", + ) + if err != nil { + return fmt.Errorf("writing test database: %w", err) + } + } + + return nil +} + +// WriteMixedIPTestDB writes mmdb files for a mixed ip version range between ::1:ffff:ffff and ::2:0000:0059 +// with various record sizes. +func (w *Writer) WriteMixedIPTestDB() error { + ipv6Range, err := parseIPRange("::1:ffff:ffff", "::2:0000:0059") + if err != nil { + return fmt.Errorf("parsing ip range: %w", err) + } + + ipv4Range, err := parseIPRange("1.1.1.1", "1.1.1.32") + if err != nil { + return fmt.Errorf("parsing ip range: %w", err) + } + + for _, recordSize := range []int{24, 28, 32} { + err := w.writeMaxMindTestDB( + recordSize, + []netipx.IPRange{ipv6Range, ipv4Range}, + "mixed", + ) + if err != nil { + return fmt.Errorf("writing test database: %w", err) + } + } + + return nil +} + +// writeMaxMindTestDB writes test mmdb files. +func (w *Writer) writeMaxMindTestDB( + recordSize int, + ipRange []netipx.IPRange, + ipVersionName string, +) error { + ipVersion := 6 + if ipRange[0].From().Is4() { + ipVersion = 4 + } + + metadata := map[string]string{} + metadata["en"] = "Test Database" + metadata["zh"] = "Test Database Chinese" + + dbWriter, err := mmdbwriter.New( + mmdbwriter.Options{ + DatabaseType: "Test", + Description: metadata, + DisableIPv4Aliasing: ipVersion == 4, + IPVersion: ipVersion, + Languages: []string{"en", "zh"}, + RecordSize: recordSize, + }, + ) + if err != nil { + return fmt.Errorf("creating mmdbwriter: %w", err) + } + + for _, ir := range ipRange { + for _, prefix := range ir.Prefixes() { + ipString := prefix.Addr().String() + if ipVersion == 6 && prefix.Addr().Is4() { + ipString = "::" + ipString + } + + err := dbWriter.Insert( + netipx.PrefixIPNet(prefix), + mmdbtype.Map{ + "ip": mmdbtype.String(ipString), + }, + ) + if err != nil { + return fmt.Errorf("inserting ip: %w", err) + } + } + } + + fileName := fmt.Sprintf("MaxMind-DB-test-%s-%d.mmdb", ipVersionName, recordSize) + if err := w.write(dbWriter, fileName); err != nil { + return fmt.Errorf("writing database: %w", err) + } + + return nil +} + +// WriteNoIPv4TestDB writes an mmdb file with no ipv4 records. +func (w *Writer) WriteNoIPv4TestDB() error { + dbWriter, err := mmdbwriter.New( + mmdbwriter.Options{ + DatabaseType: "MaxMind DB No IPv4 Search Tree", + Description: map[string]string{ + "en": "MaxMind DB No IPv4 Search Tree", + }, + DisableIPv4Aliasing: true, + IncludeReservedNetworks: true, + IPVersion: 6, + Languages: []string{"en"}, + RecordSize: 24, + }, + ) + if err != nil { + return fmt.Errorf("creating mmdbwriter: %w", err) + } + + addr, err := netip.ParsePrefix("::/64") + if err != nil { + return fmt.Errorf("parsing ip: %w", err) + } + + err = dbWriter.Insert( + netipx.PrefixIPNet(addr), + mmdbtype.String(addr.String()), + ) + if err != nil { + return fmt.Errorf("inserting ip: %w", err) + } + + if err := w.write(dbWriter, "MaxMind-DB-no-ipv4-search-tree.mmdb"); err != nil { + return fmt.Errorf("writing database: %w", err) + } + return nil +} + +// WriteNoMapTestDB writes an mmdb file where each record points to +// a string value. +func (w *Writer) WriteNoMapTestDB() error { + dbWriter, err := mmdbwriter.New( + mmdbwriter.Options{ + DatabaseType: "MaxMind DB String Value Entries", + Description: map[string]string{ + "en": "MaxMind DB String Value Entries (no maps or arrays as values)", + }, + IPVersion: 4, + Languages: []string{"en"}, + RecordSize: 24, + }, + ) + if err != nil { + return fmt.Errorf("creating mmdbwriter: %w", err) + } + + ipRange, err := parseIPRange("1.1.1.1", "1.1.1.32") + if err != nil { + return fmt.Errorf("parsing ip range: %w", err) + } + + for _, prefix := range ipRange.Prefixes() { + err := dbWriter.Insert( + netipx.PrefixIPNet(prefix), + mmdbtype.String(prefix.String()), + ) + if err != nil { + return fmt.Errorf("inserting ip: %w", err) + } + } + + if err := w.write(dbWriter, "MaxMind-DB-string-value-entries.mmdb"); err != nil { + return fmt.Errorf("writing database: %w", err) + } + return nil +} + +// WriteMetadataPointersTestDB writes an mmdb file with metadata pointers allowed. +func (w *Writer) WriteMetadataPointersTestDB() error { + repeatedString := "Lots of pointers in metadata" + dbWriter, err := mmdbwriter.New( + mmdbwriter.Options{ + DatabaseType: repeatedString, + Description: map[string]string{ + "en": repeatedString, + "es": repeatedString, + "zh": repeatedString, + }, + DisableIPv4Aliasing: true, + IPVersion: 6, + Languages: []string{"en", "es", "zh"}, + RecordSize: 24, + }, + ) + if err != nil { + return fmt.Errorf("creating mmdbwriter: %w", err) + } + + if err := populateAllNetworks(dbWriter); err != nil { + return fmt.Errorf("inserting all networks: %w", err) + } + + if err := w.write(dbWriter, "MaxMind-DB-test-metadata-pointers.mmdb"); err != nil { + return fmt.Errorf("writing database: %w", err) + } + return nil +} diff --git a/pkg/writer/nestedstructures.go b/pkg/writer/nestedstructures.go new file mode 100644 index 0000000..793bebb --- /dev/null +++ b/pkg/writer/nestedstructures.go @@ -0,0 +1,73 @@ +package writer + +import ( + "fmt" + "net/netip" + + "github.com/maxmind/mmdbwriter" + "github.com/maxmind/mmdbwriter/mmdbtype" + "go4.org/netipx" +) + +// WriteDeeplyNestedStructuresTestDB writes an mmdb file with deeply nested record value types. +func (w *Writer) WriteDeeplyNestedStructuresTestDB() error { + dbWriter, err := mmdbwriter.New( + mmdbwriter.Options{ + DatabaseType: "MaxMind DB Nested Data Structures", + Description: map[string]string{ + "en": "MaxMind DB Nested Data Structures Test database - contains deeply nested map/array structures", + }, + DisableIPv4Aliasing: false, + IncludeReservedNetworks: true, + IPVersion: 6, + Languages: []string{"en"}, + RecordSize: 24, + }, + ) + if err != nil { + return fmt.Errorf("creating mmdbwriter: %w", err) + } + + addrs, err := parseIPSlice(ipSample) + if err != nil { + return fmt.Errorf("parsing ip addresses: %w", err) + } + if err := insertNestedStructure(dbWriter, addrs); err != nil { + return fmt.Errorf("inserting all types records: %w", err) + } + + if err := w.write(dbWriter, "MaxMind-DB-test-nested.mmdb"); err != nil { + return fmt.Errorf("writing database: %w", err) + } + return nil +} + +// insertNestedStructure inserts records with deeply nested structures. +func insertNestedStructure(w *mmdbwriter.Tree, ipAddresses []netip.Prefix) error { + nestedStruct := mmdbtype.Map{ + "map1": mmdbtype.Map{ + "map2": mmdbtype.Map{ + "array": mmdbtype.Slice{ + mmdbtype.Map{ + "map3": mmdbtype.Map{ + "a": mmdbtype.Uint32(1), + "b": mmdbtype.Uint32(2), + "c": mmdbtype.Uint32(3), + }, + }, + }, + }, + }, + } + + for _, addr := range ipAddresses { + err := w.Insert( + netipx.PrefixIPNet(addr), + nestedStruct, + ) + if err != nil { + return fmt.Errorf("inserting ip: %w", err) + } + } + return nil +} diff --git a/pkg/writer/writer.go b/pkg/writer/writer.go new file mode 100644 index 0000000..0362e48 --- /dev/null +++ b/pkg/writer/writer.go @@ -0,0 +1,58 @@ +// Package writer defines database writers responsible +// for generating test mmdb files. +package writer + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/maxmind/mmdbwriter" +) + +var ipSample = []string{ + "::1.1.1.0/120", + "::2.2.0.0/112", + "::3.0.0.0/104", + "::4.5.6.7/128", + "abcd::/64", + "1000::1234:0000/112", +} + +// Writer is responsible for writing test mmdb databases +// based on the provided data sources. +type Writer struct { + source string + target string +} + +// New initializes a new test database writer struct. +func New(source, target string) (*Writer, error) { + s := filepath.Clean(source) + if _, err := os.Stat(s); os.IsNotExist(err) { + return nil, fmt.Errorf("source directory does not exist: %w", err) + } + + t := filepath.Clean(target) + if err := os.MkdirAll(t, os.ModePerm); err != nil { + return nil, fmt.Errorf("creating target directory: %w", err) + } + + return &Writer{ + source: s, + target: t, + }, nil +} + +func (w *Writer) write(dbWriter *mmdbwriter.Tree, fileName string) error { + outputFile, err := os.Create(filepath.Clean(filepath.Join(w.target, fileName))) + if err != nil { + return fmt.Errorf("creating mmdb file: %w", err) + } + defer outputFile.Close() + + if _, err := dbWriter.WriteTo(outputFile); err != nil { + return fmt.Errorf("writing mmdb file: %w", err) + } + return nil +} diff --git a/test-data/GeoIP2-Anonymous-IP-Test.mmdb b/test-data/GeoIP2-Anonymous-IP-Test.mmdb index afb1857..7d0dd61 100644 Binary files a/test-data/GeoIP2-Anonymous-IP-Test.mmdb and b/test-data/GeoIP2-Anonymous-IP-Test.mmdb differ diff --git a/test-data/GeoIP2-City-Test.mmdb b/test-data/GeoIP2-City-Test.mmdb index 5353c20..1213dce 100644 Binary files a/test-data/GeoIP2-City-Test.mmdb and b/test-data/GeoIP2-City-Test.mmdb differ diff --git a/test-data/GeoIP2-Connection-Type-Test.mmdb b/test-data/GeoIP2-Connection-Type-Test.mmdb index 36c4783..28259a8 100644 Binary files a/test-data/GeoIP2-Connection-Type-Test.mmdb and b/test-data/GeoIP2-Connection-Type-Test.mmdb differ diff --git a/test-data/GeoIP2-Country-Test.mmdb b/test-data/GeoIP2-Country-Test.mmdb index 2951ac6..6b3c317 100644 Binary files a/test-data/GeoIP2-Country-Test.mmdb and b/test-data/GeoIP2-Country-Test.mmdb differ diff --git a/test-data/GeoIP2-DensityIncome-Test.mmdb b/test-data/GeoIP2-DensityIncome-Test.mmdb index c994bf0..73ac5ca 100644 Binary files a/test-data/GeoIP2-DensityIncome-Test.mmdb and b/test-data/GeoIP2-DensityIncome-Test.mmdb differ diff --git a/test-data/GeoIP2-Domain-Test.mmdb b/test-data/GeoIP2-Domain-Test.mmdb index 5fc446b..4805715 100644 Binary files a/test-data/GeoIP2-Domain-Test.mmdb and b/test-data/GeoIP2-Domain-Test.mmdb differ diff --git a/test-data/GeoIP2-Enterprise-Test.mmdb b/test-data/GeoIP2-Enterprise-Test.mmdb index 7fff1b9..e1f7975 100644 Binary files a/test-data/GeoIP2-Enterprise-Test.mmdb and b/test-data/GeoIP2-Enterprise-Test.mmdb differ diff --git a/test-data/GeoIP2-ISP-Test.mmdb b/test-data/GeoIP2-ISP-Test.mmdb index 26d5db5..0809ae6 100644 Binary files a/test-data/GeoIP2-ISP-Test.mmdb and b/test-data/GeoIP2-ISP-Test.mmdb differ diff --git a/test-data/GeoIP2-Precision-Enterprise-Test.mmdb b/test-data/GeoIP2-Precision-Enterprise-Test.mmdb index bbfa811..bd4d031 100644 Binary files a/test-data/GeoIP2-Precision-Enterprise-Test.mmdb and b/test-data/GeoIP2-Precision-Enterprise-Test.mmdb differ diff --git a/test-data/GeoIP2-Static-IP-Score-Test.mmdb b/test-data/GeoIP2-Static-IP-Score-Test.mmdb index 63f6fc3..a77984f 100644 Binary files a/test-data/GeoIP2-Static-IP-Score-Test.mmdb and b/test-data/GeoIP2-Static-IP-Score-Test.mmdb differ diff --git a/test-data/GeoIP2-User-Count-Test.mmdb b/test-data/GeoIP2-User-Count-Test.mmdb index a62e381..a2d3ec9 100644 Binary files a/test-data/GeoIP2-User-Count-Test.mmdb and b/test-data/GeoIP2-User-Count-Test.mmdb differ diff --git a/test-data/GeoLite2-ASN-Test.mmdb b/test-data/GeoLite2-ASN-Test.mmdb index 97997e8..b3dba2b 100644 Binary files a/test-data/GeoLite2-ASN-Test.mmdb and b/test-data/GeoLite2-ASN-Test.mmdb differ diff --git a/test-data/GeoLite2-City-Test.mmdb b/test-data/GeoLite2-City-Test.mmdb index 3c48b29..c71aea1 100644 Binary files a/test-data/GeoLite2-City-Test.mmdb and b/test-data/GeoLite2-City-Test.mmdb differ diff --git a/test-data/GeoLite2-Country-Test.mmdb b/test-data/GeoLite2-Country-Test.mmdb index 40b1697..b562b34 100644 Binary files a/test-data/GeoLite2-Country-Test.mmdb and b/test-data/GeoLite2-Country-Test.mmdb differ diff --git a/test-data/MaxMind-DB-no-ipv4-search-tree.mmdb b/test-data/MaxMind-DB-no-ipv4-search-tree.mmdb index 7cbded1..61e1a37 100644 Binary files a/test-data/MaxMind-DB-no-ipv4-search-tree.mmdb and b/test-data/MaxMind-DB-no-ipv4-search-tree.mmdb differ diff --git a/test-data/MaxMind-DB-string-value-entries.mmdb b/test-data/MaxMind-DB-string-value-entries.mmdb index 0f2233e..688398d 100644 Binary files a/test-data/MaxMind-DB-string-value-entries.mmdb and b/test-data/MaxMind-DB-string-value-entries.mmdb differ diff --git a/test-data/MaxMind-DB-test-decoder.mmdb b/test-data/MaxMind-DB-test-decoder.mmdb index 775e744..ce38d5e 100644 Binary files a/test-data/MaxMind-DB-test-decoder.mmdb and b/test-data/MaxMind-DB-test-decoder.mmdb differ diff --git a/test-data/MaxMind-DB-test-ipv4-24.mmdb b/test-data/MaxMind-DB-test-ipv4-24.mmdb index 00c1a37..d5979ad 100644 Binary files a/test-data/MaxMind-DB-test-ipv4-24.mmdb and b/test-data/MaxMind-DB-test-ipv4-24.mmdb differ diff --git a/test-data/MaxMind-DB-test-ipv4-28.mmdb b/test-data/MaxMind-DB-test-ipv4-28.mmdb index daff941..c2b4910 100644 Binary files a/test-data/MaxMind-DB-test-ipv4-28.mmdb and b/test-data/MaxMind-DB-test-ipv4-28.mmdb differ diff --git a/test-data/MaxMind-DB-test-ipv4-32.mmdb b/test-data/MaxMind-DB-test-ipv4-32.mmdb index 114babd..9ba4577 100644 Binary files a/test-data/MaxMind-DB-test-ipv4-32.mmdb and b/test-data/MaxMind-DB-test-ipv4-32.mmdb differ diff --git a/test-data/MaxMind-DB-test-ipv6-24.mmdb b/test-data/MaxMind-DB-test-ipv6-24.mmdb index 22eece0..03e6546 100644 Binary files a/test-data/MaxMind-DB-test-ipv6-24.mmdb and b/test-data/MaxMind-DB-test-ipv6-24.mmdb differ diff --git a/test-data/MaxMind-DB-test-ipv6-28.mmdb b/test-data/MaxMind-DB-test-ipv6-28.mmdb index 00e94c0..da61c7b 100644 Binary files a/test-data/MaxMind-DB-test-ipv6-28.mmdb and b/test-data/MaxMind-DB-test-ipv6-28.mmdb differ diff --git a/test-data/MaxMind-DB-test-ipv6-32.mmdb b/test-data/MaxMind-DB-test-ipv6-32.mmdb index dea4bb1..4635693 100644 Binary files a/test-data/MaxMind-DB-test-ipv6-32.mmdb and b/test-data/MaxMind-DB-test-ipv6-32.mmdb differ diff --git a/test-data/MaxMind-DB-test-metadata-pointers.mmdb b/test-data/MaxMind-DB-test-metadata-pointers.mmdb index bd62740..d1996b2 100644 Binary files a/test-data/MaxMind-DB-test-metadata-pointers.mmdb and b/test-data/MaxMind-DB-test-metadata-pointers.mmdb differ diff --git a/test-data/MaxMind-DB-test-mixed-24.mmdb b/test-data/MaxMind-DB-test-mixed-24.mmdb index 735328e..ec462eb 100644 Binary files a/test-data/MaxMind-DB-test-mixed-24.mmdb and b/test-data/MaxMind-DB-test-mixed-24.mmdb differ diff --git a/test-data/MaxMind-DB-test-mixed-28.mmdb b/test-data/MaxMind-DB-test-mixed-28.mmdb index 3b3db5e..3c16b9c 100644 Binary files a/test-data/MaxMind-DB-test-mixed-28.mmdb and b/test-data/MaxMind-DB-test-mixed-28.mmdb differ diff --git a/test-data/MaxMind-DB-test-mixed-32.mmdb b/test-data/MaxMind-DB-test-mixed-32.mmdb index 4797721..4fd79d1 100644 Binary files a/test-data/MaxMind-DB-test-mixed-32.mmdb and b/test-data/MaxMind-DB-test-mixed-32.mmdb differ diff --git a/test-data/MaxMind-DB-test-nested.mmdb b/test-data/MaxMind-DB-test-nested.mmdb index 1a22175..7d5fc3c 100644 Binary files a/test-data/MaxMind-DB-test-nested.mmdb and b/test-data/MaxMind-DB-test-nested.mmdb differ diff --git a/test-data/README.md b/test-data/README.md index 596bd76..7931168 100644 --- a/test-data/README.md +++ b/test-data/README.md @@ -1,15 +1,10 @@ -The -[write-test-data.pl](https://github.com/maxmind/MaxMind-DB/blob/main/test-data/write-test-data.pl) -script will create a small set of test databases with a variety of data and -record sizes (24, 28, & 32 bit). +## How to generate test data +Use the [write-test-data](https://github.com/maxmind/MaxMind-DB/blob/main/cmd/write-test-data) +go tool to create a small set of test databases with a variety of data and +record sizes. These test databases are useful for testing code that reads MaxMind DB files. -There is also a `maps-with-pointers.raw` file. This contains the raw output of -the MaxMind::DB::Writer::Serializer module, when given a series of maps which -share some keys and values. It is used to test that decoder code can handle -pointers to map keys and values, as well as to the whole map. - There are several ways to figure out what IP addresses are actually in the test databases. You can take a look at the [source-data directory](https://github.com/maxmind/MaxMind-DB/tree/main/source-data) @@ -21,8 +16,27 @@ You can also use the in the [MaxMind-DB-Reader-perl repository](https://github.com/maxmind/MaxMind-DB-Reader-perl). -Some databases are intentionally broken and cannot be dumped. You can look at -the -[script which generates these databases](https://github.com/maxmind/MaxMind-DB/blob/main/test-data/write-test-data.pl) -to see what IP addresses they include, which will be necessary for those -databases which cannot be dumped because they contain intentional errors. +## Static test data +Some of the test files are remnants of the +[old perl test data writer](https://github.com/maxmind/MaxMind-DB/blob/f0a85c671c5b6e9c5e514bd66162724ee1dedea3/test-data/write-test-data.pl) +and cannot be generated with the go tool. These databases are intentionally broken, +and exploited functionality simply not available in the go mmdbwriter: + +- MaxMind-DB-test-broken-pointers-24.mmdb +- MaxMind-DB-test-broken-search-tree-24.mmdb +- MaxMind-DB-test-pointer-decoder.mmdb +- GeoIP2-City-Test-Broken-Double-Format.mmdb +- GeoIP2-City-Test-Invalid-Node-Count.mmdb +- maps-with-pointers.raw + +## Usage +``` +Usage of ./write-test-data: + -source string + Source data directory + -target string + Destination directory for the generated mmdb files +``` + +Example: +`./write-test-data --source ../../source-data --target ../../test-data` diff --git a/test-data/write-test-data.pl b/test-data/write-test-data.pl deleted file mode 100755 index ee34943..0000000 --- a/test-data/write-test-data.pl +++ /dev/null @@ -1,681 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use autodie; -use utf8; - -use Cwd qw( abs_path ); -use File::Basename qw( dirname ); -use File::Slurper qw( read_binary write_binary ); -use Cpanel::JSON::XS 4.16 qw( decode_json ); -use Math::Int128 qw( MAX_UINT128 string_to_uint128 uint128 ); -use MaxMind::DB::Writer::Serializer 0.100004; -use MaxMind::DB::Writer::Tree 0.100004; -use MaxMind::DB::Writer::Util qw( key_for_data ); -use Net::Works::Network (); -use Test::MaxMind::DB::Common::Util qw( standard_test_metadata ); - -my $Dir = dirname( abs_path($0) ); - -sub main { - my @sizes = ( 24, 28, 32 ); - my @ipv4_range = ( '1.1.1.1', '1.1.1.32' ); - - my @ipv4_subnets = Net::Works::Network->range_as_subnets(@ipv4_range); - for my $record_size (@sizes) { - write_test_db( - $record_size, \@ipv4_subnets, { ip_version => 4 }, - 'ipv4', - ); - } - - write_broken_pointers_test_db( - 24, \@ipv4_subnets, { ip_version => 4 }, - 'broken-pointers', - ); - - write_broken_search_tree_db( - 24, \@ipv4_subnets, { ip_version => 4 }, - 'broken-search-tree', - ); - - my @ipv6_subnets = Net::Works::Network->range_as_subnets( - '::1:ffff:ffff', - '::2:0000:0059' - ); - - for my $record_size (@sizes) { - write_test_db( - $record_size, \@ipv6_subnets, { ip_version => 6 }, - 'ipv6', - ); - - write_test_db( - $record_size, - [ - @ipv6_subnets, - Net::Works::Network->range_as_subnets( @ipv4_range, 6 ), - ], - { ip_version => 6 }, - 'mixed', - ); - } - - write_decoder_test_db(); - write_pointer_decoder_test_db(); - write_deeply_nested_structures_db(); - - write_geoip2_dbs(); - write_broken_geoip2_city_db(); - write_invalid_node_count(); - - write_no_ipv4_tree_db(); - - write_no_map_db( \@ipv4_subnets ); - - write_test_serialization_data(); - - write_db_with_metadata_pointers(); -} - -sub write_broken_pointers_test_db { - no warnings 'redefine'; - - my $orig_store_data = MaxMind::DB::Writer::Serializer->can('store_data'); - - # This breaks the value of the record for the 1.1.1.32 network, causing it - # to point outside the database. - local *MaxMind::DB::Writer::Serializer::store_data = sub { - my $data_pointer = shift->$orig_store_data(@_); - my $value = $_[1]; - if ( ref($value) eq 'HASH' - && exists $value->{ip} - && $value->{ip} eq '1.1.1.32' ) { - - $data_pointer += 100_000; - } - return $data_pointer; - }; - - # The next hack will poison the data section for the 1.1.16/28 subnet - # value. It's value will be a pointer that resolves to an offset outside - # the database. - - my $key_to_poison = key_for_data( { ip => '1.1.1.16' } ); - - my $orig_position_for_data - = MaxMind::DB::Writer::Serializer->can('_position_for_data'); - local *MaxMind::DB::Writer::Serializer::_position_for_data = sub { - my $key = $_[1]; - - if ( $key eq $key_to_poison ) { - return 1_000_000; - } - else { - return shift->$orig_position_for_data(@_); - } - }; - - write_test_db(@_); - - return; -} - -sub write_broken_search_tree_db { - my $filename = ( write_test_db(@_) )[1]; - - my $content = read_binary($filename); - - # This causes the right record of the first node to be 0, meaning it - # points back to the top of the tree. This should never happen in a - # database that follows the spec. - substr( $content, 5, 1 ) = "\0"; - write_binary( $filename, $content ); - - return; -} - -sub write_test_db { - my $record_size = shift; - my $subnets = shift; - my $metadata = shift; - my $ip_version_name = shift; - - my $writer = MaxMind::DB::Writer::Tree->new( - ip_version => $subnets->[0]->version(), - record_size => $record_size, - alias_ipv6_to_ipv4 => ( $subnets->[0]->version() == 6 ? 1 : 0 ), - map_key_type_callback => sub { 'utf8_string' }, - standard_test_metadata(), - %{$metadata}, - ); - - for my $subnet ( @{$subnets} ) { - $writer->insert_network( - $subnet, - { ip => $subnet->first()->as_string() } - ); - } - - my $filename = sprintf( - "$Dir/MaxMind-DB-test-%s-%i.mmdb", - $ip_version_name, $record_size, - ); - open my $fh, '>', $filename; - - $writer->write_tree($fh); - - close $fh; - - return ( $writer, $filename ); -} - -{ - # We will store this once for each subnet so we will also be testing - # pointers, since the serializer will generate a pointer to this - # structure. - my %all_types = ( - utf8_string => 'unicode! ☯ - ♫', - double => 42.123456, - bytes => pack( 'N', 42 ), - uint16 => 100, - uint32 => 2**28, - int32 => -1 * ( 2**28 ), - uint64 => uint128(1) << 60, - uint128 => uint128(1) << 120, - array => [ 1, 2, 3, ], - map => { - mapX => { - utf8_stringX => 'hello', - arrayX => [ 7, 8, 9 ], - }, - }, - boolean => 1, - float => 1.1, - ); - - my %all_types_0 = ( - utf8_string => q{}, - double => 0, - bytes => q{}, - uint16 => 0, - uint32 => 0, - int32 => 0, - uint64 => uint128(0), - uint128 => uint128(0), - array => [], - map => {}, - boolean => 0, - float => 0, - ); - - # We limit this to numeric types as the other types would generate - # very large databases - my %numeric_types_max = ( - double => 'Inf', - float => 'Inf', - int32 => 0x7fffffff, - uint16 => 0xffff, - uint32 => string_to_uint128('0xffff_ffff'), - uint64 => string_to_uint128('0xffff_ffff_ffff_ffff'), - uint128 => MAX_UINT128, - ); - - sub write_decoder_test_db { - my $writer = _decoder_writer(); - - my @subnets - = map { Net::Works::Network->new_from_string( string => $_ ) } - qw( - ::1.1.1.0/120 - ::2.2.0.0/112 - ::3.0.0.0/104 - ::4.5.6.7/128 - abcd::/64 - 1000::1234:0000/112 - ); - - for my $subnet (@subnets) { - $writer->insert_network( $subnet, \%all_types, ); - } - - $writer->insert_network( - Net::Works::Network->new_from_string( string => '::0.0.0.0/128' ), - \%all_types_0, - ); - - $writer->insert_network( - Net::Works::Network->new_from_string( - string => '::255.255.255.255/128' - ), - \%numeric_types_max, - ); - - open my $fh, '>', "$Dir/MaxMind-DB-test-decoder.mmdb"; - $writer->write_tree($fh); - close $fh; - - return; - } - - sub write_pointer_decoder_test_db { - - # We want to create a database where most values are pointers - no warnings 'redefine'; - local *MaxMind::DB::Writer::Serializer::_should_cache_value - = sub { 1 }; - my $writer = _decoder_writer(); - - # We add these slightly different records so that we end up with - # pointers for the individual values in the maps, not just pointers - # to the map - $writer->insert_network( - '1.0.0.0/32', - { - %all_types, - booleanX => 0, - arrayX => [ 1, 2, 3, 4, ], - mapXX => { - utf8_stringX => 'hello', - arrayX => [ 7, 8, 9, 10 ], - booleanX => 0, - }, - }, - ); - - $writer->insert_network( - '1.1.1.0/32', - { - %all_types, - - # This has to be 0 rather than 1 as otherwise the buggy - # Perl writer will think it is the same as an uint32 value of - # 1 and make a pointer to a value of a different type. - boolean => 0, - }, - ); - - open my $fh, '>', "$Dir/MaxMind-DB-test-pointer-decoder.mmdb"; - $writer->write_tree($fh); - close $fh; - - return; - } - - sub _decoder_writer { - return MaxMind::DB::Writer::Tree->new( - ip_version => 6, - record_size => 24, - database_type => 'MaxMind DB Decoder Test', - languages => ['en'], - description => { - en => - 'MaxMind DB Decoder Test database - contains every MaxMind DB data type', - }, - alias_ipv6_to_ipv4 => 1, - remove_reserved_networks => 0, - map_key_type_callback => sub { - my $key = $_[0]; - $key =~ s/X*$//; - return $key eq 'array' ? [ 'array', 'uint32' ] : $key; - }, - ); - } -} - -{ - my %nested = ( - map1 => { - map2 => { - array => [ - { - map3 => { a => 1, b => 2, c => 3 }, - }, - ], - }, - }, - ); - - sub write_deeply_nested_structures_db { - my $writer = MaxMind::DB::Writer::Tree->new( - ip_version => 6, - record_size => 24, - ip_version => 6, - database_type => 'MaxMind DB Nested Data Structures', - languages => ['en'], - description => { - en => - 'MaxMind DB Nested Data Structures Test database - contains deeply nested map/array structures', - }, - alias_ipv6_to_ipv4 => 1, - map_key_type_callback => sub { - my $key = shift; - return - $key =~ /^map/ ? 'map' - : $key eq 'array' ? [ 'array', 'map' ] - : 'uint32'; - } - ); - - my @subnets - = map { Net::Works::Network->new_from_string( string => $_ ) } - qw( - ::1.1.1.0/120 - ::2.2.0.0/112 - ::3.0.0.0/104 - ::4.5.6.7/128 - abcd::/64 - 1000::1234:0000/112 - ); - - for my $subnet (@subnets) { - $writer->insert_network( $subnet, \%nested, ); - } - - open my $fh, '>', "$Dir/MaxMind-DB-test-nested.mmdb"; - $writer->write_tree($fh); - close $fh; - - return; - } -} - -sub write_geoip2_dbs { - _write_geoip2_db( @{$_}[ 0, 1 ], 'Test' ) - for ( - [ 'GeoIP2-Anonymous-IP', {} ], - ['GeoIP2-City'], - ['GeoIP2-Connection-Type'], - ['GeoIP2-Country'], - ['GeoIP2-DensityIncome'], - ['GeoIP2-Domain'], - ['GeoIP2-Enterprise'], - ['GeoIP2-ISP'], - ['GeoIP2-Precision-Enterprise'], - ['GeoIP2-Static-IP-Score'], - ['GeoIP2-User-Count'], - ['GeoLite2-ASN'], - ['GeoLite2-City'], - ['GeoLite2-Country'], - ); -} - -sub write_broken_geoip2_city_db { - no warnings 'redefine'; - - # This is how we _used_ to encode doubles. Storing them this way with the - # current reader tools can lead to weird errors. This broken database is a - # good way to test the robustness of reader code in the face of broken - # databases. - local *MaxMind::DB::Writer::Serializer::_encode_double = sub { - my $self = shift; - my $value = shift; - - $self->_simple_encode( double => $value ); - }; - - _write_geoip2_db( 'GeoIP2-City', 0, 'Test Broken Double Format' ); -} - -sub write_invalid_node_count { - no warnings 'redefine'; - local *MaxMind::DB::Writer::Tree::node_count = sub { 100000 }; - - _write_geoip2_db( 'GeoIP2-City', 0, 'Test Invalid Node Count' ); -} - -sub _universal_map_key_type_callback { - my $map = { - - # languages - de => 'utf8_string', - en => 'utf8_string', - es => 'utf8_string', - fr => 'utf8_string', - ja => 'utf8_string', - 'pt-BR' => 'utf8_string', - ru => 'utf8_string', - 'zh-CN' => 'utf8_string', - - # production - accuracy_radius => 'uint16', - autonomous_system_number => 'uint32', - autonomous_system_organization => 'utf8_string', - average_income => 'uint32', - city => 'map', - code => 'utf8_string', - confidence => 'uint16', - connection_type => 'utf8_string', - continent => 'map', - country => 'map', - domain => 'utf8_string', - geoname_id => 'uint32', - ip_risk => 'double', - ipv4_24 => 'uint32', - ipv4_32 => 'uint32', - ipv6_32 => 'uint32', - ipv6_48 => 'uint32', - ipv6_64 => 'uint32', - is_anonymous => 'boolean', - is_anonymous_proxy => 'boolean', - is_anonymous_vpn => 'boolean', - is_hosting_provider => 'boolean', - is_in_european_union => 'boolean', - is_legitimate_proxy => 'boolean', - is_public_proxy => 'boolean', - is_residential_proxy => 'boolean', - is_satellite_provider => 'boolean', - is_tor_exit_node => 'boolean', - iso_code => 'utf8_string', - isp => 'utf8_string', - latitude => 'double', - location => 'map', - longitude => 'double', - metro_code => 'uint16', - mobile_country_code => 'utf8_string', - mobile_network_code => 'utf8_string', - names => 'map', - organization => 'utf8_string', - population_density => 'uint32', - postal => 'map', - registered_country => 'map', - represented_country => 'map', - score => 'double', - static_ip_score => 'double', - subdivisions => [ 'array', 'map' ], - time_zone => 'utf8_string', - traits => 'map', - traits => 'map', - type => 'utf8_string', - user_type => 'utf8_string', - - # for testing only - foo => 'utf8_string', - bar => 'utf8_string', - buzz => 'utf8_string', - our_value => 'utf8_string', - }; - - my $callback = sub { - my $key = shift; - - return $map->{$key} || die <<"ERROR"; -Unknown tree key '$key'. - -The universal_map_key_type_callback doesn't know what type to use for the passed -key. If you are adding a new key that will be used in a frozen tree / mmdb then -you should update the mapping in both our internal code and here. -ERROR - }; - - return $callback; -} - -sub _write_geoip2_db { - my $type = shift; - my $populate_all_networks_with_data = shift; - my $description = shift; - - my $writer = MaxMind::DB::Writer::Tree->new( - ip_version => 6, - record_size => 28, - ip_version => 6, - database_type => $type, - languages => [ 'en', $type eq 'GeoIP2-City' ? ('zh') : () ], - description => { - en => ( $type =~ s/-/ /gr ) - . " $description Database (fake GeoIP2 data, for example purposes only)", - $type eq 'GeoIP2-City' ? ( zh => '小型数据库' ) : (), - }, - alias_ipv6_to_ipv4 => 1, - map_key_type_callback => _universal_map_key_type_callback(), - ); - - _populate_all_networks( $writer, $populate_all_networks_with_data ) - if $populate_all_networks_with_data; - - my $value = shift; - my $nodes - = decode_json( read_binary("$Dir/../source-data/$type-Test.json") ); - - for my $node (@$nodes) { - for my $network ( keys %$node ) { - $writer->insert_network( - Net::Works::Network->new_from_string( string => $network ), - $node->{$network} - ); - } - } - - my $suffix = $description =~ s/ /-/gr; - open my $output_fh, '>', "$Dir/$type-$suffix.mmdb"; - $writer->write_tree($output_fh); - close $output_fh; - - return; -} - -sub _populate_all_networks { - my $writer = shift; - my $data = shift; - - my $max_uint128 = uint128(0) - 1; - my @networks = Net::Works::Network->range_as_subnets( - Net::Works::Address->new_from_integer( - integer => 0, - version => 6, - ), - Net::Works::Address->new_from_integer( - integer => $max_uint128, - version => 6, - ), - ); - - for my $network (@networks) { - $writer->insert_network( $network => $data ); - } -} - -sub write_no_ipv4_tree_db { - my $subnets = shift; - - my $writer = MaxMind::DB::Writer::Tree->new( - ip_version => 6, - record_size => 24, - ip_version => 6, - database_type => 'MaxMind DB No IPv4 Search Tree', - languages => ['en'], - description => { - en => 'MaxMind DB No IPv4 Search Tree', - }, - remove_reserved_networks => 0, - root_data_type => 'utf8_string', - map_key_type_callback => sub { {} }, - ); - - my $subnet = Net::Works::Network->new_from_string( string => '::/64' ); - $writer->insert_network( $subnet, $subnet->as_string() ); - - open my $output_fh, '>', "$Dir/MaxMind-DB-no-ipv4-search-tree.mmdb"; - $writer->write_tree($output_fh); - close $output_fh; - - return; -} - -# The point of this database is to provide something where we can test looking -# up a single value. In other words, each IP address points to a non-compound -# value, a string rather than a map or array. -sub write_no_map_db { - my $subnets = shift; - - my $writer = MaxMind::DB::Writer::Tree->new( - ip_version => 4, - record_size => 24, - database_type => 'MaxMind DB String Value Entries', - languages => ['en'], - description => { - en => - 'MaxMind DB String Value Entries (no maps or arrays as values)', - }, - root_data_type => 'utf8_string', - map_key_type_callback => sub { {} }, - ); - - for my $subnet ( @{$subnets} ) { - $writer->insert_network( $subnet, $subnet->as_string() ); - } - - open my $output_fh, '>', "$Dir/MaxMind-DB-string-value-entries.mmdb"; - $writer->write_tree($output_fh); - close $output_fh; - - return; -} - -sub write_test_serialization_data { - my $serializer = MaxMind::DB::Writer::Serializer->new( - map_key_type_callback => sub { 'utf8_string' } ); - - $serializer->store_data( map => { long_key => 'long_value1' } ); - $serializer->store_data( map => { long_key => 'long_value2' } ); - $serializer->store_data( map => { long_key2 => 'long_value1' } ); - $serializer->store_data( map => { long_key2 => 'long_value2' } ); - $serializer->store_data( map => { long_key => 'long_value1' } ); - $serializer->store_data( map => { long_key2 => 'long_value2' } ); - - open my $fh, '>', "$Dir/maps-with-pointers.raw"; - print {$fh} ${ $serializer->buffer() } - or die "Cannot write to maps-with-pointers.raw: $!"; - close $fh; - - return; -} - -sub write_db_with_metadata_pointers { - my $repeated_string = 'Lots of pointers in metadata'; - my $writer = MaxMind::DB::Writer::Tree->new( - ip_version => 6, - record_size => 24, - map_key_type_callback => sub { 'utf8_string' }, - database_type => $repeated_string, - languages => [ 'en', 'es', 'zh' ], - description => { - en => $repeated_string, - es => $repeated_string, - zh => $repeated_string, - }, - - ); - - _populate_all_networks( $writer, {} ); - - open my $fh, '>', "$Dir/MaxMind-DB-test-metadata-pointers.mmdb"; - - $writer->write_tree($fh); - - close $fh; -} - -main();