diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..571e0a3 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +testdata/ linguist-generated=true diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..8232ae8 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,17 @@ +# Please see the documentation for all configuration options: +# https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" + reviewers: + - "rstudio/ppm" + - package-ecosystem: "gomod" + directory: "/" + schedule: + interval: "monthly" + reviewers: + - "rstudio/ppm" diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml new file mode 100644 index 0000000..d908d2d --- /dev/null +++ b/.github/workflows/go.yml @@ -0,0 +1,43 @@ +name: Go CI + +on: + push: + branches: + - main + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: install go + uses: actions/setup-go@v4 + with: + go-version: 'stable' + - name: install python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + # Rust is required to build some packages + - name: install rust + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + - name: install python dependencies + run: pip install twine build wheel + - name: install go dependencies + run: go install gotest.tools/gotestsum@latest + - name: run golangci-lint + uses: reviewdog/action-golangci-lint@v2 + - name: build + run: go build + - name: test + run: gotestsum --jsonfile unit-tests.json + - name: annotate tests + if: always() + uses: guyarb/golang-test-annotations@v0.5.1 + with: + test-results: unit-tests.json diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b278b62 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +testdata/repositories/ +!testdata/repositories/.gitkeep +.idea +*.iml diff --git a/README.md b/README.md index 1b5ce95..3129dad 100644 --- a/README.md +++ b/README.md @@ -113,4 +113,16 @@ And here is example output: "blake2_256_digest": "2a8c9ca4a072d43e00503f16d08db68d690644949ac3e4704ba6c2e7028e8402" } ] -``` \ No newline at end of file +``` + +## Linting + +```bash +golangci-lint run --fix ./... +``` + +## Testing + +```bash +go test ./... +``` diff --git a/go.mod b/go.mod index ed09057..918063a 100644 --- a/go.mod +++ b/go.mod @@ -3,8 +3,17 @@ module github.com/rstudio/python-distribution-parser go 1.21 require ( - golang.org/x/crypto v0.13.0 + github.com/google/go-cmp v0.6.0 + github.com/samber/lo v1.38.1 + github.com/stretchr/testify v1.8.4 + golang.org/x/crypto v0.14.0 golang.org/x/text v0.13.0 ) -require golang.org/x/sys v0.12.0 // indirect +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + golang.org/x/exp v0.0.0-20231006140011-7918f672742d // indirect + golang.org/x/sys v0.13.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/go.sum b/go.sum index 4e44bbc..f959a47 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,40 @@ +github.com/bradleyjkemp/cupaloy v2.3.0+incompatible h1:UafIjBvWQmS9i/xRg+CamMrnLTKNzo+bdmT/oH34c2Y= +github.com/bradleyjkemp/cupaloy v2.3.0+incompatible/go.mod h1:Au1Xw1sgaJ5iSFktEhYsS0dbQiS1B0/XMXl+42y9Ilk= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/samber/lo v1.38.1 h1:j2XEAqXKb09Am4ebOg31SpvzUTTs6EN3VfgeLUhPdXM= +github.com/samber/lo v1.38.1/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.1 h1:4VhoImhV/Bm0ToFkXFi8hXNXwpDRZ/ynw3amt82mzq0= +github.com/stretchr/objx v0.5.1/go.mod h1:/iHQpkQwBD6DLUmQ4pE+s1TXdob1mORJ4/UFdrifcy0= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= golang.org/x/crypto v0.13.0 h1:mvySKfSWJ+UKUii46M40LOvyWfN0s2U+46/jDd0e6Ck= golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc= +golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc= +golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4= +golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 h1:3MTrJm4PyNL9NBqvYDSj3DHl46qQakyfqfWo4jgfaEM= +golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE= +golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI= +golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo= golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= +golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/archiver/archive_reader.go b/internal/archiver/archive_reader.go similarity index 78% rename from archiver/archive_reader.go rename to internal/archiver/archive_reader.go index 725b337..aaa0003 100644 --- a/archiver/archive_reader.go +++ b/internal/archiver/archive_reader.go @@ -7,12 +7,13 @@ import ( "errors" "fmt" "io" + "log" "os" "strings" ) -// archiveReader is an interface to abstract the behavior of different archive types. -type archiveReader interface { +// ArchiveReader is an interface to abstract the behavior of different archive types. +type ArchiveReader interface { FileNames() ([]string, error) ReadFile(name string) ([]byte, error) Close() error @@ -37,7 +38,12 @@ func (z *zipReader) ReadFile(name string) ([]byte, error) { if err != nil { return nil, err } - defer rc.Close() + defer func(rc io.ReadCloser) { + err := rc.Close() + if err != nil { + log.Printf("error closing reader: %v", err) + } + }(rc) return io.ReadAll(rc) } } @@ -51,7 +57,10 @@ type tarReader struct { } func (t *tarReader) resetReader() error { - t.Close() + err := t.Close() + if err != nil { + return err + } // Reopen the file f, err := os.Open(t.filename) @@ -62,7 +71,10 @@ func (t *tarReader) resetReader() error { if strings.HasSuffix(t.filename, ".tar.gz") || strings.HasSuffix(t.filename, ".tgz") { gzr, err := gzip.NewReader(f) if err != nil { - f.Close() + cerr := f.Close() + if cerr != nil { + return cerr + } return err } t.Reader = tar.NewReader(gzr) // Reset the tar reader with new gzip reader @@ -72,7 +84,12 @@ func (t *tarReader) resetReader() error { } func (t *tarReader) FileNames() ([]string, error) { - defer t.resetReader() + defer func(t *tarReader) { + err := t.resetReader() + if err != nil { + log.Printf("error resetting reader: %v", err) + } + }(t) var names []string for { hdr, err := t.Next() @@ -106,7 +123,7 @@ func (t *tarReader) Close() error { return t.closer.Close() } -func NewArchiveReader(fqn string) (archiveReader, error) { +func NewArchiveReader(fqn string) (ArchiveReader, error) { _, err := os.Stat(fqn) if errors.Is(err, os.ErrNotExist) { return nil, fmt.Errorf("no such file: %s", fqn) @@ -127,7 +144,10 @@ func NewArchiveReader(fqn string) (archiveReader, error) { if strings.HasSuffix(fqn, ".tar.gz") || strings.HasSuffix(fqn, ".tgz") { gzr, err := gzip.NewReader(f) if err != nil { - f.Close() + err := f.Close() + if err != nil { + log.Printf("error closing file: %v", err) + } return nil, err } r := tar.NewReader(gzr) @@ -138,7 +158,10 @@ func NewArchiveReader(fqn string) (archiveReader, error) { return &tarReader{fqn, r, f}, nil } - f.Close() + err = f.Close() + if err != nil { + log.Printf("error closing file: %v", err) + } return nil, fmt.Errorf("not a known archive format: %s", fqn) } diff --git a/distributions/distribution.go b/internal/distributions/distribution.go similarity index 85% rename from distributions/distribution.go rename to internal/distributions/distribution.go index dfdf4f2..54e027e 100644 --- a/distributions/distribution.go +++ b/internal/distributions/distribution.go @@ -5,6 +5,7 @@ import ( "io" "net/mail" "reflect" + "regexp" "strings" "unicode" @@ -51,7 +52,7 @@ func mustDecode(value interface{}) (string, error) { } func collapseLeadingWS(header, txt string) string { - if strings.ToLower(header) == "description" { // preserve newlines + if strings.ToLower(header) == "description" || strings.ToLower(header) == "license" { // preserve newlines lines := strings.Split(strings.TrimSpace(txt), "\n") for i, line := range lines { if strings.HasPrefix(line, " ") { // 8 spaces @@ -77,8 +78,8 @@ var HeaderAttrs1_0 = []HeaderAttr{ // PEP 241 {"Metadata-Version", "metadata_version", false}, {"Name", "name", false}, {"Version", "version", false}, - {"Platform", "platforms", true}, - {"Supported-Platform", "supported_platforms", true}, + {"Platform", "platform", true}, + {"Supported-Platform", "supported_platform", true}, {"Summary", "summary", false}, {"Description", "description", false}, {"Keywords", "keywords", false}, @@ -90,7 +91,7 @@ var HeaderAttrs1_0 = []HeaderAttr{ // PEP 241 var HeaderAttrs1_1 = append(HeaderAttrs1_0, []HeaderAttr{ // PEP 314 {"Classifier", "classifiers", true}, - {"Download-URL", "download_url", false}, + {"Download-Url", "download_url", false}, {"Requires", "requires", true}, {"Provides", "provides", true}, {"Obsoletes", "obsoletes", true}, @@ -104,7 +105,7 @@ var HeaderAttrs1_2 = append(HeaderAttrs1_1, []HeaderAttr{ // PEP 345 {"Requires-Dist", "requires_dist", true}, {"Provides-Dist", "provides_dist", true}, {"Obsoletes-Dist", "obsoletes_dist", true}, - {"Project-URL", "project_urls", true}, + {"Project-Url", "project_urls", true}, }...) var HeaderAttrs2_0 = HeaderAttrs1_2 //XXX PEP 426? @@ -129,12 +130,12 @@ type Distribution interface { ExtractMetadata() error Parse(data []byte) error - // Helper method to get values + // GetName is a helper method to get values GetName() string GetVersion() string GetPythonVersion() string - // This is used to return a map of all the metadata, + // MetadataMap is used to return a map of all the metadata, // similar to how twine passes the metadata in a multipart // form request. MetadataMap() map[string][]string @@ -145,8 +146,8 @@ type BaseDistribution struct { // version 1.0 Name string `json:"name"` Version string `json:"version"` - Platforms []string `json:"platforms"` - SupportedPlatforms []string `json:"supported_platforms"` + Platforms []string `json:"platform"` + SupportedPlatforms []string `json:"supported_platform"` Summary string `json:"summary"` Description string `json:"description"` Keywords string `json:"keywords"` @@ -207,11 +208,18 @@ func (bd *BaseDistribution) Parse(data []byte) error { headerValues := getAllHeaderValues(msg, headerAttr.HeaderName) if len(headerValues) != 0 { if headerAttr.Multiple { - bd.setJSONValue(headerAttr.AttrName, headerValues) + err := bd.setJSONValue(headerAttr.AttrName, headerValues) + if err != nil { + return err + } } else if headerValues[0] != "UNKNOWN" { - bd.setJSONValue(headerAttr.AttrName, headerValues[0]) + err := bd.setJSONValue(headerAttr.AttrName, headerValues[0]) + if err != nil { + return err + } } } + } body, err := io.ReadAll(msg.Body) @@ -220,7 +228,10 @@ func (bd *BaseDistribution) Parse(data []byte) error { } if body != nil { - bd.setJSONValue("description", string(body)) + err := bd.setJSONValue("description", string(body)) + if err != nil { + return err + } } return nil } @@ -228,12 +239,12 @@ func (bd *BaseDistribution) Parse(data []byte) error { func (bd *BaseDistribution) GetName() string { return bd.Name } + func (bd *BaseDistribution) GetVersion() string { return bd.Version } -// Remember to implement this for other distributions -// if they need something more specific (e.g. Wheels) +// TODO: remember to implement this for other distributions if they need something more specific (e.g. Wheels) func (bd *BaseDistribution) GetPythonVersion() string { return "" } @@ -303,3 +314,12 @@ func StructToMap(input interface{}) map[string][]string { return result } + +// Convert an arbitrary string to a standard distribution name. +// Any runs of non-alphanumeric/. characters are replaced with a single '-'. +// Copied from pkg_resources.safe_name for compatibility with warehouse. +// See https://github.com/pypa/twine/issues/743. +func SafeName(name string) string { + reg := regexp.MustCompile("[^A-Za-z0-9.]+") + return reg.ReplaceAllString(name, "-") +} diff --git a/distributions/metadata.go b/internal/distributions/metadata.go similarity index 100% rename from distributions/metadata.go rename to internal/distributions/metadata.go diff --git a/distributions/sdist.go b/internal/distributions/sdist.go similarity index 84% rename from distributions/sdist.go rename to internal/distributions/sdist.go index 2dc969d..18e7807 100644 --- a/distributions/sdist.go +++ b/internal/distributions/sdist.go @@ -3,11 +3,11 @@ package distributions import ( "bytes" "fmt" + "github.com/rstudio/python-distribution-parser/internal/archiver" + "log" "path/filepath" "sort" "strings" - - "github.com/rstudio/python-distribution-parser/archiver" ) type SDist struct { @@ -52,7 +52,12 @@ func (sd *SDist) read() ([]byte, error) { if err != nil { return nil, fmt.Errorf("error getting archive: %w", err) } - defer archiveReader.Close() // Ensure the archive is closed after reading + defer func(archiveReader archiver.ArchiveReader) { + err := archiveReader.Close() + if err != nil { + log.Printf("error closing reader: %v", err) + } + }(archiveReader) // Ensure the archive is closed after reading fileNames, err := archiveReader.FileNames() if err != nil { diff --git a/distributions/wheel.go b/internal/distributions/wheel.go similarity index 88% rename from distributions/wheel.go rename to internal/distributions/wheel.go index 47b931f..ca08571 100644 --- a/distributions/wheel.go +++ b/internal/distributions/wheel.go @@ -3,12 +3,12 @@ package distributions import ( "bytes" "fmt" + "github.com/rstudio/python-distribution-parser/internal/archiver" + "log" "path/filepath" "regexp" "sort" "strings" - - "github.com/rstudio/python-distribution-parser/archiver" ) var wheelFileRe = regexp.MustCompile(`^(?P(?P.+?)(-(?P\d.+?))?)(?:(-(?P\d.*?))?-(?P.+?)-(?P.+?)-(?P.+?)\.whl|\.dist-info)$`) @@ -57,7 +57,12 @@ func (whl *Wheel) read() ([]byte, error) { if err != nil { return nil, fmt.Errorf("error getting archive: %w", err) } - defer archiveReader.Close() // Ensure the archive is closed after reading + defer func(archiveReader archiver.ArchiveReader) { + err := archiveReader.Close() + if err != nil { + log.Printf("error closing reader: %v", err) + } + }(archiveReader) // Ensure the archive is closed after reading fileNames, err := archiveReader.FileNames() if err != nil { diff --git a/packages/hash_manager.go b/internal/packages/hash_manager.go similarity index 94% rename from packages/hash_manager.go rename to internal/packages/hash_manager.go index a343881..63dc4de 100644 --- a/packages/hash_manager.go +++ b/internal/packages/hash_manager.go @@ -6,6 +6,7 @@ import ( "encoding/hex" "hash" "io" + "log" "os" "golang.org/x/crypto/blake2b" @@ -88,7 +89,12 @@ func (hm *HashManager) Hash() error { if err != nil { return err } - defer file.Close() + defer func(file *os.File) { + err := file.Close() + if err != nil { + log.Printf("error closing file: %v", err) + } + }(file) buffer := make([]byte, 64*1024) for { diff --git a/packages/package.go b/internal/packages/package.go similarity index 71% rename from packages/package.go rename to internal/packages/package.go index ff866b3..5f0330f 100644 --- a/packages/package.go +++ b/internal/packages/package.go @@ -2,12 +2,12 @@ package packages import ( "errors" + "github.com/rstudio/python-distribution-parser/internal/distributions" + "github.com/samber/lo" "io" + "log" "os" "path/filepath" - "regexp" - - "github.com/rstudio/python-distribution-parser/distributions" ) type PackageFile struct { @@ -31,15 +31,6 @@ type Signature struct { Bytes []byte `json:"signed_bytes"` } -// Convert an arbitrary string to a standard distribution name. -// Any runs of non-alphanumeric/. characters are replaced with a single '-'. -// Copied from pkg_resources.safe_name for compatibility with warehouse. -// See https://github.com/pypa/twine/issues/743. -func safeName(name string) string { - reg := regexp.MustCompile("[^A-Za-z0-9.]+") - return reg.ReplaceAllString(name, "-") -} - func NewPackageFile(filename string) (*PackageFile, error) { metadata, pythonVersion, fileType, err := distributions.NewDistributionMetadata(filename) if err != nil { @@ -47,7 +38,7 @@ func NewPackageFile(filename string) (*PackageFile, error) { } baseFilename := filepath.Base(filename) - safeName := safeName(metadata.GetName()) + safeName := distributions.SafeName(metadata.GetName()) signedFilename := filename + ".asc" signedBaseFilename := baseFilename + ".asc" @@ -56,7 +47,10 @@ func NewPackageFile(filename string) (*PackageFile, error) { if err != nil { return nil, err } - hashManager.Hash() + err = hashManager.Hash() + if err != nil { + return nil, err + } hexdigest := hashManager.HexDigest() return &PackageFile{ @@ -87,9 +81,46 @@ func (pf *PackageFile) MetadataMap() map[string][]string { result[mk] = mv } + result["name"] = result["safe_name"] // This makes the request look more like Twine result["protocol_version"] = []string{"1"} - delete(result, "metadata") + result[":action"] = []string{"file_upload"} + + ignoredKeys := []string{ + "base_filename", + "file_name", + "safe_name", + "signed_base_filename", + "signed_filename", + "metadata", + } + + for _, key := range ignoredKeys { + delete(result, key) + } + + allowedBlankValues := []string{ + "author", + "author_email", + "comment", + "download_url", + "home_page", + "keywords", + "license", + "maintainer", + "pyversion", + "description_content_type", + "maintainer_email", + "requires_python", + } + + // remove any keys that are an empty value, unless twine expects them + result = lo.OmitBy(result, func(key string, value []string) bool { + if lo.Contains(allowedBlankValues, key) { + return false + } + return value == nil || len(value) == 1 && (value[0] == "" || value[0] == "") + }) return result } @@ -103,7 +134,12 @@ func (pf *PackageFile) AddGPGSignature(signatureFilepath string, signatureFilena if err != nil { return err } - defer gpg.Close() + defer func(gpg *os.File) { + err := gpg.Close() + if err != nil { + log.Printf("error closing file: %v", err) + } + }(gpg) bytes, err := io.ReadAll(gpg) if err != nil { diff --git a/packages/utils.go b/internal/packages/utils.go similarity index 59% rename from packages/utils.go rename to internal/packages/utils.go index 843ef9f..ff55ce9 100644 --- a/packages/utils.go +++ b/internal/packages/utils.go @@ -5,15 +5,19 @@ import ( "os" ) -// GetFileSize returns the size of a file in KB, or MB if >= 1024 KB. +// GetFileSize returns the size of a file in KB, or MB if > 1024 KB. func GetFileSize(filename string) (string, error) { file, err := os.Stat(filename) if err != nil { return "", err } + return SizeToString(file.Size()), nil +} + +func SizeToString(size int64) string { // convert file size to KB - fileSize := float64(file.Size()) / 1024 + fileSize := float64(size) / 1024 sizeUnit := "KB" if fileSize > 1024 { @@ -22,5 +26,5 @@ func GetFileSize(filename string) (string, error) { sizeUnit = "MB" } - return fmt.Sprintf("%.1f %s", fileSize, sizeUnit), nil + return fmt.Sprintf("%.1f %s", fileSize, sizeUnit) } diff --git a/internal/packages/utils_test.go b/internal/packages/utils_test.go new file mode 100644 index 0000000..180a13e --- /dev/null +++ b/internal/packages/utils_test.go @@ -0,0 +1,40 @@ +package packages_test + +import ( + "fmt" + "github.com/rstudio/python-distribution-parser/internal/packages" + "github.com/stretchr/testify/assert" + "testing" +) + +func TestSizeToString(t *testing.T) { + tests := []struct { + in int64 + out string + }{ + { + 0, "0.0 KB", + }, + { + 512, "0.5 KB", + }, + { + 1024, "1.0 KB", + }, + { + 1024 * 1024, "1024.0 KB", // exactly 1024 KB + }, + { + 1024*1024 + 1, "1.0 MB", // 1024 KB + 1 + }, + } + + for _, test := range tests { + t.Run(fmt.Sprintf("%d is %s", test.in, test.out), func(t *testing.T) { + in := test.in + out := test.out + t.Parallel() + assert.EqualValues(t, out, packages.SizeToString(in)) + }) + } +} diff --git a/parse.go b/parse.go index c610655..961df84 100644 --- a/parse.go +++ b/parse.go @@ -3,12 +3,11 @@ package parse import ( "errors" "fmt" + "github.com/rstudio/python-distribution-parser/internal/packages" "os" "path/filepath" "sort" "strings" - - "github.com/rstudio/python-distribution-parser/packages" ) func endsWith(str, suffix string) bool { @@ -48,7 +47,7 @@ func findDistributions(dists []string) ([]string, error) { if err != nil { return nil, err } - if files == nil || len(files) == 0 { + if len(files) == 0 { return nil, fmt.Errorf("cannot find file (or expand pattern): %s", filename) } @@ -66,7 +65,10 @@ func makePackage(filename string, signatures map[string]string) (*packages.Packa signedName := packageFile.SignedBaseFilename if signature, exists := signatures[signedName]; exists { - packageFile.AddGPGSignature(signature, signedName) + err := packageFile.AddGPGSignature(signature, signedName) + if err != nil { + return nil, err + } } _, err = packages.GetFileSize(packageFile.Filename) diff --git a/parse_test.go b/parse_test.go new file mode 100644 index 0000000..4b8cf3b --- /dev/null +++ b/parse_test.go @@ -0,0 +1,239 @@ +package parse_test + +import ( + "fmt" + "github.com/google/go-cmp/cmp" + "github.com/rstudio/python-distribution-parser" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "math" + "net/http" + "net/http/httptest" + "net/url" + "path" + + "os" + "os/exec" + "path/filepath" + "testing" +) + +// testdata is the path that we should store cloned repositories +var testdata = "testdata/repositories/" + +// repositoryUrls is the list of repositories that we should test +var repositoryUrls = []string{ + "https://github.com/ActiveState/appdirs", + "https://github.com/pallets/click", + "https://github.com/python/importlib_metadata", + "https://github.com/matplotlib/matplotlib", + "https://github.com/sdispater/pendulum", + "https://github.com/pytest-dev/pytest", + "https://github.com/tkem/cachetools/", + "https://github.com/certifi/python-certifi", + "https://github.com/chardet/chardet", + "https://github.com/jaraco/configparser/", + "https://github.com/nedbat/coveragepy", + "https://github.com/micheles/decorator", + "https://github.com/tiran/defusedxml", +} + +// toRepositoryName converts a repository name to the name of the folder the repository will be cloned in +// for example, https://github.com/ActiveState/appdirs => appdirs +func toRepositoryName(repositoryUrl string) (string, error) { + result, err := url.Parse(repositoryUrl) + if err != nil { + return "", err + } + return path.Base(result.Path), nil +} + +// getRepositoryPath returns the path the a repository is cloned at +func getRepositoryPath(repository string) string { + return fmt.Sprintf("%s%s/", testdata, repository) +} + +// getDistributionPath returns the path that distribution tarballs are kept +func getDistributionPath(repository string) string { + return fmt.Sprintf("%sdist/", getRepositoryPath(repository)) +} + +// getTarballPath returns the path to a built tarball for a repository +func getTarballPath(repository string) (string, error) { + distributionPath := getDistributionPath(repository) + files, err := os.ReadDir(distributionPath) + if err != nil { + return "", err + } + + var tarballs []string + + for _, file := range files { + if filepath.Ext(file.Name()) == ".gz" { + tarballs = append(tarballs, file.Name()) + } + } + + if len(tarballs) != 1 { + return "", fmt.Errorf("unexpected number of .gz files in %s: %d", distributionPath, len(tarballs)) + } + return fmt.Sprintf("%s%s", distributionPath, tarballs[0]), nil +} + +// clone will clone a Git repository to disk if it does not already exist +func clone(repositoryUrl string) error { + repositoryName, err := toRepositoryName(repositoryUrl) + if err != nil { + return err + } + + _, err = os.Stat(getRepositoryPath(repositoryName)) + if os.IsNotExist(err) { + cmd := exec.Command("git", "clone", repositoryUrl) + cmd.Dir = testdata + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + err := cmd.Run() + return err + } + + return nil +} + +// buildDistribution builds a Python package with python -m build +func buildDistribution(repository string) error { + _, err := os.Stat(getDistributionPath(repository)) + if os.IsNotExist(err) { + cmd := exec.Command("python", "-m", "build") + cmd.Dir = getRepositoryPath(repository) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + err := cmd.Run() + return err + } + + return nil +} + +// getTwineMetadata returns the metadata that Twine generates +func getTwineMetadata(repository string) (map[string][]string, error) { + var metadata map[string][]string + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _ = r.ParseMultipartForm(math.MaxInt64) + metadata = r.MultipartForm.Value + })) + defer ts.Close() + + tarball, err := getTarballPath(repository) + if err != nil { + return nil, err + } + + cmd := exec.Command("twine", "upload", tarball) + cmd.Env = append(cmd.Env, fmt.Sprintf("TWINE_REPOSITORY_URL=%s", ts.URL)) + // twine requires these variable to be set + cmd.Env = append(cmd.Env, "TWINE_USERNAME=user") + cmd.Env = append(cmd.Env, "TWINE_PASSWORD=password") + + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + err = cmd.Run() + if err != nil { + return nil, err + } + + return metadata, nil +} + +// getParserMetadata runs the Go parser and returns the resulting metadata +func getParserMetadata(repository string) (map[string][]string, error) { + tarball, err := getTarballPath(repository) + if err != nil { + return nil, err + } + + result, err := parse.Parse(tarball) + if err != nil { + return nil, err + } + + if len(result) != 1 { + return nil, fmt.Errorf("unexpected length: %d", len(result)) + } + + distribution := result[0] + metadata := distribution.MetadataMap() + + return metadata, nil +} + +// checkRequirements ensures that all test requirements are installed +func checkRequirements() error { + _, err := exec.LookPath("twine") + if err != nil { + return err + } + + _, err = exec.LookPath("python") + if err != nil { + return err + } + + _, err = exec.LookPath("cargo") + if err != nil { + return err + } + + _, err = exec.LookPath("git") + if err != nil { + return err + } + + cmd := exec.Command("pip", "show", "build") + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + err = cmd.Run() + if err != nil { + return err + } + + cmd = exec.Command("pip", "show", "wheel") + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + err = cmd.Run() + if err != nil { + return err + } + + return nil +} + +func TestParse(t *testing.T) { + err := checkRequirements() + require.NoError(t, err) + + for _, repositoryUrl := range repositoryUrls { + t.Run(repositoryUrl, func(t *testing.T) { + url := repositoryUrl + repositoryName, err := toRepositoryName(url) + assert.NoError(t, err) + + t.Parallel() + err = clone(url) + assert.NoError(t, err) + + err = buildDistribution(repositoryName) + assert.NoError(t, err) + + expectedMetadata, err := getTwineMetadata(repositoryName) + assert.NoError(t, err) + + actualMetadata, err := getParserMetadata(repositoryName) + assert.NoError(t, err) + + // compare against the normalized outputs to account for expects differences between the two parsers + assert.Empty(t, cmp.Diff(expectedMetadata, actualMetadata)) + }) + } +} diff --git a/testdata/repositories/.gitkeep b/testdata/repositories/.gitkeep new file mode 100644 index 0000000..e69de29