diff --git a/pkg/spdx/builder.go b/pkg/spdx/builder.go index 1e2d6a47..045a4363 100644 --- a/pkg/spdx/builder.go +++ b/pkg/spdx/builder.go @@ -191,6 +191,7 @@ func (builder *defaultDocBuilderImpl) GenerateDoc( if err != nil { return nil, errors.Wrap(err, "generating package from directory") } + doc.ensureUniqueElementID(pkg) if err := doc.AddPackage(pkg); err != nil { return nil, errors.Wrap(err, "adding directory package to document") @@ -204,6 +205,8 @@ func (builder *defaultDocBuilderImpl) GenerateDoc( if err != nil { return nil, errors.Wrapf(err, "generating SPDX package from image ref %s", i) } + doc.ensureUniqueElementID(p) + doc.ensureUniquePeerIDs(p.GetRelationships()) if err := doc.AddPackage(p); err != nil { return nil, errors.Wrap(err, "adding package to document") } @@ -211,11 +214,13 @@ func (builder *defaultDocBuilderImpl) GenerateDoc( // Process OCI image archives for _, tb := range genopts.Tarballs { - logrus.Infof("Processing tarball %s", tb) + logrus.Infof("Processing image archive %s", tb) p, err := spdx.PackageFromImageTarball(tb) if err != nil { return nil, errors.Wrap(err, "generating tarball package") } + doc.ensureUniqueElementID(p) + doc.ensureUniquePeerIDs(p.GetRelationships()) if err := doc.AddPackage(p); err != nil { return nil, errors.Wrap(err, "adding package to document") } @@ -228,6 +233,8 @@ func (builder *defaultDocBuilderImpl) GenerateDoc( if err != nil { return nil, errors.Wrap(err, "creating spdx package from archive") } + doc.ensureUniqueElementID(p) + doc.ensureUniquePeerIDs(p.GetRelationships()) if err := doc.AddPackage(p); err != nil { return nil, errors.Wrap(err, "adding package to document") } @@ -240,6 +247,7 @@ func (builder *defaultDocBuilderImpl) GenerateDoc( if err != nil { return nil, errors.Wrap(err, "adding file") } + doc.ensureUniqueElementID(f) if err := doc.AddFile(f); err != nil { return nil, errors.Wrap(err, "adding file to document") } diff --git a/pkg/spdx/document.go b/pkg/spdx/document.go index a4bb9b9f..f763b36c 100644 --- a/pkg/spdx/document.go +++ b/pkg/spdx/document.go @@ -175,12 +175,13 @@ func (d *Document) AddPackage(pkg *Package) error { if pkg.SPDXID() == "" { pkg.BuildID(pkg.Name) + d.ensureUniqueElementID(pkg) } if pkg.SPDXID() == "" { - return errors.New("package id is needed to add a new package") + return errors.New("package ID is needed to add a new package") } if _, ok := d.Packages[pkg.SPDXID()]; ok { - return errors.New("a package named " + pkg.SPDXID() + " already exists in the document") + return errors.Errorf("a package with ID %s already exists in the document", pkg.SPDXID()) } d.Packages[pkg.SPDXID()] = pkg @@ -278,6 +279,7 @@ func (d *Document) AddFile(file *File) error { } file.ID = "SPDXRef-File-" + fmt.Sprintf("%x", h.Sum(nil)) } + d.ensureUniqueElementID(file) d.Files[file.ID] = file return nil } @@ -398,3 +400,75 @@ func (d *Document) WriteProvenanceStatement(opts *ProvenanceOptions, path string "writing sbom as provenance statement", ) } + +// ensureUniquePackageID takes a string and checks if +// there is another string with the same name in the document. +// If there is one, it will append a digit until a unique name +// is found. +func (d *Document) ensureUniqueElementID(o Object) { + newID := o.SPDXID() + i := 0 + for { + // Check if there us already an element with the same ID + if el := d.GetElementByID(newID); el == nil { + if o.SPDXID() != newID { + logrus.Infof( + "Element name changed from %s to %s to ensure it is unique", + o.SPDXID(), newID, + ) + } + o.SetSPDXID(newID) + break + } + i++ + newID = fmt.Sprintf("%s-%04d", o.SPDXID(), i) + } +} + +// ensureUniquePeerIDs gets a relationship collection and ensures all peers +// have unique IDs +func (d *Document) ensureUniquePeerIDs(rels *[]*Relationship) { + // First, ensure peer names are unique among themselves + seen := map[string]struct{}{} + for _, rel := range *rels { + if rel.Peer == nil || rel.Peer.SPDXID() == "" { + continue + } + testName := rel.Peer.SPDXID() + i := 0 + for { + if _, ok := seen[testName]; !ok { + rel.Peer.SetSPDXID(testName) + seen[testName] = struct{}{} + break + } + i++ + testName = fmt.Sprintf("%s-%04d", rel.Peer.SPDXID(), i) + } + } + + // And then check against the document + for _, rel := range *rels { + if rel.Peer == nil { + continue + } + d.ensureUniqueElementID(rel.Peer) + } +} + +// GetPackageByID queries the packages to search for a specific entity by name +// note that this method returns a copy of the entity if found. +func (d *Document) GetElementByID(id string) Object { + seen := map[string]struct{}{} + for _, p := range d.Packages { + if sub := recursiveSearch(id, p, &seen); sub != nil { + return sub + } + } + for _, f := range d.Files { + if sub := recursiveSearch(id, f, &seen); sub != nil { + return sub + } + } + return nil +} diff --git a/pkg/spdx/document_unit_test.go b/pkg/spdx/document_unit_test.go index 2d6e8f8f..719212b6 100644 --- a/pkg/spdx/document_unit_test.go +++ b/pkg/spdx/document_unit_test.go @@ -118,3 +118,54 @@ func compareSubjects(t *testing.T, statement1, statement2 *provenance.Statement) } } } + +func TestEnsureUniqueElementID(t *testing.T) { + doc := NewDocument() + name := "same-name" + for i := 0; i < 3; i++ { + subp := NewPackage() + subp.SetSPDXID(name) + + // Passing the subpackages through nsureUniquePackageID + // should rename the last two, but not the first one + doc.ensureUniqueElementID(subp) + require.NoError(t, doc.AddPackage(subp)) + + if i == 0 { + require.Equal(t, name, subp.SPDXID()) + } else { + require.NotEqual(t, name, subp.SPDXID()) + } + } +} + +func TestEnsureUniquePeerIDs(t *testing.T) { + doc := NewDocument() + name := "same-name" + + // Add one node with the name + namePkg := NewPackage() + namePkg.SetSPDXID(name) + + // Build a package with 3 peers all with the same name + p := NewPackage() + p.SetSPDXID("parentNode") + for i := 0; i < 3; i++ { + subp := NewPackage() + subp.SetSPDXID(name) + require.NoError(t, p.AddPackage(subp)) + } + + // Pass the package with its peers through ensureUniquePeerIDs + doc.ensureUniquePeerIDs(p.GetRelationships()) + + // Now, they should all be different + seenNames := map[string]struct{}{} + rels := p.GetRelationships() + for _, rel := range *rels { + logrus.Info("Checking " + rel.Peer.SPDXID()) + _, ok := seenNames[rel.Peer.SPDXID()] + require.False(t, ok, rel.Peer.SPDXID()+" should be unique") + seenNames[rel.Peer.SPDXID()] = struct{}{} + } +} diff --git a/pkg/spdx/file.go b/pkg/spdx/file.go index 1e784f36..f3171032 100644 --- a/pkg/spdx/file.go +++ b/pkg/spdx/file.go @@ -190,3 +190,13 @@ func getFileContentType(path string) (string, error) { contentType := http.DetectContentType(buffer) return contentType, nil } + +// GetElementByID search the file and its peers looking for the +// specified SPDX id. If found, the function returns a copy of +// the object identified by the SPDX-ID provided +func (f *File) GetElementByID(id string) Object { + if f.SPDXID() == id { + return f + } + return recursiveSearch(id, f, &map[string]struct{}{}) +} diff --git a/pkg/spdx/object.go b/pkg/spdx/object.go index ed76c031..b7a100ee 100644 --- a/pkg/spdx/object.go +++ b/pkg/spdx/object.go @@ -40,6 +40,7 @@ import ( // objects. Currently this includes files and packages. type Object interface { SPDXID() string + SetSPDXID(string) ReadSourceFile(string) error Render() (string, error) BuildID(seeds ...string) @@ -48,6 +49,7 @@ type Object interface { GetRelationships() *[]*Relationship ToProvenanceSubject() *intoto.Subject getProvenanceSubjects(opts *ProvenanceOptions, seen *map[string]struct{}) []intoto.Subject + GetElementByID(string) Object } type Entity struct { @@ -77,6 +79,11 @@ func (e *Entity) SPDXID() string { return e.ID } +// SPDXID returns the SPDX reference string for the object +func (e *Entity) SetSPDXID(id string) { + e.ID = id +} + // BuildID sets the file ID, optionally from a series of strings func (e *Entity) BuildID(seeds ...string) { if len(seeds) <= 1 { @@ -269,3 +276,6 @@ mloop: } return ret } + +// GetElementByID nil function to be overridden by package and file +func (e *Entity) GetElementByID(string) Object { return nil } diff --git a/pkg/spdx/package.go b/pkg/spdx/package.go index 040c1be7..897fa9e3 100644 --- a/pkg/spdx/package.go +++ b/pkg/spdx/package.go @@ -388,6 +388,8 @@ func (p *Package) Draw(builder *strings.Builder, o *DrawingOptions, depth int, s } } +// ReadSourceFile reads a file from the filesystem and assigns its properties +// to the package metadata func (p *Package) ReadSourceFile(path string) error { if err := p.Entity.ReadSourceFile(path); err != nil { return err @@ -397,3 +399,12 @@ func (p *Package) ReadSourceFile(path string) error { } return nil } + +// GetElementByID search the package and its peers looking for the specified SPDX +// id. If found, the function returns a copy of the object +func (p *Package) GetElementByID(id string) Object { + if p.SPDXID() == id { + return p + } + return recursiveSearch(id, p, &map[string]struct{}{}) +} diff --git a/pkg/spdx/spdx.go b/pkg/spdx/spdx.go index d5ad1167..cb0c520e 100644 --- a/pkg/spdx/spdx.go +++ b/pkg/spdx/spdx.go @@ -246,3 +246,30 @@ func Banner() string { } return string(d) } + +// recursiveSearch is a function that recursively searches an object's peers +// to find the specified SPDX ID. If found, returns a copy of the object. +// nolint:gocritic // seen is a pointer recursively populated +func recursiveSearch(id string, o Object, seen *map[string]struct{}) Object { + if o.SPDXID() == id { + return o + } + for _, rel := range *o.GetRelationships() { + if rel.Peer == nil { + continue + } + + if _, ok := (*seen)[o.SPDXID()]; ok { + continue + } + + if rel.Peer.SPDXID() == id { + return rel.Peer + } + + if peerObject := recursiveSearch(id, rel.Peer, seen); peerObject != nil { + return peerObject + } + } + return nil +} diff --git a/pkg/spdx/spdx_unit_test.go b/pkg/spdx/spdx_unit_test.go index f7369847..374ca897 100644 --- a/pkg/spdx/spdx_unit_test.go +++ b/pkg/spdx/spdx_unit_test.go @@ -402,3 +402,53 @@ func TestIgnorePatterns(t *testing.T) { require.NoError(t, err) require.Len(t, p, 4) } + +func TestRecursiveSearch(t *testing.T) { + p := NewPackage() + p.SetSPDXID("p-top") + + // Lets nest 3 packages + packages := []*Package{} + for i := 0; i < 3; i++ { + subp := NewPackage() + subp.SetSPDXID(fmt.Sprintf("subpackage-%d", i)) + packages = append(packages, subp) + } + for i, sp := range packages { + if i > 0 { + require.NoError(t, packages[i-1].AddPackage(sp)) + } + } + require.NoError(t, p.AddPackage(packages[0])) + + // This functions searches 3 packages with the same prefix: + checkSubPackages := func(p *Package, radix string) { + for i := 0; i < 3; i++ { + require.NotNil(t, p.GetElementByID(fmt.Sprintf("%s-%d", radix, i)), + fmt.Sprintf("searching for %s", radix), + ) + } + } + + checkSubPackages(p, "subpackage") + // Non existent packages should not return an element + require.Nil(t, p.GetElementByID("subpackage-10000000")) + + // Now bifurcating the document structure adding dependencies should + // not alter those conditions. + + // Lets add 3 dependencies to one of the nested packages + for i := 0; i < 3; i++ { + subp := NewPackage() + subp.SetSPDXID(fmt.Sprintf("dep-%d", i)) + require.NoError(t, p.GetElementByID("subpackage-1").(*Package).AddPackage(subp)) + } + + // Same tests should still pass: + checkSubPackages(p, "subpackage") + // Non existent packages should not return an element + require.Nil(t, p.GetElementByID("subpackage-10000000")) + + // But also dependencies should be found + checkSubPackages(p, "dep") +}