From cf98ff478930dae649168b53ae983b2fc958031f Mon Sep 17 00:00:00 2001 From: Ivana Atanasova Date: Thu, 8 Sep 2022 17:54:08 +0300 Subject: [PATCH] Remove duplicates This change removes duplicates of files, packages, other licenses, external document references and relationships, based on checksums for files, packages and external document references, license ID for other licenses and identical data for relationships Signed-off-by: Ivana Atanasova --- parser/save.go | 36 ++++++++++++++++++--- parser/spdx_utils.go | 75 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 5 deletions(-) create mode 100644 parser/spdx_utils.go diff --git a/parser/save.go b/parser/save.go index db0535c..8511211 100644 --- a/parser/save.go +++ b/parser/save.go @@ -57,11 +57,37 @@ func Save(doc *Document, composableDocs []*Document, output string, outFormat st func AppendComposableDocument(res *Document, cdoc *Document, w io.Writer, outFormat string) { res.SPDXDocRef.Annotations = append(res.SPDXDocRef.Annotations, cdoc.SPDXDocRef.Annotations...) - res.SPDXDocRef.ExternalDocumentReferences = append(res.SPDXDocRef.ExternalDocumentReferences, cdoc.SPDXDocRef.ExternalDocumentReferences...) - res.SPDXDocRef.Files = append(res.SPDXDocRef.Files, cdoc.SPDXDocRef.Files...) - res.SPDXDocRef.OtherLicenses = append(res.SPDXDocRef.OtherLicenses, cdoc.SPDXDocRef.OtherLicenses...) - res.SPDXDocRef.Packages = append(res.SPDXDocRef.Packages, cdoc.SPDXDocRef.Packages...) - res.SPDXDocRef.Relationships = append(res.SPDXDocRef.Relationships, cdoc.SPDXDocRef.Relationships...) + + for _, e := range cdoc.SPDXDocRef.ExternalDocumentReferences { + if isNotDuplicate(e.Checksum.Value, res, EDR) { + res.SPDXDocRef.ExternalDocumentReferences = append(res.SPDXDocRef.ExternalDocumentReferences, e) + } + } + for _, f := range cdoc.SPDXDocRef.Files { + if areNotIdenticalChecksums(f.Checksums, res, FL) { + res.SPDXDocRef.Files = append(res.SPDXDocRef.Files, f) + } + } + for _, ol := range cdoc.SPDXDocRef.OtherLicenses { + if isNotDuplicate(ol.LicenseIdentifier, res, OL) { + res.SPDXDocRef.OtherLicenses = append(res.SPDXDocRef.OtherLicenses, ol) + } + } + for _, p := range cdoc.SPDXDocRef.Packages { + if areNotIdenticalChecksums(p.PackageChecksums, res, PKG) { + res.SPDXDocRef.Packages = append(res.SPDXDocRef.Packages, p) + } + } + for _, r := range cdoc.SPDXDocRef.Relationships { + relStr := fmt.Sprintf("%s_%s_%s_%s_%s_%s_%s_%s", + r.RefA.DocumentRefID, r.RefA.ElementRefID, r.RefA.SpecialID, + r.Relationship, + r.RefB.DocumentRefID, r.RefB.ElementRefID, r.RefB.SpecialID, + r.RelationshipComment) + if isNotDuplicate(relStr, res, RL) { + res.SPDXDocRef.Relationships = append(res.SPDXDocRef.Relationships, r) + } + } res.SPDXDocRef.Reviews = append(res.SPDXDocRef.Reviews, cdoc.SPDXDocRef.Reviews...) res.SPDXDocRef.Snippets = append(res.SPDXDocRef.Snippets, cdoc.SPDXDocRef.Snippets...) } diff --git a/parser/spdx_utils.go b/parser/spdx_utils.go new file mode 100644 index 0000000..3ee3785 --- /dev/null +++ b/parser/spdx_utils.go @@ -0,0 +1,75 @@ +// Copyright (c) 2022 VMware, Inc. All Rights Reserved. +// SPDX-License-Identifier: BSD-2-Clause + +package parser + +import ( + "fmt" + "reflect" + + "github.com/spdx/tools-golang/spdx" +) + +const ( + EDR string = "external_document_reference" + FL = "file" + OL = "other_license" + PKG = "package" + RL = "relationship" +) + +func areNotIdenticalChecksums(checksums []spdx.Checksum, doc *Document, t string) bool { + + switch t { + case "file": + for _, f := range doc.SPDXDocRef.Files { + if len(checksums) > 0 && reflect.DeepEqual(checksums, f.Checksums) { + return false + } + } + + case "package": + for _, p := range doc.SPDXDocRef.Packages { + if len(checksums) > 0 && reflect.DeepEqual(checksums, p.PackageChecksums) { + return false + } + } + default: + return true + } + return true +} + +func isNotDuplicate(data string, doc *Document, t string) bool { + switch t { + case "external_document_reference": + for _, edr := range doc.SPDXDocRef.ExternalDocumentReferences { + if len(data) > 0 && edr.Checksum.Value == data { + return false + } + } + + case "other_license": + for _, ol := range doc.SPDXDocRef.OtherLicenses { + if len(data) > 0 && ol.LicenseIdentifier == data { + return false + } + } + + case "relationship": + for _, r := range doc.SPDXDocRef.Relationships { + relStr := fmt.Sprintf("%s_%s_%s_%s_%s_%s_%s_%s", + r.RefA.DocumentRefID, r.RefA.ElementRefID, r.RefA.SpecialID, + r.Relationship, + r.RefB.DocumentRefID, r.RefB.ElementRefID, r.RefB.SpecialID, + r.RelationshipComment) + if len(data) > 0 && relStr == data { + return false + } + } + + default: + return true + } + return true +}