cos-customizer: Automatically fetch COS image SBOM

When generating SBOM with a COS image as base image,
cos-customizer can automatically fetch SBOM from GCS
bucket cos-tools.

Updated spdx golang tool version to correctly
output field "fileAnalyzed".

Fixed Document reference in field relationships.

SBOM input and output:
https://paste.googleplex.com/5880780147392512

BUG=b/277125638
TEST=Test run a build with COS base image.
RELEASE_NOTE=None

Change-Id: I163a4d36990a26ce66a8cbc2bffde104f9fc3c11
Reviewed-on: https://cos-review.googlesource.com/c/cos/tools/+/49829
Cloud-Build: GCB Service account <228075978874@cloudbuild.gserviceaccount.com>
Tested-by: He Gao <hegao@google.com>
Reviewed-by: Robert Kolchmeyer <rkolchmeyer@google.com>
diff --git a/deps.bzl b/deps.bzl
index 8346e65..0e4bb16 100644
--- a/deps.bzl
+++ b/deps.bzl
@@ -232,8 +232,8 @@
     go_repository(
         name = "com_github_spdx_tools_golang",
         importpath = "github.com/spdx/tools-golang",
-        sum = "h1:/fqihV2Jna7fmow65dHpgKNsilgLK7ICpd2tkCnPEyY=",
-        version = "v0.5.0",
+        sum = "h1:fJg3SVOGG+eIva9ZUBm/hvyA7PIPVFjRxUKe6fdAgwE=",
+        version = "v0.5.1",
     )
     go_repository(
         name = "com_github_stretchr_objx",
diff --git a/src/cmd/cos_customizer/README.md b/src/cmd/cos_customizer/README.md
index 9247669..c8665df 100644
--- a/src/cmd/cos_customizer/README.md
+++ b/src/cmd/cos_customizer/README.md
@@ -342,6 +342,8 @@
 Note: `SPDXPackages` has the same schema as field `packages` in
 [SPDX JSON Schema](https://github.com/spdx/spdx-spec/blob/master/schemas/spdx-schema.json).
 Field `filesAnalyzed` in `SPDXPackages` can be ambiguous in JSON format so it should be set explicitly.
+If COS images from project `cos-cloud` is used as base image, cos-customizer will automatically
+fetch the public SBOM from GCS bucket `cos-tools` if available.
 
 `-sbom-output-path`: The GCS path for storing the output SBOM. The complete output file path is
 `<sbom-output-path>/<outputImageName>-<outputImageVersion>_sbom.json`. If `outputImageName` is not
diff --git a/src/cmd/cos_customizer/finish_image_build.go b/src/cmd/cos_customizer/finish_image_build.go
index 40c2e3d..ff90b62 100644
--- a/src/cmd/cos_customizer/finish_image_build.go
+++ b/src/cmd/cos_customizer/finish_image_build.go
@@ -389,7 +389,7 @@
 			log.Printf("failed to parse SBOM input file at %q, err: %v", f.sbomInputPath, err)
 			return subcommands.ExitFailure
 		}
-		if err := sbom.GenerateSBOM(outputImage.Image.Name); err != nil {
+		if err := sbom.GenerateSBOM(sourceImage, outputImage); err != nil {
 			log.Printf("failed to generate SBOM, err: %v", err)
 			return subcommands.ExitFailure
 		}
diff --git a/src/pkg/tools/sbomutil/BUILD.bazel b/src/pkg/tools/sbomutil/BUILD.bazel
index 0df4863..5d04e01 100644
--- a/src/pkg/tools/sbomutil/BUILD.bazel
+++ b/src/pkg/tools/sbomutil/BUILD.bazel
@@ -20,6 +20,7 @@
     importpath = "cos.googlesource.com/cos/tools.git/src/pkg/tools/sbomutil",
     visibility = ["//visibility:public"],
     deps = [
+        "//src/pkg/config",
         "//src/pkg/fs",
         "//src/pkg/gcs",
         "@com_github_spdx_tools_golang//spdx/v2/v2_2:go_default_library",
diff --git a/src/pkg/tools/sbomutil/sbomutil.go b/src/pkg/tools/sbomutil/sbomutil.go
index accf3eb..bcfd051 100644
--- a/src/pkg/tools/sbomutil/sbomutil.go
+++ b/src/pkg/tools/sbomutil/sbomutil.go
@@ -16,12 +16,16 @@
 
 import (
 	"context"
+	"crypto/sha256"
 	"encoding/json"
 	"fmt"
+	"io"
+	"log"
 	"strings"
 	"time"
 
 	"cloud.google.com/go/storage"
+	"cos.googlesource.com/cos/tools.git/src/pkg/config"
 	"cos.googlesource.com/cos/tools.git/src/pkg/fs"
 	"cos.googlesource.com/cos/tools.git/src/pkg/gcs"
 	spdx_common "github.com/spdx/tools-golang/spdx/v2/common"
@@ -29,23 +33,25 @@
 )
 
 const (
-	spdxDocID               = "SPDXRef-DOCUMENT"
-	spdxDocRef              = "DocumentRef-%s"
-	spdxRef                 = "SPDXRef-%s"
-	spdxNoAssert            = "NOASSERTION"
-	docNameSuffix           = "sbom.json"
-	creatorToolName         = "gcr.io/cos-cloud/cos-customizer"
-	spdxJsonFileNotAnalyzed = `      "filesAnalyzed": false,`
-	defaultRootPkgVersion   = "0"
+	spdxDocID             = "SPDXRef-DOCUMENT"
+	spdxDocRef            = "DocumentRef-%s"
+	spdxRef               = "SPDXRef-%s"
+	spdxNoAssert          = "NOASSERTION"
+	docNameSuffix         = "sbom.spdx.json"
+	creatorToolName       = "gcr.io/cos-cloud/cos-customizer"
+	defaultRootPkgVersion = "0"
+	cosCloud              = "cos-cloud"
+	cosTools              = "cos-tools"
+	cosToolsPublicURL     = "https://storage.googleapis.com/" + cosTools
+	cosImageSBOMName      = "sbom.spdx.json"
 )
 
 type SBOMCreator struct {
-	sbomInput               *SBOMInput
-	sbomOutput              *spdx2_2.Document
-	ctx                     context.Context
-	gcsClient               *storage.Client
-	files                   *fs.Files
-	filesNotAnalyzedPkgRefs []string
+	sbomInput  *SBOMInput
+	sbomOutput *spdx2_2.Document
+	ctx        context.Context
+	gcsClient  *storage.Client
+	files      *fs.Files
 }
 
 // NewSBOMCreator creates a new SBOMCreator.
@@ -104,6 +110,45 @@
 	return nil
 }
 
+func (s *SBOMCreator) findCOSImageSBOM(sourceImage *config.Image) (*SBOMPackage, error) {
+	parts := strings.Split(sourceImage.Name, "-")
+	buildNumber := strings.Join(parts[len(parts)-3:], ".")
+	subdir := "lakitu"
+	if strings.Contains(sourceImage.Name, "arm64") {
+		subdir = "lakitu-arm64"
+	}
+	sbomPath := fmt.Sprintf("%s/%s/%s", buildNumber, subdir, cosImageSBOMName)
+	sbomReader, err := s.gcsClient.Bucket(cosTools).Object(sbomPath).NewReader(s.ctx)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create gcs object reader for gs://%s/%s, err: %v", cosTools, sbomPath, err)
+	}
+	defer sbomReader.Close()
+	h := sha256.New()
+	if _, err := io.Copy(h, sbomReader); err != nil {
+		return nil, fmt.Errorf("failed to copy SBOM reader to SHA writer, err: %v", err)
+	}
+
+	return &SBOMPackage{
+		Name:          sourceImage.Name,
+		SpdxDocument:  fmt.Sprintf("%s/%s", cosToolsPublicURL, sbomPath),
+		Algorithm:     spdx_common.SHA256,
+		ChecksumValue: fmt.Sprintf("%x", h.Sum(nil)),
+	}, nil
+}
+
+func (s *SBOMCreator) addExternalRef(pkg *SBOMPackage, rootPkg *spdx2_2.Package) {
+	extRef := pkg.toExternalRef()
+	s.sbomOutput.ExternalDocumentReferences = append(s.sbomOutput.ExternalDocumentReferences, extRef)
+	s.sbomOutput.Relationships = append(s.sbomOutput.Relationships, &spdx2_2.Relationship{
+		RefA: spdx_common.DocElementID{ElementRefID: rootPkg.PackageSPDXIdentifier},
+		RefB: spdx_common.DocElementID{
+			DocumentRefID: pkg.Name,
+			ElementRefID:  spdx_common.ElementID(spdxDocID),
+		},
+		Relationship: spdx_common.TypeRelationshipContains,
+	})
+}
+
 // Use NOASSERTION to fill required but empty fields.
 func (s *SBOMCreator) fillNoAssertion() {
 	for _, pkg := range s.sbomOutput.Packages {
@@ -127,26 +172,8 @@
 	return fmt.Sprintf("%v", time.Now().UTC().Format(time.RFC3339))
 }
 
-// There is a bug in unmarshaling field "filesAnalyzed"
-// https://github.com/spdx/tools-golang/issues/209
-// Solve the issue manually until it is fixed upstream.
-func (s *SBOMCreator) addFilesNotAnalyzed(content string) string {
-	lines := strings.Split(content, "\n")
-	for _, ref := range s.filesNotAnalyzedPkgRefs {
-		for idx, line := range lines {
-			if line != ref {
-				continue
-			}
-			lines = append(lines[:idx+1], lines[idx:]...)
-			lines[idx] = spdxJsonFileNotAnalyzed
-			break
-		}
-	}
-	return strings.Join(lines, "\n")
-}
-
 // GenerateSBOM uses the parsed input to generate an SPDX SBOM.
-func (s *SBOMCreator) GenerateSBOM(actualOutputImageName string) error {
+func (s *SBOMCreator) GenerateSBOM(sourceImage, actualOutputImage *config.Image) error {
 	// Add SBOM creation info.
 	s.sbomOutput.CreationInfo = &spdx2_2.CreationInfo{
 		Created: timeNow(),
@@ -170,7 +197,7 @@
 
 	// Use the actual output image name as SBOM output image name.
 	if s.sbomInput.OutputImageName == "" {
-		s.sbomInput.OutputImageName = actualOutputImageName
+		s.sbomInput.OutputImageName = actualOutputImage.Name
 		s.sbomInput.OutputImageVersion = ""
 	}
 
@@ -199,8 +226,6 @@
 		s.sbomOutput.DocumentName = fmt.Sprintf("%s-%s_%s", s.sbomInput.OutputImageName, s.sbomInput.OutputImageVersion, docNameSuffix)
 	}
 
-	s.filesNotAnalyzedPkgRefs = append(s.filesNotAnalyzedPkgRefs, fmt.Sprintf("      \"SPDXID\": \"%s\",", rootPkg.PackageSPDXIdentifier))
-
 	// Add root package and relationship for doc describing root package.
 	s.sbomOutput.Packages = append(s.sbomOutput.Packages, rootPkg)
 	s.sbomOutput.Relationships = append(s.sbomOutput.Relationships, &spdx2_2.Relationship{
@@ -209,6 +234,19 @@
 		Relationship: spdx_common.TypeRelationshipDescribe,
 	})
 
+	// Add base image if using public COS images.
+	if sourceImage.Project == cosCloud {
+		log.Println("Using COS image from cos-cloud, trying to find image SBOM...")
+		baseImagePkg, err := s.findCOSImageSBOM(sourceImage)
+		if err != nil {
+			// Allow the workflow to continue when using COS images without public SBOMs.
+			log.Printf("Failed to find COS image SBOM, please add base image as a package to SBOM input file, err: %v", err)
+		} else {
+			s.addExternalRef(baseImagePkg, rootPkg)
+			log.Println("Successfully added COS image SBOM.")
+		}
+	}
+
 	// Add SPDX packages and relationship for root package containing all those pacakges.
 	for _, pkg := range s.sbomInput.SPDXPackages {
 		s.sbomOutput.Packages = append(s.sbomOutput.Packages, pkg)
@@ -217,20 +255,11 @@
 			RefB:         spdx_common.DocElementID{ElementRefID: pkg.PackageSPDXIdentifier},
 			Relationship: spdx_common.TypeRelationshipContains,
 		})
-		if !pkg.FilesAnalyzed {
-			s.filesNotAnalyzedPkgRefs = append(s.filesNotAnalyzedPkgRefs, fmt.Sprintf("      \"SPDXID\": \"%s\",", fmt.Sprintf(spdxRef, pkg.PackageSPDXIdentifier)))
-		}
 	}
 
 	// Add SBOM packages and relationship for root package containing all those pacakges.
 	for _, pkg := range s.sbomInput.SBOMPackages {
-		extRef := pkg.toExternalRef()
-		s.sbomOutput.ExternalDocumentReferences = append(s.sbomOutput.ExternalDocumentReferences, extRef)
-		s.sbomOutput.Relationships = append(s.sbomOutput.Relationships, &spdx2_2.Relationship{
-			RefA:         spdx_common.DocElementID{ElementRefID: rootPkg.PackageSPDXIdentifier},
-			RefB:         spdx_common.DocElementID{ElementRefID: spdx_common.ElementID(extRef.DocumentRefID)},
-			Relationship: spdx_common.TypeRelationshipContains,
-		})
+		s.addExternalRef(pkg, rootPkg)
 	}
 
 	// Add extracted license info.
@@ -246,10 +275,8 @@
 	if err != nil {
 		return fmt.Errorf("failed to convert SBOM document into json: %v", err)
 	}
-	sbomOutputString := s.addFilesNotAnalyzed(string(sbomOutputBytes))
-
 	sbomOutputURL := fmt.Sprintf("%s/%s", outputGCSPath, s.sbomOutput.DocumentName)
-	if err := gcs.UploadGCSObjectString(s.ctx, s.gcsClient, sbomOutputString, sbomOutputURL); err != nil {
+	if err := gcs.UploadGCSObjectString(s.ctx, s.gcsClient, string(sbomOutputBytes), sbomOutputURL); err != nil {
 		return fmt.Errorf("Failed to upload SBOM to GCS %q, err: %v", outputGCSPath, err)
 	}
 	return nil
diff --git a/src/pkg/tools/sbomutil/sbomutil_test.go b/src/pkg/tools/sbomutil/sbomutil_test.go
index 4abe55a..3700557 100644
--- a/src/pkg/tools/sbomutil/sbomutil_test.go
+++ b/src/pkg/tools/sbomutil/sbomutil_test.go
@@ -19,6 +19,7 @@
 	"io/ioutil"
 	"testing"
 
+	"cos.googlesource.com/cos/tools.git/src/pkg/config"
 	"cos.googlesource.com/cos/tools.git/src/pkg/fakes"
 	"github.com/google/go-cmp/cmp"
 	spdx_common "github.com/spdx/tools-golang/spdx/v2/common"
@@ -88,7 +89,7 @@
 				SPDXVersion:       "SPDX-2.2",
 				DataLicense:       "CC0-1.0",
 				SPDXIdentifier:    spdx_common.ElementID("SPDXRef-DOCUMENT"),
-				DocumentName:      "image1_sbom.json",
+				DocumentName:      "image1-123_sbom.spdx.json",
 				DocumentNamespace: "NOASSERTION",
 				ExternalDocumentReferences: []spdx2_2.ExternalDocumentRef{
 					{
@@ -206,7 +207,9 @@
 			t.Parallel()
 			sbom := NewSBOMCreator(nil, nil, nil)
 			sbom.sbomInput = test.sbomInput
-			if err := sbom.GenerateSBOM(); (err != nil) != test.wantErr {
+			srcImage := &config.Image{}
+			outImage := &config.Image{}
+			if err := sbom.GenerateSBOM(srcImage, outImage); (err != nil) != test.wantErr {
 				t.Fatalf("Unexpected error status, want err: %v, got err: %v", test.wantErr, err)
 			}
 			if !test.wantErr {
@@ -250,7 +253,6 @@
   "name": "image1_sbom.json",
   "documentNamespace": "NOASSERTION",
   "creationInfo": {
-    "licenseListVersion": "",
     "creators": [
       "Tool: gcr.io/cos-cloud/cos-customizer",
       "Organization: G"