Skip to content

Commit dcc5ccd

Browse files
committed
sbom/spdx: add SPDX decoder
Adds a Decoder interface to the sbom package and implements an SPDX JSON decoder that converts SPDX documents back to IndexReport format. Includes round-trip tests and test coverage with real-world SPDX documents from Konflux. Signed-off-by: Brad Lugo <blugo@redhat.com>
1 parent a24dd16 commit dcc5ccd

15 files changed

+96468
-13
lines changed

sbom/sbom.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,8 @@ import (
1111
type Encoder interface {
1212
Encode(ctx context.Context, w io.Writer, ir *claircore.IndexReport) error
1313
}
14+
15+
// Decoder is an interface to convert an encoded SBOM into a [claircore.IndexReport].
16+
type Decoder interface {
17+
Decode(ctx context.Context, r io.Reader) (*claircore.IndexReport, error)
18+
}

sbom/spdx/decoder.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,18 @@ import (
99
"strconv"
1010

1111
"github.com/package-url/packageurl-go"
12-
"github.com/quay/claircore/sbom"
1312
spdxjson "github.com/spdx/tools-golang/json"
1413
"github.com/spdx/tools-golang/spdx/v2/v2_3"
1514

1615
"github.com/quay/claircore"
1716
"github.com/quay/claircore/purl"
17+
"github.com/quay/claircore/sbom"
1818
)
1919

2020
// DecoderOption is a type for setting optional fields for the Decoder.
2121
type DecoderOption func(*Decoder)
2222

23-
// Decoder defines an SPDX decoder that converts SPDX documents to IndexReports.
23+
// Decoder defines an SPDX decoder that converts SPDX documents to [claircore.IndexReport].
2424
type Decoder struct {
2525
// The data format to decode.
2626
Format Format
@@ -58,10 +58,10 @@ func WithDecoderPURLConverter(registry purl.Converter) DecoderOption {
5858
}
5959
}
6060

61-
// Decode decodes an SPDX document from r and returns an IndexReport.
61+
// Decode decodes an SPDX document from r and returns a [claircore.IndexReport].
6262
//
6363
// Known limitations:
64-
// - Only package indexing via PURL ExternalRefs is supported.
64+
// - Only package indexing via PURL ExternalRefs is supported.
6565
func (d *Decoder) Decode(ctx context.Context, r io.Reader) (*claircore.IndexReport, error) {
6666
var doc *v2_3.Document
6767
var err error
@@ -115,7 +115,7 @@ func (d *Decoder) parseDocument(ctx context.Context, doc *v2_3.Document) (*clair
115115
if err != nil {
116116
slog.WarnContext(ctx, "failed to parse PURL string to PURL",
117117
"purl", ref.Locator,
118-
"error", err)
118+
"reason", err)
119119
continue
120120
}
121121

@@ -130,7 +130,7 @@ func (d *Decoder) parseDocument(ctx context.Context, doc *v2_3.Document) (*clair
130130
} else {
131131
slog.WarnContext(ctx, "failed to parse PURL to IndexRecords",
132132
"purl", ref.Locator,
133-
"error", err)
133+
"reason", err)
134134
}
135135
continue
136136
}

sbom/spdx/decoder_test.go

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
package spdx
2+
3+
import (
4+
"context"
5+
"os"
6+
"strings"
7+
"testing"
8+
9+
"github.com/package-url/packageurl-go"
10+
"github.com/quay/claircore/purl"
11+
"github.com/quay/claircore/python"
12+
"github.com/quay/claircore/rhel"
13+
)
14+
15+
func TestDecoder(t *testing.T) {
16+
ctx := context.Background()
17+
18+
t.Run("konflux-manifest", func(t *testing.T) {
19+
// konflux-manifest.spdx.json contains only OCI PURLs (5 total)
20+
// There's no standalone OCI PURL parser in the ecosystem packages,
21+
// so this tests that the decoder handles unknown PURL types gracefully.
22+
reg := purl.NewRegistry()
23+
24+
d := NewDefaultDecoder(WithDecoderPURLConverter(reg))
25+
26+
f, err := os.Open("testdata/decoder/konflux-manifest.spdx.json")
27+
if err != nil {
28+
t.Skip("testdata file not available:", err)
29+
}
30+
defer f.Close()
31+
32+
ir, err := d.Decode(ctx, f)
33+
if err != nil {
34+
t.Fatal(err)
35+
}
36+
37+
// OCI PURLs are not registered, so we expect no packages
38+
t.Logf("decoded %d packages from konflux-manifest.spdx.json (OCI PURLs not registered)", len(ir.Packages))
39+
})
40+
41+
t.Run("konflux-syft+hermeto", func(t *testing.T) {
42+
// konflux-syft+hermeto.spdx.json contains:
43+
// - rpm/redhat PURLs (requires repository_cpes qualifier to parse)
44+
// - pypi PURLs (should parse successfully)
45+
// - oci PURLs (no parser registered)
46+
repoMap := map[string][]string{
47+
"rhel-9-for-aarch64-appstream-rpms": {"cpe:/a:redhat:enterprise_linux:9::appstream"},
48+
"rhel-9-for-aarch64-appstream-source-rpms": {"cpe:/a:redhat:enterprise_linux:9::appstream"},
49+
"rhel-9-for-aarch64-baseos-rpms": {"cpe:/o:redhat:enterprise_linux:9::baseos"},
50+
"rhel-9-for-aarch64-baseos-source-rpms": {"cpe:/o:redhat:enterprise_linux:9::baseos"},
51+
"rhel-9-for-ppc64le-appstream-rpms": {"cpe:/a:redhat:enterprise_linux:9::appstream"},
52+
"rhel-9-for-ppc64le-appstream-source-rpms": {"cpe:/a:redhat:enterprise_linux:9::appstream"},
53+
"rhel-9-for-ppc64le-baseos-rpms": {"cpe:/o:redhat:enterprise_linux:9::baseos"},
54+
"rhel-9-for-ppc64le-baseos-source-rpms": {"cpe:/o:redhat:enterprise_linux:9::baseos"},
55+
"rhel-9-for-s390x-appstream-rpms": {"cpe:/a:redhat:enterprise_linux:9::appstream"},
56+
"rhel-9-for-s390x-appstream-source-rpms": {"cpe:/a:redhat:enterprise_linux:9::appstream"},
57+
"rhel-9-for-s390x-baseos-rpms": {"cpe:/o:redhat:enterprise_linux:9::baseos"},
58+
"rhel-9-for-s390x-baseos-source-rpms": {"cpe:/o:redhat:enterprise_linux:9::baseos"},
59+
"rhel-9-for-x86_64-appstream-rpms": {"cpe:/a:redhat:enterprise_linux:9::appstream"},
60+
"rhel-9-for-x86_64-appstream-source-rpms": {"cpe:/a:redhat:enterprise_linux:9::appstream"},
61+
"rhel-9-for-x86_64-baseos-rpms": {"cpe:/o:redhat:enterprise_linux:9::baseos"},
62+
"rhel-9-for-x86_64-baseos-source-rpms": {"cpe:/o:redhat:enterprise_linux:9::baseos"},
63+
}
64+
reg := purl.NewRegistry()
65+
reg.RegisterPurlType(python.PURLType, purl.NoneNamespace, python.ParsePURL)
66+
reg.RegisterPurlType(rhel.PURLType, rhel.PURLNamespace, rhel.ParseRPMPURL, mockTransformer(repoMap))
67+
68+
d := NewDefaultDecoder(WithDecoderPURLConverter(reg))
69+
70+
f, err := os.Open("testdata/decoder/konflux-syft+hermeto.spdx.json")
71+
if err != nil {
72+
t.Skip("testdata file not available:", err)
73+
}
74+
defer f.Close()
75+
76+
ir, err := d.Decode(ctx, f)
77+
if err != nil {
78+
t.Fatal(err)
79+
}
80+
81+
// 1511 valid rpm/redhat PURLs
82+
// - 188 invalid valid rpm/redhat PURLs (no repository_id)
83+
// + 18 valid pypi PURLs
84+
// = 1341 packages
85+
if len(ir.Packages) != 1341 {
86+
t.Errorf("expected %d packages, got %d", 1341, len(ir.Packages))
87+
}
88+
t.Logf("decoded %d packages, %d distributions, %d repositories",
89+
len(ir.Packages), len(ir.Distributions), len(ir.Repositories))
90+
})
91+
}
92+
93+
func TestDecoderNoPURLConverter(t *testing.T) {
94+
ctx := context.Background()
95+
96+
// Decoder without a PURL converter should return an empty IndexReport.
97+
d := NewDefaultDecoder()
98+
99+
f, err := os.Open("testdata/decoder/konflux-manifest.spdx.json")
100+
if err != nil {
101+
t.Fatal(err)
102+
}
103+
defer f.Close()
104+
105+
ir, err := d.Decode(ctx, f)
106+
if err != nil {
107+
t.Fatal(err)
108+
}
109+
110+
// Without PURL converter, should have no packages
111+
if len(ir.Packages) != 0 {
112+
t.Errorf("expected no packages without PURL converter, got %d", len(ir.Packages))
113+
}
114+
}
115+
116+
func mockTransformer(repoMap map[string][]string) func(ctx context.Context, p *packageurl.PackageURL) error {
117+
return func(ctx context.Context, p *packageurl.PackageURL) error {
118+
// It has already been transformed, or doesn't need to be.
119+
if _, ok := p.Qualifiers.Map()["repository_cpes"]; ok {
120+
return nil
121+
}
122+
repoid, ok := p.Qualifiers.Map()["repository_id"]
123+
if !ok {
124+
return nil
125+
}
126+
if cpes, ok := repoMap[repoid]; ok {
127+
cpesStr := strings.Join(cpes, ",")
128+
p.Qualifiers = append(p.Qualifiers, packageurl.Qualifier{Key: "repository_cpes", Value: cpesStr})
129+
}
130+
return nil
131+
}
132+
}

sbom/spdx/encoder_test.go

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,8 @@ import (
44
"bytes"
55
"context"
66
"encoding/json"
7-
"fmt"
87
"io/fs"
98
"os"
10-
"path"
119
"strings"
1210
"testing"
1311

@@ -42,14 +40,14 @@ func TestEncoder(t *testing.T) {
4240
WithPURLConverter(pr)(e)
4341

4442
ctx := context.Background()
45-
td := os.DirFS("testdata")
43+
td := os.DirFS("testdata/round-trip")
4644
de, err := fs.ReadDir(td, ".")
4745
if err != nil {
4846
t.Fatal(err)
4947
}
5048
for _, de := range de {
5149
n := de.Name()
52-
if strings.HasSuffix(n, ".want.json") {
50+
if de.IsDir() || !strings.HasSuffix(n, ".ir.json") {
5351
continue
5452
}
5553
t.Run(n, func(t *testing.T) {
@@ -58,9 +56,8 @@ func TestEncoder(t *testing.T) {
5856
t.Fatal(err)
5957
}
6058
defer f.Close()
61-
ext := path.Ext(n)
62-
base := strings.TrimSuffix(n, ext)
63-
wantPath := fmt.Sprintf("%s.want%s", base, ext)
59+
base := strings.TrimSuffix(n, ".ir.json")
60+
wantPath := base + ".spdx.json"
6461
w, err := td.Open(wantPath)
6562
if err != nil {
6663
t.Fatal(err)

sbom/spdx/round_trip_test.go

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
package spdx
2+
3+
import (
4+
"bytes"
5+
"context"
6+
"encoding/json"
7+
"io/fs"
8+
"os"
9+
"strings"
10+
"testing"
11+
12+
"github.com/quay/claircore"
13+
"github.com/quay/claircore/gobin"
14+
"github.com/quay/claircore/purl"
15+
)
16+
17+
// TestRoundTrip tests encoding an IndexReport to SPDX and decoding it back.
18+
func TestRoundTrip(t *testing.T) {
19+
ctx := context.Background()
20+
21+
// Create a registry for both encoding and decoding.
22+
// For golang PURLs, we need to register parsers for all namespaces we might encounter.
23+
// The golang PURL generator splits package names like "github.com/example/pkg" into:
24+
// namespace: github.com, name: example, subpath: pkg
25+
// So we need to register the parser for the "github.com" namespace.
26+
reg := purl.NewRegistry()
27+
reg.RegisterDetector(gobin.Detector{}, gobin.GeneratePURL)
28+
reg.RegisterPurlType(gobin.PURLType, purl.NoneNamespace, gobin.ParsePURL)
29+
reg.RegisterPurlType(gobin.PURLType, "github.com", gobin.ParsePURL)
30+
31+
original := &claircore.IndexReport{
32+
State: "IndexFinished",
33+
Success: true,
34+
Packages: map[string]*claircore.Package{
35+
"1": {
36+
ID: "1",
37+
Name: "github.com/example/pkg",
38+
Version: "v1.2.3",
39+
Kind: claircore.BINARY,
40+
Detector: &claircore.Detector{
41+
Name: "gobin",
42+
Version: "1",
43+
Kind: "package",
44+
},
45+
},
46+
},
47+
Distributions: map[string]*claircore.Distribution{},
48+
Repositories: map[string]*claircore.Repository{},
49+
Environments: map[string][]*claircore.Environment{
50+
"1": {{PackageDB: "go.sum"}},
51+
},
52+
}
53+
54+
// Encode to SPDX.
55+
encoder := NewDefaultEncoder(
56+
WithPURLConverter(reg),
57+
WithDocumentName("test"),
58+
WithDocumentNamespace("test-ns"),
59+
)
60+
61+
var buf bytes.Buffer
62+
if err := encoder.Encode(ctx, &buf, original); err != nil {
63+
t.Fatalf("encode failed: %v", err)
64+
}
65+
66+
// Decode back to IndexReport.
67+
decoder := NewDefaultDecoder(WithDecoderPURLConverter(reg))
68+
decoded, err := decoder.Decode(ctx, &buf)
69+
if err != nil {
70+
t.Fatalf("decode failed: %v", err)
71+
}
72+
73+
// Compare key fields.
74+
if len(decoded.Packages) != len(original.Packages) {
75+
t.Errorf("package count mismatch: got %d, want %d", len(decoded.Packages), len(original.Packages))
76+
}
77+
78+
// Check that we got a package with the right name and version.
79+
foundPkg := false
80+
for _, pkg := range decoded.Packages {
81+
if pkg.Name == "github.com/example/pkg" && pkg.Version == "v1.2.3" {
82+
foundPkg = true
83+
break
84+
}
85+
}
86+
if !foundPkg {
87+
t.Error("expected to find package github.com/example/pkg@v1.2.3 after round-trip")
88+
}
89+
}
90+
91+
// TestRoundTripTestdata tests round-tripping the testdata files.
92+
func TestRoundTripTestdata(t *testing.T) {
93+
ctx := context.Background()
94+
95+
// Read all .ir.json files from testdata/round-trip.
96+
td := os.DirFS("testdata/round-trip")
97+
entries, err := fs.ReadDir(td, ".")
98+
if err != nil {
99+
t.Fatal(err)
100+
}
101+
102+
reg := purl.NewRegistry()
103+
reg.RegisterPurlType(gobin.PURLType, purl.NoneNamespace, gobin.ParsePURL)
104+
105+
for _, entry := range entries {
106+
name := entry.Name()
107+
if entry.IsDir() || !strings.HasSuffix(name, ".ir.json") {
108+
continue
109+
}
110+
111+
t.Run(name, func(t *testing.T) {
112+
// Read the original IndexReport.
113+
f, err := td.Open(name)
114+
if err != nil {
115+
t.Fatal(err)
116+
}
117+
defer f.Close()
118+
119+
var original claircore.IndexReport
120+
if err := json.NewDecoder(f).Decode(&original); err != nil {
121+
t.Fatal(err)
122+
}
123+
124+
// Encode to SPDX.
125+
encoder := NewDefaultEncoder(
126+
WithDocumentName("test"),
127+
WithDocumentNamespace("test-ns"),
128+
)
129+
130+
var buf bytes.Buffer
131+
if err := encoder.Encode(ctx, &buf, &original); err != nil {
132+
t.Fatal(err)
133+
}
134+
135+
// Decode back.
136+
decoder := NewDefaultDecoder(WithDecoderPURLConverter(reg))
137+
decoded, err := decoder.Decode(ctx, &buf)
138+
if err != nil {
139+
t.Fatal(err)
140+
}
141+
142+
t.Logf("original: %d packages, decoded: %d packages",
143+
len(original.Packages), len(decoded.Packages))
144+
})
145+
}
146+
}

sbom/spdx/testdata/decoder/konflux-manifest.ir.json

Whitespace-only changes.

0 commit comments

Comments
 (0)