diff --git a/extractor/filesystem/language/java/pomxmlnet/pomxmlnet.go b/extractor/filesystem/language/java/pomxmlnet/pomxmlnet.go new file mode 100644 index 00000000..2869789e --- /dev/null +++ b/extractor/filesystem/language/java/pomxmlnet/pomxmlnet.go @@ -0,0 +1,204 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package pomxmlnet extracts Maven's pom.xml format with transitive dependency resolution. +package pomxmlnet + +import ( + "context" + "fmt" + "path/filepath" + "strings" + + "golang.org/x/exp/maps" + + "deps.dev/util/maven" + "deps.dev/util/resolve" + mavenresolve "deps.dev/util/resolve/maven" + "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/filesystem" + "github.com/google/osv-scalibr/extractor/filesystem/osv" + "github.com/google/osv-scalibr/internal/datasource" + "github.com/google/osv-scalibr/internal/mavenutil" + "github.com/google/osv-scalibr/internal/resolution/client" + "github.com/google/osv-scalibr/plugin" + "github.com/google/osv-scalibr/purl" +) + +// Extractor extracts Maven packages with transitive dependency resolution. +type Extractor struct { + client.DependencyClient + *datasource.MavenRegistryAPIClient +} + +// Name of the extractor. +func (e Extractor) Name() string { return "java/pomxmlnet" } + +// Version of the extractor. +func (e Extractor) Version() int { return 0 } + +// Requirements of the extractor. +func (e Extractor) Requirements() *plugin.Capabilities { + return &plugin.Capabilities{ + Network: true, + DirectFS: true, + } +} + +// FileRequired never returns true, as this is for the osv-scanner json output. +func (e Extractor) FileRequired(fapi filesystem.FileAPI) bool { + return filepath.Base(fapi.Path()) == "pom.xml" +} + +// Extract extracts packages from yarn.lock files passed through the scan input. +func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) ([]*extractor.Inventory, error) { + var project maven.Project + if err := datasource.NewMavenDecoder(input.Reader).Decode(&project); err != nil { + return nil, fmt.Errorf("could not extract from %s: %w", input.Path, err) + } + // Empty JDK and ActivationOS indicates merging the default profiles. + if err := project.MergeProfiles("", maven.ActivationOS{}); err != nil { + return nil, fmt.Errorf("failed to merge profiles: %w", err) + } + for _, repo := range project.Repositories { + if err := e.MavenRegistryAPIClient.AddRegistry(datasource.MavenRegistry{ + URL: string(repo.URL), + ID: string(repo.ID), + ReleasesEnabled: repo.Releases.Enabled.Boolean(), + SnapshotsEnabled: repo.Snapshots.Enabled.Boolean(), + }); err != nil { + return nil, fmt.Errorf("failed to add registry %s: %w", repo.URL, err) + } + } + // Merging parents data by parsing local parent pom.xml or fetching from upstream. + if err := mavenutil.MergeParents(ctx, input, e.MavenRegistryAPIClient, &project, project.Parent, 1, true); err != nil { + return nil, fmt.Errorf("failed to merge parents: %w", err) + } + // Process the dependencies: + // - dedupe dependencies and dependency management + // - import dependency management + // - fill in missing dependency version requirement + project.ProcessDependencies(func(groupID, artifactID, version maven.String) (maven.DependencyManagement, error) { + return mavenutil.GetDependencyManagement(ctx, e.MavenRegistryAPIClient, groupID, artifactID, version) + }) + + if registries := e.MavenRegistryAPIClient.GetRegistries(); len(registries) > 0 { + clientRegs := make([]client.Registry, len(registries)) + for i, reg := range registries { + clientRegs[i] = reg + } + if err := e.DependencyClient.AddRegistries(clientRegs); err != nil { + return nil, err + } + } + + overrideClient := client.NewOverrideClient(e.DependencyClient) + resolver := mavenresolve.NewResolver(overrideClient) + + // Resolve the dependencies. + root := resolve.Version{ + VersionKey: resolve.VersionKey{ + PackageKey: resolve.PackageKey{ + System: resolve.Maven, + Name: project.ProjectKey.Name(), + }, + VersionType: resolve.Concrete, + Version: string(project.Version), + }} + reqs := make([]resolve.RequirementVersion, len(project.Dependencies)+len(project.DependencyManagement.Dependencies)) + for i, d := range project.Dependencies { + reqs[i] = resolve.RequirementVersion{ + VersionKey: resolve.VersionKey{ + PackageKey: resolve.PackageKey{ + System: resolve.Maven, + Name: d.Name(), + }, + VersionType: resolve.Requirement, + Version: string(d.Version), + }, + Type: resolve.MavenDepType(d, ""), + } + } + for i, d := range project.DependencyManagement.Dependencies { + reqs[len(project.Dependencies)+i] = resolve.RequirementVersion{ + VersionKey: resolve.VersionKey{ + PackageKey: resolve.PackageKey{ + System: resolve.Maven, + Name: d.Name(), + }, + VersionType: resolve.Requirement, + Version: string(d.Version), + }, + Type: resolve.MavenDepType(d, mavenutil.OriginManagement), + } + } + overrideClient.AddVersion(root, reqs) + + g, err := resolver.Resolve(ctx, root.VersionKey) + if err != nil { + return nil, fmt.Errorf("failed resolving %v: %w", root, err) + } + for i, e := range g.Edges { + g.Edges[i] = e + } + + details := map[string]*extractor.Inventory{} + for i := 1; i < len(g.Nodes); i++ { + // Ignore the first node which is the root. + node := g.Nodes[i] + depGroups := []string{} + inventory := extractor.Inventory{ + Name: node.Version.Name, + Version: node.Version.Version, + // TODO(#408): Add merged paths in here as well + Locations: []string{input.Path}, + } + // We are only able to know dependency groups of direct dependencies but + // not transitive dependencies because the nodes in the resolve graph does + // not have the scope information. + for _, dep := range project.Dependencies { + if dep.Name() != inventory.Name { + continue + } + if dep.Scope != "" && dep.Scope != "compile" { + depGroups = append(depGroups, string(dep.Scope)) + } + } + inventory.Metadata = osv.DepGroupMetadata{ + DepGroupVals: depGroups, + } + details[inventory.Name] = &inventory + } + + return maps.Values(details), nil +} + +// ToPURL converts an inventory created by this extractor into a PURL. +func (e Extractor) ToPURL(i *extractor.Inventory) *purl.PackageURL { + g, a, _ := strings.Cut(i.Name, ":") + return &purl.PackageURL{ + Type: purl.TypeMaven, + Namespace: g, + Name: a, + Version: i.Version, + // TODO(#426): add Maven classifier and type to PURL. + } +} + +// Ecosystem returns the OSV ecosystem ('npm') of the software extracted by this extractor. +func (e Extractor) Ecosystem(_ *extractor.Inventory) string { + return "Maven" +} + +var _ filesystem.Extractor = Extractor{} diff --git a/extractor/filesystem/language/java/pomxmlnet/pomxmlnet_test.go b/extractor/filesystem/language/java/pomxmlnet/pomxmlnet_test.go new file mode 100644 index 00000000..8e580058 --- /dev/null +++ b/extractor/filesystem/language/java/pomxmlnet/pomxmlnet_test.go @@ -0,0 +1,371 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pomxmlnet_test + +import ( + "context" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/filesystem/language/java/pomxmlnet" + "github.com/google/osv-scalibr/extractor/filesystem/osv" + "github.com/google/osv-scalibr/extractor/filesystem/simplefileapi" + "github.com/google/osv-scalibr/internal/datasource" + "github.com/google/osv-scalibr/internal/resolution/clienttest" + "github.com/google/osv-scalibr/testing/extracttest" +) + +func TestMavenResolverExtractor_FileRequired(t *testing.T) { + tests := []struct { + path string + want bool + }{ + { + path: "", + want: false, + }, + { + path: "pom.xml", + want: true, + }, + { + path: "path/to/my/pom.xml", + want: true, + }, + { + path: "path/to/my/pom.xml/file", + want: false, + }, + { + path: "path/to/my/pom.xml.file", + want: false, + }, + { + path: "path.to.my.pom.xml", + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + e := pomxmlnet.Extractor{} + got := e.FileRequired(simplefileapi.New(tt.path, nil)) + if got != tt.want { + t.Errorf("Extract() got = %v, want %v", got, tt.want) + } + }) + } +} + +func TestExtractor_Extract(t *testing.T) { + tests := []extracttest.TestTableEntry{ + { + Name: "Not a pom file", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/maven/not-pom.txt", + }, + WantErr: extracttest.ContainsErrStr{Str: "could not extract from"}, + }, + { + Name: "invalid xml syntax", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/maven/invalid-syntax.xml", + }, + WantErr: extracttest.ContainsErrStr{Str: "XML syntax error"}, + }, + { + Name: "empty", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/maven/empty.xml", + }, + WantInventory: []*extractor.Inventory{}, + }, + { + Name: "one package", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/maven/one-package.xml", + }, + WantInventory: []*extractor.Inventory{ + { + Name: "org.apache.maven:maven-artifact", + Version: "1.0.0", + Locations: []string{"testdata/maven/one-package.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + }, + }, + { + Name: "two packages", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/maven/two-packages.xml", + }, + WantInventory: []*extractor.Inventory{ + { + Name: "io.netty:netty-all", + Version: "4.1.42.Final", + Locations: []string{"testdata/maven/two-packages.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.slf4j:slf4j-log4j12", + Version: "1.7.25", + Locations: []string{"testdata/maven/two-packages.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + }, + }, + { + Name: "with dependency management", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/maven/with-dependency-management.xml", + }, + WantInventory: []*extractor.Inventory{ + { + Name: "io.netty:netty-all", + Version: "4.1.9", + Locations: []string{"testdata/maven/with-dependency-management.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.slf4j:slf4j-log4j12", + Version: "1.7.25", + Locations: []string{"testdata/maven/with-dependency-management.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + }, + }, + { + Name: "interpolation", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/maven/interpolation.xml", + }, + WantInventory: []*extractor.Inventory{ + { + Name: "org.mine:mypackage", + Version: "1.0.0", + Locations: []string{"testdata/maven/interpolation.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.mine:my.package", + Version: "2.3.4", + Locations: []string{"testdata/maven/interpolation.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.mine:ranged-package", + Version: "9.4.37", + Locations: []string{"testdata/maven/interpolation.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + }, + }, + { + Name: "with scope / dep groups", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/maven/with-scope.xml", + }, + WantInventory: []*extractor.Inventory{ + { + Name: "junit:junit", + Version: "4.12", + Locations: []string{"testdata/maven/with-scope.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{"runtime"}}, + }, + }, + }, + { + Name: "transitive dependencies", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/maven/transitive.xml", + }, + WantInventory: []*extractor.Inventory{ + { + Name: "org.direct:alice", + Version: "1.0.0", + Locations: []string{"testdata/maven/transitive.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.direct:bob", + Version: "2.0.0", + Locations: []string{"testdata/maven/transitive.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.direct:chris", + Version: "3.0.0", + Locations: []string{"testdata/maven/transitive.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.transitive:chuck", + Version: "1.1.1", + Locations: []string{"testdata/maven/transitive.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.transitive:dave", + Version: "2.2.2", + Locations: []string{"testdata/maven/transitive.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.transitive:eve", + Version: "3.3.3", + Locations: []string{"testdata/maven/transitive.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.transitive:frank", + Version: "4.4.4", + Locations: []string{"testdata/maven/transitive.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + resolutionClient := clienttest.NewMockResolutionClient(t, "testdata/universe/basic-universe.yaml") + extr := pomxmlnet.Extractor{ + DependencyClient: resolutionClient, + MavenRegistryAPIClient: &datasource.MavenRegistryAPIClient{}, + } + + scanInput := extracttest.GenerateScanInputMock(t, tt.InputConfig) + defer extracttest.CloseTestScanInput(t, scanInput) + + got, err := extr.Extract(context.Background(), &scanInput) + + if diff := cmp.Diff(tt.WantErr, err, cmpopts.EquateErrors()); diff != "" { + t.Errorf("%s.Extract(%q) error diff (-want +got):\n%s", extr.Name(), tt.InputConfig.Path, diff) + return + } + + if diff := cmp.Diff(tt.WantInventory, got, cmpopts.SortSlices(extracttest.InventoryCmpLess)); diff != "" { + t.Errorf("%s.Extract(%q) diff (-want +got):\n%s", extr.Name(), tt.InputConfig.Path, diff) + } + }) + } +} + +func TestExtractor_Extract_WithMockServer(t *testing.T) { + tt := extracttest.TestTableEntry{ + // Name: "with parent", + InputConfig: extracttest.ScanInputMockConfig{ + Path: "testdata/maven/with-parent.xml", + }, + WantInventory: []*extractor.Inventory{ + { + Name: "org.alice:alice", + Version: "1.0.0", + Locations: []string{"testdata/maven/with-parent.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.bob:bob", + Version: "2.0.0", + Locations: []string{"testdata/maven/with-parent.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.chuck:chuck", + Version: "3.0.0", + Locations: []string{"testdata/maven/with-parent.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.dave:dave", + Version: "4.0.0", + Locations: []string{"testdata/maven/with-parent.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.eve:eve", + Version: "5.0.0", + Locations: []string{"testdata/maven/with-parent.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + { + Name: "org.frank:frank", + Version: "6.0.0", + Locations: []string{"testdata/maven/with-parent.xml"}, + Metadata: osv.DepGroupMetadata{DepGroupVals: []string{}}, + }, + }, + } + + srv := clienttest.NewMockHTTPServer(t) + srv.SetResponse(t, "org/upstream/parent-pom/1.0/parent-pom-1.0.pom", []byte(` + + org.upstream + parent-pom + 1.0 + pom + + + org.eve + eve + 5.0.0 + + + + `)) + srv.SetResponse(t, "org/import/import/1.2.3/import-1.2.3.pom", []byte(` + + org.import + import + 1.2.3 + pom + + + + org.frank + frank + 6.0.0 + + + + + `)) + + apiClient, err := datasource.NewMavenRegistryAPIClient(datasource.MavenRegistry{URL: srv.URL, ReleasesEnabled: true}) + if err != nil { + t.Fatalf("%v", err) + } + + resolutionClient := clienttest.NewMockResolutionClient(t, "testdata/universe/basic-universe.yaml") + extr := pomxmlnet.Extractor{ + DependencyClient: resolutionClient, + MavenRegistryAPIClient: apiClient, + } + + scanInput := extracttest.GenerateScanInputMock(t, tt.InputConfig) + defer extracttest.CloseTestScanInput(t, scanInput) + + got, err := extr.Extract(context.Background(), &scanInput) + + if diff := cmp.Diff(tt.WantErr, err, cmpopts.EquateErrors()); diff != "" { + t.Errorf("%s.Extract(%q) error diff (-want +got):\n%s", extr.Name(), tt.InputConfig.Path, diff) + return + } + + if diff := cmp.Diff(tt.WantInventory, got, cmpopts.SortSlices(extracttest.InventoryCmpLess)); diff != "" { + t.Errorf("%s.Extract(%q) diff (-want +got):\n%s", extr.Name(), tt.InputConfig.Path, diff) + } +} diff --git a/extractor/filesystem/language/java/pomxmlnet/testdata/maven/empty.xml b/extractor/filesystem/language/java/pomxmlnet/testdata/maven/empty.xml new file mode 100644 index 00000000..8cfeebaa --- /dev/null +++ b/extractor/filesystem/language/java/pomxmlnet/testdata/maven/empty.xml @@ -0,0 +1,7 @@ + + 4.0.0 + + com.mycompany.app + my-app + 1 + diff --git a/extractor/filesystem/language/java/pomxmlnet/testdata/maven/interpolation.xml b/extractor/filesystem/language/java/pomxmlnet/testdata/maven/interpolation.xml new file mode 100644 index 00000000..6b7f761a --- /dev/null +++ b/extractor/filesystem/language/java/pomxmlnet/testdata/maven/interpolation.xml @@ -0,0 +1,37 @@ + + + 4.0.0 + + io.library + my-library + 1.0-SNAPSHOT + jar + + + 1.0.0 + 2.3.4 + [9.4.35.v20201120,9.5) + + + + + org.mine + mypackage + ${mypackageVersion} + + + + org.mine + my.package + ${my.package.version} + + + + org.mine + ranged-package + ${version-range} + + + + diff --git a/extractor/filesystem/language/java/pomxmlnet/testdata/maven/invalid-syntax.xml b/extractor/filesystem/language/java/pomxmlnet/testdata/maven/invalid-syntax.xml new file mode 100644 index 00000000..761a32c1 --- /dev/null +++ b/extractor/filesystem/language/java/pomxmlnet/testdata/maven/invalid-syntax.xml @@ -0,0 +1,13 @@ + + + <${Id}.version>${project.version} + + + + + io.netty + netty-all + 4.1.42.Final + + + diff --git a/extractor/filesystem/language/java/pomxmlnet/testdata/maven/not-pom.txt b/extractor/filesystem/language/java/pomxmlnet/testdata/maven/not-pom.txt new file mode 100644 index 00000000..f9df712b --- /dev/null +++ b/extractor/filesystem/language/java/pomxmlnet/testdata/maven/not-pom.txt @@ -0,0 +1 @@ +this is not a pom.xml file! diff --git a/extractor/filesystem/language/java/pomxmlnet/testdata/maven/one-package.xml b/extractor/filesystem/language/java/pomxmlnet/testdata/maven/one-package.xml new file mode 100644 index 00000000..bbb1359e --- /dev/null +++ b/extractor/filesystem/language/java/pomxmlnet/testdata/maven/one-package.xml @@ -0,0 +1,17 @@ + + com.mycompany.app + my-app + 1.0 + + + 3.0 + + + + + org.apache.maven + maven-artifact + 1.0.0 + + + diff --git a/extractor/filesystem/language/java/pomxmlnet/testdata/maven/parent/pom.xml b/extractor/filesystem/language/java/pomxmlnet/testdata/maven/parent/pom.xml new file mode 100644 index 00000000..3751df6b --- /dev/null +++ b/extractor/filesystem/language/java/pomxmlnet/testdata/maven/parent/pom.xml @@ -0,0 +1,21 @@ + + org.local + parent-pom + 1.0 + + pom + + + org.upstream + parent-pom + 1.0 + + + + + org.dave + dave + 4.0.0 + + + diff --git a/extractor/filesystem/language/java/pomxmlnet/testdata/maven/transitive.xml b/extractor/filesystem/language/java/pomxmlnet/testdata/maven/transitive.xml new file mode 100644 index 00000000..52e416a0 --- /dev/null +++ b/extractor/filesystem/language/java/pomxmlnet/testdata/maven/transitive.xml @@ -0,0 +1,33 @@ + + com.mycompany.app + my-app + 1.0 + + + + + org.transitive + frank + 4.4.4 + + + + + + + org.direct + alice + 1.0.0 + + + org.direct + bob + 2.0.0 + + + org.direct + chris + 3.0.0 + + + diff --git a/extractor/filesystem/language/java/pomxmlnet/testdata/maven/two-packages.xml b/extractor/filesystem/language/java/pomxmlnet/testdata/maven/two-packages.xml new file mode 100644 index 00000000..897f648a --- /dev/null +++ b/extractor/filesystem/language/java/pomxmlnet/testdata/maven/two-packages.xml @@ -0,0 +1,22 @@ + + com.mycompany.app + my-app + 1.0 + + + 3.0 + + + + + io.netty + netty-all + 4.1.42.Final + + + org.slf4j + slf4j-log4j12 + 1.7.25 + + + diff --git a/extractor/filesystem/language/java/pomxmlnet/testdata/maven/with-dependency-management.xml b/extractor/filesystem/language/java/pomxmlnet/testdata/maven/with-dependency-management.xml new file mode 100644 index 00000000..1928688e --- /dev/null +++ b/extractor/filesystem/language/java/pomxmlnet/testdata/maven/with-dependency-management.xml @@ -0,0 +1,37 @@ + + com.mycompany.app + my-app + 1.0 + + + 3.0 + + + + + io.netty + netty-all + 4.1.9 + + + org.slf4j + slf4j-log4j12 + 1.7.25 + + + + + + + io.netty + netty-all + 4.1.42.Final + + + com.google.code.findbugs + jsr305 + 3.0.2 + + + + diff --git a/extractor/filesystem/language/java/pomxmlnet/testdata/maven/with-parent.xml b/extractor/filesystem/language/java/pomxmlnet/testdata/maven/with-parent.xml new file mode 100644 index 00000000..602b8b87 --- /dev/null +++ b/extractor/filesystem/language/java/pomxmlnet/testdata/maven/with-parent.xml @@ -0,0 +1,54 @@ + + com.mycompany.app + my-app + 1.0 + + + org.local + parent-pom + 1.0 + ./parent/pom.xml + + + + 2.0.0 + + + + + org.alice + alice + 1.0.0 + + + org.bob + bob + ${bob.version} + + + org.chuck + chuck + + + org.frank + frank + + + + + + + org.chuck + chuck + 3.0.0 + + + org.import + import + 1.2.3 + pom + import + + + + diff --git a/extractor/filesystem/language/java/pomxmlnet/testdata/maven/with-scope.xml b/extractor/filesystem/language/java/pomxmlnet/testdata/maven/with-scope.xml new file mode 100644 index 00000000..688c6bb7 --- /dev/null +++ b/extractor/filesystem/language/java/pomxmlnet/testdata/maven/with-scope.xml @@ -0,0 +1,14 @@ + + com.mycompany.app + my-app + 1.0 + + + + junit + junit + 4.12 + runtime + + + diff --git a/extractor/filesystem/language/java/pomxmlnet/testdata/universe/basic-universe.yaml b/extractor/filesystem/language/java/pomxmlnet/testdata/universe/basic-universe.yaml new file mode 100644 index 00000000..2bf2b327 --- /dev/null +++ b/extractor/filesystem/language/java/pomxmlnet/testdata/universe/basic-universe.yaml @@ -0,0 +1,60 @@ +system: maven +schema: | + com.google.code.findbugs:jsr305 + 3.0.2 + io.netty:netty-all + 4.1.9 + 4.1.42.Final + junit:junit + 4.12 + org.alice:alice + 1.0.0 + org.apache.maven:maven-artifact + 1.0.0 + org.bob:bob + 2.0.0 + org.chuck:chuck + 3.0.0 + org.dave:dave + 4.0.0 + org.direct:alice + 1.0.0 + org.transitive:chuck@1.1.1 + org.transitive:dave@2.2.2 + org.direct:bob + 2.0.0 + org.transitive:eve@3.3.3 + org.direct:chris + 3.0.0 + org.transitive:frank@3.3.3 + org.eve:eve + 5.0.0 + org.frank:frank + 6.0.0 + org.mine:my.package + 2.3.4 + org.mine:mypackage + 1.0.0 + org.mine:ranged-package + 9.4.35 + 9.4.36 + 9.4.37 + 9.5 + org.slf4j:slf4j-log4j12 + 1.7.25 + org.transitive:chuck + 1.1.1 + 2.2.2 + org.transitive:eve@2.2.2 + 3.3.3 + org.transitive:dave + 1.1.1 + 2.2.2 + 3.3.3 + org.transitive:eve + 1.1.1 + 2.2.2 + 3.3.3 + org.transitive:frank + 3.3.3 + 4.4.4 diff --git a/go.mod b/go.mod index cd9b73a4..930e2304 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,11 @@ module github.com/google/osv-scalibr -go 1.22 +go 1.23.4 require ( + deps.dev/util/maven v0.0.0-20250114022823-c1ebdca3d00a + deps.dev/util/resolve v0.0.0-20250114022823-c1ebdca3d00a + deps.dev/util/semver v0.0.0-20250114022823-c1ebdca3d00a github.com/BurntSushi/toml v1.3.2 github.com/CycloneDX/cyclonedx-go v0.9.0 github.com/GehirnInc/crypt v0.0.0-20230320061759-8cc1b52080c5 @@ -24,16 +27,18 @@ require ( golang.org/x/crypto v0.31.0 golang.org/x/exp v0.0.0-20240707233637-46b078467d37 golang.org/x/mod v0.19.0 + golang.org/x/net v0.33.0 golang.org/x/sys v0.28.0 golang.org/x/text v0.21.0 golang.org/x/vuln v1.0.4 - google.golang.org/grpc v1.62.1 - google.golang.org/protobuf v1.33.0 + google.golang.org/grpc v1.69.4 + google.golang.org/protobuf v1.35.1 gopkg.in/yaml.v3 v3.0.1 www.velocidex.com/golang/regparser v0.0.0-20240404115756-2169ac0e3c09 ) require ( + deps.dev/api/v3 v3.0.0-20240311054650-e1e6a3d70fb7 // indirect github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20230306123547-8075edf89bb0 // indirect github.com/Microsoft/go-winio v0.6.2 // indirect @@ -54,11 +59,10 @@ require ( github.com/docker/docker-credential-helpers v0.8.1 // indirect github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect github.com/felixge/httpsnoop v1.0.3 // indirect - github.com/go-logr/logr v1.2.4 // indirect + github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect - github.com/golang/protobuf v1.5.4 // indirect github.com/klauspost/compress v1.17.7 // indirect github.com/kr/pretty v0.3.1 // indirect github.com/mattn/go-isatty v0.0.20 // indirect @@ -76,14 +80,14 @@ require ( github.com/vbatts/tar-split v0.11.5 // indirect go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0 // indirect - go.opentelemetry.io/otel v1.19.0 // indirect - go.opentelemetry.io/otel/metric v1.19.0 // indirect - go.opentelemetry.io/otel/trace v1.19.0 // indirect - golang.org/x/net v0.33.0 // indirect + go.opentelemetry.io/otel v1.31.0 // indirect + go.opentelemetry.io/otel/metric v1.31.0 // indirect + go.opentelemetry.io/otel/trace v1.31.0 // indirect golang.org/x/sync v0.10.0 // indirect golang.org/x/tools v0.23.0 // indirect golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect google.golang.org/genproto v0.0.0-20240123012728-ef4313101c80 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240304212257-790db918fca8 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20241015192408-796eee8c2d53 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20241015192408-796eee8c2d53 // indirect sigs.k8s.io/yaml v1.4.0 // indirect ) diff --git a/go.sum b/go.sum index 0cc56f63..46780777 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,12 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +deps.dev/api/v3 v3.0.0-20240311054650-e1e6a3d70fb7 h1:dleK4xoNCfxlfknQNPR1DmSdVErIAWlEzxtTImCqWXI= +deps.dev/api/v3 v3.0.0-20240311054650-e1e6a3d70fb7/go.mod h1:k3RHZwAw7ijqoXmVDvcO7ikeTwTC4jtmhCDathV+IKE= +deps.dev/util/maven v0.0.0-20250114022823-c1ebdca3d00a h1:YM06Hmw6MzLUf7L55XqoO5LPgB8suxfIdxKfmJarF4g= +deps.dev/util/maven v0.0.0-20250114022823-c1ebdca3d00a/go.mod h1:95XAmYKjcTdXRC6BRVwH0sqVZ35SafZq/9jgnus0nYw= +deps.dev/util/resolve v0.0.0-20250114022823-c1ebdca3d00a h1:jNmpFTY3jQBwqIgjI1m0BKDsTTu/mau2+T5SHeJjf5Q= +deps.dev/util/resolve v0.0.0-20250114022823-c1ebdca3d00a/go.mod h1:5J7t7bInhJoA6mnt7txtGIxbxXQnx1jYOxj90N/4DiU= +deps.dev/util/semver v0.0.0-20250114022823-c1ebdca3d00a h1:mA8DyBAuaCfaUETZsopof0MFqmNU7/sSL0x6zKCECdI= +deps.dev/util/semver v0.0.0-20250114022823-c1ebdca3d00a/go.mod h1:jjJweVqtuMQ7Q4zlTQ/kCHpboojkRvpMYlhy/c93DVU= github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU= github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8= github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20230306123547-8075edf89bb0 h1:59MxjQVfjXsBpLy+dbd2/ELV5ofnUkUZBvWSC85sheA= @@ -67,8 +75,8 @@ github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSw github.com/glebarez/go-sqlite v1.20.3 h1:89BkqGOXR9oRmG58ZrzgoY/Fhy5x0M+/WV48U5zVrZ4= github.com/glebarez/go-sqlite v1.20.3/go.mod h1:u3N6D/wftiAzIOJtZl6BmedqxmmkDfH3q+ihjqxC9u0= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= -github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= @@ -195,12 +203,16 @@ go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0 h1:x8Z78aZx8cOF0+Kkazoc7lwUNMGy0LrzEMxTm4BbTxg= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0/go.mod h1:62CPTSry9QZtOaSsE3tOzhx6LzDhHnXJ6xHeMNNiM6Q= -go.opentelemetry.io/otel v1.19.0 h1:MuS/TNf4/j4IXsZuJegVzI1cwut7Qc00344rgH7p8bs= -go.opentelemetry.io/otel v1.19.0/go.mod h1:i0QyjOq3UPoTzff0PJB2N66fb4S0+rSbSB15/oyH9fY= -go.opentelemetry.io/otel/metric v1.19.0 h1:aTzpGtV0ar9wlV4Sna9sdJyII5jTVJEvKETPiOKwvpE= -go.opentelemetry.io/otel/metric v1.19.0/go.mod h1:L5rUsV9kM1IxCj1MmSdS+JQAcVm319EUrDVLrt7jqt8= -go.opentelemetry.io/otel/trace v1.19.0 h1:DFVQmlVbfVeOuBRrwdtaehRrWiL1JoVs9CPIQ1Dzxpg= -go.opentelemetry.io/otel/trace v1.19.0/go.mod h1:mfaSyvGyEJEI0nyV2I4qhNQnbBOUUmYZpYojqMnX2vo= +go.opentelemetry.io/otel v1.31.0 h1:NsJcKPIW0D0H3NgzPDHmo0WW6SptzPdqg/L1zsIm2hY= +go.opentelemetry.io/otel v1.31.0/go.mod h1:O0C14Yl9FgkjqcCZAsE053C13OaddMYr/hz6clDkEJE= +go.opentelemetry.io/otel/metric v1.31.0 h1:FSErL0ATQAmYHUIzSezZibnyVlft1ybhy4ozRPcF2fE= +go.opentelemetry.io/otel/metric v1.31.0/go.mod h1:C3dEloVbLuYoX41KpmAhOqNriGbA+qqH6PQ5E5mUfnY= +go.opentelemetry.io/otel/sdk v1.31.0 h1:xLY3abVHYZ5HSfOg3l2E5LUj2Cwva5Y7yGxnSW9H5Gk= +go.opentelemetry.io/otel/sdk v1.31.0/go.mod h1:TfRbMdhvxIIr/B2N2LQW2S5v9m3gOQ/08KsbbO5BPT0= +go.opentelemetry.io/otel/sdk/metric v1.31.0 h1:i9hxxLJF/9kkvfHppyLL55aW7iIJz4JjxTeYusH7zMc= +go.opentelemetry.io/otel/sdk/metric v1.31.0/go.mod h1:CRInTMVvNhUKgSAMbKyTMxqOBC0zgyxzW55lZzX43Y8= +go.opentelemetry.io/otel/trace v1.31.0 h1:ffjsj1aRouKewfr85U2aGagJ46+MvodynlQ1HYdmJys= +go.opentelemetry.io/otel/trace v1.31.0/go.mod h1:TXZkRk7SM2ZQLtR6eoAWQFIHPvzQ06FJAsO1tJg480A= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= @@ -276,15 +288,17 @@ google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98 google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= google.golang.org/genproto v0.0.0-20240123012728-ef4313101c80 h1:KAeGQVN3M9nD0/bQXnr/ClcEMJ968gUXJQ9pwfSynuQ= google.golang.org/genproto v0.0.0-20240123012728-ef4313101c80/go.mod h1:cc8bqMqtv9gMOr0zHg2Vzff5ULhhL2IXP4sbcn32Dro= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240304212257-790db918fca8 h1:IR+hp6ypxjH24bkMfEJ0yHR21+gwPWdV+/IBrPQyn3k= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240304212257-790db918fca8/go.mod h1:UCOku4NytXMJuLQE5VuqA5lX3PcHCBo8pxNyvkf4xBs= +google.golang.org/genproto/googleapis/api v0.0.0-20241015192408-796eee8c2d53 h1:fVoAXEKA4+yufmbdVYv+SE73+cPZbbbe8paLsHfkK+U= +google.golang.org/genproto/googleapis/api v0.0.0-20241015192408-796eee8c2d53/go.mod h1:riSXTwQ4+nqmPGtobMFyW5FqVAmIs0St6VPp4Ug7CE4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241015192408-796eee8c2d53 h1:X58yt85/IXCx0Y3ZwN6sEIKZzQtDEYaBWrDvErdXrRE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241015192408-796eee8c2d53/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.62.1 h1:B4n+nfKzOICUXMgyrNd19h/I9oH0L1pizfk1d4zSgTk= -google.golang.org/grpc v1.62.1/go.mod h1:IWTG0VlJLCh1SkC58F7np9ka9mx/WNkjl4PGJaiq+QE= +google.golang.org/grpc v1.69.4 h1:MF5TftSMkd8GLw/m0KM6V8CMOCY6NZ1NQDPGFgbTt4A= +google.golang.org/grpc v1.69.4/go.mod h1:vyjdE6jLBI76dgpDojsFGNaHlxdjXN9ghpnd2o7JGZ4= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= @@ -296,8 +310,8 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= -google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= +google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/internal/datasource/cache.go b/internal/datasource/cache.go new file mode 100644 index 00000000..97b8ad5a --- /dev/null +++ b/internal/datasource/cache.go @@ -0,0 +1,123 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package datasource provides clients to fetch data from different APIs. +package datasource + +import ( + "bytes" + "encoding/gob" + "maps" + "sync" + "time" +) + +const cacheExpiry = 6 * time.Hour + +func gobMarshal(v any) ([]byte, error) { + var b bytes.Buffer + enc := gob.NewEncoder(&b) + + err := enc.Encode(v) + if err != nil { + return nil, err + } + + return b.Bytes(), nil +} + +func gobUnmarshal(b []byte, v any) error { + dec := gob.NewDecoder(bytes.NewReader(b)) + return dec.Decode(v) +} + +type requestCacheCall[V any] struct { + wg sync.WaitGroup + val V + err error +} + +// RequestCache is a map to cache the results of expensive functions that are called concurrently. +type RequestCache[K comparable, V any] struct { + cache map[K]V + calls map[K]*requestCacheCall[V] + mu sync.Mutex +} + +// NewRequestCache creates a new RequestCache. +func NewRequestCache[K comparable, V any]() *RequestCache[K, V] { + return &RequestCache[K, V]{ + cache: make(map[K]V), + calls: make(map[K]*requestCacheCall[V]), + } +} + +// Get gets the value from the cache map if it's cached, otherwise it will call fn to get the value and cache it. +// fn will only ever be called once for a key, even if there are multiple simultaneous calls to Get before the first call is finished. +func (rq *RequestCache[K, V]) Get(key K, fn func() (V, error)) (V, error) { + // Try get it from regular cache. + rq.mu.Lock() + if v, ok := rq.cache[key]; ok { + rq.mu.Unlock() + return v, nil + } + + // See if there is already a pending request for this key. + if c, ok := rq.calls[key]; ok { + rq.mu.Unlock() + c.wg.Wait() + + return c.val, c.err + } + + // Cache miss - create the call. + c := new(requestCacheCall[V]) + c.wg.Add(1) + rq.calls[key] = c + rq.mu.Unlock() + + c.val, c.err = fn() + rq.mu.Lock() + defer rq.mu.Unlock() + + // Allow other waiting goroutines to return + c.wg.Done() + + // Store value in regular cache. + if c.err == nil { + rq.cache[key] = c.val + } + + // Remove the completed call now that it's cached. + if rq.calls[key] == c { + delete(rq.calls, key) + } + + return c.val, c.err +} + +// GetMap gets a shallow clone of the stored cache map. +func (rq *RequestCache[K, V]) GetMap() map[K]V { + rq.mu.Lock() + defer rq.mu.Unlock() + + return maps.Clone(rq.cache) +} + +// SetMap loads (a shallow clone of) the provided map into the cache map. +func (rq *RequestCache[K, V]) SetMap(m map[K]V) { + rq.mu.Lock() + defer rq.mu.Unlock() + rq.cache = maps.Clone(m) +} diff --git a/internal/datasource/cache_test.go b/internal/datasource/cache_test.go new file mode 100644 index 00000000..4e0034e7 --- /dev/null +++ b/internal/datasource/cache_test.go @@ -0,0 +1,98 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package datasource_test + +import ( + "maps" + "sync" + "sync/atomic" + "testing" + + "github.com/google/osv-scalibr/internal/datasource" +) + +func TestRequestCache(t *testing.T) { + // Test that RequestCache calls each function exactly once per key. + requestCache := datasource.NewRequestCache[int, int]() + + const numKeys = 20 + const requestsPerKey = 50 + + var wg sync.WaitGroup + var fnCalls [numKeys]int32 + + for i := range numKeys { + for range requestsPerKey { + wg.Add(1) + go func() { + t.Helper() + //nolint:errcheck + requestCache.Get(i, func() (int, error) { + // Count how many times this function gets called for this key, + // then return the key as the value. + atomic.AddInt32(&fnCalls[i], 1) + return i, nil + }) + wg.Done() + }() + } + } + + wg.Wait() // Make sure all the goroutines are finished + + for i, c := range fnCalls { + if c != 1 { + t.Errorf("RequestCache Get(%d) function called %d times", i, c) + } + } + + cacheMap := requestCache.GetMap() + if len(cacheMap) != numKeys { + t.Errorf("RequestCache GetMap length was %d, expected %d", len(cacheMap), numKeys) + } + + for k, v := range cacheMap { + if k != v { + t.Errorf("RequestCache GetMap key %d has unexpected value %d", k, v) + } + } +} + +func TestRequestCacheSetMap(t *testing.T) { + requestCache := datasource.NewRequestCache[string, string]() + requestCache.SetMap(map[string]string{"foo": "foo1", "bar": "bar2"}) + fn := func() (string, error) { return "CACHE MISS", nil } + + want := map[string]string{ + "foo": "foo1", + "bar": "bar2", + "baz": "CACHE MISS", + "FOO": "CACHE MISS", + } + + for k, v := range want { + got, err := requestCache.Get(k, fn) + if err != nil { + t.Errorf("Get(%v) returned an error: %v", v, err) + } else if got != v { + t.Errorf("Get(%v) got: %v, want %v", k, got, v) + } + } + + gotMap := requestCache.GetMap() + if !maps.Equal(want, gotMap) { + t.Errorf("GetMap() got %v, want %v", gotMap, want) + } +} diff --git a/internal/datasource/fixtures/maven_settings/settings.xml b/internal/datasource/fixtures/maven_settings/settings.xml new file mode 100644 index 00000000..f47fecd7 --- /dev/null +++ b/internal/datasource/fixtures/maven_settings/settings.xml @@ -0,0 +1,21 @@ + + + + + server1 + user + pass + + + server2 + ${env.MAVEN_SETTINGS_TEST_USR} + ~~${env.MAVEN_SETTINGS_TEST_PWD}~~ + + + ${env.MAVEN_SETTINGS_TEST_SID} + ${env.maven_settings_test_usr}-${env.MAVEN_SETTINGS_TEST_NIL} + ${env.MAVEN_SETTINGS_TEST_BAD} + + + diff --git a/internal/datasource/http_auth.go b/internal/datasource/http_auth.go new file mode 100644 index 00000000..39a465a9 --- /dev/null +++ b/internal/datasource/http_auth.go @@ -0,0 +1,314 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package datasource provides clients to fetch data from different APIs. +package datasource + +import ( + "bytes" + "context" + "crypto/md5" + "crypto/rand" + "encoding/base64" + "encoding/hex" + "fmt" + "net/http" + "slices" + "strings" + "sync/atomic" +) + +type HTTPAuthMethod int + +const ( + AuthBasic HTTPAuthMethod = iota + AuthBearer + AuthDigest +) + +// HTTPAuthentication holds the information needed for general HTTP Authentication support. +// Requests made through this will automatically populate the relevant info in the Authorization headers. +// This is a general implementation and should be suitable for use with any ecosystem. +type HTTPAuthentication struct { + SupportedMethods []HTTPAuthMethod // In order of preference, only one method will be attempted. + + // AlwaysAuth determines whether to always send auth headers. + // If false, the server must respond with a WWW-Authenticate header which will be checked for supported methods. + // Must be set to false to use Digest authentication. + AlwaysAuth bool + + // Shared + Username string // Basic & Digest, plain text. + Password string // Basic & Digest, plain text. + // Basic + BasicAuth string // Base64-encoded username:password. Overrides Username & Password fields if set. + // Bearer + BearerToken string + // Digest + CnonceFunc func() string // Function used to generate cnonce string for Digest. OK to leave unassigned. Mostly for use in tests. + + lastUsed atomic.Value // The last-used authentication method - used when AlwaysAuth is false to automatically send Basic auth. +} + +// Get makes an http GET request with the given http.Client. +// The Authorization Header will automatically be populated according from the fields in the HTTPAuthentication. +func (auth *HTTPAuthentication) Get(ctx context.Context, httpClient *http.Client, url string) (*http.Response, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, err + } + + // For convenience, have the nil HTTPAuthentication just make an unauthenticated request. + if auth == nil { + return httpClient.Do(req) + } + + if auth.AlwaysAuth { + for _, method := range auth.SupportedMethods { + ok := false + switch method { + case AuthBasic: + ok = auth.addBasic(req) + case AuthBearer: + ok = auth.addBearer(req) + case AuthDigest: + // AuthDigest needs a challenge from WWW-Authenticate, so we cannot always add the auth. + } + if ok { + break + } + } + + return httpClient.Do(req) + } + + // If the last request we made to this server used Basic or Bearer auth, send the header with this request + if lastUsed, ok := auth.lastUsed.Load().(HTTPAuthMethod); ok { + switch lastUsed { + case AuthBasic: + auth.addBasic(req) + case AuthBearer: + auth.addBearer(req) + case AuthDigest: + // Cannot add AuthDigest without the challenge from the initial request. + } + } + + resp, err := httpClient.Do(req) + if err != nil { + return nil, err + } + if resp.StatusCode != http.StatusUnauthorized { + return resp, nil + } + + wwwAuth := resp.Header.Values("WWW-Authenticate") + + ok := false + var usedMethod HTTPAuthMethod + req, err = http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, err + } + for _, method := range auth.SupportedMethods { + switch method { + case AuthBasic: + if auth.authIndex(wwwAuth, "Basic") >= 0 { + ok = auth.addBasic(req) + } + case AuthBearer: + if auth.authIndex(wwwAuth, "Bearer") >= 0 { + ok = auth.addBearer(req) + } + case AuthDigest: + if idx := auth.authIndex(wwwAuth, "Digest"); idx >= 0 { + ok = auth.addDigest(req, wwwAuth[idx]) + } + } + if ok { + usedMethod = method + break + } + } + + if ok { + defer resp.Body.Close() // Close the original request before we discard it. + resp, err = httpClient.Do(req) + } + if resp.StatusCode == http.StatusOK { + auth.lastUsed.Store(usedMethod) + } + // The original request's response will be returned if there is no matching methods. + return resp, err +} + +func (auth *HTTPAuthentication) authIndex(wwwAuth []string, authScheme string) int { + return slices.IndexFunc(wwwAuth, func(s string) bool { + scheme, _, _ := strings.Cut(s, " ") + return scheme == authScheme + }) +} + +func (auth *HTTPAuthentication) addBasic(req *http.Request) bool { + if auth.BasicAuth != "" { + req.Header.Set("Authorization", "Basic "+auth.BasicAuth) + + return true + } + + if auth.Username != "" && auth.Password != "" { + authStr := base64.StdEncoding.EncodeToString([]byte(auth.Username + ":" + auth.Password)) + req.Header.Set("Authorization", "Basic "+authStr) + + return true + } + + return false +} + +func (auth *HTTPAuthentication) addBearer(req *http.Request) bool { + if auth.BearerToken != "" { + req.Header.Set("Authorization", "Bearer "+auth.BearerToken) + + return true + } + + return false +} + +func (auth *HTTPAuthentication) addDigest(req *http.Request, challenge string) bool { + // Mostly following the algorithm as outlined in https://en.wikipedia.org/wiki/Digest_access_authentication + // And also https://datatracker.ietf.org/doc/html/rfc2617 + if auth.Username == "" || auth.Password == "" { + return false + } + params := auth.parseChallenge(challenge) + realm, ok := params["realm"] + if !ok { + return false + } + + nonce, ok := params["nonce"] + if !ok { + return false + } + var cnonce string + + ha1 := md5.Sum([]byte(auth.Username + ":" + realm + ":" + auth.Password)) //nolint:gosec + switch params["algorithm"] { + case "MD5-sess": + cnonce = auth.cnonce() + if cnonce == "" { + return false + } + var b bytes.Buffer + fmt.Fprintf(&b, "%x:%s:%s", ha1, nonce, cnonce) + ha1 = md5.Sum(b.Bytes()) //nolint:gosec + case "MD5": + case "": + default: + return false + } + + // Only support "auth" qop + if qop, ok := params["qop"]; ok && !slices.Contains(strings.Split(qop, ","), "auth") { + return false + } + + uri := req.URL.Path // is this sufficient? + + ha2 := md5.Sum([]byte(req.Method + ":" + uri)) //nolint:gosec + + // hard-coding nonceCount to 1 since we don't make a request more than once + nonceCount := "00000001" + + var b bytes.Buffer + if _, ok := params["qop"]; ok { + if cnonce == "" { + cnonce = auth.cnonce() + if cnonce == "" { + return false + } + } + fmt.Fprintf(&b, "%x:%s:%s:%s:%s:%x", ha1, nonce, nonceCount, cnonce, "auth", ha2) + } else { + fmt.Fprintf(&b, "%x:%s:%x", ha1, nonce, ha2) + } + response := md5.Sum(b.Bytes()) //nolint:gosec + + var sb strings.Builder + fmt.Fprintf(&sb, "Digest username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\"", + auth.Username, realm, nonce, uri) + if _, ok := params["qop"]; ok { + fmt.Fprintf(&sb, ", qop=auth, nc=%s, cnonce=\"%s\"", nonceCount, cnonce) + } + if alg, ok := params["algorithm"]; ok { + fmt.Fprintf(&sb, ", algorithm=%s", alg) + } + fmt.Fprintf(&sb, ", response=\"%x\", opaque=\"%s\"", response, params["opaque"]) + + req.Header.Add("Authorization", sb.String()) + + return true +} + +func (auth *HTTPAuthentication) parseChallenge(challenge string) map[string]string { + // Parse the params out of the auth challenge header. + // e.g. Digest realm="testrealm@host.com", qop="auth,auth-int" -> + // {"realm": "testrealm@host.com", "qop", "auth,auth-int"} + // + // This isn't perfectly robust - some edge cases / weird headers may parse incorrectly. + + // Get rid of "Digest" prefix + _, challenge, _ = strings.Cut(challenge, " ") + + parts := strings.Split(challenge, ",") + // parts may have had a quoted comma, recombine if there's an unclosed quote. + + for i := 0; i < len(parts); { + if strings.Count(parts[i], "\"")%2 == 1 && len(parts) > i+1 { + parts[i] = parts[i] + "," + parts[i+1] + parts = append(parts[:i+1], parts[i+2:]...) + + continue + } + i++ + } + + m := make(map[string]string) + for _, part := range parts { + key, val, _ := strings.Cut(part, "=") + key = strings.Trim(key, " ") + val = strings.Trim(val, " ") + // remove quotes from quoted string + val = strings.Trim(val, "\"") + m[key] = val + } + + return m +} + +func (auth *HTTPAuthentication) cnonce() string { + if auth.CnonceFunc != nil { + return auth.CnonceFunc() + } + + // for a default nonce use a random 8 bytes + b := make([]byte, 8) + if _, err := rand.Read(b); err != nil { + return "" + } + + return hex.EncodeToString(b) +} diff --git a/internal/datasource/http_auth_test.go b/internal/datasource/http_auth_test.go new file mode 100644 index 00000000..97a72962 --- /dev/null +++ b/internal/datasource/http_auth_test.go @@ -0,0 +1,324 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package datasource_test + +import ( + "context" + "net/http" + "testing" + + "github.com/google/osv-scalibr/internal/datasource" +) + +// mockTransport is used to inspect the requests being made by HTTPAuthentications +type mockTransport struct { + Requests []*http.Request // All requests made to this transport + UnauthedResponse *http.Response // Response sent when request does not have an 'Authorization' header. + AuthedReponse *http.Response // Response to sent when request does include 'Authorization' (not checked). +} + +func (mt *mockTransport) RoundTrip(req *http.Request) (*http.Response, error) { + mt.Requests = append(mt.Requests, req) + var resp *http.Response + if req.Header.Get("Authorization") == "" { + resp = mt.UnauthedResponse + } else { + resp = mt.AuthedReponse + } + if resp == nil { + resp = &http.Response{StatusCode: http.StatusOK} + } + + return resp, nil +} + +func TestHTTPAuthentication(t *testing.T) { + tests := []struct { + name string + httpAuth *datasource.HTTPAuthentication + requestURL string + wwwAuth []string + expectedAuths []string // expected Authentication headers received. + expectedResponseCodes []int // expected final response codes received (length may be less than expectedAuths) + }{ + { + name: "nil auth", + httpAuth: nil, + requestURL: "http://127.0.0.1/", + wwwAuth: []string{"Basic"}, + expectedAuths: []string{""}, + expectedResponseCodes: []int{http.StatusUnauthorized}, + }, + { + name: "default auth", + httpAuth: &datasource.HTTPAuthentication{}, + requestURL: "http://127.0.0.1/", + wwwAuth: []string{"Basic"}, + expectedAuths: []string{""}, + expectedResponseCodes: []int{http.StatusUnauthorized}, + }, + { + name: "basic auth", + httpAuth: &datasource.HTTPAuthentication{ + SupportedMethods: []datasource.HTTPAuthMethod{datasource.AuthBasic}, + AlwaysAuth: true, + Username: "Aladdin", + Password: "open sesame", + }, + requestURL: "http://127.0.0.1/", + expectedAuths: []string{"Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ=="}, + expectedResponseCodes: []int{http.StatusOK}, + }, + { + name: "basic auth from token", + httpAuth: &datasource.HTTPAuthentication{ + SupportedMethods: []datasource.HTTPAuthMethod{datasource.AuthBasic}, + AlwaysAuth: true, + Username: "ignored", + Password: "ignored", + BasicAuth: "QWxhZGRpbjpvcGVuIHNlc2FtZQ==", + }, + requestURL: "http://127.0.0.1/", + expectedAuths: []string{"Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ=="}, + expectedResponseCodes: []int{http.StatusOK}, + }, + { + name: "basic auth missing username", + httpAuth: &datasource.HTTPAuthentication{ + SupportedMethods: []datasource.HTTPAuthMethod{datasource.AuthBasic}, + AlwaysAuth: true, + Username: "", + Password: "ignored", + }, + requestURL: "http://127.0.0.1/", + expectedAuths: []string{""}, + expectedResponseCodes: []int{http.StatusOK}, + }, + { + name: "basic auth missing password", + httpAuth: &datasource.HTTPAuthentication{ + SupportedMethods: []datasource.HTTPAuthMethod{datasource.AuthBasic}, + AlwaysAuth: true, + Username: "ignored", + Password: "", + }, + requestURL: "http://127.0.0.1/", + expectedAuths: []string{""}, + expectedResponseCodes: []int{http.StatusOK}, + }, + { + name: "basic auth not always", + httpAuth: &datasource.HTTPAuthentication{ + SupportedMethods: []datasource.HTTPAuthMethod{datasource.AuthBasic}, + AlwaysAuth: false, + BasicAuth: "YTph", + }, + requestURL: "http://127.0.0.1/", + wwwAuth: []string{"Basic realm=\"User Visible Realm\""}, + expectedAuths: []string{"", "Basic YTph"}, + expectedResponseCodes: []int{http.StatusOK}, + }, + { + name: "bearer auth", + httpAuth: &datasource.HTTPAuthentication{ + SupportedMethods: []datasource.HTTPAuthMethod{datasource.AuthBearer}, + AlwaysAuth: true, + BearerToken: "abcdefgh", + }, + requestURL: "http://127.0.0.1/", + expectedAuths: []string{"Bearer abcdefgh"}, + expectedResponseCodes: []int{http.StatusOK}, + }, + { + name: "bearer auth not always", + httpAuth: &datasource.HTTPAuthentication{ + SupportedMethods: []datasource.HTTPAuthMethod{datasource.AuthBearer}, + AlwaysAuth: false, + BearerToken: "abcdefgh", + }, + requestURL: "http://127.0.0.1/", + wwwAuth: []string{"Bearer"}, + expectedAuths: []string{"", "Bearer abcdefgh"}, + expectedResponseCodes: []int{http.StatusOK}, + }, + { + name: "always auth priority", + httpAuth: &datasource.HTTPAuthentication{ + SupportedMethods: []datasource.HTTPAuthMethod{datasource.AuthBasic, datasource.AuthBearer}, + AlwaysAuth: true, + BasicAuth: "UseThisOne", + BearerToken: "NotThisOne", + }, + requestURL: "http://127.0.0.1/", + expectedAuths: []string{"Basic UseThisOne"}, + expectedResponseCodes: []int{http.StatusOK}, + }, + { + name: "not always auth priority", + httpAuth: &datasource.HTTPAuthentication{ + SupportedMethods: []datasource.HTTPAuthMethod{datasource.AuthBearer, datasource.AuthDigest, datasource.AuthBasic}, + AlwaysAuth: false, + Username: "DoNotUse", + Password: "ThisField", + BearerToken: "PleaseUseThis", + }, + requestURL: "http://127.0.0.1/", + wwwAuth: []string{"Basic", "Bearer"}, + expectedAuths: []string{"", "Bearer PleaseUseThis"}, + expectedResponseCodes: []int{http.StatusOK}, + }, + { + name: "digest auth", + // Example from https://en.wikipedia.org/wiki/Digest_access_authentication#Example_with_explanation + httpAuth: &datasource.HTTPAuthentication{ + SupportedMethods: []datasource.HTTPAuthMethod{datasource.AuthDigest}, + AlwaysAuth: false, + Username: "Mufasa", + Password: "Circle Of Life", + CnonceFunc: func() string { return "0a4f113b" }, + }, + requestURL: "https://127.0.0.1/dir/index.html", + wwwAuth: []string{ + "Digest realm=\"testrealm@host.com\", " + + "qop=\"auth,auth-int\", " + + "nonce=\"dcd98b7102dd2f0e8b11d0f600bfb0c093\", " + + "opaque=\"5ccc069c403ebaf9f0171e9517f40e41\"", + }, + expectedAuths: []string{ + "", + // The order of these fields shouldn't actually matter + "Digest username=\"Mufasa\", " + + "realm=\"testrealm@host.com\", " + + "nonce=\"dcd98b7102dd2f0e8b11d0f600bfb0c093\", " + + "uri=\"/dir/index.html\", " + + "qop=auth, " + + "nc=00000001, " + + "cnonce=\"0a4f113b\", " + + "response=\"6629fae49393a05397450978507c4ef1\", " + + "opaque=\"5ccc069c403ebaf9f0171e9517f40e41\"", + }, + expectedResponseCodes: []int{http.StatusOK}, + }, + { + name: "digest auth rfc2069", // old spec, without qop header + httpAuth: &datasource.HTTPAuthentication{ + SupportedMethods: []datasource.HTTPAuthMethod{datasource.AuthDigest}, + AlwaysAuth: false, + Username: "Mufasa", + Password: "Circle Of Life", + }, + requestURL: "https://127.0.0.1/dir/index.html", + wwwAuth: []string{ + "Digest realm=\"testrealm@host.com\", " + + "nonce=\"dcd98b7102dd2f0e8b11d0f600bfb0c093\", " + + "opaque=\"5ccc069c403ebaf9f0171e9517f40e41\"", + }, + expectedAuths: []string{ + "", + // The order of these fields shouldn't actually matter + "Digest username=\"Mufasa\", " + + "realm=\"testrealm@host.com\", " + + "nonce=\"dcd98b7102dd2f0e8b11d0f600bfb0c093\", " + + "uri=\"/dir/index.html\", " + + "response=\"670fd8c2df070c60b045671b8b24ff02\", " + + "opaque=\"5ccc069c403ebaf9f0171e9517f40e41\"", + }, + expectedResponseCodes: []int{http.StatusOK}, + }, + { + name: "digest auth mvn", + // From what mvn sends. + httpAuth: &datasource.HTTPAuthentication{ + SupportedMethods: []datasource.HTTPAuthMethod{datasource.AuthDigest}, + AlwaysAuth: false, + Username: "my-username", + Password: "cool-password", + CnonceFunc: func() string { return "f7ef2d457dabcd54" }, + }, + requestURL: "https://127.0.0.1:41565/commons-io/commons-io/1.0/commons-io-1.0.pom", + wwwAuth: []string{ + "Digest realm=\"test@osv.dev\"," + + "qop=\"auth\"," + + "nonce=\"deadbeef\"," + + "opaque=\"aaaa\"," + + "algorithm=\"MD5-sess\"," + + "domain=\"/test\"", + }, + expectedAuths: []string{ + "", + // The order of these fields shouldn't actually matter + "Digest username=\"my-username\", " + + "realm=\"test@osv.dev\", " + + "nonce=\"deadbeef\", " + + "uri=\"/commons-io/commons-io/1.0/commons-io-1.0.pom\", " + + "qop=auth, " + + "nc=00000001, " + + "cnonce=\"f7ef2d457dabcd54\", " + + "algorithm=MD5-sess, " + + "response=\"15a35e7018a0fc7db05d31185e0d2c9e\", " + + "opaque=\"aaaa\"", + }, + expectedResponseCodes: []int{http.StatusOK}, + }, + { + name: "basic auth reuse on subsequent", + httpAuth: &datasource.HTTPAuthentication{ + SupportedMethods: []datasource.HTTPAuthMethod{datasource.AuthDigest, datasource.AuthBasic}, + AlwaysAuth: false, + Username: "user", + Password: "pass", + }, + requestURL: "http://127.0.0.1/", + wwwAuth: []string{"Basic realm=\"Realm\""}, + expectedAuths: []string{"", "Basic dXNlcjpwYXNz", "Basic dXNlcjpwYXNz"}, + expectedResponseCodes: []int{http.StatusOK, http.StatusOK}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mt := &mockTransport{} + if len(tt.wwwAuth) > 0 { + mt.UnauthedResponse = &http.Response{ + StatusCode: http.StatusUnauthorized, + Header: make(http.Header), + } + for _, v := range tt.wwwAuth { + mt.UnauthedResponse.Header.Add("WWW-Authenticate", v) + } + } + httpClient := &http.Client{Transport: mt} + for _, want := range tt.expectedResponseCodes { + resp, err := tt.httpAuth.Get(context.Background(), httpClient, tt.requestURL) + if err != nil { + t.Fatalf("error making request: %v", err) + } + defer resp.Body.Close() + if resp.StatusCode != want { + t.Errorf("authorization response status code got = %d, want %d", resp.StatusCode, want) + } + } + if len(mt.Requests) != len(tt.expectedAuths) { + t.Fatalf("unexpected number of requests got = %d, want %d", len(mt.Requests), len(tt.expectedAuths)) + } + for i, want := range tt.expectedAuths { + got := mt.Requests[i].Header.Get("Authorization") + if got != want { + t.Errorf("authorization header got = \"%s\", want \"%s\"", got, want) + } + } + }) + } +} diff --git a/internal/datasource/insights.go b/internal/datasource/insights.go new file mode 100644 index 00000000..739df954 --- /dev/null +++ b/internal/datasource/insights.go @@ -0,0 +1,114 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package datasource provides clients to fetch data from different APIs. +package datasource + +import ( + "context" + "crypto/x509" + "fmt" + "sync" + "time" + + pb "deps.dev/api/v3" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials" +) + +// CachedInsightsClient is a wrapper for InsightsClient that caches requests. +type CachedInsightsClient struct { + pb.InsightsClient + + // cache fields + mu sync.Mutex + cacheTimestamp *time.Time + packageCache *RequestCache[packageKey, *pb.Package] + versionCache *RequestCache[versionKey, *pb.Version] + requirementsCache *RequestCache[versionKey, *pb.Requirements] +} + +// Comparable types to use as map keys for cache. +type packageKey struct { + System pb.System + Name string +} + +func makePackageKey(k *pb.PackageKey) packageKey { + return packageKey{ + System: k.GetSystem(), + Name: k.GetName(), + } +} + +type versionKey struct { + System pb.System + Name string + Version string +} + +func makeVersionKey(k *pb.VersionKey) versionKey { + return versionKey{ + System: k.GetSystem(), + Name: k.GetName(), + Version: k.GetVersion(), + } +} + +// NewCachedInsightClient creates a CachedInsightsClient. +func NewCachedInsightsClient(addr string, userAgent string) (*CachedInsightsClient, error) { + certPool, err := x509.SystemCertPool() + if err != nil { + return nil, fmt.Errorf("getting system cert pool: %w", err) + } + creds := credentials.NewClientTLSFromCert(certPool, "") + dialOpts := []grpc.DialOption{grpc.WithTransportCredentials(creds)} + + if userAgent != "" { + dialOpts = append(dialOpts, grpc.WithUserAgent(userAgent)) + } + + conn, err := grpc.NewClient(addr, dialOpts...) + if err != nil { + return nil, fmt.Errorf("dialling %q: %w", addr, err) + } + + return &CachedInsightsClient{ + InsightsClient: pb.NewInsightsClient(conn), + packageCache: NewRequestCache[packageKey, *pb.Package](), + versionCache: NewRequestCache[versionKey, *pb.Version](), + requirementsCache: NewRequestCache[versionKey, *pb.Requirements](), + }, nil +} + +// GetPackage returns metadata about a package by querying deps.dev API. +func (c *CachedInsightsClient) GetPackage(ctx context.Context, in *pb.GetPackageRequest, opts ...grpc.CallOption) (*pb.Package, error) { + return c.packageCache.Get(makePackageKey(in.GetPackageKey()), func() (*pb.Package, error) { + return c.InsightsClient.GetPackage(ctx, in, opts...) + }) +} + +// GetVersion returns metadata about a version by querying deps.dev API. +func (c *CachedInsightsClient) GetVersion(ctx context.Context, in *pb.GetVersionRequest, opts ...grpc.CallOption) (*pb.Version, error) { + return c.versionCache.Get(makeVersionKey(in.GetVersionKey()), func() (*pb.Version, error) { + return c.InsightsClient.GetVersion(ctx, in, opts...) + }) +} + +// GetRequirements returns requirements of the given version by querying deps.dev API. +func (c *CachedInsightsClient) GetRequirements(ctx context.Context, in *pb.GetRequirementsRequest, opts ...grpc.CallOption) (*pb.Requirements, error) { + return c.requirementsCache.Get(makeVersionKey(in.GetVersionKey()), func() (*pb.Requirements, error) { + return c.InsightsClient.GetRequirements(ctx, in, opts...) + }) +} diff --git a/internal/datasource/insights_cache.go b/internal/datasource/insights_cache.go new file mode 100644 index 00000000..977a651e --- /dev/null +++ b/internal/datasource/insights_cache.go @@ -0,0 +1,127 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package datasource provides clients to fetch data from different APIs. +package datasource + +import ( + "time" + + pb "deps.dev/api/v3" + "google.golang.org/protobuf/proto" +) + +type depsdevAPICache struct { + Timestamp *time.Time + PackageCache map[packageKey][]byte + VersionCache map[versionKey][]byte + RequirementsCache map[versionKey][]byte +} + +func protoMarshalCache[K comparable, V proto.Message](protoMap map[K]V) (map[K][]byte, error) { + byteMap := make(map[K][]byte) + for k, v := range protoMap { + b, err := proto.Marshal(v) + if err != nil { + return nil, err + } + byteMap[k] = b + } + + return byteMap, nil +} + +func protoUnmarshalCache[K comparable, V any, PV interface { + proto.Message + *V +}](byteMap map[K][]byte, protoMap *map[K]PV) error { + *protoMap = make(map[K]PV) + for k, b := range byteMap { + v := PV(new(V)) + if err := proto.Unmarshal(b, v); err != nil { + return err + } + (*protoMap)[k] = v + } + + return nil +} + +// GobEncode encodes cache to bytes. +func (c *CachedInsightsClient) GobEncode() ([]byte, error) { + var cache depsdevAPICache + c.mu.Lock() + defer c.mu.Unlock() + + if c.cacheTimestamp == nil { + now := time.Now().UTC() + c.cacheTimestamp = &now + } + + cache.Timestamp = c.cacheTimestamp + var err error + cache.PackageCache, err = protoMarshalCache(c.packageCache.GetMap()) + if err != nil { + return nil, err + } + cache.VersionCache, err = protoMarshalCache(c.versionCache.GetMap()) + if err != nil { + return nil, err + } + cache.RequirementsCache, err = protoMarshalCache(c.requirementsCache.GetMap()) + if err != nil { + return nil, err + } + + return gobMarshal(cache) +} + +// GobDecode decodes bytes to cache. +func (c *CachedInsightsClient) GobDecode(b []byte) error { + var cache depsdevAPICache + if err := gobUnmarshal(b, &cache); err != nil { + return err + } + + if cache.Timestamp != nil && time.Since(*cache.Timestamp) >= cacheExpiry { + // Cache expired + return nil + } + + c.mu.Lock() + defer c.mu.Unlock() + + c.cacheTimestamp = cache.Timestamp + + var pkgMap map[packageKey]*pb.Package + if err := protoUnmarshalCache(cache.PackageCache, &pkgMap); err != nil { + return err + } + + var verMap map[versionKey]*pb.Version + if err := protoUnmarshalCache(cache.VersionCache, &verMap); err != nil { + return err + } + + var reqMap map[versionKey]*pb.Requirements + if err := protoUnmarshalCache(cache.RequirementsCache, &reqMap); err != nil { + return err + } + + c.packageCache.SetMap(pkgMap) + c.versionCache.SetMap(verMap) + c.requirementsCache.SetMap(reqMap) + + return nil +} diff --git a/internal/datasource/maven_registry.go b/internal/datasource/maven_registry.go new file mode 100644 index 00000000..eac2479f --- /dev/null +++ b/internal/datasource/maven_registry.go @@ -0,0 +1,271 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package datasource provides clients to fetch data from different APIs. +package datasource + +import ( + "bytes" + "context" + "encoding/xml" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "slices" + "strings" + "sync" + "time" + + "deps.dev/util/maven" + "deps.dev/util/semver" + "golang.org/x/net/html/charset" +) + +const MavenCentral = "https://repo.maven.apache.org/maven2" + +var errAPIFailed = errors.New("API query failed") + +// MavenRegistryAPIClient defines a client to fetch metadata from a Maven registry. +type MavenRegistryAPIClient struct { + defaultRegistry MavenRegistry // The default registry that we are making requests + registries []MavenRegistry // Additional registries specified to fetch projects + registryAuths map[string]*HTTPAuthentication // Authentication for the registries keyed by registry ID. From settings.xml + + // Cache fields + mu *sync.Mutex + cacheTimestamp *time.Time // If set, this means we loaded from a cache + responses *RequestCache[string, response] +} + +type response struct { + StatusCode int + Body []byte +} + +type MavenRegistry struct { + URL string + Parsed *url.URL + + // Information from pom.xml + ID string + ReleasesEnabled bool + SnapshotsEnabled bool +} + +// NewMavenRegistryAPIClient returns a new MavenRegistryAPIClient. +func NewMavenRegistryAPIClient(registry MavenRegistry) (*MavenRegistryAPIClient, error) { + if registry.URL == "" { + registry.URL = MavenCentral + registry.ID = "central" + } + u, err := url.Parse(registry.URL) + if err != nil { + return nil, fmt.Errorf("invalid Maven registry %s: %w", registry.URL, err) + } + registry.Parsed = u + + // TODO: allow for manual specification of settings files + globalSettings := ParseMavenSettings(globalMavenSettingsFile()) + userSettings := ParseMavenSettings(userMavenSettingsFile()) + + return &MavenRegistryAPIClient{ + // We assume only downloading releases is allowed on the default registry. + defaultRegistry: registry, + mu: &sync.Mutex{}, + responses: NewRequestCache[string, response](), + registryAuths: MakeMavenAuth(globalSettings, userSettings), + }, nil +} + +// WithoutRegistries makes MavenRegistryAPIClient including its cache but not registries. +func (m *MavenRegistryAPIClient) WithoutRegistries() *MavenRegistryAPIClient { + return &MavenRegistryAPIClient{ + defaultRegistry: m.defaultRegistry, + mu: m.mu, + cacheTimestamp: m.cacheTimestamp, + responses: m.responses, + } +} + +// AddRegistry adds the given registry to the list of registries if it has not been added. +func (m *MavenRegistryAPIClient) AddRegistry(registry MavenRegistry) error { + for _, reg := range m.registries { + if reg.ID == registry.ID { + return nil + } + } + + u, err := url.Parse(registry.URL) + if err != nil { + return err + } + + registry.Parsed = u + m.registries = append(m.registries, registry) + + return nil +} + +// GetRegistries returns the registries added to this client. +func (m *MavenRegistryAPIClient) GetRegistries() (registries []MavenRegistry) { + return m.registries +} + +// GetProject fetches a pom.xml specified by groupID, artifactID and version and parses it to maven.Project. +// Each registry in the list is tried until we find the project. +// For a snapshot version, version level metadata is used to find the extact version string. +// More about Maven Repository Metadata Model: https://maven.apache.org/ref/3.9.9/maven-repository-metadata/ +// More about Maven Metadata: https://maven.apache.org/repositories/metadata.html +func (m *MavenRegistryAPIClient) GetProject(ctx context.Context, groupID, artifactID, version string) (maven.Project, error) { + if !strings.HasSuffix(version, "-SNAPSHOT") { + for _, registry := range append(m.registries, m.defaultRegistry) { + if !registry.ReleasesEnabled { + continue + } + project, err := m.getProject(ctx, registry, groupID, artifactID, version, "") + if err == nil { + return project, nil + } + } + + return maven.Project{}, fmt.Errorf("failed to fetch Maven project %s:%s@%s", groupID, artifactID, version) + } + + for _, registry := range append(m.registries, m.defaultRegistry) { + // Fetch version metadata for snapshot versions from the registries enabling that. + if !registry.SnapshotsEnabled { + continue + } + metadata, err := m.getVersionMetadata(ctx, registry, groupID, artifactID, version) + if err != nil { + continue + } + + snapshot := "" + for _, sv := range metadata.Versioning.SnapshotVersions { + if sv.Extension == "pom" { + // We only look for pom.xml for project metadata. + snapshot = string(sv.Value) + break + } + } + + project, err := m.getProject(ctx, registry, groupID, artifactID, version, snapshot) + if err == nil { + return project, nil + } + } + + return maven.Project{}, fmt.Errorf("failed to fetch Maven project %s:%s@%s", groupID, artifactID, version) +} + +// GetVersions returns the list of available versions of a Maven package specified by groupID and artifactID. +// Versions found in all registries are unioned, then sorted by semver. +func (m *MavenRegistryAPIClient) GetVersions(ctx context.Context, groupID, artifactID string) ([]maven.String, error) { + var versions []maven.String + for _, registry := range append(m.registries, m.defaultRegistry) { + metadata, err := m.getArtifactMetadata(ctx, registry, groupID, artifactID) + if err != nil { + continue + } + versions = append(versions, metadata.Versioning.Versions...) + } + slices.SortFunc(versions, func(a, b maven.String) int { return semver.Maven.Compare(string(a), string(b)) }) + + return slices.Compact(versions), nil +} + +// getProject fetches a pom.xml specified by groupID, artifactID and version and parses it to maven.Project. +// For snapshot versions, the exact version value is specified by snapshot. +func (m *MavenRegistryAPIClient) getProject(ctx context.Context, registry MavenRegistry, groupID, artifactID, version, snapshot string) (maven.Project, error) { + if snapshot == "" { + snapshot = version + } + u := registry.Parsed.JoinPath(strings.ReplaceAll(groupID, ".", "/"), artifactID, version, fmt.Sprintf("%s-%s.pom", artifactID, snapshot)).String() + + var project maven.Project + if err := m.get(ctx, m.registryAuths[registry.ID], u, &project); err != nil { + return maven.Project{}, err + } + + return project, nil +} + +// getVersionMetadata fetches a version level maven-metadata.xml and parses it to maven.Metadata. +func (m *MavenRegistryAPIClient) getVersionMetadata(ctx context.Context, registry MavenRegistry, groupID, artifactID, version string) (maven.Metadata, error) { + u := registry.Parsed.JoinPath(strings.ReplaceAll(groupID, ".", "/"), artifactID, version, "maven-metadata.xml").String() + + var metadata maven.Metadata + if err := m.get(ctx, m.registryAuths[registry.ID], u, &metadata); err != nil { + return maven.Metadata{}, err + } + + return metadata, nil +} + +// GetArtifactMetadata fetches an artifact level maven-metadata.xml and parses it to maven.Metadata. +func (m *MavenRegistryAPIClient) getArtifactMetadata(ctx context.Context, registry MavenRegistry, groupID, artifactID string) (maven.Metadata, error) { + u := registry.Parsed.JoinPath(strings.ReplaceAll(groupID, ".", "/"), artifactID, "maven-metadata.xml").String() + + var metadata maven.Metadata + if err := m.get(ctx, m.registryAuths[registry.ID], u, &metadata); err != nil { + return maven.Metadata{}, err + } + + return metadata, nil +} + +func (m *MavenRegistryAPIClient) get(ctx context.Context, auth *HTTPAuthentication, url string, dst interface{}) error { + resp, err := m.responses.Get(url, func() (response, error) { + resp, err := auth.Get(ctx, http.DefaultClient, url) + if err != nil { + return response{}, fmt.Errorf("%w: Maven registry query failed: %w", errAPIFailed, err) + } + defer resp.Body.Close() + + if !slices.Contains([]int{http.StatusOK, http.StatusNotFound, http.StatusUnauthorized}, resp.StatusCode) { + // Only cache responses with Status OK, NotFound, or Unauthorized + return response{}, fmt.Errorf("%w: Maven registry query status: %d", errAPIFailed, resp.StatusCode) + } + + if b, err := io.ReadAll(resp.Body); err == nil { + return response{StatusCode: resp.StatusCode, Body: b}, nil + } + + return response{}, fmt.Errorf("failed to read body: %w", err) + }) + if err != nil { + return err + } + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("%w: Maven registry query status: %d", errAPIFailed, resp.StatusCode) + } + + return NewMavenDecoder(bytes.NewReader(resp.Body)).Decode(dst) +} + +// NewMavenDecoder returns an xml decoder with CharsetReader and Entity set. +func NewMavenDecoder(reader io.Reader) *xml.Decoder { + decoder := xml.NewDecoder(reader) + // Set charset reader for conversion from non-UTF-8 charset into UTF-8. + decoder.CharsetReader = charset.NewReaderLabel + // Set HTML entity map for translation between non-standard entity names + // and string replacements. + decoder.Entity = xml.HTMLEntity + + return decoder +} diff --git a/internal/datasource/maven_registry_cache.go b/internal/datasource/maven_registry_cache.go new file mode 100644 index 00000000..e5c861b6 --- /dev/null +++ b/internal/datasource/maven_registry_cache.go @@ -0,0 +1,64 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package datasource provides clients to fetch data from different APIs. +package datasource + +import ( + "time" +) + +type mavenRegistryCache struct { + Timestamp *time.Time + Responses map[string]response // url -> response +} + +// GobEncode encodes cache to bytes. +func (m *MavenRegistryAPIClient) GobEncode() ([]byte, error) { + m.mu.Lock() + defer m.mu.Unlock() + + if m.cacheTimestamp == nil { + now := time.Now().UTC() + m.cacheTimestamp = &now + } + + cache := mavenRegistryCache{ + Timestamp: m.cacheTimestamp, + Responses: m.responses.GetMap(), + } + + return gobMarshal(&cache) +} + +// GobDecode encodes bytes to cache. +func (m *MavenRegistryAPIClient) GobDecode(b []byte) error { + var cache mavenRegistryCache + if err := gobUnmarshal(b, &cache); err != nil { + return err + } + + if cache.Timestamp != nil && time.Since(*cache.Timestamp) >= cacheExpiry { + // Cache expired + return nil + } + + m.mu.Lock() + defer m.mu.Unlock() + + m.cacheTimestamp = cache.Timestamp + m.responses.SetMap(cache.Responses) + + return nil +} diff --git a/internal/datasource/maven_registry_test.go b/internal/datasource/maven_registry_test.go new file mode 100644 index 00000000..de03cd4a --- /dev/null +++ b/internal/datasource/maven_registry_test.go @@ -0,0 +1,194 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package datasource_test + +import ( + "context" + "reflect" + "testing" + + "deps.dev/util/maven" + "github.com/google/osv-scalibr/internal/datasource" + "github.com/google/osv-scalibr/internal/resolution/clienttest" +) + +func TestGetProject(t *testing.T) { + srv := clienttest.NewMockHTTPServer(t) + client, _ := datasource.NewMavenRegistryAPIClient(datasource.MavenRegistry{URL: srv.URL, ReleasesEnabled: true}) + srv.SetResponse(t, "org/example/x.y.z/1.0.0/x.y.z-1.0.0.pom", []byte(` + + org.example + x.y.z + 1.0.0 + + `)) + + got, err := client.GetProject(context.Background(), "org.example", "x.y.z", "1.0.0") + if err != nil { + t.Fatalf("failed to get Maven project %s:%s verion %s: %v", "org.example", "x.y.z", "1.0.0", err) + } + want := maven.Project{ + ProjectKey: maven.ProjectKey{ + GroupID: "org.example", + ArtifactID: "x.y.z", + Version: "1.0.0", + }, + } + if !reflect.DeepEqual(got, want) { + t.Errorf("GetProject(%s, %s, %s):\ngot %v\nwant %v\n", "org.example", "x.y.z", "1.0.0", got, want) + } +} + +func TestGetProjectSnapshot(t *testing.T) { + srv := clienttest.NewMockHTTPServer(t) + client, _ := datasource.NewMavenRegistryAPIClient(datasource.MavenRegistry{URL: srv.URL, SnapshotsEnabled: true}) + srv.SetResponse(t, "org/example/x.y.z/3.3.1-SNAPSHOT/maven-metadata.xml", []byte(` + + org.example + x.y.z + + + 20230302.052731 + 9 + + 20230302052731 + + + jar + 3.3.1-20230302.052731-9 + 20230302052731 + + + pom + 3.3.1-20230302.052731-9 + 20230302052731 + + + + + `)) + srv.SetResponse(t, "org/example/x.y.z/3.3.1-SNAPSHOT/x.y.z-3.3.1-20230302.052731-9.pom", []byte(` + + org.example + x.y.z + 3.3.1-SNAPSHOT + + `)) + + got, err := client.GetProject(context.Background(), "org.example", "x.y.z", "3.3.1-SNAPSHOT") + if err != nil { + t.Fatalf("failed to get Maven project %s:%s verion %s: %v", "org.example", "x.y.z", "3.3.1-SNAPSHOT", err) + } + want := maven.Project{ + ProjectKey: maven.ProjectKey{ + GroupID: "org.example", + ArtifactID: "x.y.z", + Version: "3.3.1-SNAPSHOT", + }, + } + if !reflect.DeepEqual(got, want) { + t.Errorf("GetProject(%s, %s, %s):\ngot %v\nwant %v\n", "org.example", "x.y.z", "3.3.1-SNAPSHOT", got, want) + } +} + +func TestMultipleRegistry(t *testing.T) { + dft := clienttest.NewMockHTTPServer(t) + client, _ := datasource.NewMavenRegistryAPIClient(datasource.MavenRegistry{URL: dft.URL, ReleasesEnabled: true}) + dft.SetResponse(t, "org/example/x.y.z/maven-metadata.xml", []byte(` + + org.example + x.y.z + + 3.0.0 + 3.0.0 + + 2.0.0 + 3.0.0 + + + + `)) + dft.SetResponse(t, "org/example/x.y.z/2.0.0/x.y.z-2.0.0.pom", []byte(` + + org.example + x.y.z + 2.0.0 + + `)) + dft.SetResponse(t, "org/example/x.y.z/3.0.0/x.y.z-3.0.0.pom", []byte(` + + org.example + x.y.z + 3.0.0 + + `)) + + srv := clienttest.NewMockHTTPServer(t) + if err := client.AddRegistry(datasource.MavenRegistry{URL: srv.URL, ReleasesEnabled: true}); err != nil { + t.Fatalf("failed to add registry %s: %v", srv.URL, err) + } + srv.SetResponse(t, "org/example/x.y.z/maven-metadata.xml", []byte(` + + org.example + x.y.z + + 2.0.0 + 2.0.0 + + 1.0.0 + 2.0.0 + + + + `)) + srv.SetResponse(t, "org/example/x.y.z/1.0.0/x.y.z-1.0.0.pom", []byte(` + + org.example + x.y.z + 1.0.0 + + `)) + srv.SetResponse(t, "org/example/x.y.z/2.0.0/x.y.z-2.0.0.pom", []byte(` + + org.example + x.y.z + 2.0.0 + + `)) + + gotProj, err := client.GetProject(context.Background(), "org.example", "x.y.z", "1.0.0") + if err != nil { + t.Fatalf("failed to get Maven project %s:%s verion %s: %v", "org.example", "x.y.z", "1.0.0", err) + } + wantProj := maven.Project{ + ProjectKey: maven.ProjectKey{ + GroupID: "org.example", + ArtifactID: "x.y.z", + Version: "1.0.0", + }, + } + if !reflect.DeepEqual(gotProj, wantProj) { + t.Errorf("GetProject(%s, %s, %s):\ngot %v\nwant %v\n", "org.example", "x.y.z", "1.0.0", gotProj, wantProj) + } + + gotVersions, err := client.GetVersions(context.Background(), "org.example", "x.y.z") + if err != nil { + t.Fatalf("failed to get versions for Maven package %s:%s: %v", "org.example", "x.y.z", err) + } + wantVersions := []maven.String{"1.0.0", "2.0.0", "3.0.0"} + if !reflect.DeepEqual(gotVersions, wantVersions) { + t.Errorf("GetVersions(%s, %s):\ngot %v\nwant %v\n", "org.example", "x.y.z", gotVersions, wantVersions) + } +} diff --git a/internal/datasource/maven_settings.go b/internal/datasource/maven_settings.go new file mode 100644 index 00000000..d268ca4a --- /dev/null +++ b/internal/datasource/maven_settings.go @@ -0,0 +1,140 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package datasource provides clients to fetch data from different APIs. +package datasource + +import ( + "encoding/xml" + "os" + "os/exec" + "path/filepath" + "regexp" + "runtime" + "strings" + "unicode" +) + +// Maven settings.xml file parsing for registry authentication. +// https://maven.apache.org/settings.html + +type MavenSettingsXML struct { + Servers []MavenSettingsXMLServer `xml:"servers>server"` +} + +type MavenSettingsXMLServer struct { + ID string `xml:"id"` + Username string `xml:"username"` + Password string `xml:"password"` +} + +// ParseMavenSettings parses Maven settings at the given path. +func ParseMavenSettings(path string) MavenSettingsXML { + f, err := os.Open(path) + if err != nil { + return MavenSettingsXML{} + } + defer f.Close() + + var settings MavenSettingsXML + if err := xml.NewDecoder(f).Decode(&settings); err != nil { + return MavenSettingsXML{} + } + + // interpolate strings with environment variables only + // system properties are too hard to determine. + re := regexp.MustCompile(`\${env\.[^}]*}`) + replFn := func(match string) string { + // grab just the environment variable string + env := match[len("${env.") : len(match)-1] + + // Environment variables on Windows are case-insensitive, + // but Maven will only replace them if they are in all-caps. + if runtime.GOOS == "windows" && strings.ContainsFunc(env, unicode.IsLower) { + return match // No replacement. + } + + if val, ok := os.LookupEnv(env); ok { + return val + } + + // Don't do any replacement if the environment variable isn't set + return match + } + for i := range settings.Servers { + settings.Servers[i].ID = re.ReplaceAllStringFunc(settings.Servers[i].ID, replFn) + settings.Servers[i].Username = re.ReplaceAllStringFunc(settings.Servers[i].Username, replFn) + settings.Servers[i].Password = re.ReplaceAllStringFunc(settings.Servers[i].Password, replFn) + } + + return settings +} + +// TODO(#409): How to use with virtual filesystem + environment variables. +func globalMavenSettingsFile() string { + // ${maven.home}/conf/settings.xml + // Find ${maven.home} from the installed mvn binary + mvnExec, err := exec.LookPath("mvn") + if err != nil { + return "" + } + mvnExec, err = filepath.EvalSymlinks(mvnExec) + if err != nil { + return "" + } + + settings := filepath.Join(filepath.Dir(mvnExec), "..", "conf", "settings.xml") + settings, err = filepath.Abs(settings) + if err != nil { + return "" + } + + return settings +} + +func userMavenSettingsFile() string { + // ${user.home}/.m2/settings.xml + home, err := os.UserHomeDir() + if err != nil { + return "" + } + + return filepath.Join(home, ".m2", "settings.xml") +} + +var mavenSupportedAuths = []HTTPAuthMethod{AuthDigest, AuthBasic} + +// MakeMavenAuth returns a map of Maven authentication information index by repository ID. +func MakeMavenAuth(globalSettings, userSettings MavenSettingsXML) map[string]*HTTPAuthentication { + auth := make(map[string]*HTTPAuthentication) + for _, s := range globalSettings.Servers { + auth[s.ID] = &HTTPAuthentication{ + SupportedMethods: mavenSupportedAuths, + AlwaysAuth: false, + Username: s.Username, + Password: s.Password, + } + } + + for _, s := range userSettings.Servers { + auth[s.ID] = &HTTPAuthentication{ + SupportedMethods: mavenSupportedAuths, + AlwaysAuth: false, + Username: s.Username, + Password: s.Password, + } + } + + return auth +} diff --git a/internal/datasource/maven_settings_test.go b/internal/datasource/maven_settings_test.go new file mode 100644 index 00000000..4e44c87d --- /dev/null +++ b/internal/datasource/maven_settings_test.go @@ -0,0 +1,129 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package datasource_test + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/google/osv-scalibr/internal/datasource" +) + +func TestParseMavenSettings(t *testing.T) { + t.Setenv("MAVEN_SETTINGS_TEST_USR", "UsErNaMe") + t.Setenv("MAVEN_SETTINGS_TEST_PWD", "P455W0RD") + t.Setenv("MAVEN_SETTINGS_TEST_SID", "my-cool-server") + t.Setenv("MAVEN_SETTINGS_TEST_NIL", "") + want := datasource.MavenSettingsXML{ + Servers: []datasource.MavenSettingsXMLServer{ + { + ID: "server1", + Username: "user", + Password: "pass", + }, + { + ID: "server2", + Username: "UsErNaMe", + Password: "~~P455W0RD~~", + }, + { + ID: "my-cool-server", + Username: "${env.maven_settings_test_usr}-", + Password: "${env.MAVEN_SETTINGS_TEST_BAD}", + }, + }, + } + + got := datasource.ParseMavenSettings("./fixtures/maven_settings/settings.xml") + + if diff := cmp.Diff(want, got); diff != "" { + t.Errorf("ParseMavenSettings() (-want +got):\n%s", diff) + } +} + +func TestMakeMavenAuth(t *testing.T) { + globalSettings := datasource.MavenSettingsXML{ + Servers: []datasource.MavenSettingsXMLServer{ + { + ID: "global", + Username: "global-user", + Password: "global-pass", + }, + { + ID: "overwrite1", + Username: "original-user", + Password: "original-pass", + }, + { + ID: "overwrite2", + Username: "user-to-be-deleted", + // no password + }, + }, + } + userSettings := datasource.MavenSettingsXML{ + Servers: []datasource.MavenSettingsXMLServer{ + { + ID: "user", + Username: "user", + Password: "pass", + }, + { + ID: "overwrite1", + Username: "new-user", + Password: "new-pass", + }, + { + ID: "overwrite2", + // no username + Password: "lone-password", + }, + }, + } + + wantSupportedMethods := []datasource.HTTPAuthMethod{datasource.AuthDigest, datasource.AuthBasic} + want := map[string]*datasource.HTTPAuthentication{ + "global": { + SupportedMethods: wantSupportedMethods, + AlwaysAuth: false, + Username: "global-user", + Password: "global-pass", + }, + "user": { + SupportedMethods: wantSupportedMethods, + AlwaysAuth: false, + Username: "user", + Password: "pass", + }, + "overwrite1": { + SupportedMethods: wantSupportedMethods, + AlwaysAuth: false, + Username: "new-user", + Password: "new-pass", + }, + "overwrite2": { + SupportedMethods: wantSupportedMethods, + AlwaysAuth: false, + Username: "", + Password: "lone-password", + }, + } + + got := datasource.MakeMavenAuth(globalSettings, userSettings) + if diff := cmp.Diff(want, got, cmpopts.IgnoreUnexported(datasource.HTTPAuthentication{})); diff != "" { + t.Errorf("MakeMavenAuth() (-want +got):\n%s", diff) + } +} diff --git a/internal/mavenutil/fixtures/my-app/pom.xml b/internal/mavenutil/fixtures/my-app/pom.xml new file mode 100644 index 00000000..d01d0478 --- /dev/null +++ b/internal/mavenutil/fixtures/my-app/pom.xml @@ -0,0 +1,8 @@ + + + + org.test + my-app + 1.0.0 + + diff --git a/internal/mavenutil/fixtures/parent/pom.xml b/internal/mavenutil/fixtures/parent/pom.xml new file mode 100644 index 00000000..fe2e50f5 --- /dev/null +++ b/internal/mavenutil/fixtures/parent/pom.xml @@ -0,0 +1,8 @@ + + + + org.test + parent-pom + 1.0.0 + + diff --git a/internal/mavenutil/fixtures/pom.xml b/internal/mavenutil/fixtures/pom.xml new file mode 100644 index 00000000..1d289685 --- /dev/null +++ b/internal/mavenutil/fixtures/pom.xml @@ -0,0 +1,8 @@ + + + + org.test + test + 1.0.0 + + diff --git a/internal/mavenutil/maven.go b/internal/mavenutil/maven.go new file mode 100644 index 00000000..e31a9f52 --- /dev/null +++ b/internal/mavenutil/maven.go @@ -0,0 +1,196 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package mavenutil provides utilities for merging Maven pom/xml. +package mavenutil + +import ( + "context" + "errors" + "fmt" + "path/filepath" + + "deps.dev/util/maven" + "github.com/google/osv-scalibr/extractor/filesystem" + "github.com/google/osv-scalibr/internal/datasource" +) + +const ( + OriginManagement = "management" + OriginParent = "parent" + OriginPlugin = "plugin" + OriginProfile = "profile" +) + +// MaxParent sets a limit on the number of parents to avoid indefinite loop. +const MaxParent = 100 + +// MergeParents parses local accessible parent pom.xml or fetches it from +// upstream, merges into root project, then interpolate the properties. +// - result holds the Maven project to merge into, this is modified in place. +// - current holds the current parent project to merge. +// - parentIndex indicates the index of the current parent project, which is +// used to check if the packaging has to be `pom`. +// - allowLocal indicates whether parsing local parent pom.xml is allowed. +// - path holds the path to the current pom.xml, which is used to compute the +// relative path of parent. +func MergeParents(ctx context.Context, input *filesystem.ScanInput, mavenClient *datasource.MavenRegistryAPIClient, result *maven.Project, current maven.Parent, initialParentIndex int, allowLocal bool) error { + currentPath := "" + if input != nil { + currentPath = input.Path + } + + visited := make(map[maven.ProjectKey]struct{}, MaxParent) + for n := initialParentIndex; n < MaxParent; n++ { + if current.GroupID == "" || current.ArtifactID == "" || current.Version == "" { + break + } + if _, ok := visited[current.ProjectKey]; ok { + // A cycle of parents is detected + return errors.New("a cycle of parents is detected") + } + visited[current.ProjectKey] = struct{}{} + + var proj maven.Project + parentFoundLocally := false + if allowLocal { + var parentPath string + var err error + parentFoundLocally, parentPath, err = loadParentLocal(input, current, currentPath, &proj) + if err != nil { + return fmt.Errorf("failed to load parent at %s: %w", currentPath, err) + } + if parentPath != "" { + currentPath = parentPath + } + } + if !parentFoundLocally { + // Once we fetch a parent pom.xml from upstream, we should not + // allow parsing parent pom.xml locally anymore. + allowLocal = false + var err error + proj, err = loadParentRemote(ctx, mavenClient, current, n) + if err != nil { + return fmt.Errorf("failed to load parent from remote: %w", err) + } + } + // Use an empty JDK string and ActivationOS here to merge the default profiles. + if err := result.MergeProfiles("", maven.ActivationOS{}); err != nil { + return fmt.Errorf("failed to merge default profiles: %w", err) + } + for _, repo := range proj.Repositories { + if err := mavenClient.AddRegistry(datasource.MavenRegistry{ + URL: string(repo.URL), + ID: string(repo.ID), + ReleasesEnabled: repo.Releases.Enabled.Boolean(), + SnapshotsEnabled: repo.Snapshots.Enabled.Boolean(), + }); err != nil { + return fmt.Errorf("failed to add registry %s: %w", repo.URL, err) + } + } + result.MergeParent(proj) + current = proj.Parent + } + // Interpolate the project to resolve the properties. + return result.Interpolate() +} + +// loadParentLocal loads a parent Maven project from local file system +// and returns whether parent is found locally as well as parent path. +func loadParentLocal(input *filesystem.ScanInput, parent maven.Parent, path string, result *maven.Project) (bool, string, error) { + parentPath := parentPOMPath(input, path, string(parent.RelativePath)) + if parentPath == "" { + return false, "", nil + } + f, err := input.FS.Open(parentPath) + if err != nil { + return false, "", fmt.Errorf("failed to open parent file %s: %w", parentPath, err) + } + err = datasource.NewMavenDecoder(f).Decode(result) + if closeErr := f.Close(); closeErr != nil { + return false, "", fmt.Errorf("failed to close file: %w", err) + } + if err != nil { + return false, "", fmt.Errorf("failed to unmarshal project: %w", err) + } + return true, parentPath, nil +} + +// loadParentRemote loads a parent from remote registry. +func loadParentRemote(ctx context.Context, mavenClient *datasource.MavenRegistryAPIClient, parent maven.Parent, parentIndex int) (maven.Project, error) { + proj, err := mavenClient.GetProject(ctx, string(parent.GroupID), string(parent.ArtifactID), string(parent.Version)) + if err != nil { + return maven.Project{}, fmt.Errorf("failed to get Maven project %s:%s:%s: %w", parent.GroupID, parent.ArtifactID, parent.Version, err) + } + if parentIndex > 0 && proj.Packaging != "pom" { + // A parent project should only be of "pom" packaging type. + return maven.Project{}, fmt.Errorf("invalid packaging for parent project %s", proj.Packaging) + } + if ProjectKey(proj) != parent.ProjectKey { + // The identifiers in parent does not match what we want. + return maven.Project{}, fmt.Errorf("parent identifiers mismatch: %v, expect %v", proj.ProjectKey, parent.ProjectKey) + } + return proj, nil +} + +// ProjectKey returns a project key with empty groupId/version +// filled by corresponding fields in parent. +func ProjectKey(proj maven.Project) maven.ProjectKey { + if proj.GroupID == "" { + proj.GroupID = proj.Parent.GroupID + } + if proj.Version == "" { + proj.Version = proj.Parent.Version + } + + return proj.ProjectKey +} + +// parentPOMPath returns the path of a parent pom.xml. +// Maven looks for the parent POM first in 'relativePath', then +// the local repository '../pom.xml', and lastly in the remote repo. +// An empty string is returned if failed to resolve the parent path. +func parentPOMPath(input *filesystem.ScanInput, currentPath, relativePath string) string { + if relativePath == "" { + relativePath = "../pom.xml" + } + + path := filepath.ToSlash(filepath.Join(filepath.Dir(currentPath), relativePath)) + if info, err := input.FS.Stat(path); err == nil { + if !info.IsDir() { + return path + } + // Current path is a directory, so look for pom.xml in the directory. + path = filepath.ToSlash(filepath.Join(path, "pom.xml")) + if _, err := input.FS.Stat(path); err == nil { + return path + } + } + + return "" +} + +// GetDependencyManagement returns managed dependencies in the specified Maven project by fetching remote pom.xml. +func GetDependencyManagement(ctx context.Context, client *datasource.MavenRegistryAPIClient, groupID, artifactID, version maven.String) (maven.DependencyManagement, error) { + root := maven.Parent{ProjectKey: maven.ProjectKey{GroupID: groupID, ArtifactID: artifactID, Version: version}} + var result maven.Project + // To get dependency management from another project, we need the + // project with parents merged, so we call MergeParents by passing + // an empty project. + if err := MergeParents(ctx, nil, client.WithoutRegistries(), &result, root, 0, false); err != nil { + return maven.DependencyManagement{}, err + } + + return result.DependencyManagement, nil +} diff --git a/internal/mavenutil/maven_test.go b/internal/mavenutil/maven_test.go new file mode 100644 index 00000000..50b0a26e --- /dev/null +++ b/internal/mavenutil/maven_test.go @@ -0,0 +1,84 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mavenutil + +import ( + "path/filepath" + "testing" + + "github.com/google/osv-scalibr/testing/extracttest" +) + +func TestParentPOMPath(t *testing.T) { + input := extracttest.GenerateScanInputMock(t, extracttest.ScanInputMockConfig{ + Path: filepath.Join("fixtures", "my-app", "pom.xml"), + }) + defer extracttest.CloseTestScanInput(t, input) + + tests := []struct { + currentPath, relativePath string + want string + }{ + // fixtures + // |- maven + // | |- my-app + // | | |- pom.xml + // | |- parent + // | | |- pom.xml + // |- pom.xml + { + // Parent path is specified correctly. + currentPath: filepath.Join("fixtures", "my-app", "pom.xml"), + relativePath: "../parent/pom.xml", + want: filepath.Join("fixtures", "parent", "pom.xml"), + }, + { + // Wrong file name is specified in relative path. + currentPath: filepath.Join("fixtures", "my-app", "pom.xml"), + relativePath: "../parent/abc.xml", + want: "", + }, + { + // Wrong directory is specified in relative path. + currentPath: filepath.Join("fixtures", "my-app", "pom.xml"), + relativePath: "../not-found/pom.xml", + want: "", + }, + { + // Only directory is specified. + currentPath: filepath.Join("fixtures", "my-app", "pom.xml"), + relativePath: "../parent", + want: filepath.Join("fixtures", "parent", "pom.xml"), + }, + { + // Parent relative path is default to '../pom.xml'. + currentPath: filepath.Join("fixtures", "my-app", "pom.xml"), + relativePath: "", + want: filepath.Join("fixtures", "pom.xml"), + }, + { + // No pom.xml is found even in the default path. + currentPath: filepath.Join("fixtures", "pom.xml"), + relativePath: "", + want: "", + }, + } + for _, tt := range tests { + got := parentPOMPath(&input, tt.currentPath, tt.relativePath) + if got != filepath.ToSlash(tt.want) { + t.Errorf("ParentPOMPath(%s, %s): got %s, want %s", tt.currentPath, tt.relativePath, got, tt.want) + } + } +} diff --git a/internal/resolution/client/client.go b/internal/resolution/client/client.go new file mode 100644 index 00000000..2dec0623 --- /dev/null +++ b/internal/resolution/client/client.go @@ -0,0 +1,33 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package client provides clients required by dependency resolution. +package client + +import ( + "deps.dev/util/resolve" +) + +// DependencyClient is the interface of the client required by dependency resolution. +type DependencyClient interface { + resolve.Client + // WriteCache writes a manifest-specific resolution cache. + WriteCache(filepath string) error + // LoadCache loads a manifest-specific resolution cache. + LoadCache(filepath string) error + // AddRegistries adds the specified registries to fetch data. + AddRegistries(registries []Registry) error +} + +type Registry interface{} diff --git a/internal/resolution/client/depsdev_client.go b/internal/resolution/client/depsdev_client.go new file mode 100644 index 00000000..a383b011 --- /dev/null +++ b/internal/resolution/client/depsdev_client.go @@ -0,0 +1,67 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package client provides clients required by dependency resolution. +package client + +import ( + "encoding/gob" + "os" + + "deps.dev/util/resolve" + "github.com/google/osv-scalibr/internal/datasource" +) + +const depsDevCacheExt = ".resolve.deps" + +// DepsDevClient is a ResolutionClient wrapping the official resolve.APIClient +type DepsDevClient struct { + resolve.APIClient + c *datasource.CachedInsightsClient +} + +// NewDepsDevClient creates a new DepsDevClient. +func NewDepsDevClient(addr string, userAgent string) (*DepsDevClient, error) { + c, err := datasource.NewCachedInsightsClient(addr, userAgent) + if err != nil { + return nil, err + } + + return &DepsDevClient{APIClient: *resolve.NewAPIClient(c), c: c}, nil +} + +// AddRegistries is a placeholder here for DepsDevClient. +func (d *DepsDevClient) AddRegistries(_ []Registry) error { return nil } + +// WriteCache writes cache at the given path. +func (d *DepsDevClient) WriteCache(path string) error { + f, err := os.Create(path + depsDevCacheExt) + if err != nil { + return err + } + defer f.Close() + + return gob.NewEncoder(f).Encode(d.c) +} + +// LoadCache loads the cache at the given path. +func (d *DepsDevClient) LoadCache(path string) error { + f, err := os.Open(path + depsDevCacheExt) + if err != nil { + return err + } + defer f.Close() + + return gob.NewDecoder(f).Decode(&d.c) +} diff --git a/internal/resolution/client/maven_registry_client.go b/internal/resolution/client/maven_registry_client.go new file mode 100644 index 00000000..dfcd4aec --- /dev/null +++ b/internal/resolution/client/maven_registry_client.go @@ -0,0 +1,181 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package client provides clients required by dependency resolution. +package client + +import ( + "context" + "errors" + "fmt" + "strings" + + "deps.dev/util/maven" + "deps.dev/util/resolve" + "deps.dev/util/resolve/version" + "github.com/google/osv-scalibr/internal/datasource" + "github.com/google/osv-scalibr/internal/mavenutil" +) + +type MavenRegistryClient struct { + api *datasource.MavenRegistryAPIClient +} + +// NewMavenRegistryClient makes a new MavenRegistryClient. +func NewMavenRegistryClient(registry string) (*MavenRegistryClient, error) { + client, err := datasource.NewMavenRegistryAPIClient(datasource.MavenRegistry{URL: registry, ReleasesEnabled: true}) + if err != nil { + return nil, err + } + + return &MavenRegistryClient{api: client}, nil +} + +// Version returns metadata of a version specified by the VersionKey. +func (c *MavenRegistryClient) Version(ctx context.Context, vk resolve.VersionKey) (resolve.Version, error) { + g, a, found := strings.Cut(vk.Name, ":") + if !found { + return resolve.Version{}, fmt.Errorf("invalid Maven package name %s", vk.Name) + } + proj, err := c.api.GetProject(ctx, g, a, vk.Version) + if err != nil { + return resolve.Version{}, err + } + + regs := make([]string, len(proj.Repositories)) + // Repositories are served as dependency registries. + // https://github.com/google/deps.dev/blob/main/util/resolve/api.go#L106 + for i, repo := range proj.Repositories { + regs[i] = "dep:" + string(repo.URL) + } + var attr version.AttrSet + if len(regs) > 0 { + attr.SetAttr(version.Registries, strings.Join(regs, "|")) + } + + return resolve.Version{VersionKey: vk, AttrSet: attr}, nil +} + +// Versions returns all the available versions of the package specified by the given PackageKey. +// TODO: we should also include versions not listed in the metadata file +// There exist versions in the repository but not listed in the metada file, +// for example version 20030203.000550 of package commons-io:commons-io +// https://repo1.maven.org/maven2/commons-io/commons-io/20030203.000550/. +// A package may depend on such version if a soft requirement of this version +// is declared. +// We need to find out if there are such versions and include them in the +// returned versions. +func (c *MavenRegistryClient) Versions(ctx context.Context, pk resolve.PackageKey) ([]resolve.Version, error) { + if pk.System != resolve.Maven { + return nil, fmt.Errorf("wrong system: %v", pk.System) + } + + g, a, found := strings.Cut(pk.Name, ":") + if !found { + return nil, fmt.Errorf("invalid Maven package name %s", pk.Name) + } + versions, err := c.api.GetVersions(ctx, g, a) + if err != nil { + return nil, err + } + + vks := make([]resolve.Version, len(versions)) + for i, v := range versions { + vks[i] = resolve.Version{ + VersionKey: resolve.VersionKey{ + PackageKey: pk, + Version: string(v), + VersionType: resolve.Concrete, + }} + } + + return vks, nil +} + +// Requirements returns requirements of a version specified by the VersionKey. +func (c *MavenRegistryClient) Requirements(ctx context.Context, vk resolve.VersionKey) ([]resolve.RequirementVersion, error) { + if vk.System != resolve.Maven { + return nil, fmt.Errorf("wrong system: %v", vk.System) + } + + g, a, found := strings.Cut(vk.Name, ":") + if !found { + return nil, fmt.Errorf("invalid Maven package name %s", vk.Name) + } + proj, err := c.api.GetProject(ctx, g, a, vk.Version) + if err != nil { + return nil, err + } + + // Only merge default profiles by passing empty JDK and OS information. + if err := proj.MergeProfiles("", maven.ActivationOS{}); err != nil { + return nil, err + } + + // We should not add registries defined in dependencies pom.xml files. + apiWithoutRegistries := c.api.WithoutRegistries() + // We need to merge parents for potential dependencies in parents. + if err := mavenutil.MergeParents(ctx, nil, apiWithoutRegistries, &proj, proj.Parent, 1, false); err != nil { + return nil, err + } + proj.ProcessDependencies(func(groupID, artifactID, version maven.String) (maven.DependencyManagement, error) { + return mavenutil.GetDependencyManagement(ctx, apiWithoutRegistries, groupID, artifactID, version) + }) + + reqs := make([]resolve.RequirementVersion, 0, len(proj.Dependencies)) + for _, d := range proj.Dependencies { + reqs = append(reqs, resolve.RequirementVersion{ + VersionKey: resolve.VersionKey{ + PackageKey: resolve.PackageKey{ + System: resolve.Maven, + Name: d.Name(), + }, + VersionType: resolve.Requirement, + Version: string(d.Version), + }, + Type: resolve.MavenDepType(d, ""), + }) + } + + return reqs, nil +} + +// MatchingVersions returns versions matching the requirement specified by the VersionKey. +func (c *MavenRegistryClient) MatchingVersions(ctx context.Context, vk resolve.VersionKey) ([]resolve.Version, error) { + if vk.System != resolve.Maven { + return nil, fmt.Errorf("wrong system: %v", vk.System) + } + + versions, err := c.Versions(ctx, vk.PackageKey) + if err != nil { + return nil, err + } + + return resolve.MatchRequirement(vk, versions), nil +} + +// AddRegistries adds registries to the MavenRegistryClient. +func (c *MavenRegistryClient) AddRegistries(registries []Registry) error { + for _, reg := range registries { + specific, ok := reg.(datasource.MavenRegistry) + if !ok { + return errors.New("invalid Maven registry information") + } + if err := c.api.AddRegistry(specific); err != nil { + return err + } + } + + return nil +} diff --git a/internal/resolution/client/override_client.go b/internal/resolution/client/override_client.go new file mode 100644 index 00000000..876df715 --- /dev/null +++ b/internal/resolution/client/override_client.go @@ -0,0 +1,94 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package client provides clients required by dependency resolution. +package client + +import ( + "context" + "slices" + + "deps.dev/util/resolve" +) + +// OverrideClient wraps a DependencyClient, allowing for custom packages & versions to be added +type OverrideClient struct { + DependencyClient + // Can't quite reuse resolve.LocalClient because it automatically creates dependencies + pkgVers map[resolve.PackageKey][]resolve.Version // versions of a package + verDeps map[resolve.VersionKey][]resolve.RequirementVersion // dependencies of a version +} + +// NewOverrideClient makes a new OverrideClient. +func NewOverrideClient(c DependencyClient) *OverrideClient { + return &OverrideClient{ + DependencyClient: c, + pkgVers: make(map[resolve.PackageKey][]resolve.Version), + verDeps: make(map[resolve.VersionKey][]resolve.RequirementVersion), + } +} + +// AddVersion adds the specified version and dependencies to the client. +func (c *OverrideClient) AddVersion(v resolve.Version, deps []resolve.RequirementVersion) { + // TODO: Inserting multiple co-dependent requirements may not work, depending on order + versions := c.pkgVers[v.PackageKey] + sem := v.Semver() + // Only add it to the versions if not already there (and keep versions sorted) + idx, ok := slices.BinarySearchFunc(versions, v, func(a, b resolve.Version) int { + return sem.Compare(a.Version, b.Version) + }) + if !ok { + versions = slices.Insert(versions, idx, v) + } + c.pkgVers[v.PackageKey] = versions + c.verDeps[v.VersionKey] = slices.Clone(deps) // overwrites dependencies if called multiple times with same version +} + +// Version returns the version specified by the VersionKey. +func (c *OverrideClient) Version(ctx context.Context, vk resolve.VersionKey) (resolve.Version, error) { + for _, v := range c.pkgVers[vk.PackageKey] { + if v.VersionKey == vk { + return v, nil + } + } + + return c.DependencyClient.Version(ctx, vk) +} + +// Versions returns the versions of a package specified by the PackageKey. +func (c *OverrideClient) Versions(ctx context.Context, pk resolve.PackageKey) ([]resolve.Version, error) { + if vers, ok := c.pkgVers[pk]; ok { + return vers, nil + } + + return c.DependencyClient.Versions(ctx, pk) +} + +// Requirements returns the requirement versions of the version specified by the VersionKey. +func (c *OverrideClient) Requirements(ctx context.Context, vk resolve.VersionKey) ([]resolve.RequirementVersion, error) { + if deps, ok := c.verDeps[vk]; ok { + return deps, nil + } + + return c.DependencyClient.Requirements(ctx, vk) +} + +// MatchingVersions returns the versions matching the requirement specified by the VersionKey. +func (c *OverrideClient) MatchingVersions(ctx context.Context, vk resolve.VersionKey) ([]resolve.Version, error) { + if vs, ok := c.pkgVers[vk.PackageKey]; ok { + return resolve.MatchRequirement(vk, vs), nil + } + + return c.DependencyClient.MatchingVersions(ctx, vk) +} diff --git a/internal/resolution/clienttest/mock_http.go b/internal/resolution/clienttest/mock_http.go new file mode 100644 index 00000000..eb6164cd --- /dev/null +++ b/internal/resolution/clienttest/mock_http.go @@ -0,0 +1,95 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package clienttest + +import ( + "log" + "net/http" + "net/http/httptest" + "os" + "strings" + "sync" + "testing" +) + +type MockHTTPServer struct { + *httptest.Server + mu sync.Mutex + response map[string][]byte // path -> response + authorization string // expected Authorization header contents +} + +// NewMockHTTPServer starts and returns a new simple HTTP Server for mocking basic requests. +// The Server will automatically be shut down with Close() in the test Cleanup function. +// +// Use the SetResponse / SetResponseFromFile to set the responses for specific URL paths. +func NewMockHTTPServer(t *testing.T) *MockHTTPServer { + t.Helper() + mock := &MockHTTPServer{response: make(map[string][]byte)} + mock.Server = httptest.NewServer(mock) + t.Cleanup(func() { mock.Server.Close() }) + + return mock +} + +// SetResponse sets the Server's response for the URL path to be response bytes. +func (m *MockHTTPServer) SetResponse(t *testing.T, path string, response []byte) { + t.Helper() + m.mu.Lock() + defer m.mu.Unlock() + path = strings.TrimPrefix(path, "/") + m.response[path] = response +} + +// SetResponseFromFile sets the Server's response for the URL path to be the contents of the file at filename. +func (m *MockHTTPServer) SetResponseFromFile(t *testing.T, path string, filename string) { + t.Helper() + b, err := os.ReadFile(filename) + if err != nil { + t.Fatalf("failed to read response file: %v", err) + } + m.SetResponse(t, path, b) +} + +// SetAuthorization sets the contents of the 'Authorization' header the server expects for all endpoints. +// +// The incoming requests' headers must match the auth string exactly, otherwise the server will response with 401 Unauthorized. +// If authorization is unset or empty, the server will not require authorization. +func (m *MockHTTPServer) SetAuthorization(t *testing.T, auth string) { + t.Helper() + m.mu.Lock() + defer m.mu.Unlock() + m.authorization = auth +} + +// ServeHTTP is the http.Handler for the underlying httptest.Server. +func (m *MockHTTPServer) ServeHTTP(w http.ResponseWriter, r *http.Request) { + m.mu.Lock() + wantAuth := m.authorization + resp, ok := m.response[strings.TrimPrefix(r.URL.EscapedPath(), "/")] + m.mu.Unlock() + + if wantAuth != "" && r.Header.Get("Authorization") != wantAuth { + w.WriteHeader(http.StatusUnauthorized) + resp = []byte("unauthorized") + } else if !ok { + w.WriteHeader(http.StatusNotFound) + resp = []byte("not found") + } + + if _, err := w.Write(resp); err != nil { + log.Fatalf("Write: %v", err) + } +} diff --git a/internal/resolution/clienttest/mock_resolution_client.go b/internal/resolution/clienttest/mock_resolution_client.go new file mode 100644 index 00000000..eba4bf0a --- /dev/null +++ b/internal/resolution/clienttest/mock_resolution_client.go @@ -0,0 +1,74 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package clienttest + +import ( + "os" + "strings" + "testing" + + "deps.dev/util/resolve" + "deps.dev/util/resolve/schema" + "github.com/google/osv-scalibr/internal/resolution/client" + "gopkg.in/yaml.v3" +) + +type ResolutionUniverse struct { + System string `yaml:"system"` + Schema string `yaml:"schema"` +} + +type mockDependencyClient struct { + *resolve.LocalClient +} + +func (mdc mockDependencyClient) LoadCache(string) error { return nil } +func (mdc mockDependencyClient) WriteCache(string) error { return nil } +func (mdc mockDependencyClient) AddRegistries(_ []client.Registry) error { return nil } + +func NewMockResolutionClient(t *testing.T, universeYAML string) client.DependencyClient { + t.Helper() + f, err := os.Open(universeYAML) + if err != nil { + t.Fatalf("failed opening mock universe: %v", err) + } + defer f.Close() + dec := yaml.NewDecoder(f) + + var universe ResolutionUniverse + if err := dec.Decode(&universe); err != nil { + t.Fatalf("failed decoding mock universe: %v", err) + } + + var sys resolve.System + switch strings.ToLower(universe.System) { + case "npm": + sys = resolve.NPM + case "maven": + sys = resolve.Maven + default: + t.Fatalf("unknown ecosystem in universe: %s", universe.System) + } + + // schema needs a strict tab indentation, which is awkward to do within the YAML. + // Replace double space from yaml with single tab + universe.Schema = strings.ReplaceAll(universe.Schema, " ", "\t") + sch, err := schema.New(universe.Schema, sys) + if err != nil { + t.Fatalf("failed parsing schema: %v", err) + } + + return mockDependencyClient{sch.NewClient()} +}