Skip to content

Commit

Permalink
refactor: Vendored extractor osvdev (#1498)
Browse files Browse the repository at this point in the history
I am attempting to split a big refactor PR into several small pieces,
this is part 5,
This PR can be reviewed independently from part 4 (#1497) by just
looking at the latest commit.

Update the vendored extractor to take and use the osvdev Client rather
than calling the global osv function.
  • Loading branch information
another-rex authored Jan 15, 2025
1 parent 404b7f3 commit aa7435a
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 13 deletions.
23 changes: 12 additions & 11 deletions internal/scalibrextract/filesystem/vendored/vendored.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (
scalibrfs "github.com/google/osv-scalibr/fs"
"github.com/google/osv-scalibr/plugin"
"github.com/google/osv-scalibr/purl"
"github.com/google/osv-scanner/pkg/osv"
"github.com/google/osv-scanner/internal/osvdev"
)

var (
Expand Down Expand Up @@ -58,8 +58,7 @@ type Extractor struct {
// ScanGitDir determines whether a vendored library with a git directory is scanned or not,
// this is used to avoid duplicate results, once from git scanning, once from vendoredDir scanning
ScanGitDir bool
// TODO(v2): Client rework
// determineVersionsClient
OSVClient *osvdev.OSVClient
}

var _ filesystem.Extractor = Extractor{}
Expand Down Expand Up @@ -96,12 +95,10 @@ func (e Extractor) FileRequired(fapi filesystem.FileAPI) bool {

// Extract determines the most likely package version from the directory and returns them as
// commit hash inventory entries
func (e Extractor) Extract(_ context.Context, input *filesystem.ScanInput) ([]*extractor.Inventory, error) {
func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) ([]*extractor.Inventory, error) {
var packages []*extractor.Inventory

// r.Infof("Scanning potential vendored dir: %s\n", libPath)
// TODO: make this a goroutine to parallelize this operation
results, err := queryDetermineVersions(input.Path, input.FS, e.ScanGitDir)
results, err := e.queryDetermineVersions(ctx, input.Path, input.FS, e.ScanGitDir)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -130,8 +127,8 @@ func (e Extractor) Ecosystem(_ *extractor.Inventory) string {
return ""
}

func queryDetermineVersions(repoDir string, fsys scalibrfs.FS, scanGitDir bool) (*osv.DetermineVersionResponse, error) {
var hashes []osv.DetermineVersionHash
func (e Extractor) queryDetermineVersions(ctx context.Context, repoDir string, fsys scalibrfs.FS, scanGitDir bool) (*osvdev.DetermineVersionResponse, error) {
var hashes []osvdev.DetermineVersionHash

err := fs.WalkDir(fsys, repoDir, func(p string, d fs.DirEntry, _ error) error {
if d.IsDir() {
Expand Down Expand Up @@ -165,7 +162,7 @@ func queryDetermineVersions(repoDir string, fsys scalibrfs.FS, scanGitDir bool)
return err
}
hash := md5.Sum(buf.Bytes()) //nolint:gosec
hashes = append(hashes, osv.DetermineVersionHash{
hashes = append(hashes, osvdev.DetermineVersionHash{
Path: strings.ReplaceAll(p, repoDir, ""),
Hash: hash[:],
})
Expand All @@ -180,7 +177,11 @@ func queryDetermineVersions(repoDir string, fsys scalibrfs.FS, scanGitDir bool)
return nil, fmt.Errorf("failed during hashing: %w", err)
}

result, err := osv.MakeDetermineVersionRequest(filepath.Base(repoDir), hashes)
result, err := e.OSVClient.ExperimentalDetermineVersion(ctx, &osvdev.DetermineVersionsRequest{
Name: filepath.Base(repoDir),
FileHashes: hashes,
})

if err != nil {
return nil, fmt.Errorf("failed to determine versions: %w", err)
}
Expand Down
5 changes: 4 additions & 1 deletion internal/scalibrextract/filesystem/vendored/vendored_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/google/osv-scalibr/extractor/filesystem/simplefileapi"
"github.com/google/osv-scalibr/testing/extracttest"
"github.com/google/osv-scalibr/testing/fakefs"
"github.com/google/osv-scanner/internal/osvdev"
"github.com/google/osv-scanner/internal/scalibrextract/filesystem/vendored"
"github.com/google/osv-scanner/internal/testutility"
)
Expand Down Expand Up @@ -127,7 +128,9 @@ func TestExtractor_Extract(t *testing.T) {
for _, tt := range tests {
t.Run(tt.Name, func(t *testing.T) {
t.Parallel()
extr := vendored.Extractor{}
extr := vendored.Extractor{
OSVClient: osvdev.DefaultClient(),
}

scanInput := extracttest.GenerateScanInputMock(t, tt.InputConfig)
defer extracttest.CloseTestScanInput(t, scanInput)
Expand Down
7 changes: 6 additions & 1 deletion pkg/osvscanner/internal/scanners/walker.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/google/osv-scalibr/extractor/filesystem"
"github.com/google/osv-scalibr/extractor/filesystem/language/java/pomxml"
"github.com/google/osv-scanner/internal/customgitignore"
"github.com/google/osv-scanner/internal/osvdev"
"github.com/google/osv-scanner/internal/output"
"github.com/google/osv-scanner/internal/scalibrextract"
"github.com/google/osv-scanner/internal/scalibrextract/filesystem/vendored"
Expand Down Expand Up @@ -47,7 +48,11 @@ func ScanDir(r reporter.Reporter, dir string, skipGit bool, recursive bool, useG
relevantExtractors = append(relevantExtractors, lockfileExtractors...)
relevantExtractors = append(relevantExtractors, SBOMExtractors...)
// Only scan git directories if we are skipping the git extractor
relevantExtractors = append(relevantExtractors, vendored.Extractor{ScanGitDir: skipGit})
// TODO: If in offline mode, don't create a vendoredExtractor
relevantExtractors = append(relevantExtractors, vendored.Extractor{
ScanGitDir: skipGit,
OSVClient: osvdev.DefaultClient(),
})
if pomExtractor != nil {
relevantExtractors = append(relevantExtractors, pomExtractor)
} else {
Expand Down

0 comments on commit aa7435a

Please sign in to comment.