Skip to content

Commit 126d3e3

Browse files
Mario Leyvacopybara-github
Mario Leyva
authored andcommittedFeb 10, 2025·
[Layer Scanning] Add FileRequirer to the image Config object to allow users to specify files that should be unpacked during Image object creation. Only regular files are handled by the FileRequirer in this cl. Symlinks will be handled in another cl.
PiperOrigin-RevId: 725214345
1 parent 43dea65 commit 126d3e3

File tree

2 files changed

+86
-13
lines changed

2 files changed

+86
-13
lines changed
 

‎artifact/image/layerscanning/image/image.go

+37-12
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import (
3333
"github.com/google/go-containerregistry/pkg/v1/tarball"
3434
scalibrImage "github.com/google/osv-scalibr/artifact/image"
3535
"github.com/google/osv-scalibr/artifact/image/pathtree"
36+
"github.com/google/osv-scalibr/artifact/image/require"
3637
"github.com/google/osv-scalibr/artifact/image/symlink"
3738
"github.com/google/osv-scalibr/artifact/image/whiteout"
3839
"github.com/google/osv-scalibr/log"
@@ -50,6 +51,8 @@ var (
5051
ErrFileReadLimitExceeded = errors.New("file exceeds read limit")
5152
// ErrSymlinkPointsOutsideRoot is returned when a symlink points outside the root.
5253
ErrSymlinkPointsOutsideRoot = errors.New("symlink points outside the root")
54+
// ErrInvalidConfig is returned when the image config is invalid.
55+
ErrInvalidConfig = errors.New("invalid image config")
5356
)
5457

5558
// ========================================================
@@ -59,20 +62,32 @@ var (
5962
// Config contains the configuration to load an Image.
6063
type Config struct {
6164
MaxFileBytes int64
65+
Requirer require.FileRequirer
6266
}
6367

6468
// DefaultConfig returns the default configuration to load an Image.
6569
func DefaultConfig() *Config {
6670
return &Config{
6771
MaxFileBytes: DefaultMaxFileBytes,
72+
Requirer: &require.FileRequirerAll{},
6873
}
6974
}
7075

76+
func validateConfig(config *Config) error {
77+
if config.MaxFileBytes <= 0 {
78+
return fmt.Errorf("%w: max file bytes must be positive: %d", ErrInvalidConfig, config.MaxFileBytes)
79+
}
80+
if config.Requirer == nil {
81+
return fmt.Errorf("%w: requirer must be specified", ErrInvalidConfig)
82+
}
83+
return nil
84+
}
85+
7186
// Image is a container image. It is composed of a set of layers that can be scanned for software
7287
// inventory. It contains the proper metadata to attribute inventory to layers.
7388
type Image struct {
7489
chainLayers []*chainLayer
75-
maxFileBytes int64
90+
config *Config
7691
ExtractDir string
7792
BaseImageIndex int
7893
}
@@ -113,11 +128,16 @@ func FromTarball(tarPath string, config *Config) (*Image, error) {
113128
// FromV1Image takes a v1.Image and produces a layer-scannable Image. The steps taken are as
114129
// follows:
115130
//
116-
// (1) Retrieves v1.Layers, configFile. Creates tempPath to store the image files.
117-
// (2) Initializes the output image and the chain layers.
118-
// (3) Unpacks the layers by looping through the layers in reverse, while filling in the files
131+
// (1) Validates the user input image config object.
132+
// (2) Retrieves v1.Layers, configFile. Creates tempPath to store the image files.
133+
// (3) Initializes the output image and the chain layers.
134+
// (4) Unpacks the layers by looping through the layers in reverse, while filling in the files
119135
// into the appropriate chain layer.
120136
func FromV1Image(v1Image v1.Image, config *Config) (*Image, error) {
137+
if err := validateConfig(config); err != nil {
138+
return nil, fmt.Errorf("invalid image config: %w", err)
139+
}
140+
121141
configFile, err := v1Image.ConfigFile()
122142
if err != nil {
123143
return nil, fmt.Errorf("failed to load config file: %w", err)
@@ -145,9 +165,9 @@ func FromV1Image(v1Image v1.Image, config *Config) (*Image, error) {
145165

146166
outputImage := Image{
147167
chainLayers: chainLayers,
168+
config: config,
148169
ExtractDir: tempPath,
149170
BaseImageIndex: baseImageIndex,
150-
maxFileBytes: config.MaxFileBytes,
151171
}
152172

153173
// Add the root directory to each chain layer. If this is not done, then the virtual paths won't
@@ -291,18 +311,18 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay
291311
if err != nil {
292312
return fmt.Errorf("could not read tar: %w", err)
293313
}
294-
// Some tools prepend everything with "./", so if we don't Clean the
295-
// name, we may have duplicate entries, which angers tar-split.
296-
// Using path instead of filepath to keep `/` and deterministic behavior
314+
// Some tools prepend everything with "./", so if we don't path.Clean the name, we may have
315+
// duplicate entries, which angers tar-split. Using path instead of filepath to keep `/` and
316+
// deterministic behavior.
297317
cleanedFilePath := path.Clean(filepath.ToSlash(header.Name))
298318

299319
// Prevent "Zip Slip"
300320
if strings.HasPrefix(cleanedFilePath, "../") {
301321
continue
302322
}
303323

304-
// Force PAX format to remove Name/Linkname length limit of 100 characters required by USTAR
305-
// and to not depend on internal tar package guess which prefers USTAR over PAX.
324+
// Force PAX format to remove Name/Linkname length limit of 100 characters required by USTAR and
325+
// to not depend on internal tar package guess which prefers USTAR over PAX.
306326
header.Format = tar.FormatPAX
307327

308328
// There is a difference between the filepath and path modules. The filepath module will handle
@@ -343,6 +363,11 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay
343363
// any forward slashes to the appropriate OS specific path separator.
344364
realFilePath := filepath.Join(dirPath, filepath.FromSlash(cleanedFilePath))
345365

366+
// If the file is not required, then skip it.
367+
if !img.config.Requirer.FileRequired(virtualPath, header.FileInfo()) {
368+
continue
369+
}
370+
346371
var newNode *fileNode
347372
switch header.Typeflag {
348373
case tar.TypeDir:
@@ -437,8 +462,8 @@ func (img *Image) handleFile(realFilePath, virtualPath, originLayerID string, ta
437462
}
438463
defer f.Close()
439464

440-
numBytes, err := io.Copy(f, io.LimitReader(tarReader, img.maxFileBytes))
441-
if numBytes >= img.maxFileBytes || errors.Is(err, io.EOF) {
465+
numBytes, err := io.Copy(f, io.LimitReader(tarReader, img.config.MaxFileBytes))
466+
if numBytes >= img.config.MaxFileBytes || errors.Is(err, io.EOF) {
442467
return nil, ErrFileReadLimitExceeded
443468
}
444469

‎artifact/image/layerscanning/image/image_test.go

+49-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
v1 "github.com/google/go-containerregistry/pkg/v1"
2828
"github.com/google/go-containerregistry/pkg/v1/types"
2929
"github.com/google/osv-scalibr/artifact/image"
30+
"github.com/google/osv-scalibr/artifact/image/require"
3031
)
3132

3233
const testdataDir = "testdata"
@@ -132,6 +133,23 @@ func TestFromTarball(t *testing.T) {
132133
wantErrDuringImageCreation error
133134
wantErrWhileReadingFiles error
134135
}{
136+
{
137+
name: "invalid config - non positive maxFileBytes",
138+
tarPath: filepath.Join(testdataDir, "single-file.tar"),
139+
config: &Config{
140+
Requirer: &require.FileRequirerAll{},
141+
MaxFileBytes: 0,
142+
},
143+
wantErrDuringImageCreation: ErrInvalidConfig,
144+
},
145+
{
146+
name: "invalid config - missing requirer",
147+
tarPath: filepath.Join(testdataDir, "single-file.tar"),
148+
config: &Config{
149+
MaxFileBytes: DefaultMaxFileBytes,
150+
},
151+
wantErrDuringImageCreation: ErrInvalidConfig,
152+
},
135153
{
136154
name: "image with one file",
137155
tarPath: filepath.Join(testdataDir, "single-file.tar"),
@@ -294,6 +312,7 @@ func TestFromTarball(t *testing.T) {
294312
tarPath: filepath.Join(testdataDir, "single-file.tar"),
295313
config: &Config{
296314
MaxFileBytes: 1,
315+
Requirer: &require.FileRequirerAll{},
297316
},
298317
wantChainLayerEntries: []chainLayerEntries{
299318
{
@@ -427,12 +446,39 @@ func TestFromTarball(t *testing.T) {
427446
config: DefaultConfig(),
428447
wantErrDuringImageCreation: ErrSymlinkPointsOutsideRoot,
429448
},
449+
{
450+
name: "require single file from images",
451+
tarPath: filepath.Join(testdataDir, "multiple-files.tar"),
452+
config: &Config{
453+
MaxFileBytes: DefaultMaxFileBytes,
454+
// Only require foo.txt.
455+
Requirer: require.NewFileRequirerPaths([]string{"/foo.txt"}),
456+
},
457+
wantChainLayerEntries: []chainLayerEntries{
458+
{
459+
filepathContentPairs: []filepathContentPair{
460+
{
461+
filepath: "foo.txt",
462+
content: "foo\n",
463+
},
464+
},
465+
},
466+
{
467+
// dir1/bar.txt and dir1/baz.txt are ignored in the second layer.
468+
filepathContentPairs: []filepathContentPair{
469+
{
470+
filepath: "foo.txt",
471+
content: "foo\n",
472+
},
473+
},
474+
},
475+
},
476+
},
430477
}
431478

432479
for _, tc := range tests {
433480
t.Run(tc.name, func(t *testing.T) {
434481
gotImage, gotErr := FromTarball(tc.tarPath, tc.config)
435-
defer gotImage.CleanUp()
436482

437483
if tc.wantErrDuringImageCreation != nil {
438484
if errors.Is(gotErr, tc.wantErrDuringImageCreation) {
@@ -444,6 +490,8 @@ func TestFromTarball(t *testing.T) {
444490
if gotErr != nil {
445491
t.Fatalf("FromTarball(%v) returned unexpected error: %v", tc.tarPath, gotErr)
446492
}
493+
// Only defer call to CleanUp if the image was created successfully.
494+
defer gotImage.CleanUp()
447495

448496
chainLayers, err := gotImage.ChainLayers()
449497
if err != nil {

0 commit comments

Comments
 (0)
Please sign in to comment.