Skip to content

Commit

Permalink
[Layer Scanning] Add FileRequirer to the image Config object to a…
Browse files Browse the repository at this point in the history
…llow users to specify files that should be unpacked during `Image` object creation.

PiperOrigin-RevId: 715492638
  • Loading branch information
Mario Leyva authored and copybara-github committed Jan 14, 2025
1 parent 9491c94 commit 681317a
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 23 deletions.
65 changes: 44 additions & 21 deletions artifact/image/layerscanning/image/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"github.com/google/go-containerregistry/pkg/v1/tarball"
scalibrImage "github.com/google/osv-scalibr/artifact/image"
"github.com/google/osv-scalibr/artifact/image/pathtree"
"github.com/google/osv-scalibr/artifact/image/require"
"github.com/google/osv-scalibr/artifact/image/symlink"
"github.com/google/osv-scalibr/artifact/image/whiteout"
"github.com/google/osv-scalibr/log"
Expand All @@ -50,6 +51,8 @@ var (
ErrFileReadLimitExceeded = errors.New("file exceeds read limit")
// ErrSymlinkPointsOutsideRoot is returned when a symlink points outside the root.
ErrSymlinkPointsOutsideRoot = errors.New("symlink points outside the root")
// ErrInvalidConfig is returned when the image config is invalid.
ErrInvalidConfig = errors.New("invalid image config")
)

// ========================================================
Expand All @@ -59,20 +62,32 @@ var (
// Config contains the configuration to load an Image.
type Config struct {
MaxFileBytes int64
Requirer require.FileRequirer
}

// DefaultConfig returns the default configuration to load an Image.
func DefaultConfig() *Config {
return &Config{
MaxFileBytes: DefaultMaxFileBytes,
Requirer: &require.FileRequirerAll{},
}
}

func validateConfig(config *Config) error {
if config.MaxFileBytes <= 0 {
return fmt.Errorf("%w: max file bytes must be positive: %d", ErrInvalidConfig, config.MaxFileBytes)
}
if config.Requirer == nil {
return fmt.Errorf("%w: requirer must be specified", ErrInvalidConfig)
}
return nil
}

// Image is a container image. It is composed of a set of layers that can be scanned for software
// inventory. It contains the proper metadata to attribute inventory to layers.
type Image struct {
chainLayers []*chainLayer
maxFileBytes int64
config *Config
ExtractDir string
BaseImageIndex int
}
Expand Down Expand Up @@ -113,11 +128,16 @@ func FromTarball(tarPath string, config *Config) (*Image, error) {
// FromV1Image takes a v1.Image and produces a layer-scannable Image. The steps taken are as
// follows:
//
// (1) Retrieves v1.Layers, configFile. Creates tempPath to store the image files.
// (2) Initializes the output image and the chain layers.
// (3) Unpacks the layers by looping through the layers in reverse, while filling in the files
// (1) Validates the user input image config object.
// (2) Retrieves v1.Layers, configFile. Creates tempPath to store the image files.
// (3) Initializes the output image and the chain layers.
// (4) Unpacks the layers by looping through the layers in reverse, while filling in the files
// into the appropriate chain layer.
func FromV1Image(v1Image v1.Image, config *Config) (*Image, error) {
if err := validateConfig(config); err != nil {
return nil, fmt.Errorf("invalid image config: %w", err)
}

configFile, err := v1Image.ConfigFile()
if err != nil {
return nil, fmt.Errorf("failed to load config file: %w", err)
Expand Down Expand Up @@ -145,9 +165,9 @@ func FromV1Image(v1Image v1.Image, config *Config) (*Image, error) {

outputImage := Image{
chainLayers: chainLayers,
config: config,
ExtractDir: tempPath,
BaseImageIndex: baseImageIndex,
maxFileBytes: config.MaxFileBytes,
}

// Add the root directory to each chain layer. If this is not done, then the virtual paths won't
Expand Down Expand Up @@ -277,18 +297,18 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay
if err != nil {
return fmt.Errorf("could not read tar: %w", err)
}
// Some tools prepend everything with "./", so if we don't Clean the
// name, we may have duplicate entries, which angers tar-split.
// Using path instead of filepath to keep `/` and deterministic behavior
// Some tools prepend everything with "./", so if we don't path.Clean the name, we may have
// duplicate entries, which angers tar-split. Using path instead of filepath to keep `/` and
// deterministic behavior.
cleanedFilePath := path.Clean(filepath.ToSlash(header.Name))

// Prevent "Zip Slip"
if strings.HasPrefix(cleanedFilePath, "../") {
continue
}

// Force PAX format to remove Name/Linkname length limit of 100 characters required by USTAR
// and to not depend on internal tar package guess which prefers USTAR over PAX.
// Force PAX format to remove Name/Linkname length limit of 100 characters required by USTAR and
// to not depend on internal tar package guess which prefers USTAR over PAX.
header.Format = tar.FormatPAX

// There is a difference between the filepath and path modules. The filepath module will handle
Expand All @@ -305,10 +325,10 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay
continue
}

tombstone := strings.HasPrefix(basename, whiteout.WhiteoutPrefix)
isWhiteout := whiteout.IsWhiteout(basename)
// TODO: b/379094217 - Handle Opaque Whiteouts
if tombstone {
basename = basename[len(whiteout.WhiteoutPrefix):]
if isWhiteout {
basename = whiteout.Reverse(basename)
}

// If we're checking a directory, don't filepath.Join names.
Expand All @@ -320,19 +340,22 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay
}

// realFilePath is where the file will be written to disk. filepath.Clean first to convert
// to OS specific file path.
// TODO: b/377553499 - Escape invalid characters on windows that's valid on linux
// realFilePath := filepath.Join(dirPath, filepath.Clean(cleanedFilePath))
// to OS specific file path.s
realFilePath := filepath.Join(dirPath, filepath.FromSlash(cleanedFilePath))

// If the file is not required, then skip it.
if !img.config.Requirer.FileRequired(virtualPath, header.FileInfo()) {
continue
}

var newNode *fileNode
switch header.Typeflag {
case tar.TypeDir:
newNode, err = img.handleDir(realFilePath, virtualPath, originLayerID, tarReader, header, tombstone)
newNode, err = img.handleDir(realFilePath, virtualPath, originLayerID, tarReader, header, isWhiteout)
case tar.TypeReg:
newNode, err = img.handleFile(realFilePath, virtualPath, originLayerID, tarReader, header, tombstone)
newNode, err = img.handleFile(realFilePath, virtualPath, originLayerID, tarReader, header, isWhiteout)
case tar.TypeSymlink, tar.TypeLink:
newNode, err = img.handleSymlink(realFilePath, virtualPath, originLayerID, tarReader, header, tombstone)
newNode, err = img.handleSymlink(realFilePath, virtualPath, originLayerID, tarReader, header, isWhiteout)
default:
log.Warnf("unsupported file type: %v, path: %s", header.Typeflag, header.Name)
continue
Expand Down Expand Up @@ -406,8 +429,8 @@ func (img *Image) handleFile(realFilePath, virtualPath, originLayerID string, ta
}
defer f.Close()

numBytes, err := io.Copy(f, io.LimitReader(tarReader, img.maxFileBytes))
if numBytes >= img.maxFileBytes || errors.Is(err, io.EOF) {
numBytes, err := io.Copy(f, io.LimitReader(tarReader, img.config.MaxFileBytes))
if numBytes >= img.config.MaxFileBytes || errors.Is(err, io.EOF) {
return nil, ErrFileReadLimitExceeded
}

Expand Down
49 changes: 48 additions & 1 deletion artifact/image/layerscanning/image/image_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
v1 "github.com/google/go-containerregistry/pkg/v1"
"github.com/google/go-containerregistry/pkg/v1/types"
"github.com/google/osv-scalibr/artifact/image"
"github.com/google/osv-scalibr/artifact/image/require"
)

const testdataDir = "testdata"
Expand Down Expand Up @@ -132,6 +133,23 @@ func TestFromTarball(t *testing.T) {
wantErrDuringImageCreation error
wantErrWhileReadingFiles error
}{
{
name: "invalid config - non positive maxFileBytes",
tarPath: filepath.Join(testdataDir, "single-file.tar"),
config: &Config{
Requirer: &require.FileRequirerAll{},
MaxFileBytes: 0,
},
wantErrDuringImageCreation: ErrInvalidConfig,
},
{
name: "invalid config - missing requirer",
tarPath: filepath.Join(testdataDir, "single-file.tar"),
config: &Config{
MaxFileBytes: DefaultMaxFileBytes,
},
wantErrDuringImageCreation: ErrInvalidConfig,
},
{
name: "image with one file",
tarPath: filepath.Join(testdataDir, "single-file.tar"),
Expand Down Expand Up @@ -294,6 +312,7 @@ func TestFromTarball(t *testing.T) {
tarPath: filepath.Join(testdataDir, "single-file.tar"),
config: &Config{
MaxFileBytes: 1,
Requirer: &require.FileRequirerAll{},
},
wantErrDuringImageCreation: ErrFileReadLimitExceeded,
},
Expand Down Expand Up @@ -415,12 +434,39 @@ func TestFromTarball(t *testing.T) {
config: DefaultConfig(),
wantErrDuringImageCreation: ErrSymlinkPointsOutsideRoot,
},
{
name: "require single file from images",
tarPath: filepath.Join(testdataDir, "multiple-files.tar"),
config: &Config{
MaxFileBytes: DefaultMaxFileBytes,
// Only require foo.txt.
Requirer: require.NewFileRequirerPaths([]string{"/foo.txt"}),
},
wantChainLayerEntries: []chainLayerEntries{
{
filepathContentPairs: []filepathContentPair{
{
filepath: "foo.txt",
content: "foo\n",
},
},
},
{
// dir1/bar.txt and dir1/baz.txt are ignored in the second layer.
filepathContentPairs: []filepathContentPair{
{
filepath: "foo.txt",
content: "foo\n",
},
},
},
},
},
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
gotImage, gotErr := FromTarball(tc.tarPath, tc.config)
defer gotImage.CleanUp()

if tc.wantErrDuringImageCreation != nil {
if errors.Is(gotErr, tc.wantErrDuringImageCreation) {
Expand All @@ -432,6 +478,7 @@ func TestFromTarball(t *testing.T) {
if gotErr != nil {
t.Fatalf("FromTarball(%v) returned unexpected error: %v", tc.tarPath, gotErr)
}
defer gotImage.CleanUp()

chainLayers, err := gotImage.ChainLayers()
if err != nil {
Expand Down
4 changes: 3 additions & 1 deletion artifact/image/require/require.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
// in during a container image extraction.
package require

import "io/fs"
import (
"io/fs"
)

// FileRequirer determines if a file is required to unpack.
type FileRequirer interface {
Expand Down
29 changes: 29 additions & 0 deletions artifact/image/whiteout/whiteout.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,32 @@ func Files(scalibrfs scalibrfs.FS) (map[string]struct{}, error) {
}
return whiteouts, nil
}

// IsWhiteout returns true if a path is a whiteout path.
func IsWhiteout(p string) bool {
_, file := filepath.Split(p)
return strings.HasPrefix(file, WhiteoutPrefix)
}

// Path returns the whiteout version of a path.
func Path(p string) string {
dir, file := filepath.Split(p)
return filepath.Join(dir, fmt.Sprintf("%s%s", WhiteoutPrefix, file))
}

// Reverse returns the non whiteout version of a path.
func Reverse(p string) string {
dir, file := filepath.Split(p)

if strings.HasPrefix(file, WhiteoutPrefix) {
file = strings.TrimPrefix(file, WhiteoutPrefix)
}

reverse := filepath.Join(dir, file)

if dir != "" && file == "" {
reverse = fmt.Sprintf("%s/", reverse)
}

return reverse
}

0 comments on commit 681317a

Please sign in to comment.