Skip to content

Commit

Permalink
Add basic symlink handling in the layer scanning implementation. A ne…
Browse files Browse the repository at this point in the history
…w field (`targetPath`) was added to the internal `fileNode` struct to store the symlink target path. Next steps include testing hardlinks and handling symlinks during layer tracing.

PiperOrigin-RevId: 715188482
  • Loading branch information
Mario Leyva authored and copybara-github committed Jan 14, 2025
1 parent ed18178 commit 046c3be
Show file tree
Hide file tree
Showing 15 changed files with 666 additions and 111 deletions.
4 changes: 3 additions & 1 deletion artifact/image/layerscanning/image/file_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"io/fs"
"os"
"path"
"path/filepath"
)

const (
Expand All @@ -36,6 +37,7 @@ type fileNode struct {
originLayerID string
isWhiteout bool
virtualPath string
targetPath string
mode fs.FileMode
file *os.File
}
Expand Down Expand Up @@ -94,7 +96,7 @@ func (f *fileNode) Close() error {
// RealFilePath returns the real file path of the fileNode. This is the concatenation of the
// root image extract directory, origin layer ID, and the virtual path.
func (f *fileNode) RealFilePath() string {
return path.Join(f.extractDir, f.originLayerID, f.virtualPath)
return filepath.Join(f.extractDir, f.originLayerID, filepath.FromSlash(f.virtualPath))
}

// ========================================================
Expand Down
9 changes: 5 additions & 4 deletions artifact/image/layerscanning/image/file_node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"io/fs"
"os"
"path"
"path/filepath"
"testing"

"github.com/google/go-cmp/cmp"
Expand Down Expand Up @@ -435,22 +436,22 @@ func TestRealFilePath(t *testing.T) {
{
name: "root directory",
node: rootDirectory,
want: "/tmp/extract/layer1",
want: filepath.FromSlash("/tmp/extract/layer1"),
},
{
name: "root file",
node: rootFile,
want: "/tmp/extract/layer1/bar",
want: filepath.FromSlash("/tmp/extract/layer1/bar"),
},
{
name: "non-root file",
node: nonRootFile,
want: "/tmp/extract/layer1/dir1/foo",
want: filepath.FromSlash("/tmp/extract/layer1/dir1/foo"),
},
{
name: "non-root directory",
node: nonRootDirectory,
want: "/tmp/extract/layer1/dir1/dir2",
want: filepath.FromSlash("/tmp/extract/layer1/dir1/dir2"),
},
}

Expand Down
118 changes: 78 additions & 40 deletions artifact/image/layerscanning/image/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ import (
"github.com/google/go-containerregistry/pkg/v1/tarball"
scalibrImage "github.com/google/osv-scalibr/artifact/image"
"github.com/google/osv-scalibr/artifact/image/pathtree"
"github.com/google/osv-scalibr/artifact/image/symlink"
"github.com/google/osv-scalibr/artifact/image/whiteout"
"github.com/google/osv-scalibr/log"
)

const (
Expand All @@ -46,6 +48,8 @@ var (
// ErrFileReadLimitExceeded is returned when a file exceeds the read limit. This is intended to
// prevent zip bomb attacks, for example.
ErrFileReadLimitExceeded = errors.New("file exceeds read limit")
// ErrSymlinkPointsOutsideRoot is returned when a symlink points outside the root.
ErrSymlinkPointsOutsideRoot = errors.New("symlink points outside the root")
)

// ========================================================
Expand Down Expand Up @@ -278,7 +282,7 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay
// Some tools prepend everything with "./", so if we don't Clean the
// name, we may have duplicate entries, which angers tar-split.
// Using path instead of filepath to keep `/` and deterministic behavior
cleanedFilePath := path.Clean(header.Name)
cleanedFilePath := path.Clean(filepath.ToSlash(header.Name))

// Prevent "Zip Slip"
if strings.HasPrefix(cleanedFilePath, "../") {
Expand Down Expand Up @@ -320,94 +324,128 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay
// realFilePath is where the file will be written to disk. filepath.Clean first to convert
// to OS specific file path.
// TODO: b/377553499 - Escape invalid characters on windows that's valid on linux
realFilePath := filepath.Join(dirPath, filepath.Clean(cleanedFilePath))
// realFilePath := filepath.Join(dirPath, filepath.Clean(cleanedFilePath))
realFilePath := filepath.Join(dirPath, filepath.FromSlash(cleanedFilePath))

var fileMode fs.FileMode
// Write out the file/dir to disk.
var newNode *fileNode
switch header.Typeflag {
case tar.TypeDir:
fileMode, err = img.handleDir(realFilePath, tarReader, header)
if err != nil {
return fmt.Errorf("failed to handle directory: %w", err)
}

newNode, err = img.handleDir(realFilePath, virtualPath, originLayerID, tarReader, header, tombstone)
case tar.TypeReg:
newNode, err = img.handleFile(realFilePath, virtualPath, originLayerID, tarReader, header, tombstone)
case tar.TypeSymlink, tar.TypeLink:
newNode, err = img.handleSymlink(realFilePath, virtualPath, originLayerID, tarReader, header, tombstone)
default:
// TODO: b/374769529 - Handle symlinks.
// Assume if it's not a directory, it's a normal file.
fileMode, err = img.handleFile(realFilePath, tarReader, header)
if err != nil {
return fmt.Errorf("failed to handle file: %w", err)
}
log.Warnf("unsupported file type: %v, path: %s", header.Typeflag, header.Name)
continue
}

if err != nil {
return fmt.Errorf("failed to handle tar entry with path %s: %w", virtualPath, err)
}

// In each outer loop, a layer is added to each relevant output chainLayer slice. Because the
// outer loop is looping backwards (latest layer first), we ignore any files that are already in
// each chainLayer, as they would have been overwritten.
fillChainLayersWithVirtualPath(img, chainLayersToFill, originLayerID, virtualPath, tombstone, fileMode)
fillChainLayersWithFileNode(chainLayersToFill, newNode)
}

return nil
}

// handleSymlink returns the symlink header mode. Symlinks are handled by creating a fileNode with
// the symlink mode with additional metadata.
func (img *Image) handleSymlink(realFilePath, virtualPath, originLayerID string, tarReader *tar.Reader, header *tar.Header, isWhiteout bool) (*fileNode, error) {
targetPath := filepath.ToSlash(header.Linkname)
if targetPath == "" {
return nil, fmt.Errorf("symlink header has no target path")
}

if symlink.TargetOutsideRoot(virtualPath, targetPath) {
log.Warnf("Found symlink that points outside the root, skipping: %q -> %q", virtualPath, targetPath)
return nil, fmt.Errorf("%w: %q -> %q", ErrSymlinkPointsOutsideRoot, virtualPath, targetPath)
}

// Resolve the relative symlink path to an absolute path.
if !path.IsAbs(targetPath) {
targetPath = path.Clean(path.Join(path.Dir(virtualPath), targetPath))
}

return &fileNode{
extractDir: img.ExtractDir,
originLayerID: originLayerID,
virtualPath: virtualPath,
targetPath: targetPath,
isWhiteout: isWhiteout,
mode: fs.FileMode(header.Mode) | fs.ModeSymlink,
}, nil
}

// handleDir creates the directory specified by path, if it doesn't exist.
func (img *Image) handleDir(path string, tarReader *tar.Reader, header *tar.Header) (fs.FileMode, error) {
if _, err := os.Stat(path); err != nil {
if err := os.MkdirAll(path, dirPermission); err != nil {
return 0, fmt.Errorf("failed to create directory with path %s: %w", path, err)
func (img *Image) handleDir(realFilePath, virtualPath, originLayerID string, tarReader *tar.Reader, header *tar.Header, isWhiteout bool) (*fileNode, error) {
if _, err := os.Stat(realFilePath); err != nil {
if err := os.MkdirAll(realFilePath, dirPermission); err != nil {
return nil, fmt.Errorf("failed to create directory with realFilePath %s: %w", realFilePath, err)
}
}
return fs.FileMode(header.Mode) | fs.ModeDir, nil
return &fileNode{
extractDir: img.ExtractDir,
originLayerID: originLayerID,
virtualPath: virtualPath,
isWhiteout: isWhiteout,
mode: fs.FileMode(header.Mode) | fs.ModeDir,
}, nil
}

// handleFile creates the file specified by path, and then copies the contents of the tarReader into
// the file.
func (img *Image) handleFile(path string, tarReader *tar.Reader, header *tar.Header) (fs.FileMode, error) {
func (img *Image) handleFile(realFilePath, virtualPath, originLayerID string, tarReader *tar.Reader, header *tar.Header, isWhiteout bool) (*fileNode, error) {
// Write all files as read/writable by the current user, inaccessible by anyone else
// Actual permission bits are stored in FileNode
f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, filePermission)
f, err := os.OpenFile(realFilePath, os.O_CREATE|os.O_RDWR, filePermission)

if err != nil {
return 0, err
return nil, err
}
defer f.Close()

numBytes, err := io.Copy(f, io.LimitReader(tarReader, img.maxFileBytes))
if numBytes >= img.maxFileBytes || errors.Is(err, io.EOF) {
return 0, ErrFileReadLimitExceeded
return nil, ErrFileReadLimitExceeded
}

if err != nil {
return 0, fmt.Errorf("unable to copy file: %w", err)
return nil, fmt.Errorf("unable to copy file: %w", err)
}

return fs.FileMode(header.Mode), nil
return &fileNode{
extractDir: img.ExtractDir,
originLayerID: originLayerID,
virtualPath: virtualPath,
isWhiteout: isWhiteout,
mode: fs.FileMode(header.Mode),
}, nil
}

// fillChainLayersWithVirtualPath fills the chain layers with the virtual path.
func fillChainLayersWithVirtualPath(img *Image, chainLayers []*chainLayer, originLayerID, virtualPath string, isWhiteout bool, fileMode fs.FileMode) {
for _, chainLayer := range chainLayers {
// fillChainLayersWithFileNode fills the chain layers with a new fileNode.
func fillChainLayersWithFileNode(chainLayersToFill []*chainLayer, newNode *fileNode) {
virtualPath := newNode.virtualPath
for _, chainLayer := range chainLayersToFill {
if node := chainLayer.fileNodeTree.Get(virtualPath); node != nil {
// A newer version of the file already exists on a later chainLayer.
// Since we do not want to overwrite a later layer with information
// written in an earlier layer, skip this file.
continue
}

// check for a whited out parent directory
// Check for a whited out parent directory.
if inWhiteoutDir(chainLayer, virtualPath) {
// The entire directory has been deleted, so no need to save this file
// The entire directory has been deleted, so no need to save this file.
continue
}

// Add the file to the chain layer. If there is an error, then we fail open.
// TODO: b/379154069 - Add logging for fail open errors.
chainLayer.fileNodeTree.Insert(virtualPath, &fileNode{
extractDir: img.ExtractDir,
originLayerID: originLayerID,
virtualPath: virtualPath,
isWhiteout: isWhiteout,
mode: fileMode,
})
chainLayer.fileNodeTree.Insert(virtualPath, newNode)
}
}

Expand Down
Loading

0 comments on commit 046c3be

Please sign in to comment.