Skip to content

Commit

Permalink
feat: Java reachability: Analyze uber .jar files (#1494)
Browse files Browse the repository at this point in the history
Followup to #1480

This adds support for analyzing uber .jar files by:

- Unpacking the .jar
- Finding the main class from META-INF/MANIFEST.MF
- Building a map of class -> Maven jar by extracting the list of Maven
dependencies from META-INF/maven/**/pom.properties files (using
OSV-Scalibr), downloading the .jar files and unpacking them to discover
.class files.
- Enumerating class reachability from the main class.
- Determining the list of reachable Maven dependencies by using the
class -> Maven jar map we built.

Usage:

```
go run ./cmd/reachable -verbose /path/to/file.jar

...
2025/01/14 13:50:52 INFO Reachable dep=io.swagger.parser.v3:swagger-parser-v3
2025/01/14 13:50:52 INFO Reachable dep=com.dorkbox:Desktop
2025/01/14 13:50:52 INFO Reachable dep=ch.qos.logback:logback-core
2025/01/14 13:50:52 INFO Reachable dep=commons-io:commons-io
2025/01/14 13:50:52 INFO Not reachable dep=com.dorkbox:NetworkUtils
2025/01/14 13:50:52 INFO Not reachable dep=io.swagger.parser.v3:swagger-parser
2025/01/14 13:50:52 INFO Not reachable dep=io.swagger.parser.v3:swagger-parser-v2-converter
2025/01/14 13:50:52 INFO Not reachable dep=com.reprezen.kaizen:openapi-parser
...
```
  • Loading branch information
oliverchang authored Jan 15, 2025
1 parent 2e9d96a commit 0809439
Show file tree
Hide file tree
Showing 4 changed files with 383 additions and 55 deletions.
247 changes: 192 additions & 55 deletions experimental/javareach/cmd/reachable/main.go
Original file line number Diff line number Diff line change
@@ -1,61 +1,154 @@
package main

import (
"archive/zip"
"flag"
"fmt"
"io"
"log/slog"
"maps"
"os"
"path/filepath"
"slices"
"strings"

"github.com/google/osv-scalibr/extractor/filesystem/language/java/archive"
"github.com/google/osv-scanner/experimental/javareach"
)

// Usage:
//
// go run ./cmd/reachable -classpath=<classpath> path/to/root/class
// go run ./cmd/reachable path/to/file.jar
//
// Note that <classpath> currently only supports a single directory path containing .class files.
// This is unlike classpaths supported by Java runtimes (which supports
// specifying multiple directories and .jar files)
//
// TODO: Support unpacking .jar files (uber jars that contain all dependencies)
// TODO: Support non-uber jars by transitively resolving pom.xml files and
// automatically downloading dependencies if the pom.xml exists in the .jar
// (e.g. META-INF/maven/pom.xml)
// TODO: Map classes back to Maven dependencies.
// TODO: Support non-uber jars by downloading dependencie on demand from registries. This requires
// a reliable index of class -> Maven jar mappings for the entire Maven universe.
func main() {
classPath := flag.String("classpath", "", "(Required) A single directory containing Java class files with a directory structure that mirrors the package hierarchy.")
classPath := flag.String("classpath", "", "A single directory containing Java class files with a directory structure that mirrors the package hierarchy.")
verbose := flag.Bool("verbose", false, "Enable debug logs.")
flag.Usage = func() {
fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s <arguments> <root class name> <root class name 2...>\n", os.Args[0])
flag.PrintDefaults()
}
flag.Parse()

if *classPath == "" {
flag.Usage()
os.Exit(1)
if *verbose {
slog.SetLogLoggerLevel(slog.LevelDebug)
}

for _, className := range flag.Args() {
cf, err := findClass(*classPath, className)
if err != nil {
slog.Error("Failed to find", "class", className, "error", err)
os.Exit(1)
for _, arg := range flag.Args() {
if strings.HasSuffix(arg, ".jar") {
if err := enumerateReachabilityForJar(arg); err != nil {
slog.Error("Failed to enumerate reachability for", "jar", arg, "error", err)
os.Exit(1)
}
} else {
if *classPath == "" {
flag.Usage()
os.Exit(1)
}

classes, err := EnumerateReachabilityFromClass(arg, *classPath)
if err != nil {
slog.Error("Failed to enumerate reachability for", "class", arg, "error", err)
os.Exit(1)
}

for _, class := range classes {
slog.Info("Reachable", "class", class)
}
}
}
}

func enumerateReachabilityForJar(jarPath string) error {
jarfile, err := os.Open(jarPath)
if err != nil {
return err
}
allDeps, err := javareach.ExtractDependencies(jarfile)
for _, dep := range allDeps {
slog.Debug("extracted dep",
"group id", dep.Metadata.(*archive.Metadata).GroupID, "artifact id", dep.Name, "version", dep.Version)
}

err = EnumerateReachability(cf, *classPath)
classFinder, err := javareach.NewDefaultPackageFinder(allDeps)
if err != nil {
return err
}

tmpDir, err := os.MkdirTemp("", "")
if err != nil {
return err
}
defer os.RemoveAll(tmpDir)

slog.Info("Unzipping", "jar", jarPath, "to", tmpDir)
err = unzipJar(jarPath, tmpDir)
if err != nil {
return err
}

manifest, err := os.Open(filepath.Join(tmpDir, "META-INF/MANIFEST.MF"))
if err != nil {
return err
}

mainClass, err := javareach.GetMainClass(manifest)
if err != nil {
return err
}
slog.Info("Found", "main class", mainClass)
classes, err := EnumerateReachabilityFromClass(mainClass, tmpDir)
if err != nil {
return err
}

reachableDeps := map[string]struct{}{}
for _, class := range classes {
deps, err := classFinder.Find(class)
if err != nil {
slog.Error("Failed to enumerate reachability", "class", className, "error", err)
os.Exit(1)
slog.Error("Failed to find", "class", class, "error", err)
continue
}

for _, dep := range deps {
reachableDeps[dep] = struct{}{}
}
}

for dep, _ := range reachableDeps {
slog.Info("Reachable", "dep", dep)
}

for _, dep := range allDeps {
name := fmt.Sprintf("%s:%s", dep.Metadata.(*archive.Metadata).GroupID, dep.Name)
if _, ok := reachableDeps[name]; !ok {
slog.Info("Not reachable", "dep", name)
}
}
return nil
}

func EnumerateReachabilityFromClass(mainClass string, classPath string) ([]string, error) {
cf, err := findClass(classPath, mainClass)
if err != nil {
return nil, err
}

return EnumerateReachability([]*javareach.ClassFile{cf}, classPath)
}

func findClass(classPath string, className string) (*javareach.ClassFile, error) {
// TODO: Handle directory traversal.
classFilepath := filepath.Join(classPath, className)
if !strings.HasPrefix(classFilepath, filepath.Clean(classPath)+string(os.PathSeparator)) {
return nil, fmt.Errorf("directory traversal: %s", classFilepath)
}

if !strings.HasSuffix(classFilepath, ".class") {
classFilepath += ".class"
}
Expand All @@ -67,19 +160,18 @@ func findClass(classPath string, className string) (*javareach.ClassFile, error)
}

// TODO:
// - Detect uses of reflection
// - Detect uses of reflection and dynamic class loading -> Consider all dependencies used.
// - See if we should do a finer grained analysis to only consider referenced
// classes where a method is called/referenced.
func EnumerateReachability(cf *javareach.ClassFile, classPath string) error {
func EnumerateReachability(roots []*javareach.ClassFile, classPath string) ([]string, error) {
seen := map[string]struct{}{}
if err := enumerateReachability(cf, classPath, seen); err != nil {
return err
for _, root := range roots {
if err := enumerateReachability(root, classPath, seen); err != nil {
return nil, err
}
}

for k, _ := range seen {
fmt.Println(k)
}
return nil
return slices.Collect(maps.Keys(seen)), nil
}

func enumerateReachability(cf *javareach.ClassFile, classPath string, seen map[string]struct{}) error {
Expand All @@ -99,46 +191,91 @@ func enumerateReachability(cf *javareach.ClassFile, classPath string, seen map[s
// Don't consider this class itself.
continue
}
if cp.Type() != javareach.ConstantKindClass {
continue
}

if cp.Type() == javareach.ConstantKindClass {
class, err := cf.ConstantPoolClass(i)
if err != nil {
return err
}
class, err := cf.ConstantPoolClass(i)
if err != nil {
return err
}

// Handle arrays.
if len(class) > 0 && class[0] == '[' {
// "[" can appear multiple times (nested arrays).
class = strings.TrimLeft(class, "[")

// Array of class type. Extract the class name.
if len(class) > 0 && class[0] == 'L' {
class = strings.TrimSuffix(class[1:], ";")
} else if slices.Contains(javareach.BinaryBaseTypes, class) {
// Base type (e.g. integer): just ignore this.
continue
} else {
// We don't know what the type is.
return fmt.Errorf("unknown class type %s", class)
}
}
// Handle arrays.
if len(class) > 0 && class[0] == '[' {
// "[" can appear multiple times (nested arrays).
class = strings.TrimLeft(class, "[")

if javareach.IsStdLib(class) {
// Array of class type. Extract the class name.
if len(class) > 0 && class[0] == 'L' {
class = strings.TrimSuffix(class[1:], ";")
} else if slices.Contains(javareach.BinaryBaseTypes, class) {
// Base type (e.g. integer): just ignore this.
continue
} else {
// We don't know what the type is.
return fmt.Errorf("unknown class type %s", class)
}
}

if javareach.IsStdLib(class) {
continue
}

slog.Debug("found", "dependency", class)
depcf, err := findClass(classPath, class)
slog.Debug("found", "dependency", class)
if _, ok := seen[class]; ok {
continue
}

depcf, err := findClass(classPath, class)
if err != nil {
// Dependencies can be optional, so this is not a fatal error.
slog.Error("failed to find class", "class", class, "from", thisClass, "cp idx", i, "error", err)
continue
}

if err := enumerateReachability(depcf, classPath, seen); err != nil {
return err
}
}

return nil
}

func unzipJar(jarPath string, tmpDir string) error {
r, err := zip.OpenReader(jarPath)
if err != nil {
return err
}

for _, file := range r.File {
path := filepath.Join(tmpDir, file.Name)
if !strings.HasPrefix(path, filepath.Clean(tmpDir)+string(os.PathSeparator)) {
return fmt.Errorf("directory traversal: %s", path)
}

if file.FileInfo().IsDir() {
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
} else {
source, err := file.Open()
if err != nil {
// Dependencies can be optional, so this is not a fatal error.
slog.Error("failed to find class", "class", class, "from", thisClass, "cp idx", i, "error", err)
continue
return err
}

f, err := os.Create(path)
if err != nil {
return err
}
if err := enumerateReachability(depcf, classPath, seen); err != nil {

_, err = io.Copy(f, source)
if err != nil {
f.Close()
return err
}
f.Close()
}
}

}
return nil
}
11 changes: 11 additions & 0 deletions experimental/javareach/go.mod
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
module github.com/google/osv-scanner/experimental/javareach

go 1.23

require (
github.com/google/osv-scalibr v0.1.5
golang.org/x/sync v0.7.0
)

require (
github.com/gobwas/glob v0.2.3 // indirect
github.com/package-url/packageurl-go v0.1.2 // indirect
go.uber.org/multierr v1.11.0 // indirect
)
20 changes: 20 additions & 0 deletions experimental/javareach/go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/osv-scalibr v0.1.5 h1:72HA+yhxk/fFUotxJgXIIEpDlYfuAP0BeM4m3PbGDWE=
github.com/google/osv-scalibr v0.1.5/go.mod h1:fvnB14pFjAupxDoCLUgdMg2rHu6v86BgKGQHzgTFrTg=
github.com/package-url/packageurl-go v0.1.2 h1:0H2DQt6DHd/NeRlVwW4EZ4oEI6Bn40XlNPRqegcxuo4=
github.com/package-url/packageurl-go v0.1.2/go.mod h1:uQd4a7Rh3ZsVg5j0lNyAfyxIeGde9yrlhjF78GzeW0c=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
Loading

0 comments on commit 0809439

Please sign in to comment.