diff --git a/extractor/internal/units/units.go b/extractor/internal/units/units.go new file mode 100644 index 00000000..6d687e7d --- /dev/null +++ b/extractor/internal/units/units.go @@ -0,0 +1,31 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package units provides constants for common units. +package units + +const ( + // KiB is a Kibibyte + KiB = int64(1024) + // MiB is a Mebibyte + MiB = 1024 * KiB + // GiB is a Gibibyte + GiB = 1024 * MiB + // TiB is a Tebibyte + TiB = 1024 * GiB + // PiB is a Pebibyte + PiB = 1024 * TiB + // EiB is a Exbibyte + EiB = 1024 * PiB +) diff --git a/extractor/language/java/archive/extractor.go b/extractor/language/java/archive/extractor.go index c187cbea..b067aa4b 100644 --- a/extractor/language/java/archive/extractor.go +++ b/extractor/language/java/archive/extractor.go @@ -30,6 +30,7 @@ import ( "go.uber.org/multierr" "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/internal/units" "github.com/google/osv-scalibr/log" "github.com/google/osv-scalibr/purl" ) @@ -43,7 +44,7 @@ const ( defaultMaxZipDepth = 16 // defaultMaxZipBytes in the maximum number of bytes recursively read from an archive file. // If this limit is reached, the default extractor is halted and results so far are returned. - defaultMaxZipBytes = 4 << 30 // 4GiB + defaultMaxZipBytes = 4 * units.GiB // defaultMinZipBytes is slightly larger than an empty zip file which is 22 bytes. // https://en.wikipedia.org/wiki/ZIP_(file_format)#:~:text=Viewed%20as%20an%20ASCII%20string,file%20are%20usually%20%22PK%22. defaultMinZipBytes = 30 @@ -60,7 +61,7 @@ type Config struct { MaxZipDepth int // MaxOpenedBytes is the maximum number of bytes recursively read from an archive file. // If this limit is reached, extraction is halted and results so far are returned. - MaxOpenedBytes int + MaxOpenedBytes int64 // MinZipBytes is use to ignore empty zip files during extraction. // Zip files smaller than minZipBytes are ignored. MinZipBytes int @@ -73,7 +74,7 @@ type Config struct { // Extractor extracts Java packages from archive files. type Extractor struct { maxZipDepth int - maxOpenedBytes int + maxOpenedBytes int64 minZipBytes int extractFromFilename bool hashJars bool @@ -128,12 +129,12 @@ func (e Extractor) Extract(ctx context.Context, input *extractor.ScanInput) ([]* // // It returns early with an error if max depth or max opened bytes is reached. // Extracted packages are returned even if an error has occurred. -func (e Extractor) extractWithMax(ctx context.Context, input *extractor.ScanInput, depth, openedBytes int) ([]*extractor.Inventory, error) { +func (e Extractor) extractWithMax(ctx context.Context, input *extractor.ScanInput, depth int, openedBytes int64) ([]*extractor.Inventory, error) { // Return early if any max/min thresholds are hit. if depth > e.maxZipDepth { return nil, fmt.Errorf("%s reached max zip depth %d at %q", e.Name(), depth, input.Path) } - if oBytes := openedBytes + int(input.Info.Size()); oBytes > e.maxOpenedBytes { + if oBytes := openedBytes + input.Info.Size(); oBytes > e.maxOpenedBytes { return nil, fmt.Errorf("%s reached max opened bytes of %d at %q", e.Name(), oBytes, input.Path) } if int(input.Info.Size()) < e.minZipBytes { @@ -151,7 +152,7 @@ func (e Extractor) extractWithMax(ctx context.Context, input *extractor.ScanInpu if err != nil { return nil, fmt.Errorf("%s failed to read file at %q: %w", e.Name(), input.Path, err) } - openedBytes += len(b) + openedBytes += int64(len(b)) // Check size again in case input.Info.Size() was not accurate. Return early if hit max. if openedBytes > e.maxOpenedBytes { return nil, fmt.Errorf("%s reached max opened bytes of %d at %q", e.Name(), openedBytes, input.Path) diff --git a/extractor/language/javascript/packagejson/extractor.go b/extractor/language/javascript/packagejson/extractor.go index fdece647..73d0305d 100644 --- a/extractor/language/javascript/packagejson/extractor.go +++ b/extractor/language/javascript/packagejson/extractor.go @@ -25,6 +25,7 @@ import ( "strings" "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/internal/units" "github.com/google/osv-scalibr/log" "github.com/google/osv-scalibr/purl" ) @@ -35,7 +36,7 @@ const ( // defaultMaxJSONSize is the maximum file size an extractor will unmarshal. // If Extract gets a bigger file, it will return an error. - defaultMaxJSONSize = int64(100) << 20 // 100MiB + defaultMaxJSONSize = 100 * units.MiB ) type packageJSON struct { diff --git a/extractor/language/python/wheelegg/extractor.go b/extractor/language/python/wheelegg/extractor.go index 7c6762c9..50cc1432 100644 --- a/extractor/language/python/wheelegg/extractor.go +++ b/extractor/language/python/wheelegg/extractor.go @@ -29,6 +29,7 @@ import ( "strings" "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/internal/units" "github.com/google/osv-scalibr/purl" ) @@ -38,7 +39,7 @@ const ( // defaultMaxFileSize is the maximum file size an extractor will unmarshal. // If Extract gets a bigger file, it will return an error. - defaultMaxFileSize = int64(100) << 20 // 100MiB + defaultMaxFileSize = 100 * units.MiB ) // Extractor extracts python packages from wheel/egg files. diff --git a/extractor/os/dpkg/extractor.go b/extractor/os/dpkg/extractor.go index 7e134a63..fdb4f796 100644 --- a/extractor/os/dpkg/extractor.go +++ b/extractor/os/dpkg/extractor.go @@ -27,6 +27,7 @@ import ( "strings" "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/internal/units" "github.com/google/osv-scalibr/extractor/os/osrelease" "github.com/google/osv-scalibr/log" "github.com/google/osv-scalibr/purl" @@ -38,7 +39,7 @@ const ( // defaultMaxFileSize is the maximum file size an extractor will unmarshal. // If Extract gets a bigger file, it will return an error. - defaultMaxFileSize = int64(100) << 20 // 100MiB + defaultMaxFileSize = 100 * units.MiB ) // Config is the configuration for the Extractor.