From 8788a35822afefa7d9f626a4d8877be7b60551d2 Mon Sep 17 00:00:00 2001 From: devampkid Date: Thu, 21 Aug 2025 21:15:54 +0400 Subject: [PATCH 1/9] secret extractor for hugginface.co api --- binary/proto/scan_result.proto | 5 + .../scan_result_go_proto/scan_result.pb.go | 116 +++++++-- binary/proto/secret.go | 21 ++ enricher/secrets/secrets.go | 2 + extractor/filesystem/secrets/secrets.go | 2 + veles/secrets/huggingfaceapikey/detector.go | 44 ++++ .../huggingfaceapikey/detector_test.go | 142 +++++++++++ .../huggingfaceapikey/huggingfaceapikey.go | 23 ++ veles/secrets/huggingfaceapikey/validator.go | 81 +++++++ .../huggingfaceapikey/validator_test.go | 224 ++++++++++++++++++ 10 files changed, 636 insertions(+), 24 deletions(-) create mode 100644 veles/secrets/huggingfaceapikey/detector.go create mode 100644 veles/secrets/huggingfaceapikey/detector_test.go create mode 100644 veles/secrets/huggingfaceapikey/huggingfaceapikey.go create mode 100644 veles/secrets/huggingfaceapikey/validator.go create mode 100644 veles/secrets/huggingfaceapikey/validator_test.go diff --git a/binary/proto/scan_result.proto b/binary/proto/scan_result.proto index b5b0789e4..a9c1e80d7 100644 --- a/binary/proto/scan_result.proto +++ b/binary/proto/scan_result.proto @@ -613,6 +613,7 @@ message Secret { message SecretData { oneof secret { GCPSAK gcpsak = 1; + HuggingfaceAPIKey hugginface = 2; } message GCPSAK { @@ -635,6 +636,10 @@ message SecretData { // risk that this might accidentally leak the key. string private_key = 12; } + + message HuggingfaceAPIKey { + string key = 1; + } } message SecretStatus { diff --git a/binary/proto/scan_result_go_proto/scan_result.pb.go b/binary/proto/scan_result_go_proto/scan_result.pb.go index 1c8a4ac6e..8b82dc4b6 100644 --- a/binary/proto/scan_result_go_proto/scan_result.pb.go +++ b/binary/proto/scan_result_go_proto/scan_result.pb.go @@ -15,7 +15,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.36.6 +// protoc-gen-go v1.36.7 // protoc v3.21.12 // source: proto/scan_result.proto @@ -4755,6 +4755,7 @@ type SecretData struct { // Types that are valid to be assigned to Secret: // // *SecretData_Gcpsak + // *SecretData_Hugginface Secret isSecretData_Secret `protobuf_oneof:"secret"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache @@ -4806,6 +4807,15 @@ func (x *SecretData) GetGcpsak() *SecretData_GCPSAK { return nil } +func (x *SecretData) GetHugginface() *SecretData_HuggingfaceAPIKey { + if x != nil { + if x, ok := x.Secret.(*SecretData_Hugginface); ok { + return x.Hugginface + } + } + return nil +} + type isSecretData_Secret interface { isSecretData_Secret() } @@ -4814,8 +4824,14 @@ type SecretData_Gcpsak struct { Gcpsak *SecretData_GCPSAK `protobuf:"bytes,1,opt,name=gcpsak,proto3,oneof"` } +type SecretData_Hugginface struct { + Hugginface *SecretData_HuggingfaceAPIKey `protobuf:"bytes,2,opt,name=hugginface,proto3,oneof"` +} + func (*SecretData_Gcpsak) isSecretData_Secret() {} +func (*SecretData_Hugginface) isSecretData_Secret() {} + type SecretStatus struct { state protoimpl.MessageState `protogen:"open.v1"` Status SecretStatus_SecretStatusEnum `protobuf:"varint,1,opt,name=status,proto3,enum=scalibr.SecretStatus_SecretStatusEnum" json:"status,omitempty"` @@ -5302,6 +5318,50 @@ func (x *SecretData_GCPSAK) GetPrivateKey() string { return "" } +type SecretData_HuggingfaceAPIKey struct { + state protoimpl.MessageState `protogen:"open.v1"` + Key string `protobuf:"bytes,1,opt,name=key,proto3" json:"key,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *SecretData_HuggingfaceAPIKey) Reset() { + *x = SecretData_HuggingfaceAPIKey{} + mi := &file_proto_scan_result_proto_msgTypes[59] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *SecretData_HuggingfaceAPIKey) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*SecretData_HuggingfaceAPIKey) ProtoMessage() {} + +func (x *SecretData_HuggingfaceAPIKey) ProtoReflect() protoreflect.Message { + mi := &file_proto_scan_result_proto_msgTypes[59] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use SecretData_HuggingfaceAPIKey.ProtoReflect.Descriptor instead. +func (*SecretData_HuggingfaceAPIKey) Descriptor() ([]byte, []int) { + return file_proto_scan_result_proto_rawDescGZIP(), []int{50, 1} +} + +func (x *SecretData_HuggingfaceAPIKey) GetKey() string { + if x != nil { + return x.Key + } + return "" +} + var File_proto_scan_result_proto protoreflect.FileDescriptor const file_proto_scan_result_proto_rawDesc = "" + @@ -5689,10 +5749,13 @@ const file_proto_scan_result_proto_rawDesc = "" + "\x06Secret\x12+\n" + "\x06secret\x18\x01 \x01(\v2\x13.scalibr.SecretDataR\x06secret\x12-\n" + "\x06status\x18\x02 \x01(\v2\x15.scalibr.SecretStatusR\x06status\x12/\n" + - "\tlocations\x18\x03 \x03(\v2\x11.scalibr.LocationR\tlocations\"\xff\x03\n" + + "\tlocations\x18\x03 \x03(\v2\x11.scalibr.LocationR\tlocations\"\xef\x04\n" + "\n" + "SecretData\x124\n" + - "\x06gcpsak\x18\x01 \x01(\v2\x1a.scalibr.SecretData.GCPSAKH\x00R\x06gcpsak\x1a\xb0\x03\n" + + "\x06gcpsak\x18\x01 \x01(\v2\x1a.scalibr.SecretData.GCPSAKH\x00R\x06gcpsak\x12G\n" + + "\n" + + "hugginface\x18\x02 \x01(\v2%.scalibr.SecretData.HuggingfaceAPIKeyH\x00R\n" + + "hugginface\x1a\xb0\x03\n" + "\x06GCPSAK\x12$\n" + "\x0eprivate_key_id\x18\x01 \x01(\tR\fprivateKeyId\x12!\n" + "\fclient_email\x18\x02 \x01(\tR\vclientEmail\x12\x1c\n" + @@ -5708,7 +5771,9 @@ const file_proto_scan_result_proto_rawDesc = "" + " \x01(\tR\x11clientX509CertUrl\x12'\n" + "\x0funiverse_domain\x18\v \x01(\tR\x0euniverseDomain\x12\x1f\n" + "\vprivate_key\x18\f \x01(\tR\n" + - "privateKeyB\b\n" + + "privateKey\x1a%\n" + + "\x11HuggingfaceAPIKey\x12\x10\n" + + "\x03key\x18\x01 \x01(\tR\x03keyB\b\n" + "\x06secret\"\xf8\x01\n" + "\fSecretStatus\x12>\n" + "\x06status\x18\x01 \x01(\x0e2&.scalibr.SecretStatus.SecretStatusEnumR\x06status\x12=\n" + @@ -5766,7 +5831,7 @@ func file_proto_scan_result_proto_rawDescGZIP() []byte { } var file_proto_scan_result_proto_enumTypes = make([]protoimpl.EnumInfo, 5) -var file_proto_scan_result_proto_msgTypes = make([]protoimpl.MessageInfo, 59) +var file_proto_scan_result_proto_msgTypes = make([]protoimpl.MessageInfo, 60) var file_proto_scan_result_proto_goTypes = []any{ (VexJustification)(0), // 0: scalibr.VexJustification (SeverityEnum)(0), // 1: scalibr.SeverityEnum @@ -5832,11 +5897,12 @@ var file_proto_scan_result_proto_goTypes = []any{ (*ContainerCommand)(nil), // 61: scalibr.ContainerCommand nil, // 62: scalibr.PodmanMetadata.ExposedPortsEntry (*SecretData_GCPSAK)(nil), // 63: scalibr.SecretData.GCPSAK - (*timestamppb.Timestamp)(nil), // 64: google.protobuf.Timestamp + (*SecretData_HuggingfaceAPIKey)(nil), // 64: scalibr.SecretData.HuggingfaceAPIKey + (*timestamppb.Timestamp)(nil), // 65: google.protobuf.Timestamp } var file_proto_scan_result_proto_depIdxs = []int32{ - 64, // 0: scalibr.ScanResult.start_time:type_name -> google.protobuf.Timestamp - 64, // 1: scalibr.ScanResult.end_time:type_name -> google.protobuf.Timestamp + 65, // 0: scalibr.ScanResult.start_time:type_name -> google.protobuf.Timestamp + 65, // 1: scalibr.ScanResult.end_time:type_name -> google.protobuf.Timestamp 7, // 2: scalibr.ScanResult.status:type_name -> scalibr.ScanStatus 8, // 3: scalibr.ScanResult.plugin_status:type_name -> scalibr.PluginStatus 9, // 4: scalibr.ScanResult.inventories_deprecated:type_name -> scalibr.Package @@ -5895,26 +5961,27 @@ var file_proto_scan_result_proto_depIdxs = []int32{ 15, // 57: scalibr.SPDXPackageMetadata.purl:type_name -> scalibr.Purl 15, // 58: scalibr.CDXPackageMetadata.purl:type_name -> scalibr.Purl 62, // 59: scalibr.PodmanMetadata.exposed_ports:type_name -> scalibr.PodmanMetadata.ExposedPortsEntry - 64, // 60: scalibr.PodmanMetadata.started_time:type_name -> google.protobuf.Timestamp - 64, // 61: scalibr.PodmanMetadata.finished_time:type_name -> google.protobuf.Timestamp + 65, // 60: scalibr.PodmanMetadata.started_time:type_name -> google.protobuf.Timestamp + 65, // 61: scalibr.PodmanMetadata.finished_time:type_name -> google.protobuf.Timestamp 53, // 62: scalibr.DockerContainersMetadata.ports:type_name -> scalibr.DockerPort 55, // 63: scalibr.Secret.secret:type_name -> scalibr.SecretData 56, // 64: scalibr.Secret.status:type_name -> scalibr.SecretStatus 57, // 65: scalibr.Secret.locations:type_name -> scalibr.Location 63, // 66: scalibr.SecretData.gcpsak:type_name -> scalibr.SecretData.GCPSAK - 4, // 67: scalibr.SecretStatus.status:type_name -> scalibr.SecretStatus.SecretStatusEnum - 64, // 68: scalibr.SecretStatus.last_updated:type_name -> google.protobuf.Timestamp - 58, // 69: scalibr.Location.filepath:type_name -> scalibr.Filepath - 59, // 70: scalibr.Location.filepath_with_layer_details:type_name -> scalibr.FilepathWithLayerDetails - 60, // 71: scalibr.Location.environment_variable:type_name -> scalibr.EnvironmentVariable - 61, // 72: scalibr.Location.container_command:type_name -> scalibr.ContainerCommand - 11, // 73: scalibr.FilepathWithLayerDetails.layer_details:type_name -> scalibr.LayerDetails - 51, // 74: scalibr.PodmanMetadata.ExposedPortsEntry.value:type_name -> scalibr.Protocol - 75, // [75:75] is the sub-list for method output_type - 75, // [75:75] is the sub-list for method input_type - 75, // [75:75] is the sub-list for extension type_name - 75, // [75:75] is the sub-list for extension extendee - 0, // [0:75] is the sub-list for field type_name + 64, // 67: scalibr.SecretData.hugginface:type_name -> scalibr.SecretData.HuggingfaceAPIKey + 4, // 68: scalibr.SecretStatus.status:type_name -> scalibr.SecretStatus.SecretStatusEnum + 65, // 69: scalibr.SecretStatus.last_updated:type_name -> google.protobuf.Timestamp + 58, // 70: scalibr.Location.filepath:type_name -> scalibr.Filepath + 59, // 71: scalibr.Location.filepath_with_layer_details:type_name -> scalibr.FilepathWithLayerDetails + 60, // 72: scalibr.Location.environment_variable:type_name -> scalibr.EnvironmentVariable + 61, // 73: scalibr.Location.container_command:type_name -> scalibr.ContainerCommand + 11, // 74: scalibr.FilepathWithLayerDetails.layer_details:type_name -> scalibr.LayerDetails + 51, // 75: scalibr.PodmanMetadata.ExposedPortsEntry.value:type_name -> scalibr.Protocol + 76, // [76:76] is the sub-list for method output_type + 76, // [76:76] is the sub-list for method input_type + 76, // [76:76] is the sub-list for extension type_name + 76, // [76:76] is the sub-list for extension extendee + 0, // [0:76] is the sub-list for field type_name } func init() { file_proto_scan_result_proto_init() } @@ -5961,6 +6028,7 @@ func file_proto_scan_result_proto_init() { } file_proto_scan_result_proto_msgTypes[50].OneofWrappers = []any{ (*SecretData_Gcpsak)(nil), + (*SecretData_Hugginface)(nil), } file_proto_scan_result_proto_msgTypes[52].OneofWrappers = []any{ (*Location_Filepath)(nil), @@ -5974,7 +6042,7 @@ func file_proto_scan_result_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_proto_scan_result_proto_rawDesc), len(file_proto_scan_result_proto_rawDesc)), NumEnums: 5, - NumMessages: 59, + NumMessages: 60, NumExtensions: 0, NumServices: 0, }, diff --git a/binary/proto/secret.go b/binary/proto/secret.go index c97dba9c1..de0a01ee2 100644 --- a/binary/proto/secret.go +++ b/binary/proto/secret.go @@ -22,6 +22,7 @@ import ( "github.com/google/osv-scalibr/inventory" "github.com/google/osv-scalibr/veles" velesgcpsak "github.com/google/osv-scalibr/veles/secrets/gcpsak" + "github.com/google/osv-scalibr/veles/secrets/huggingfaceapikey" spb "github.com/google/osv-scalibr/binary/proto/scan_result_go_proto" "google.golang.org/protobuf/types/known/timestamppb" @@ -86,6 +87,8 @@ func velesSecretToProto(s veles.Secret) (*spb.SecretData, error) { switch t := s.(type) { case velesgcpsak.GCPSAK: return gcpsakToProto(t), nil + case huggingfaceapikey.HuggingfaceAPIKey: + return huggingfaceAPIKeyToProto(t), nil default: return nil, fmt.Errorf("%w: %T", ErrUnsupportedSecretType, s) } @@ -115,6 +118,16 @@ func gcpsakToProto(sak velesgcpsak.GCPSAK) *spb.SecretData { } } +func huggingfaceAPIKeyToProto(s huggingfaceapikey.HuggingfaceAPIKey) *spb.SecretData { + return &spb.SecretData{ + Secret: &spb.SecretData_Hugginface{ + Hugginface: &spb.SecretData_HuggingfaceAPIKey{ + Key: s.Key, + }, + }, + } +} + func validationResultToProto(r inventory.SecretValidationResult) (*spb.SecretStatus, error) { status, err := validationStatusToProto(r.Status) if err != nil { @@ -193,11 +206,19 @@ func velesSecretToStruct(s *spb.SecretData) (veles.Secret, error) { switch s.Secret.(type) { case *spb.SecretData_Gcpsak: return gcpsakToStruct(s.GetGcpsak()), nil + case *spb.SecretData_Hugginface: + return huggingfaceAPIKeyToStruct(s.GetHugginface()), nil default: return nil, fmt.Errorf("%w: %T", ErrUnsupportedSecretType, s.GetSecret()) } } +func huggingfaceAPIKeyToStruct(kPB *spb.SecretData_HuggingfaceAPIKey) huggingfaceapikey.HuggingfaceAPIKey { + return huggingfaceapikey.HuggingfaceAPIKey{ + Key: kPB.GetKey(), + } +} + func gcpsakToStruct(sakPB *spb.SecretData_GCPSAK) velesgcpsak.GCPSAK { sak := velesgcpsak.GCPSAK{ PrivateKeyID: sakPB.GetPrivateKeyId(), diff --git a/enricher/secrets/secrets.go b/enricher/secrets/secrets.go index 4e35350d2..dfac674b3 100644 --- a/enricher/secrets/secrets.go +++ b/enricher/secrets/secrets.go @@ -25,6 +25,7 @@ import ( "github.com/google/osv-scalibr/plugin" "github.com/google/osv-scalibr/veles" "github.com/google/osv-scalibr/veles/secrets/gcpsak" + "github.com/google/osv-scalibr/veles/secrets/huggingfaceapikey" ) const ( @@ -45,6 +46,7 @@ type Enricher struct { func New() enricher.Enricher { engine := veles.NewValidationEngine( veles.WithValidator(gcpsak.NewValidator()), + veles.WithValidator(huggingfaceapikey.NewValidator()), ) return &Enricher{engine: engine} } diff --git a/extractor/filesystem/secrets/secrets.go b/extractor/filesystem/secrets/secrets.go index 09801c574..77d1d2e64 100644 --- a/extractor/filesystem/secrets/secrets.go +++ b/extractor/filesystem/secrets/secrets.go @@ -28,6 +28,7 @@ import ( "github.com/google/osv-scalibr/plugin" "github.com/google/osv-scalibr/veles" "github.com/google/osv-scalibr/veles/secrets/gcpsak" + "github.com/google/osv-scalibr/veles/secrets/huggingfaceapikey" ) const ( @@ -61,6 +62,7 @@ func init() { //nolint:gochecknoinits var err error defaultEngine, err = veles.NewDetectionEngine([]veles.Detector{ gcpsak.NewDetector(), + huggingfaceapikey.NewDetector(), }) if err != nil { panic(fmt.Sprintf("Unable to initialize default Veles engine: %v", err)) diff --git a/veles/secrets/huggingfaceapikey/detector.go b/veles/secrets/huggingfaceapikey/detector.go new file mode 100644 index 000000000..0917b8354 --- /dev/null +++ b/veles/secrets/huggingfaceapikey/detector.go @@ -0,0 +1,44 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package huggingfaceapikey contains a Veles Secret type and a Detector for +// Huggingface API keys (prefix `hf_`). +package huggingfaceapikey + +import ( + "regexp" + + "github.com/google/osv-scalibr/veles" + "github.com/google/osv-scalibr/veles/secrets/common/simpletoken" +) + +// maxTokenLength is the maximum size of a Huggingface API key. +const maxTokenLength = 53 + +// keyRe is a regular expression that matches a Huggingface API key. +// Huggingface API keys have the form: `hf_` followed by 48 +// alphanumeric characters. +var keyRe = regexp.MustCompile(`hf_[A-Za-z]{34}`) + +// NewDetector returns a new simpletoken.Detector that matches +// Huggingface API keys. +func NewDetector() veles.Detector { + return simpletoken.Detector{ + MaxLen: maxTokenLength, + Re: keyRe, + FromMatch: func(b []byte) veles.Secret { + return HuggingfaceAPIKey{Key: string(b)} + }, + } +} diff --git a/veles/secrets/huggingfaceapikey/detector_test.go b/veles/secrets/huggingfaceapikey/detector_test.go new file mode 100644 index 000000000..85afcdaf8 --- /dev/null +++ b/veles/secrets/huggingfaceapikey/detector_test.go @@ -0,0 +1,142 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package huggingfaceapikey_test + +import ( + "fmt" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/google/osv-scalibr/veles" + "github.com/google/osv-scalibr/veles/secrets/huggingfaceapikey" +) + +const testKey = `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` + +// TestDetector_truePositives tests for cases where we know the Detector +// will find a Huggingface API key/s. +func TestDetector_truePositives(t *testing.T) { + engine, err := veles.NewDetectionEngine([]veles.Detector{huggingfaceapikey.NewDetector()}) + if err != nil { + t.Fatal(err) + } + cases := []struct { + name string + input string + want []veles.Secret + }{{ + name: "simple matching string", + input: testKey, + want: []veles.Secret{ + huggingfaceapikey.HuggingfaceAPIKey{Key: testKey}, + }, + }, { + name: "match at end of string", + input: `HUGGINGFACE_API_KEY=` + testKey, + want: []veles.Secret{ + huggingfaceapikey.HuggingfaceAPIKey{Key: testKey}, + }, + }, { + name: "match in middle of string", + input: `HUGGINGFACE_API_KEY="` + testKey + `"`, + want: []veles.Secret{ + huggingfaceapikey.HuggingfaceAPIKey{Key: testKey}, + }, + }, { + name: "multiple matches", + input: testKey + testKey + testKey, + want: []veles.Secret{ + huggingfaceapikey.HuggingfaceAPIKey{Key: testKey}, + huggingfaceapikey.HuggingfaceAPIKey{Key: testKey}, + huggingfaceapikey.HuggingfaceAPIKey{Key: testKey}, + }, + }, { + name: "multiple distinct matches", + input: testKey + "\n" + testKey[:len(testKey)-1] + "a", + want: []veles.Secret{ + huggingfaceapikey.HuggingfaceAPIKey{Key: testKey}, + huggingfaceapikey.HuggingfaceAPIKey{Key: testKey[:len(testKey)-1] + "a"}, + }, + }, { + name: "larger input containing key", + input: fmt.Sprintf(` +:test_api_key: hf-test +:huggingface_api_key: %s + `, testKey), + want: []veles.Secret{ + huggingfaceapikey.HuggingfaceAPIKey{Key: testKey}, + }, + }, { + name: "potential match longer than max key length", + input: testKey + `extra`, + want: []veles.Secret{ + huggingfaceapikey.HuggingfaceAPIKey{Key: testKey}, + }, + }} + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got, err := engine.Detect(t.Context(), strings.NewReader(tc.input)) + if err != nil { + t.Errorf("Detect() error: %v, want nil", err) + } + fmt.Printf("got = %+v\n", got) + if diff := cmp.Diff(tc.want, got, cmpopts.EquateEmpty()); diff != "" { + t.Errorf("Detect() diff (-want +got):\n%s", diff) + } + }) + } +} + +// TestDetector_trueNegatives tests for cases where we know the Detector +// will not find a Huggingface API key. +func TestDetector_trueNegatives(t *testing.T) { + engine, err := veles.NewDetectionEngine([]veles.Detector{huggingfaceapikey.NewDetector()}) + if err != nil { + t.Fatal(err) + } + cases := []struct { + name string + input string + want []veles.Secret + }{{ + name: "empty input", + input: "", + }, { + name: "short key should not match", + input: testKey[:len(testKey)-1], + }, { + name: "invalid character in key should not match", + input: `hf_@rh6GxZ4JnSMHi3YFrpVOWH2znfcmU1WCQ`, + }, { + name: "incorrect prefix should not match", + input: `hff_ArhUGxZeJnSMHixYFrpVOWHYznfcmUWWCQ`, + }, { + name: "prefix missing dash should not match", + input: `hfSvBATDnaPfgMWAtDQmmoIMAUmzdAAlexyr`, + }} + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got, err := engine.Detect(t.Context(), strings.NewReader(tc.input)) + if err != nil { + t.Errorf("Detect() error: %v, want nil", err) + } + if diff := cmp.Diff(tc.want, got, cmpopts.EquateEmpty()); diff != "" { + t.Errorf("Detect() diff (-want +got):\n%s", diff) + } + }) + } +} diff --git a/veles/secrets/huggingfaceapikey/huggingfaceapikey.go b/veles/secrets/huggingfaceapikey/huggingfaceapikey.go new file mode 100644 index 000000000..eec9a9e71 --- /dev/null +++ b/veles/secrets/huggingfaceapikey/huggingfaceapikey.go @@ -0,0 +1,23 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package huggingfaceapikey contains a Veles Secret type for Huggingface API keys. +package huggingfaceapikey + +// HuggingfaceAPIKey is a Veles Secret that holds relevant information for a +// Huggingface API key (prefix `hf_`). +// HuggingfaceAPIKey represents an API key used to authenticate requests +type HuggingfaceAPIKey struct { + Key string +} diff --git a/veles/secrets/huggingfaceapikey/validator.go b/veles/secrets/huggingfaceapikey/validator.go new file mode 100644 index 000000000..429c7e2ba --- /dev/null +++ b/veles/secrets/huggingfaceapikey/validator.go @@ -0,0 +1,81 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package huggingfaceapikey + +import ( + "context" + "fmt" + "net/http" + + "github.com/google/osv-scalibr/veles" +) + +// Validator validates Huggingface API keys via the Huggingface API endpoint. +type Validator struct { + httpC *http.Client +} + +// ValidatorOption configures a Validator when creating it via NewValidator. +type ValidatorOption func(*Validator) + +// WithClient configures the http.Client that the Validator uses. +// +// By default, it uses http.DefaultClient. +func WithClient(c *http.Client) ValidatorOption { + return func(v *Validator) { + v.httpC = c + } +} + +// NewValidator creates a new Validator with the given ValidatorOptions. +func NewValidator(opts ...ValidatorOption) *Validator { + v := &Validator{ + httpC: http.DefaultClient, + } + for _, opt := range opts { + opt(v) + } + return v +} + +// Validate checks whether the given HuggingfaceAPIKey is valid. +// +// It performs a GET request to the Huggingface chat completions endpoint +// using the API key in the Authorization header. If the request returns +// HTTP 200, the key is considered valid. If 401 Unauthorized, the key +// is invalid. Other errors return ValidationFailed. +func (v *Validator) Validate(ctx context.Context, key HuggingfaceAPIKey) (veles.ValidationStatus, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, + "https://huggingface.co/api/whoami-v2", nil) + if err != nil { + return veles.ValidationFailed, fmt.Errorf("unable to create HTTP request: %w", err) + } + req.Header.Set("Authorization", "Bearer "+key.Key) + + res, err := v.httpC.Do(req) + if err != nil { + return veles.ValidationFailed, fmt.Errorf("HTTP GET failed: %w", err) + } + defer res.Body.Close() + + switch res.StatusCode { + case http.StatusOK: + return veles.ValidationValid, nil + case http.StatusUnauthorized: + return veles.ValidationInvalid, nil + default: + return veles.ValidationFailed, fmt.Errorf("unexpected HTTP status: %d", res.StatusCode) + } +} diff --git a/veles/secrets/huggingfaceapikey/validator_test.go b/veles/secrets/huggingfaceapikey/validator_test.go new file mode 100644 index 000000000..28c4f1581 --- /dev/null +++ b/veles/secrets/huggingfaceapikey/validator_test.go @@ -0,0 +1,224 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package huggingfaceapikey_test + +import ( + "context" + "net/http" + "net/http/httptest" + "net/url" + "strings" + "testing" + "time" + + "github.com/google/osv-scalibr/veles" + "github.com/google/osv-scalibr/veles/secrets/huggingfaceapikey" +) + +const validatorTestKey = "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" + +// mockTransport redirects requests to the test server +type mockTransport struct { + testServer *httptest.Server +} + +func (m *mockTransport) RoundTrip(req *http.Request) (*http.Response, error) { + // Replace the original URL with our test server URL + if req.URL.Host == "huggingface.co" { + testURL, _ := url.Parse(m.testServer.URL) + req.URL.Scheme = testURL.Scheme + req.URL.Host = testURL.Host + } + return http.DefaultTransport.RoundTrip(req) +} + +// mockHuggingfaceServer creates a mock Huggingface API server for testing +func mockHuggingfaceServer(t *testing.T, expectedKey string, statusCode int) *httptest.Server { + t.Helper() + + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Check if it's a GET request to the expected endpoint + if r.Method != http.MethodGet || r.URL.Path != "/api/whoami-v2" { + t.Errorf("unexpected request: %s %s, expected: GET /api/whoami-v2", r.Method, r.URL.Path) + http.Error(w, "not found", http.StatusNotFound) + return + } + + // Check Authorization header + authHeader := r.Header.Get("Authorization") + if !strings.HasSuffix(authHeader, expectedKey) { + t.Errorf("expected Authorization header to end with key %s, got: %s", expectedKey, authHeader) + } + + // Set response + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(statusCode) + })) +} + +func TestValidator(t *testing.T) { + cases := []struct { + name string + statusCode int + want veles.ValidationStatus + expectError bool + }{ + { + name: "valid_key", + statusCode: http.StatusOK, + want: veles.ValidationValid, + }, + { + name: "invalid_key_unauthorized", + statusCode: http.StatusUnauthorized, + want: veles.ValidationInvalid, + }, + { + name: "server_error", + statusCode: http.StatusInternalServerError, + want: veles.ValidationFailed, + expectError: true, + }, + { + name: "bad_gateway", + statusCode: http.StatusBadGateway, + want: veles.ValidationFailed, + expectError: true, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + // Create a mock server + server := mockHuggingfaceServer(t, validatorTestKey, tc.statusCode) + defer server.Close() + + // Create a client with custom transport + client := &http.Client{ + Transport: &mockTransport{testServer: server}, + } + + // Create a validator with a mock client + validator := huggingfaceapikey.NewValidator( + huggingfaceapikey.WithClient(client), + ) + + // Create a test key + key := huggingfaceapikey.HuggingfaceAPIKey{Key: validatorTestKey} + + // Test validation + got, err := validator.Validate(context.Background(), key) + + // Check error expectation + if tc.expectError { + if err == nil { + t.Errorf("Validate() expected error, got nil") + } + } else { + if err != nil { + t.Errorf("Validate() unexpected error: %v", err) + } + } + + // Check validation status + if got != tc.want { + t.Errorf("Validate() = %v, want %v", got, tc.want) + } + }) + } +} + +func TestValidator_ContextCancellation(t *testing.T) { + // Create a server that delays response + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(100 * time.Millisecond) + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + // Create a client with custom transport + client := &http.Client{ + Transport: &mockTransport{testServer: server}, + } + + validator := huggingfaceapikey.NewValidator( + huggingfaceapikey.WithClient(client), + ) + + key := huggingfaceapikey.HuggingfaceAPIKey{Key: validatorTestKey} + + // Create context with a short timeout + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond) + defer cancel() + + // Test validation with cancelled context + got, err := validator.Validate(ctx, key) + + if err == nil { + t.Errorf("Validate() expected error due to context cancellation, got nil") + } + if got != veles.ValidationFailed { + t.Errorf("Validate() = %v, want %v", got, veles.ValidationFailed) + } +} + +func TestValidator_InvalidRequest(t *testing.T) { + // Create a mock server that returns 401 Unauthorized + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusUnauthorized) + })) + defer server.Close() + + // Create a client with custom transport + client := &http.Client{ + Transport: &mockTransport{testServer: server}, + } + + validator := huggingfaceapikey.NewValidator( + huggingfaceapikey.WithClient(client), + ) + + testCases := []struct { + name string + key string + expected veles.ValidationStatus + }{ + { + name: "empty_key", + key: "", + expected: veles.ValidationInvalid, + }, + { + name: "invalid_key_format", + key: "invalid-key-format", + expected: veles.ValidationInvalid, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + key := huggingfaceapikey.HuggingfaceAPIKey{Key: tc.key} + + got, err := validator.Validate(context.Background(), key) + + if err != nil { + t.Errorf("Validate() unexpected error for %s: %v", tc.name, err) + } + if got != tc.expected { + t.Errorf("Validate() = %v, want %v for %s", got, tc.expected, tc.name) + } + }) + } +} From cc212f308034391e5ae973377802742fe1a80506 Mon Sep 17 00:00:00 2001 From: devampkid Date: Fri, 22 Aug 2025 01:29:45 +0400 Subject: [PATCH 2/9] minor updates --- veles/secrets/huggingfaceapikey/detector.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/veles/secrets/huggingfaceapikey/detector.go b/veles/secrets/huggingfaceapikey/detector.go index 0917b8354..b34ad1aa8 100644 --- a/veles/secrets/huggingfaceapikey/detector.go +++ b/veles/secrets/huggingfaceapikey/detector.go @@ -24,10 +24,10 @@ import ( ) // maxTokenLength is the maximum size of a Huggingface API key. -const maxTokenLength = 53 +const maxTokenLength = 37 // keyRe is a regular expression that matches a Huggingface API key. -// Huggingface API keys have the form: `hf_` followed by 48 +// Huggingface API keys have the form: `hf_` followed by 34 // alphanumeric characters. var keyRe = regexp.MustCompile(`hf_[A-Za-z]{34}`) From 60a817bec0c82132b57ae17b8c742e93871f0517 Mon Sep 17 00:00:00 2001 From: devampkid Date: Thu, 4 Sep 2025 21:43:59 +0400 Subject: [PATCH 3/9] added more precise status codes check for Validate function. --- veles/secrets/huggingfaceapikey/validator.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/veles/secrets/huggingfaceapikey/validator.go b/veles/secrets/huggingfaceapikey/validator.go index 429c7e2ba..0b01fab8e 100644 --- a/veles/secrets/huggingfaceapikey/validator.go +++ b/veles/secrets/huggingfaceapikey/validator.go @@ -75,6 +75,10 @@ func (v *Validator) Validate(ctx context.Context, key HuggingfaceAPIKey) (veles. return veles.ValidationValid, nil case http.StatusUnauthorized: return veles.ValidationInvalid, nil + case http.StatusTooManyRequests: + return veles.ValidationValid, nil + case http.StatusInternalServerError: + return veles.ValidationFailed, fmt.Errorf("unexpected server-side error: %d", res.StatusCode) default: return veles.ValidationFailed, fmt.Errorf("unexpected HTTP status: %d", res.StatusCode) } From 487097231948f8b79e3061397b8e76ee7b4ca9a3 Mon Sep 17 00:00:00 2001 From: devampkid Date: Fri, 5 Sep 2025 17:30:44 +0400 Subject: [PATCH 4/9] add huggingface enricher and make scan result more verbose, this is not finalized. --- binary/proto/scan_result.proto | 2 + .../scan_result_go_proto/scan_result.pb.go | 32 ++- binary/proto/secret.go | 8 +- enricher/enricherlist/list.go | 8 + .../huggingfacesecrets/huggingfacesecrets.go | 106 +++++++ .../huggingfacesecrets_test.go | 262 ++++++++++++++++++ enricher/secrets/secrets.go | 2 - .../huggingfaceapikey/huggingfaceapikey.go | 4 +- veles/secrets/huggingfaceapikey/validator.go | 3 +- .../huggingfaceapikey/validator_test.go | 6 +- 10 files changed, 417 insertions(+), 16 deletions(-) create mode 100644 enricher/huggingfacesecrets/huggingfacesecrets.go create mode 100644 enricher/huggingfacesecrets/huggingfacesecrets_test.go diff --git a/binary/proto/scan_result.proto b/binary/proto/scan_result.proto index a9c1e80d7..d014ec133 100644 --- a/binary/proto/scan_result.proto +++ b/binary/proto/scan_result.proto @@ -639,6 +639,8 @@ message SecretData { message HuggingfaceAPIKey { string key = 1; + string role = 2; + repeated string fine_grained_scope = 3; } } diff --git a/binary/proto/scan_result_go_proto/scan_result.pb.go b/binary/proto/scan_result_go_proto/scan_result.pb.go index 8b82dc4b6..808bbf8fc 100644 --- a/binary/proto/scan_result_go_proto/scan_result.pb.go +++ b/binary/proto/scan_result_go_proto/scan_result.pb.go @@ -5319,10 +5319,12 @@ func (x *SecretData_GCPSAK) GetPrivateKey() string { } type SecretData_HuggingfaceAPIKey struct { - state protoimpl.MessageState `protogen:"open.v1"` - Key string `protobuf:"bytes,1,opt,name=key,proto3" json:"key,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + state protoimpl.MessageState `protogen:"open.v1"` + Key string `protobuf:"bytes,1,opt,name=key,proto3" json:"key,omitempty"` + Role string `protobuf:"bytes,2,opt,name=role,proto3" json:"role,omitempty"` + FineGrainedScope []string `protobuf:"bytes,3,rep,name=fine_grained_scope,json=fineGrainedScope,proto3" json:"fine_grained_scope,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *SecretData_HuggingfaceAPIKey) Reset() { @@ -5362,6 +5364,20 @@ func (x *SecretData_HuggingfaceAPIKey) GetKey() string { return "" } +func (x *SecretData_HuggingfaceAPIKey) GetRole() string { + if x != nil { + return x.Role + } + return "" +} + +func (x *SecretData_HuggingfaceAPIKey) GetFineGrainedScope() []string { + if x != nil { + return x.FineGrainedScope + } + return nil +} + var File_proto_scan_result_proto protoreflect.FileDescriptor const file_proto_scan_result_proto_rawDesc = "" + @@ -5749,7 +5765,7 @@ const file_proto_scan_result_proto_rawDesc = "" + "\x06Secret\x12+\n" + "\x06secret\x18\x01 \x01(\v2\x13.scalibr.SecretDataR\x06secret\x12-\n" + "\x06status\x18\x02 \x01(\v2\x15.scalibr.SecretStatusR\x06status\x12/\n" + - "\tlocations\x18\x03 \x03(\v2\x11.scalibr.LocationR\tlocations\"\xef\x04\n" + + "\tlocations\x18\x03 \x03(\v2\x11.scalibr.LocationR\tlocations\"\xb1\x05\n" + "\n" + "SecretData\x124\n" + "\x06gcpsak\x18\x01 \x01(\v2\x1a.scalibr.SecretData.GCPSAKH\x00R\x06gcpsak\x12G\n" + @@ -5771,9 +5787,11 @@ const file_proto_scan_result_proto_rawDesc = "" + " \x01(\tR\x11clientX509CertUrl\x12'\n" + "\x0funiverse_domain\x18\v \x01(\tR\x0euniverseDomain\x12\x1f\n" + "\vprivate_key\x18\f \x01(\tR\n" + - "privateKey\x1a%\n" + + "privateKey\x1ag\n" + "\x11HuggingfaceAPIKey\x12\x10\n" + - "\x03key\x18\x01 \x01(\tR\x03keyB\b\n" + + "\x03key\x18\x01 \x01(\tR\x03key\x12\x12\n" + + "\x04role\x18\x02 \x01(\tR\x04role\x12,\n" + + "\x12fine_grained_scope\x18\x03 \x03(\tR\x10fineGrainedScopeB\b\n" + "\x06secret\"\xf8\x01\n" + "\fSecretStatus\x12>\n" + "\x06status\x18\x01 \x01(\x0e2&.scalibr.SecretStatus.SecretStatusEnumR\x06status\x12=\n" + diff --git a/binary/proto/secret.go b/binary/proto/secret.go index de0a01ee2..ad437260b 100644 --- a/binary/proto/secret.go +++ b/binary/proto/secret.go @@ -122,7 +122,9 @@ func huggingfaceAPIKeyToProto(s huggingfaceapikey.HuggingfaceAPIKey) *spb.Secret return &spb.SecretData{ Secret: &spb.SecretData_Hugginface{ Hugginface: &spb.SecretData_HuggingfaceAPIKey{ - Key: s.Key, + Key: s.Key, + Role: s.Role, + FineGrainedScope: s.FineGrainedScope, }, }, } @@ -215,7 +217,9 @@ func velesSecretToStruct(s *spb.SecretData) (veles.Secret, error) { func huggingfaceAPIKeyToStruct(kPB *spb.SecretData_HuggingfaceAPIKey) huggingfaceapikey.HuggingfaceAPIKey { return huggingfaceapikey.HuggingfaceAPIKey{ - Key: kPB.GetKey(), + Key: kPB.GetKey(), + Role: kPB.GetRole(), + FineGrainedScope: kPB.GetFineGrainedScope(), } } diff --git a/enricher/enricherlist/list.go b/enricher/enricherlist/list.go index 9f9782fb7..83c91f14a 100644 --- a/enricher/enricherlist/list.go +++ b/enricher/enricherlist/list.go @@ -22,6 +22,7 @@ import ( "github.com/google/osv-scalibr/enricher" "github.com/google/osv-scalibr/enricher/baseimage" + "github.com/google/osv-scalibr/enricher/huggingfacesecrets" "github.com/google/osv-scalibr/enricher/license" "github.com/google/osv-scalibr/enricher/reachability/java" "github.com/google/osv-scalibr/enricher/secrets" @@ -62,6 +63,11 @@ var ( secrets.Name: {secrets.New}, } + // HuggingfaceSecrets enrichers. + HuggingfaceSecrets = InitMap{ + huggingfacesecrets.Name: {huggingfacesecrets.New}, + } + // Reachability enrichers. Reachability = InitMap{ java.Name: {java.NewDefault}, @@ -81,6 +87,7 @@ var ( VulnMatching, VEX, Secrets, + HuggingfaceSecrets, License, Reachability, TransitiveDependency, @@ -92,6 +99,7 @@ var ( "vulnmatch": vals(VulnMatching), "layerdetails": vals(LayerDetails), "secrets": vals(Secrets), + "huggingfacesecrets": vals(HuggingfaceSecrets), "reachability": vals(Reachability), "transitivedependency": vals(TransitiveDependency), "enrichers/default": vals(Default), diff --git a/enricher/huggingfacesecrets/huggingfacesecrets.go b/enricher/huggingfacesecrets/huggingfacesecrets.go new file mode 100644 index 000000000..671135fc9 --- /dev/null +++ b/enricher/huggingfacesecrets/huggingfacesecrets.go @@ -0,0 +1,106 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package huggingfacesecrets contains an Enricher that uses Veles Validators to validate +// Secrets found by the Veles Extractor. +package huggingfacesecrets + +import ( + "context" + "time" + + "github.com/google/osv-scalibr/enricher" + "github.com/google/osv-scalibr/inventory" + "github.com/google/osv-scalibr/plugin" + "github.com/google/osv-scalibr/veles" + "github.com/google/osv-scalibr/veles/secrets/huggingfaceapikey" +) + +const ( + // Name is the unique name of this Enricher. + Name = "huggingfacesecrets/velesvalidate" + + version = 1 +) + +var _ enricher.Enricher = &Enricher{} + +// Enricher uses a Veles ValidationEngine to validate Secrets found by Veles. +type Enricher struct { + engine *veles.ValidationEngine +} + +// New creates a new Enricher using the default Veles Validators. +func New() enricher.Enricher { + engine := veles.NewValidationEngine( + veles.WithValidator(huggingfaceapikey.NewValidator()), + ) + return &Enricher{engine: engine} +} + +// AddValidator adds a Validator for a specific type of Secret to the underlying validation engine. +// +// Returns whether there was already a Validator in place that now got replaced. +func AddValidator[S veles.Secret](e *Enricher, v veles.Validator[S]) bool { + return veles.AddValidator(e.engine, v) +} + +// NewWithEngine creates a new Enricher with a specified Veles ValidationEngine. +func NewWithEngine(engine *veles.ValidationEngine) enricher.Enricher { + return &Enricher{engine: engine} +} + +// Name of the Enricher. +func (Enricher) Name() string { + return Name +} + +// Version of the Enricher. +func (Enricher) Version() int { + return version +} + +// Requirements of the Enricher. +// Needs network access so it can validate Secrets. +func (Enricher) Requirements() *plugin.Capabilities { + return &plugin.Capabilities{ + Network: plugin.NetworkOnline, + } +} + +// RequiredPlugins returns the plugins that are required to be enabled for this +// Enricher to run. While it works on the results of the filesystem/secrets +// Extractor, the Enricher itself can run independently. +func (Enricher) RequiredPlugins() []string { + return []string{} +} + +// Enrich validates all the Secrets from the Inventory using a Veles +// ValidationEngine. +// +// Each individual Secret maintains its own error in case the validation failed. +func (e *Enricher) Enrich(ctx context.Context, _ *enricher.ScanInput, inv *inventory.Inventory) error { + for _, s := range inv.Secrets { + if err := ctx.Err(); err != nil { + return err + } + status, err := e.engine.Validate(ctx, s.Secret) + s.Validation = inventory.SecretValidationResult{ + At: time.Now(), + Status: status, + Err: err, + } + } + return nil +} diff --git a/enricher/huggingfacesecrets/huggingfacesecrets_test.go b/enricher/huggingfacesecrets/huggingfacesecrets_test.go new file mode 100644 index 000000000..d63372cbe --- /dev/null +++ b/enricher/huggingfacesecrets/huggingfacesecrets_test.go @@ -0,0 +1,262 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package huggingfacesecrets_test + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + secrets "github.com/google/osv-scalibr/enricher/huggingfacesecrets" + "github.com/google/osv-scalibr/inventory" + "github.com/google/osv-scalibr/veles" + "github.com/google/osv-scalibr/veles/velestest" +) + +type testEnricherSubCase struct { + name string + input inventory.Inventory + want inventory.Inventory +} + +func TestEnricher(t *testing.T) { + errTest := errors.New("some validation error") + path := "/foo/bar/key.json" + cases := []struct { + name string + engine *veles.ValidationEngine + subs []testEnricherSubCase + }{ + { + name: "only strings supported", + engine: veles.NewValidationEngine(veles.WithValidator(velestest.NewFakeStringSecretValidator(veles.ValidationValid, nil))), + subs: []testEnricherSubCase{ + { + name: "supported", + input: inventory.Inventory{ + Secrets: []*inventory.Secret{ + { + Secret: velestest.NewFakeStringSecret("hf_e11YtkCn11KjOBBBBEaZT11vFjijio111"), + Location: path, + }, + }, + }, + want: inventory.Inventory{ + Secrets: []*inventory.Secret{ + { + Secret: velestest.NewFakeStringSecret("FOO"), + Location: path, + Validation: inventory.SecretValidationResult{ + Status: veles.ValidationValid, + }, + }, + }, + }, + }, + { + name: "unsupported", + input: inventory.Inventory{ + Secrets: []*inventory.Secret{ + { + Secret: velestest.NewFakeIntSecret(123), + Location: path, + }, + }, + }, + want: inventory.Inventory{ + Secrets: []*inventory.Secret{ + { + Secret: velestest.NewFakeIntSecret(123), + Location: path, + Validation: inventory.SecretValidationResult{ + Status: veles.ValidationUnsupported, + }, + }, + }, + }, + }, + }, + }, + { + name: "per secret errors", + engine: veles.NewValidationEngine( + veles.WithValidator(velestest.NewFakeStringSecretValidator(veles.ValidationValid, nil)), + veles.WithValidator(velestest.NewFakeIntSecretValidator(veles.ValidationFailed, errTest)), + ), + subs: []testEnricherSubCase{ + { + name: "single error", + input: inventory.Inventory{ + Secrets: []*inventory.Secret{ + { + Secret: velestest.NewFakeIntSecret(123), + Location: path, + }, + }, + }, + want: inventory.Inventory{ + Secrets: []*inventory.Secret{ + { + Secret: velestest.NewFakeIntSecret(123), + Location: path, + Validation: inventory.SecretValidationResult{ + Status: veles.ValidationFailed, + Err: errTest, + }, + }, + }, + }, + }, + { + name: "multiple errors", + input: inventory.Inventory{ + Secrets: []*inventory.Secret{ + { + Secret: velestest.NewFakeIntSecret(123), + Location: path, + }, + { + Secret: velestest.NewFakeIntSecret(456), + Location: path, + }, + }, + }, + want: inventory.Inventory{ + Secrets: []*inventory.Secret{ + { + Secret: velestest.NewFakeIntSecret(123), + Location: path, + Validation: inventory.SecretValidationResult{ + Status: veles.ValidationFailed, + Err: errTest, + }, + }, + { + Secret: velestest.NewFakeIntSecret(456), + Location: path, + Validation: inventory.SecretValidationResult{ + Status: veles.ValidationFailed, + Err: errTest, + }, + }, + }, + }, + }, + { + name: "mixed", + input: inventory.Inventory{ + Secrets: []*inventory.Secret{ + { + Secret: velestest.NewFakeIntSecret(123), + Location: path, + }, + { + Secret: velestest.NewFakeStringSecret("foo"), + Location: path, + }, + }, + }, + want: inventory.Inventory{ + Secrets: []*inventory.Secret{ + { + Secret: velestest.NewFakeIntSecret(123), + Location: path, + Validation: inventory.SecretValidationResult{ + Status: veles.ValidationFailed, + Err: errTest, + }, + }, + { + Secret: velestest.NewFakeStringSecret("foo"), + Location: path, + Validation: inventory.SecretValidationResult{ + Status: veles.ValidationValid, + }, + }, + }, + }, + }, + }, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + enricher := secrets.NewWithEngine(tc.engine) + for _, sc := range tc.subs { + t.Run(sc.name, func(t *testing.T) { + if err := enricher.Enrich(t.Context(), nil, &sc.input); err != nil { + t.Errorf("Enrich() error: %v, want nil", err) + } + got := &sc.input + want := &sc.want + // We can rely on the order of Secrets in the inventory here, since the enricher is not supposed to change it. + if diff := cmp.Diff(want, got, cmpopts.EquateErrors(), cmpopts.IgnoreTypes(time.Time{})); diff != "" { + t.Errorf("Enrich() got diff (-want +got):\n%s", diff) + } + }) + } + }) + } +} + +func TestEnricher_respectsContext(t *testing.T) { + enricher := secrets.NewWithEngine(veles.NewValidationEngine( + veles.WithValidator(velestest.NewFakeStringSecretValidator(veles.ValidationValid, nil)), + )) + inv := &inventory.Inventory{ + Secrets: []*inventory.Secret{ + { + Secret: velestest.NewFakeStringSecret("foo"), + Location: "/foo/bar/baz.json", + }, + }, + } + ctx, cancel := context.WithCancel(t.Context()) + cancel() + if err := enricher.Enrich(ctx, nil, inv); !errors.Is(err, context.Canceled) { + t.Errorf("enricher.Enrich() error = nil, want context cancelled") + } +} + +func TestAddValidator(t *testing.T) { + secret := inventory.Secret{ + Secret: velestest.NewFakeStringSecret("foo"), + Location: "/foo/bar/baz.json", + } + inv := inventory.Inventory{Secrets: []*inventory.Secret{&secret}} + enricher := secrets.NewWithEngine(veles.NewValidationEngine()).(*secrets.Enricher) + + // Ensure that it's unsupported. + if err := enricher.Enrich(t.Context(), nil, &inv); err != nil { + t.Errorf("Enrich() error: %v, want nil", err) + } + if got, want := secret.Validation.Status, veles.ValidationUnsupported; got != want { + t.Errorf("Enrich() validation status = %q, want %q", got, want) + } + + // Add new validator and ensure that we now get the correct result. + if present := secrets.AddValidator(enricher, velestest.NewFakeStringSecretValidator(veles.ValidationValid, nil)); present { + t.Errorf("AddValidator() = %t, want false", present) + } + if err := enricher.Enrich(t.Context(), nil, &inv); err != nil { + t.Errorf("Enrich() error: %v, want nil", err) + } + if got, want := secret.Validation.Status, veles.ValidationValid; got != want { + t.Errorf("Enrich() validation status = %q, want %q", got, want) + } +} diff --git a/enricher/secrets/secrets.go b/enricher/secrets/secrets.go index dfac674b3..4e35350d2 100644 --- a/enricher/secrets/secrets.go +++ b/enricher/secrets/secrets.go @@ -25,7 +25,6 @@ import ( "github.com/google/osv-scalibr/plugin" "github.com/google/osv-scalibr/veles" "github.com/google/osv-scalibr/veles/secrets/gcpsak" - "github.com/google/osv-scalibr/veles/secrets/huggingfaceapikey" ) const ( @@ -46,7 +45,6 @@ type Enricher struct { func New() enricher.Enricher { engine := veles.NewValidationEngine( veles.WithValidator(gcpsak.NewValidator()), - veles.WithValidator(huggingfaceapikey.NewValidator()), ) return &Enricher{engine: engine} } diff --git a/veles/secrets/huggingfaceapikey/huggingfaceapikey.go b/veles/secrets/huggingfaceapikey/huggingfaceapikey.go index eec9a9e71..5230fc5e5 100644 --- a/veles/secrets/huggingfaceapikey/huggingfaceapikey.go +++ b/veles/secrets/huggingfaceapikey/huggingfaceapikey.go @@ -19,5 +19,7 @@ package huggingfaceapikey // Huggingface API key (prefix `hf_`). // HuggingfaceAPIKey represents an API key used to authenticate requests type HuggingfaceAPIKey struct { - Key string + Key string + Role string + FineGrainedScope []string } diff --git a/veles/secrets/huggingfaceapikey/validator.go b/veles/secrets/huggingfaceapikey/validator.go index 0b01fab8e..742c1420a 100644 --- a/veles/secrets/huggingfaceapikey/validator.go +++ b/veles/secrets/huggingfaceapikey/validator.go @@ -56,9 +56,10 @@ func NewValidator(opts ...ValidatorOption) *Validator { // using the API key in the Authorization header. If the request returns // HTTP 200, the key is considered valid. If 401 Unauthorized, the key // is invalid. Other errors return ValidationFailed. -func (v *Validator) Validate(ctx context.Context, key HuggingfaceAPIKey) (veles.ValidationStatus, error) { +func (v *Validator) Validate(ctx context.Context, key *HuggingfaceAPIKey) (veles.ValidationStatus, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://huggingface.co/api/whoami-v2", nil) + key.FineGrainedScope = []string{"TestScope1", "TestScope2"} if err != nil { return veles.ValidationFailed, fmt.Errorf("unable to create HTTP request: %w", err) } diff --git a/veles/secrets/huggingfaceapikey/validator_test.go b/veles/secrets/huggingfaceapikey/validator_test.go index 28c4f1581..bb2607165 100644 --- a/veles/secrets/huggingfaceapikey/validator_test.go +++ b/veles/secrets/huggingfaceapikey/validator_test.go @@ -119,7 +119,7 @@ func TestValidator(t *testing.T) { key := huggingfaceapikey.HuggingfaceAPIKey{Key: validatorTestKey} // Test validation - got, err := validator.Validate(context.Background(), key) + got, err := validator.Validate(context.Background(), &key) // Check error expectation if tc.expectError { @@ -164,7 +164,7 @@ func TestValidator_ContextCancellation(t *testing.T) { defer cancel() // Test validation with cancelled context - got, err := validator.Validate(ctx, key) + got, err := validator.Validate(ctx, &key) if err == nil { t.Errorf("Validate() expected error due to context cancellation, got nil") @@ -211,7 +211,7 @@ func TestValidator_InvalidRequest(t *testing.T) { t.Run(tc.name, func(t *testing.T) { key := huggingfaceapikey.HuggingfaceAPIKey{Key: tc.key} - got, err := validator.Validate(context.Background(), key) + got, err := validator.Validate(context.Background(), &key) if err != nil { t.Errorf("Validate() unexpected error for %s: %v", tc.name, err) From 7d95088b4d2a23b372c440220ae8f560eb8d9b8c Mon Sep 17 00:00:00 2001 From: devampkid Date: Mon, 15 Sep 2025 01:37:07 +0400 Subject: [PATCH 5/9] new enricher for adding scope and roles of a huggingface api key --- enricher/enricherlist/list.go | 8 - .../huggingfacesecrets/huggingfacesecrets.go | 91 +++++-- .../huggingfacesecrets_test.go | 236 ++++-------------- 3 files changed, 120 insertions(+), 215 deletions(-) diff --git a/enricher/enricherlist/list.go b/enricher/enricherlist/list.go index 83c91f14a..9f9782fb7 100644 --- a/enricher/enricherlist/list.go +++ b/enricher/enricherlist/list.go @@ -22,7 +22,6 @@ import ( "github.com/google/osv-scalibr/enricher" "github.com/google/osv-scalibr/enricher/baseimage" - "github.com/google/osv-scalibr/enricher/huggingfacesecrets" "github.com/google/osv-scalibr/enricher/license" "github.com/google/osv-scalibr/enricher/reachability/java" "github.com/google/osv-scalibr/enricher/secrets" @@ -63,11 +62,6 @@ var ( secrets.Name: {secrets.New}, } - // HuggingfaceSecrets enrichers. - HuggingfaceSecrets = InitMap{ - huggingfacesecrets.Name: {huggingfacesecrets.New}, - } - // Reachability enrichers. Reachability = InitMap{ java.Name: {java.NewDefault}, @@ -87,7 +81,6 @@ var ( VulnMatching, VEX, Secrets, - HuggingfaceSecrets, License, Reachability, TransitiveDependency, @@ -99,7 +92,6 @@ var ( "vulnmatch": vals(VulnMatching), "layerdetails": vals(LayerDetails), "secrets": vals(Secrets), - "huggingfacesecrets": vals(HuggingfaceSecrets), "reachability": vals(Reachability), "transitivedependency": vals(TransitiveDependency), "enrichers/default": vals(Default), diff --git a/enricher/huggingfacesecrets/huggingfacesecrets.go b/enricher/huggingfacesecrets/huggingfacesecrets.go index 671135fc9..d6350c65f 100644 --- a/enricher/huggingfacesecrets/huggingfacesecrets.go +++ b/enricher/huggingfacesecrets/huggingfacesecrets.go @@ -18,7 +18,10 @@ package huggingfacesecrets import ( "context" - "time" + "encoding/json" + "fmt" + "io" + "net/http" "github.com/google/osv-scalibr/enricher" "github.com/google/osv-scalibr/inventory" @@ -31,22 +34,34 @@ const ( // Name is the unique name of this Enricher. Name = "huggingfacesecrets/velesvalidate" - version = 1 + version = 1 + defaultBaseURL = "https://huggingface.co" ) var _ enricher.Enricher = &Enricher{} // Enricher uses a Veles ValidationEngine to validate Secrets found by Veles. type Enricher struct { - engine *veles.ValidationEngine + engine *veles.ValidationEngine + baseURL string + httpClient *http.Client } // New creates a new Enricher using the default Veles Validators. func New() enricher.Enricher { - engine := veles.NewValidationEngine( - veles.WithValidator(huggingfaceapikey.NewValidator()), - ) - return &Enricher{engine: engine} + return &Enricher{ + baseURL: defaultBaseURL, + httpClient: http.DefaultClient, + } +} + +// NewWithBaseURL creates a new Enricher that uses the provided base URL for the Hugging Face API. +// Useful for tests with an httptest.Server. +func NewWithBaseURL(baseURL string) enricher.Enricher { + return &Enricher{ + baseURL: baseURL, + httpClient: http.DefaultClient, + } } // AddValidator adds a Validator for a specific type of Secret to the underlying validation engine. @@ -56,11 +71,6 @@ func AddValidator[S veles.Secret](e *Enricher, v veles.Validator[S]) bool { return veles.AddValidator(e.engine, v) } -// NewWithEngine creates a new Enricher with a specified Veles ValidationEngine. -func NewWithEngine(engine *veles.ValidationEngine) enricher.Enricher { - return &Enricher{engine: engine} -} - // Name of the Enricher. func (Enricher) Name() string { return Name @@ -86,20 +96,65 @@ func (Enricher) RequiredPlugins() []string { return []string{} } +// huggingfaceResponse represents the minimal structure needed from the Hugging Face API response +type huggingfaceResponse struct { + Auth struct { + AccessToken struct { + Role string `json:"role"` + FineGrained struct { + Scoped []struct { + Permissions []string `json:"permissions"` + } `json:"scoped"` + } `json:"fineGrained"` + } `json:"accessToken"` + } `json:"auth"` +} + // Enrich validates all the Secrets from the Inventory using a Veles // ValidationEngine. -// // Each individual Secret maintains its own error in case the validation failed. func (e *Enricher) Enrich(ctx context.Context, _ *enricher.ScanInput, inv *inventory.Inventory) error { for _, s := range inv.Secrets { if err := ctx.Err(); err != nil { return err } - status, err := e.engine.Validate(ctx, s.Secret) - s.Validation = inventory.SecretValidationResult{ - At: time.Now(), - Status: status, - Err: err, + if huggingSecret, ok := s.Secret.(huggingfaceapikey.HuggingfaceAPIKey); ok { + url := e.baseURL + "/api/whoami-v2" + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return fmt.Errorf("creating HTTP GET request failed: %w", err) + } + req.Header.Set("Authorization", "Bearer "+huggingSecret.Key) + res, err := e.httpClient.Do(req) + if err != nil { + return fmt.Errorf("HTTP GET failed: %w", err) + } + defer res.Body.Close() + + if res.StatusCode == http.StatusOK { + body, err := io.ReadAll(res.Body) + if err != nil { + return fmt.Errorf("reading response body failed: %w", err) + } + + var apiResponse huggingfaceResponse + if err := json.Unmarshal(body, &apiResponse); err != nil { + return fmt.Errorf("parsing JSON response failed: %w", err) + } + + // Extract all permissions from scoped entities + var permissions []string + for _, scopedItem := range apiResponse.Auth.AccessToken.FineGrained.Scoped { + permissions = append(permissions, scopedItem.Permissions...) + } + + // Update the secret with the actual values from the response + s.Secret = huggingfaceapikey.HuggingfaceAPIKey{ + Key: huggingSecret.Key, + Role: apiResponse.Auth.AccessToken.Role, + FineGrainedScope: permissions, + } + } } } return nil diff --git a/enricher/huggingfacesecrets/huggingfacesecrets_test.go b/enricher/huggingfacesecrets/huggingfacesecrets_test.go index d63372cbe..e64a37b13 100644 --- a/enricher/huggingfacesecrets/huggingfacesecrets_test.go +++ b/enricher/huggingfacesecrets/huggingfacesecrets_test.go @@ -15,178 +15,57 @@ package huggingfacesecrets_test import ( - "context" - "errors" + "encoding/json" + "net/http" + "net/http/httptest" "testing" "time" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" - secrets "github.com/google/osv-scalibr/enricher/huggingfacesecrets" + "github.com/google/osv-scalibr/enricher/huggingfacesecrets" "github.com/google/osv-scalibr/inventory" - "github.com/google/osv-scalibr/veles" - "github.com/google/osv-scalibr/veles/velestest" + "github.com/google/osv-scalibr/veles/secrets/huggingfaceapikey" ) type testEnricherSubCase struct { - name string - input inventory.Inventory - want inventory.Inventory + name string + input inventory.Inventory + Role string + FineGrainedScope []string + want inventory.Inventory } func TestEnricher(t *testing.T) { - errTest := errors.New("some validation error") path := "/foo/bar/key.json" cases := []struct { - name string - engine *veles.ValidationEngine - subs []testEnricherSubCase + name string + subs []testEnricherSubCase }{ { - name: "only strings supported", - engine: veles.NewValidationEngine(veles.WithValidator(velestest.NewFakeStringSecretValidator(veles.ValidationValid, nil))), + name: "Append Role and Fine Grained Scopes", subs: []testEnricherSubCase{ { name: "supported", input: inventory.Inventory{ Secrets: []*inventory.Secret{ { - Secret: velestest.NewFakeStringSecret("hf_e11YtkCn11KjOBBBBEaZT11vFjijio111"), + Secret: huggingfaceapikey.HuggingfaceAPIKey{Key: "foo"}, Location: path, }, }, }, + Role: "read", + FineGrainedScope: []string{"inference.endpoints.infer.write", "repo.content.read"}, want: inventory.Inventory{ Secrets: []*inventory.Secret{ { - Secret: velestest.NewFakeStringSecret("FOO"), - Location: path, - Validation: inventory.SecretValidationResult{ - Status: veles.ValidationValid, - }, - }, - }, - }, - }, - { - name: "unsupported", - input: inventory.Inventory{ - Secrets: []*inventory.Secret{ - { - Secret: velestest.NewFakeIntSecret(123), - Location: path, - }, - }, - }, - want: inventory.Inventory{ - Secrets: []*inventory.Secret{ - { - Secret: velestest.NewFakeIntSecret(123), - Location: path, - Validation: inventory.SecretValidationResult{ - Status: veles.ValidationUnsupported, - }, - }, - }, - }, - }, - }, - }, - { - name: "per secret errors", - engine: veles.NewValidationEngine( - veles.WithValidator(velestest.NewFakeStringSecretValidator(veles.ValidationValid, nil)), - veles.WithValidator(velestest.NewFakeIntSecretValidator(veles.ValidationFailed, errTest)), - ), - subs: []testEnricherSubCase{ - { - name: "single error", - input: inventory.Inventory{ - Secrets: []*inventory.Secret{ - { - Secret: velestest.NewFakeIntSecret(123), - Location: path, - }, - }, - }, - want: inventory.Inventory{ - Secrets: []*inventory.Secret{ - { - Secret: velestest.NewFakeIntSecret(123), - Location: path, - Validation: inventory.SecretValidationResult{ - Status: veles.ValidationFailed, - Err: errTest, - }, - }, - }, - }, - }, - { - name: "multiple errors", - input: inventory.Inventory{ - Secrets: []*inventory.Secret{ - { - Secret: velestest.NewFakeIntSecret(123), - Location: path, - }, - { - Secret: velestest.NewFakeIntSecret(456), - Location: path, - }, - }, - }, - want: inventory.Inventory{ - Secrets: []*inventory.Secret{ - { - Secret: velestest.NewFakeIntSecret(123), - Location: path, - Validation: inventory.SecretValidationResult{ - Status: veles.ValidationFailed, - Err: errTest, + Secret: huggingfaceapikey.HuggingfaceAPIKey{ + Key: "foo", + Role: "read", + FineGrainedScope: []string{"inference.endpoints.infer.write", "repo.content.read"}, }, - }, - { - Secret: velestest.NewFakeIntSecret(456), Location: path, - Validation: inventory.SecretValidationResult{ - Status: veles.ValidationFailed, - Err: errTest, - }, - }, - }, - }, - }, - { - name: "mixed", - input: inventory.Inventory{ - Secrets: []*inventory.Secret{ - { - Secret: velestest.NewFakeIntSecret(123), - Location: path, - }, - { - Secret: velestest.NewFakeStringSecret("foo"), - Location: path, - }, - }, - }, - want: inventory.Inventory{ - Secrets: []*inventory.Secret{ - { - Secret: velestest.NewFakeIntSecret(123), - Location: path, - Validation: inventory.SecretValidationResult{ - Status: veles.ValidationFailed, - Err: errTest, - }, - }, - { - Secret: velestest.NewFakeStringSecret("foo"), - Location: path, - Validation: inventory.SecretValidationResult{ - Status: veles.ValidationValid, - }, }, }, }, @@ -196,9 +75,35 @@ func TestEnricher(t *testing.T) { } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - enricher := secrets.NewWithEngine(tc.engine) for _, sc := range tc.subs { + sc := sc // capture range variable for subtest closures t.Run(sc.name, func(t *testing.T) { + // Mock Hugging Face API server responding with the desired Role and FineGrainedScope + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/api/whoami-v2" { + http.NotFound(w, r) + return + } + w.Header().Set("Content-Type", "application/json") + resp := map[string]any{ + "auth": map[string]any{ + "accessToken": map[string]any{ + "role": sc.Role, + "fineGrained": map[string]any{ + "scoped": []map[string]any{ + {"permissions": sc.FineGrainedScope}, + }, + }, + }, + }, + } + _ = json.NewEncoder(w).Encode(resp) + })) + defer ts.Close() + + // Use enricher configured against the mock server + enricher := huggingfacesecrets.NewWithBaseURL(ts.URL) + if err := enricher.Enrich(t.Context(), nil, &sc.input); err != nil { t.Errorf("Enrich() error: %v, want nil", err) } @@ -213,50 +118,3 @@ func TestEnricher(t *testing.T) { }) } } - -func TestEnricher_respectsContext(t *testing.T) { - enricher := secrets.NewWithEngine(veles.NewValidationEngine( - veles.WithValidator(velestest.NewFakeStringSecretValidator(veles.ValidationValid, nil)), - )) - inv := &inventory.Inventory{ - Secrets: []*inventory.Secret{ - { - Secret: velestest.NewFakeStringSecret("foo"), - Location: "/foo/bar/baz.json", - }, - }, - } - ctx, cancel := context.WithCancel(t.Context()) - cancel() - if err := enricher.Enrich(ctx, nil, inv); !errors.Is(err, context.Canceled) { - t.Errorf("enricher.Enrich() error = nil, want context cancelled") - } -} - -func TestAddValidator(t *testing.T) { - secret := inventory.Secret{ - Secret: velestest.NewFakeStringSecret("foo"), - Location: "/foo/bar/baz.json", - } - inv := inventory.Inventory{Secrets: []*inventory.Secret{&secret}} - enricher := secrets.NewWithEngine(veles.NewValidationEngine()).(*secrets.Enricher) - - // Ensure that it's unsupported. - if err := enricher.Enrich(t.Context(), nil, &inv); err != nil { - t.Errorf("Enrich() error: %v, want nil", err) - } - if got, want := secret.Validation.Status, veles.ValidationUnsupported; got != want { - t.Errorf("Enrich() validation status = %q, want %q", got, want) - } - - // Add new validator and ensure that we now get the correct result. - if present := secrets.AddValidator(enricher, velestest.NewFakeStringSecretValidator(veles.ValidationValid, nil)); present { - t.Errorf("AddValidator() = %t, want false", present) - } - if err := enricher.Enrich(t.Context(), nil, &inv); err != nil { - t.Errorf("Enrich() error: %v, want nil", err) - } - if got, want := secret.Validation.Status, veles.ValidationValid; got != want { - t.Errorf("Enrich() validation status = %q, want %q", got, want) - } -} From 9e5c9f83956fdb59187e6b5bb52e5de536984019 Mon Sep 17 00:00:00 2001 From: devampkid Date: Mon, 15 Sep 2025 01:50:48 +0400 Subject: [PATCH 6/9] HuggingfaceSecrets to HuggingfaceMeta --- enricher/enricherlist/list.go | 7 +++++++ .../huggingfacemeta.go} | 6 +++--- .../huggingfacemeta_test.go} | 6 +++--- veles/secrets/huggingfaceapikey/validator.go | 1 - 4 files changed, 13 insertions(+), 7 deletions(-) rename enricher/{huggingfacesecrets/huggingfacesecrets.go => huggingfacemeta/huggingfacemeta.go} (96%) rename enricher/{huggingfacesecrets/huggingfacesecrets_test.go => huggingfacemeta/huggingfacemeta_test.go} (95%) diff --git a/enricher/enricherlist/list.go b/enricher/enricherlist/list.go index 9f9782fb7..3ce83ec17 100644 --- a/enricher/enricherlist/list.go +++ b/enricher/enricherlist/list.go @@ -22,6 +22,7 @@ import ( "github.com/google/osv-scalibr/enricher" "github.com/google/osv-scalibr/enricher/baseimage" + "github.com/google/osv-scalibr/enricher/huggingfacemeta" "github.com/google/osv-scalibr/enricher/license" "github.com/google/osv-scalibr/enricher/reachability/java" "github.com/google/osv-scalibr/enricher/secrets" @@ -62,6 +63,11 @@ var ( secrets.Name: {secrets.New}, } + // HuggingfaceMeta enricher. + HuggingfaceMeta = InitMap{ + huggingfacemeta.Name: {huggingfacemeta.New}, + } + // Reachability enrichers. Reachability = InitMap{ java.Name: {java.NewDefault}, @@ -81,6 +87,7 @@ var ( VulnMatching, VEX, Secrets, + HuggingfaceMeta, License, Reachability, TransitiveDependency, diff --git a/enricher/huggingfacesecrets/huggingfacesecrets.go b/enricher/huggingfacemeta/huggingfacemeta.go similarity index 96% rename from enricher/huggingfacesecrets/huggingfacesecrets.go rename to enricher/huggingfacemeta/huggingfacemeta.go index d6350c65f..fe1543894 100644 --- a/enricher/huggingfacesecrets/huggingfacesecrets.go +++ b/enricher/huggingfacemeta/huggingfacemeta.go @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package huggingfacesecrets contains an Enricher that uses Veles Validators to validate +// Package huggingfacemeta contains an Enricher that uses Veles Validators to validate // Secrets found by the Veles Extractor. -package huggingfacesecrets +package huggingfacemeta import ( "context" @@ -32,7 +32,7 @@ import ( const ( // Name is the unique name of this Enricher. - Name = "huggingfacesecrets/velesvalidate" + Name = "huggingfacemeta/velesvalidate" version = 1 defaultBaseURL = "https://huggingface.co" diff --git a/enricher/huggingfacesecrets/huggingfacesecrets_test.go b/enricher/huggingfacemeta/huggingfacemeta_test.go similarity index 95% rename from enricher/huggingfacesecrets/huggingfacesecrets_test.go rename to enricher/huggingfacemeta/huggingfacemeta_test.go index e64a37b13..ac73fae6d 100644 --- a/enricher/huggingfacesecrets/huggingfacesecrets_test.go +++ b/enricher/huggingfacemeta/huggingfacemeta_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package huggingfacesecrets_test +package huggingfacemeta_test import ( "encoding/json" @@ -23,7 +23,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" - "github.com/google/osv-scalibr/enricher/huggingfacesecrets" + "github.com/google/osv-scalibr/enricher/huggingfacemeta" "github.com/google/osv-scalibr/inventory" "github.com/google/osv-scalibr/veles/secrets/huggingfaceapikey" ) @@ -102,7 +102,7 @@ func TestEnricher(t *testing.T) { defer ts.Close() // Use enricher configured against the mock server - enricher := huggingfacesecrets.NewWithBaseURL(ts.URL) + enricher := huggingfacemeta.NewWithBaseURL(ts.URL) if err := enricher.Enrich(t.Context(), nil, &sc.input); err != nil { t.Errorf("Enrich() error: %v, want nil", err) diff --git a/veles/secrets/huggingfaceapikey/validator.go b/veles/secrets/huggingfaceapikey/validator.go index 742c1420a..293e6d92f 100644 --- a/veles/secrets/huggingfaceapikey/validator.go +++ b/veles/secrets/huggingfaceapikey/validator.go @@ -59,7 +59,6 @@ func NewValidator(opts ...ValidatorOption) *Validator { func (v *Validator) Validate(ctx context.Context, key *HuggingfaceAPIKey) (veles.ValidationStatus, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://huggingface.co/api/whoami-v2", nil) - key.FineGrainedScope = []string{"TestScope1", "TestScope2"} if err != nil { return veles.ValidationFailed, fmt.Errorf("unable to create HTTP request: %w", err) } From 9127b7538a2f57eebb232cd767f56e990447478c Mon Sep 17 00:00:00 2001 From: devampkid Date: Mon, 15 Sep 2025 01:56:43 +0400 Subject: [PATCH 7/9] complete revert of validator changes --- enricher/huggingfacemeta/huggingfacemeta_test.go | 1 - enricher/secrets/secrets.go | 2 ++ veles/secrets/huggingfaceapikey/validator.go | 2 +- veles/secrets/huggingfaceapikey/validator_test.go | 6 +++--- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/enricher/huggingfacemeta/huggingfacemeta_test.go b/enricher/huggingfacemeta/huggingfacemeta_test.go index ac73fae6d..585ca349a 100644 --- a/enricher/huggingfacemeta/huggingfacemeta_test.go +++ b/enricher/huggingfacemeta/huggingfacemeta_test.go @@ -76,7 +76,6 @@ func TestEnricher(t *testing.T) { for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { for _, sc := range tc.subs { - sc := sc // capture range variable for subtest closures t.Run(sc.name, func(t *testing.T) { // Mock Hugging Face API server responding with the desired Role and FineGrainedScope ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { diff --git a/enricher/secrets/secrets.go b/enricher/secrets/secrets.go index 4e35350d2..dfac674b3 100644 --- a/enricher/secrets/secrets.go +++ b/enricher/secrets/secrets.go @@ -25,6 +25,7 @@ import ( "github.com/google/osv-scalibr/plugin" "github.com/google/osv-scalibr/veles" "github.com/google/osv-scalibr/veles/secrets/gcpsak" + "github.com/google/osv-scalibr/veles/secrets/huggingfaceapikey" ) const ( @@ -45,6 +46,7 @@ type Enricher struct { func New() enricher.Enricher { engine := veles.NewValidationEngine( veles.WithValidator(gcpsak.NewValidator()), + veles.WithValidator(huggingfaceapikey.NewValidator()), ) return &Enricher{engine: engine} } diff --git a/veles/secrets/huggingfaceapikey/validator.go b/veles/secrets/huggingfaceapikey/validator.go index 293e6d92f..0b01fab8e 100644 --- a/veles/secrets/huggingfaceapikey/validator.go +++ b/veles/secrets/huggingfaceapikey/validator.go @@ -56,7 +56,7 @@ func NewValidator(opts ...ValidatorOption) *Validator { // using the API key in the Authorization header. If the request returns // HTTP 200, the key is considered valid. If 401 Unauthorized, the key // is invalid. Other errors return ValidationFailed. -func (v *Validator) Validate(ctx context.Context, key *HuggingfaceAPIKey) (veles.ValidationStatus, error) { +func (v *Validator) Validate(ctx context.Context, key HuggingfaceAPIKey) (veles.ValidationStatus, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://huggingface.co/api/whoami-v2", nil) if err != nil { diff --git a/veles/secrets/huggingfaceapikey/validator_test.go b/veles/secrets/huggingfaceapikey/validator_test.go index bb2607165..28c4f1581 100644 --- a/veles/secrets/huggingfaceapikey/validator_test.go +++ b/veles/secrets/huggingfaceapikey/validator_test.go @@ -119,7 +119,7 @@ func TestValidator(t *testing.T) { key := huggingfaceapikey.HuggingfaceAPIKey{Key: validatorTestKey} // Test validation - got, err := validator.Validate(context.Background(), &key) + got, err := validator.Validate(context.Background(), key) // Check error expectation if tc.expectError { @@ -164,7 +164,7 @@ func TestValidator_ContextCancellation(t *testing.T) { defer cancel() // Test validation with cancelled context - got, err := validator.Validate(ctx, &key) + got, err := validator.Validate(ctx, key) if err == nil { t.Errorf("Validate() expected error due to context cancellation, got nil") @@ -211,7 +211,7 @@ func TestValidator_InvalidRequest(t *testing.T) { t.Run(tc.name, func(t *testing.T) { key := huggingfaceapikey.HuggingfaceAPIKey{Key: tc.key} - got, err := validator.Validate(context.Background(), &key) + got, err := validator.Validate(context.Background(), key) if err != nil { t.Errorf("Validate() unexpected error for %s: %v", tc.name, err) From 64ca7c8b71fffc0abd8c3cd3d76f7646b4354438 Mon Sep 17 00:00:00 2001 From: devampkid Date: Mon, 15 Sep 2025 19:29:01 +0400 Subject: [PATCH 8/9] huggingfacemeta tests now support more cases, fix some minor issues --- enricher/huggingfacemeta/huggingfacemeta.go | 15 +-- .../huggingfacemeta/huggingfacemeta_test.go | 123 ++++++++++++++---- .../huggingfaceapikey/huggingfaceapikey.go | 1 - .../huggingfaceapikey/validator_test.go | 9 +- 4 files changed, 102 insertions(+), 46 deletions(-) diff --git a/enricher/huggingfacemeta/huggingfacemeta.go b/enricher/huggingfacemeta/huggingfacemeta.go index fe1543894..8d7e72e84 100644 --- a/enricher/huggingfacemeta/huggingfacemeta.go +++ b/enricher/huggingfacemeta/huggingfacemeta.go @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package huggingfacemeta contains an Enricher that uses Veles Validators to validate -// Secrets found by the Veles Extractor. +// Package huggingfacemeta contains an Enricher that adds additional metadata +// to each Huggingface keys based on the API response package huggingfacemeta import ( @@ -26,7 +26,6 @@ import ( "github.com/google/osv-scalibr/enricher" "github.com/google/osv-scalibr/inventory" "github.com/google/osv-scalibr/plugin" - "github.com/google/osv-scalibr/veles" "github.com/google/osv-scalibr/veles/secrets/huggingfaceapikey" ) @@ -42,7 +41,6 @@ var _ enricher.Enricher = &Enricher{} // Enricher uses a Veles ValidationEngine to validate Secrets found by Veles. type Enricher struct { - engine *veles.ValidationEngine baseURL string httpClient *http.Client } @@ -56,7 +54,7 @@ func New() enricher.Enricher { } // NewWithBaseURL creates a new Enricher that uses the provided base URL for the Hugging Face API. -// Useful for tests with an httptest.Server. +// Useful for tests with a httptest.Server. func NewWithBaseURL(baseURL string) enricher.Enricher { return &Enricher{ baseURL: baseURL, @@ -64,13 +62,6 @@ func NewWithBaseURL(baseURL string) enricher.Enricher { } } -// AddValidator adds a Validator for a specific type of Secret to the underlying validation engine. -// -// Returns whether there was already a Validator in place that now got replaced. -func AddValidator[S veles.Secret](e *Enricher, v veles.Validator[S]) bool { - return veles.AddValidator(e.engine, v) -} - // Name of the Enricher. func (Enricher) Name() string { return Name diff --git a/enricher/huggingfacemeta/huggingfacemeta_test.go b/enricher/huggingfacemeta/huggingfacemeta_test.go index 585ca349a..a5bc5ce04 100644 --- a/enricher/huggingfacemeta/huggingfacemeta_test.go +++ b/enricher/huggingfacemeta/huggingfacemeta_test.go @@ -28,25 +28,40 @@ import ( "github.com/google/osv-scalibr/veles/secrets/huggingfaceapikey" ) -type testEnricherSubCase struct { - name string - input inventory.Inventory - Role string - FineGrainedScope []string - want inventory.Inventory -} - func TestEnricher(t *testing.T) { + type testEnricherSubCase struct { + name string + respBody any + statusCode int + input inventory.Inventory + want inventory.Inventory + expectError bool + } + validRespBody := func(role string, fineGrainedScope []string) map[string]any { + return map[string]any{ + "auth": map[string]any{ + "accessToken": map[string]any{ + "role": role, + "fineGrained": map[string]any{ + "scoped": []map[string]any{ + {"permissions": fineGrainedScope}, + }, + }, + }, + }, + } + } path := "/foo/bar/key.json" cases := []struct { name string subs []testEnricherSubCase }{ { - name: "Append Role and Fine Grained Scopes", + name: "Append role and Fine Grained Scopes", subs: []testEnricherSubCase{ { - name: "supported", + name: "supported", + statusCode: http.StatusOK, input: inventory.Inventory{ Secrets: []*inventory.Secret{ { @@ -55,8 +70,8 @@ func TestEnricher(t *testing.T) { }, }, }, - Role: "read", - FineGrainedScope: []string{"inference.endpoints.infer.write", "repo.content.read"}, + respBody: validRespBody("read", + []string{"inference.endpoints.infer.write", "repo.content.read"}), want: inventory.Inventory{ Secrets: []*inventory.Secret{ { @@ -70,6 +85,52 @@ func TestEnricher(t *testing.T) { }, }, }, + { + name: "no json response", + statusCode: http.StatusOK, + expectError: true, + input: inventory.Inventory{ + Secrets: []*inventory.Secret{ + { + Secret: huggingfaceapikey.HuggingfaceAPIKey{Key: "foo2"}, + Location: path, + }, + }, + }, + respBody: "response body is not json", + want: inventory.Inventory{ + Secrets: []*inventory.Secret{ + { + Secret: huggingfaceapikey.HuggingfaceAPIKey{ + Key: "foo2", + }, + Location: path, + }, + }, + }, + }, + { + name: "non-200 status code", + statusCode: http.StatusUnauthorized, // 401 Unauthorized + input: inventory.Inventory{ + Secrets: []*inventory.Secret{ + { + Secret: huggingfaceapikey.HuggingfaceAPIKey{Key: "foo3"}, + Location: path, + }, + }, + }, + want: inventory.Inventory{ + Secrets: []*inventory.Secret{ + { + Secret: huggingfaceapikey.HuggingfaceAPIKey{ + Key: "foo3", + }, + Location: path, + }, + }, + }, + }, }, }, } @@ -77,26 +138,27 @@ func TestEnricher(t *testing.T) { t.Run(tc.name, func(t *testing.T) { for _, sc := range tc.subs { t.Run(sc.name, func(t *testing.T) { - // Mock Hugging Face API server responding with the desired Role and FineGrainedScope + // Mock Hugging Face API server responding with the desired role and fineGrainedScope ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path != "/api/whoami-v2" { http.NotFound(w, r) return } + // Return the status code defined in the test case + if sc.statusCode != 0 && sc.statusCode != http.StatusOK { + w.WriteHeader(sc.statusCode) + return + } + w.Header().Set("Content-Type", "application/json") - resp := map[string]any{ - "auth": map[string]any{ - "accessToken": map[string]any{ - "role": sc.Role, - "fineGrained": map[string]any{ - "scoped": []map[string]any{ - {"permissions": sc.FineGrainedScope}, - }, - }, - }, - }, + if _, ok := sc.respBody.(string); ok { + _, err := w.Write([]byte(sc.respBody.(string))) + if err != nil { + return + } + return } - _ = json.NewEncoder(w).Encode(resp) + _ = json.NewEncoder(w).Encode(sc.respBody) })) defer ts.Close() @@ -104,7 +166,16 @@ func TestEnricher(t *testing.T) { enricher := huggingfacemeta.NewWithBaseURL(ts.URL) if err := enricher.Enrich(t.Context(), nil, &sc.input); err != nil { - t.Errorf("Enrich() error: %v, want nil", err) + // Check error expectation + if sc.expectError { + if err == nil { + t.Errorf("Validate() expected error, got nil") + } + } else { + if err != nil { + t.Errorf("Validate() unexpected error: %v", err) + } + } } got := &sc.input want := &sc.want diff --git a/veles/secrets/huggingfaceapikey/huggingfaceapikey.go b/veles/secrets/huggingfaceapikey/huggingfaceapikey.go index 5230fc5e5..aa89dd1c7 100644 --- a/veles/secrets/huggingfaceapikey/huggingfaceapikey.go +++ b/veles/secrets/huggingfaceapikey/huggingfaceapikey.go @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package huggingfaceapikey contains a Veles Secret type for Huggingface API keys. package huggingfaceapikey // HuggingfaceAPIKey is a Veles Secret that holds relevant information for a diff --git a/veles/secrets/huggingfaceapikey/validator_test.go b/veles/secrets/huggingfaceapikey/validator_test.go index 28c4f1581..b2f483b9f 100644 --- a/veles/secrets/huggingfaceapikey/validator_test.go +++ b/veles/secrets/huggingfaceapikey/validator_test.go @@ -21,7 +21,6 @@ import ( "net/url" "strings" "testing" - "time" "github.com/google/osv-scalibr/veles" "github.com/google/osv-scalibr/veles/secrets/huggingfaceapikey" @@ -143,11 +142,9 @@ func TestValidator(t *testing.T) { func TestValidator_ContextCancellation(t *testing.T) { // Create a server that delays response server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - time.Sleep(100 * time.Millisecond) w.WriteHeader(http.StatusOK) })) defer server.Close() - // Create a client with custom transport client := &http.Client{ Transport: &mockTransport{testServer: server}, @@ -156,13 +153,11 @@ func TestValidator_ContextCancellation(t *testing.T) { validator := huggingfaceapikey.NewValidator( huggingfaceapikey.WithClient(client), ) + ctx, cancel := context.WithCancel(t.Context()) + cancel() key := huggingfaceapikey.HuggingfaceAPIKey{Key: validatorTestKey} - // Create context with a short timeout - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond) - defer cancel() - // Test validation with cancelled context got, err := validator.Validate(ctx, key) From 11af5eee9a22bf66ff704cffaea394e0c2d28886 Mon Sep 17 00:00:00 2001 From: devampkid Date: Fri, 19 Sep 2025 13:05:23 +0400 Subject: [PATCH 9/9] update keyRe description --- veles/secrets/huggingfaceapikey/detector.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/veles/secrets/huggingfaceapikey/detector.go b/veles/secrets/huggingfaceapikey/detector.go index b34ad1aa8..6f6680e70 100644 --- a/veles/secrets/huggingfaceapikey/detector.go +++ b/veles/secrets/huggingfaceapikey/detector.go @@ -28,7 +28,7 @@ const maxTokenLength = 37 // keyRe is a regular expression that matches a Huggingface API key. // Huggingface API keys have the form: `hf_` followed by 34 -// alphanumeric characters. +// alphabet characters. var keyRe = regexp.MustCompile(`hf_[A-Za-z]{34}`) // NewDetector returns a new simpletoken.Detector that matches