diff --git a/docs/sources/reference/components/loki/loki.process.md b/docs/sources/reference/components/loki/loki.process.md index 0078935f80..d2e1e78972 100644 --- a/docs/sources/reference/components/loki/loki.process.md +++ b/docs/sources/reference/components/loki/loki.process.md @@ -76,6 +76,7 @@ You can use the following blocks with `loki.process`: | [`stage.tenant`][stage.tenant] | Configures a `tenant` processing stage. | no | | [`stage.timestamp`][stage.timestamp] | Configures a `timestamp` processing stage. | no | | [`stage.truncate`][stage.truncate] | Configures a `truncate` processing stage. | no | +| [`stage.useragent`][stage.useragent] | Configures a `useragent` processing stage. | no | | [`stage.windowsevent`][stage.windowsevent] | Configures a `windowsevent` processing stage. | no | You can provide any number of these stage blocks nested inside `loki.process`. These blocks run in order of appearance in the configuration file. @@ -109,6 +110,7 @@ You can provide any number of these stage blocks nested inside `loki.process`. T [stage.tenant]: #stagetenant [stage.truncate]: #stagetruncate [stage.timestamp]: #stagetimestamp +[stage.useragent]: #stageuseragent [stage.windowsevent]: #stagewindowsevent ### `stage.cri` @@ -1950,6 +1952,80 @@ You can use this entry to add a label in `stage.labels` or structured metadata i truncated: label,line ``` +### `stage.useragent` + +The `stage.useragent` inner block configures a processing stage that parses user-agent strings and extracts browser, operating system, and device information using the uap-core library. + +The following arguments are supported: + +| Name | Type | Description | Default | Required | +| ------------ | -------- | ------------------------------------------------------------------------------------------------------- | ------- | -------- | +| `source` | `string` | Name from extracted data to parse as user-agent. If empty, uses the log message. | `""` | no | +| `regex_file` | `string` | Path to a custom YAML file containing regular expression patterns. If empty, uses default expressions. | `""` | no | + +The `source` field defines the source of data to parse as a user-agent string. +When `source` is missing or empty, the stage parses the log line itself. +It can also be used to parse a previously extracted value. + +The `regex_file` field allows you to specify a custom YAML file containing regular expression patterns for user-agent parsing. +If not provided, the stage uses the default patterns from the uap-core library. + +The stage extracts the following fields into the shared map: + +- `useragent_browser`: The browser name. For example, "Chrome", "Firefox", "Safari". +- `useragent_browser_version`: The browser version. For example, "91.0.4472". +- `useragent_os`: The operating system name. For example, "Windows", "Mac OS X", "iOS". +- `useragent_os_version`: The operating system version. For example, "10.15.7". +- `useragent_device`: The device family. For example, "iPhone", "iPad". +- `useragent_device_brand`: The device brand. For example, "Apple", "Samsung". +- `useragent_device_model`: The device model. For example, "iPhone", "Galaxy S21". + +#### Example + +```alloy +stage.useragent {} +``` + +Given the following log line: + +```text +Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Mobile/15E148 Safari/604.1 +``` + +The stage extracts the following key-value pairs: + +```text +useragent_browser: Mobile Safari +useragent_browser_version: 14.1.1 +useragent_os: iOS +useragent_os_version: 14.6 +useragent_device: iPhone +useragent_device_brand: Apple +useragent_device_model: iPhone +``` + +#### Example with source field + +```alloy +stage.json { + expressions = { "user_agent" = "user_agent" } +} + +stage.useragent { + source = "user_agent" +} + +stage.labels { + values = { + browser = "useragent_browser", + os = "useragent_os", + device = "useragent_device", + } +} +``` + +This pipeline first extracts the user-agent string from a JSON field, parses it to extract browser and device information, and then adds the extracted values as labels. + ### `stage.windowsevent` The `windowsevent` stage extracts data from the message string in the Windows Event Log. diff --git a/go.mod b/go.mod index 2b334e3fc8..c50480df60 100644 --- a/go.mod +++ b/go.mod @@ -877,7 +877,7 @@ require ( github.com/twmb/franz-go/pkg/sasl/kerberos v1.1.0 // indirect github.com/twmb/franz-go/plugin/kzap v1.1.2 // indirect github.com/twmb/murmur3 v1.1.8 // indirect - github.com/ua-parser/uap-go v0.0.0-20240611065828-3a4781585db6 // indirect + github.com/ua-parser/uap-go v0.0.0-20240611065828-3a4781585db6 github.com/uber/jaeger-lib v2.4.1+incompatible // indirect github.com/valyala/fastjson v1.6.4 // indirect github.com/vertica/vertica-sql-go v1.3.3 // indirect diff --git a/internal/component/loki/process/stages/pipeline.go b/internal/component/loki/process/stages/pipeline.go index 6b0b026fea..a4de149cc4 100644 --- a/internal/component/loki/process/stages/pipeline.go +++ b/internal/component/loki/process/stages/pipeline.go @@ -46,6 +46,7 @@ type StageConfig struct { TenantConfig *TenantConfig `alloy:"tenant,block,optional"` TruncateConfig *TruncateConfig `alloy:"truncate,block,optional"` TimestampConfig *TimestampConfig `alloy:"timestamp,block,optional"` + UserAgentConfig *UserAgentConfig `alloy:"useragent,block,optional"` WindowsEventConfig *WindowsEventConfig `alloy:"windowsevent,block,optional"` } diff --git a/internal/component/loki/process/stages/stage.go b/internal/component/loki/process/stages/stage.go index 392e6bf5fd..9c88138282 100644 --- a/internal/component/loki/process/stages/stage.go +++ b/internal/component/loki/process/stages/stage.go @@ -47,6 +47,7 @@ const ( StageTypeTenant = "tenant" StageTypeTimestamp = "timestamp" StageTypeTruncate = "truncate" + StageTypeUserAgent = "useragent" StageTypeWindowsEvent = "windowsevent" ) @@ -274,6 +275,11 @@ func New(logger log.Logger, jobName *string, cfg StageConfig, registerer prometh if err != nil { return nil, err } + case cfg.UserAgentConfig != nil: + s, err = newUserAgentStage(logger, *cfg.UserAgentConfig) + if err != nil { + return nil, err + } default: panic(fmt.Sprintf("unreachable; should have decoded into one of the StageConfig fields: %+v", cfg)) } diff --git a/internal/component/loki/process/stages/useragent.go b/internal/component/loki/process/stages/useragent.go new file mode 100644 index 0000000000..64f72994b3 --- /dev/null +++ b/internal/component/loki/process/stages/useragent.go @@ -0,0 +1,140 @@ +package stages + +import ( + "errors" + "fmt" + "reflect" + "time" + + "github.com/go-kit/log" + "github.com/grafana/alloy/internal/runtime/logging/level" + "github.com/prometheus/common/model" + "github.com/ua-parser/uap-go/uaparser" +) + +// Config Errors. +var ( + ErrEmptyUserAgentStageSource = errors.New("empty source") +) + +// UserAgentConfig configures a processing stage that uses uap-core to +// parse user-agent strings and extract browser, OS, and device information. +type UserAgentConfig struct { + Source *string `alloy:"source,attr,optional"` + RegexFile string `alloy:"regex_file,attr,optional"` +} + +// validateUserAgentConfig validates the config +func validateUserAgentConfig(c UserAgentConfig) error { + if c.Source != nil && *c.Source == "" { + return ErrEmptyUserAgentStageSource + } + return nil +} + +// userAgentStage parses user-agent strings and extracts browser/OS/device info +type userAgentStage struct { + config *UserAgentConfig + parser *uaparser.Parser + logger log.Logger +} + +// newUserAgentStage creates a newUserAgentStage +func newUserAgentStage(logger log.Logger, config UserAgentConfig) (Stage, error) { + if err := validateUserAgentConfig(config); err != nil { + return nil, err + } + + var parser *uaparser.Parser + if config.RegexFile != "" { + var err error + parser, err = uaparser.New(config.RegexFile) + if err != nil { + return nil, fmt.Errorf("failed to load regex file %s: %w", config.RegexFile, err) + } + } else { + parser = uaparser.NewFromSaved() + } + + return toStage(&userAgentStage{ + config: &config, + parser: parser, + logger: log.With(logger, "component", "stage", "type", "useragent"), + }), nil +} + +// Process implements Stage +func (u *userAgentStage) Process(labels model.LabelSet, extracted map[string]interface{}, t *time.Time, entry *string) { + // If a source key is provided, the user_agent stage should process it + // from the extracted map, otherwise should fall back to the entry + input := entry + + if u.config.Source != nil { + if _, ok := extracted[*u.config.Source]; !ok { + if Debug { + level.Debug(u.logger).Log("msg", "source does not exist in the set of extracted values", "source", *u.config.Source) + } + return + } + + value, err := getString(extracted[*u.config.Source]) + if err != nil { + if Debug { + level.Debug(u.logger).Log("msg", "failed to convert source value to string", "source", *u.config.Source, "err", err, "type", reflect.TypeOf(extracted[*u.config.Source])) + } + return + } + + input = &value + } + + if input == nil { + if Debug { + level.Debug(u.logger).Log("msg", "cannot parse a nil entry") + } + return + } + + // Parse the user-agent string + client := u.parser.Parse(*input) + + // Extract browser information + if client.UserAgent.Family != "" { + extracted["useragent_browser"] = client.UserAgent.Family + } + + if client.UserAgent.Major != "" { + extracted["useragent_browser_version"] = fmt.Sprintf("%s.%s.%s", client.UserAgent.Major, client.UserAgent.Minor, client.UserAgent.Patch) + } + + // Extract OS information + if client.Os.Family != "" { + extracted["useragent_os"] = client.Os.Family + } + + if client.Os.Major != "" { + extracted["useragent_os_version"] = fmt.Sprintf("%s.%s.%s", client.Os.Major, client.Os.Minor, client.Os.Patch) + } + + // Extract device information + if client.Device.Family != "" && client.Device.Family != "Other" { + extracted["useragent_device"] = client.Device.Family + } + + if client.Device.Brand != "" { + extracted["useragent_device_brand"] = client.Device.Brand + } + + if client.Device.Model != "" { + extracted["useragent_device_model"] = client.Device.Model + } + + if Debug { + level.Debug(u.logger).Log("msg", "extracted user-agent data debug", "extracted data", fmt.Sprintf("%v", extracted)) + } +} + +// Name implements Stage +func (u *userAgentStage) Name() string { + return StageTypeUserAgent +} diff --git a/internal/component/loki/process/stages/useragent_test.go b/internal/component/loki/process/stages/useragent_test.go new file mode 100644 index 0000000000..b56f2df9b3 --- /dev/null +++ b/internal/component/loki/process/stages/useragent_test.go @@ -0,0 +1,168 @@ +package stages + +import ( + "testing" + "time" + + "github.com/go-kit/log" + "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" +) + +func TestUserAgentConfig_Validation(t *testing.T) { + tests := []struct { + name string + config UserAgentConfig + expectedErr error + }{ + { + name: "valid config with source", + config: UserAgentConfig{Source: getStringPointer("user_agent")}, + }, + { + name: "valid config without source", + config: UserAgentConfig{}, + }, + { + name: "valid config with regex file", + config: UserAgentConfig{RegexFile: "/path/to/regexes.yaml"}, + }, + { + name: "invalid config with empty source", + config: UserAgentConfig{Source: getStringPointer("")}, + expectedErr: ErrEmptyUserAgentStageSource, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + err := validateUserAgentConfig(test.config) + if test.expectedErr == nil { + require.NoError(t, err) + } else { + require.Equal(t, test.expectedErr, err) + } + }) + } +} + +func TestUserAgentStage_Process(t *testing.T) { + tests := []struct { + name string + config UserAgentConfig + input string + expected map[string]interface{} + }{ + { + name: "Chrome browser parsing", + config: UserAgentConfig{}, + input: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", + expected: map[string]interface{}{ + "useragent_browser": "Chrome", + "useragent_browser_version": "91.0.4472", + "useragent_os": "Windows", + "useragent_os_version": "10...", + }, + }, + { + name: "Safari browser parsing", + config: UserAgentConfig{}, + input: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15", + expected: map[string]interface{}{ + "useragent_browser": "Safari", + "useragent_browser_version": "14.1.1", + "useragent_os": "Mac OS X", + "useragent_os_version": "10.15.7", + }, + }, + { + name: "Firefox browser parsing", + config: UserAgentConfig{}, + input: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0", + expected: map[string]interface{}{ + "useragent_browser": "Firefox", + "useragent_browser_version": "89.0.", + "useragent_os": "Windows", + "useragent_os_version": "10...", + }, + }, + { + name: "Mobile Safari parsing", + config: UserAgentConfig{}, + input: "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Mobile/15E148 Safari/604.1", + expected: map[string]interface{}{ + "useragent_browser": "Mobile Safari", + "useragent_browser_version": "14.1.1", + "useragent_os": "iOS", + "useragent_os_version": "14.6.", + "useragent_device": "iPhone", + "useragent_device_brand": "Apple", + "useragent_device_model": "iPhone", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + stage, err := newUserAgentStage(log.NewNopLogger(), test.config) + require.NoError(t, err) + + labels := model.LabelSet{} + extracted := make(map[string]interface{}) + ts := time.Now() + entry := test.input + + stage.(*stageProcessor).Process(labels, extracted, &ts, &entry) + + // Check that expected fields are present (allowing for version flexibility) + for key, expectedValue := range test.expected { + require.Contains(t, extracted, key) + if key == "useragent_browser" || key == "useragent_os" || key == "useragent_device" || key == "useragent_device_brand" || key == "useragent_device_model" { + require.Equal(t, expectedValue, extracted[key]) + } + // For version fields, just check they contain the major version + if key == "useragent_browser_version" || key == "useragent_os_version" { + require.Contains(t, extracted[key].(string), expectedValue.(string)[:2]) + } + } + }) + } +} + +func TestUserAgentStage_ProcessWithSource(t *testing.T) { + source := "user_agent_field" + config := UserAgentConfig{Source: &source} + stage, err := newUserAgentStage(log.NewNopLogger(), config) + require.NoError(t, err) + + labels := model.LabelSet{} + extracted := map[string]interface{}{ + "user_agent_field": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", + } + ts := time.Now() + entry := "some other log line" + + stage.(*stageProcessor).Process(labels, extracted, &ts, &entry) + + require.Contains(t, extracted, "useragent_browser") + require.Equal(t, "Chrome", extracted["useragent_browser"]) +} + +func TestUserAgentStage_Name(t *testing.T) { + config := UserAgentConfig{} + stage, err := newUserAgentStage(log.NewNopLogger(), config) + require.NoError(t, err) + + require.Equal(t, StageTypeUserAgent, stage.(*stageProcessor).Name()) +} + +func TestUserAgentStage_NewWithInvalidRegexFile(t *testing.T) { + config := UserAgentConfig{RegexFile: "/nonexistent/path/regexes.yaml"} + _, err := newUserAgentStage(log.NewNopLogger(), config) + require.Error(t, err) + require.Contains(t, err.Error(), "failed to load regex file") +} + +func getStringPointer(s string) *string { + return &s +}