diff --git a/assets/src/generated/graphql.ts b/assets/src/generated/graphql.ts index 68be4f1f7c..2c72193f23 100644 --- a/assets/src/generated/graphql.ts +++ b/assets/src/generated/graphql.ts @@ -3792,6 +3792,8 @@ export type DeploymentSettings = { logging?: Maybe; /** the way we can connect to your loki instance */ lokiConnection?: Maybe; + /** settings for OpenTelemetry metrics export */ + metrics?: Maybe; /** the root repo you used to run `plural up` */ mgmtRepo?: Maybe; name: Scalars['String']['output']; @@ -3826,6 +3828,8 @@ export type DeploymentSettingsAttributes = { logging?: InputMaybe; /** connection details for a loki instance to use */ lokiConnection?: InputMaybe; + /** settings for OpenTelemetry metrics export */ + metrics?: InputMaybe; mgmtRepo?: InputMaybe; /** connection details for a prometheus instance to use */ prometheusConnection?: InputMaybe; @@ -5727,6 +5731,27 @@ export type MetricResult = { value?: Maybe; }; +/** Settings for OpenTelemetry metrics export */ +export type MetricsSettings = { + __typename?: 'MetricsSettings'; + /** cron expression for export schedule */ + crontab?: Maybe; + /** whether metrics export is enabled */ + enabled?: Maybe; + /** the OpenTelemetry collector endpoint */ + endpoint?: Maybe; +}; + +/** Settings for OpenTelemetry metrics export */ +export type MetricsSettingsAttributes = { + /** cron expression for how often to export metrics (e.g. '*\/5 * * * *') */ + crontab?: InputMaybe; + /** whether to enable metrics export */ + enabled?: InputMaybe; + /** the OpenTelemetry collector endpoint to send metrics to */ + endpoint?: InputMaybe; +}; + /** A monitor defines a recurring check over observability data that can raise alerts */ export type Monitor = { __typename?: 'Monitor'; diff --git a/go/client/models_gen.go b/go/client/models_gen.go index deaa7bfc26..934f1c8c3f 100644 --- a/go/client/models_gen.go +++ b/go/client/models_gen.go @@ -3087,6 +3087,8 @@ type DeploymentSettings struct { Cost *CostSettings `json:"cost,omitempty"` // settings for connections to log aggregation datastores Logging *LoggingSettings `json:"logging,omitempty"` + // settings for OpenTelemetry metrics export + Metrics *MetricsSettings `json:"metrics,omitempty"` // the root repo you used to run `plural up` MgmtRepo *string `json:"mgmtRepo,omitempty"` // whether the console has been onboarded and getting started pages need to be shown @@ -3131,7 +3133,9 @@ type DeploymentSettingsAttributes struct { // configuration for LLM provider clients Ai *AiSettingsAttributes `json:"ai,omitempty"` // settings for cost management functionality - Cost *CostSettingsAttributes `json:"cost,omitempty"` + Cost *CostSettingsAttributes `json:"cost,omitempty"` + // settings for OpenTelemetry metrics export + Metrics *MetricsSettingsAttributes `json:"metrics,omitempty"` ReadBindings []*PolicyBindingAttributes `json:"readBindings,omitempty"` WriteBindings []*PolicyBindingAttributes `json:"writeBindings,omitempty"` GitBindings []*PolicyBindingAttributes `json:"gitBindings,omitempty"` @@ -4686,6 +4690,26 @@ type MetricResult struct { Value *string `json:"value,omitempty"` } +// Settings for OpenTelemetry metrics export +type MetricsSettings struct { + // whether metrics export is enabled + Enabled *bool `json:"enabled,omitempty"` + // the OpenTelemetry collector endpoint + Endpoint *string `json:"endpoint,omitempty"` + // cron expression for export schedule + Crontab *string `json:"crontab,omitempty"` +} + +// Settings for OpenTelemetry metrics export +type MetricsSettingsAttributes struct { + // whether to enable metrics export + Enabled *bool `json:"enabled,omitempty"` + // the OpenTelemetry collector endpoint to send metrics to + Endpoint *string `json:"endpoint,omitempty"` + // cron expression for how often to export metrics (e.g. '*/5 * * * *') + Crontab *string `json:"crontab,omitempty"` +} + // A monitor defines a recurring check over observability data that can raise alerts type Monitor struct { // Stable identifier for this monitor diff --git a/go/controller/api/v1alpha1/deploymentsettings_types.go b/go/controller/api/v1alpha1/deploymentsettings_types.go index 7e49f78682..849bd64224 100644 --- a/go/controller/api/v1alpha1/deploymentsettings_types.go +++ b/go/controller/api/v1alpha1/deploymentsettings_types.go @@ -161,6 +161,11 @@ type DeploymentSettingsSpec struct { // +kubebuilder:validation:Optional Cost *CostSettings `json:"cost,omitempty"` + // Metrics settings for OpenTelemetry metrics export + // + // +kubebuilder:validation:Optional + Metrics *MetricsSettings `json:"metrics,omitempty"` + // DeploymentRepositoryRef is a pointer to the deployment GIT repository to use // // +kubebuilder:validation:Optional @@ -393,6 +398,38 @@ func (cost *CostSettings) Attributes() *console.CostSettingsAttributes { } } +// MetricsSettings holds configuration for OpenTelemetry metrics export. +type MetricsSettings struct { + // Enabled defines whether to enable the metrics export or not. + // + // +kubebuilder:default=false + // +kubebuilder:validation:Optional + Enabled *bool `json:"enabled,omitempty"` + + // Endpoint is the OpenTelemetry collector endpoint to send metrics to. + // + // +kubebuilder:validation:Optional + Endpoint *string `json:"endpoint,omitempty"` + + // Crontab is the cron expression for how often to export metrics. + // Example: "*/5 * * * *" for every 5 minutes. + // + // +kubebuilder:validation:Optional + Crontab *string `json:"crontab,omitempty"` +} + +func (m *MetricsSettings) Attributes() *console.MetricsSettingsAttributes { + if m == nil { + return nil + } + + return &console.MetricsSettingsAttributes{ + Enabled: m.Enabled, + Endpoint: m.Endpoint, + Crontab: m.Crontab, + } +} + // AISettings holds the configuration for LLM provider clients. type AISettings struct { // Enabled defines whether to enable the AI integration or not. diff --git a/go/controller/docs/api.md b/go/controller/docs/api.md index ade96fd5dd..2668c6b924 100644 --- a/go/controller/docs/api.md +++ b/go/controller/docs/api.md @@ -1388,6 +1388,7 @@ _Appears in:_ | `ai` _[AISettings](#aisettings)_ | AI settings specifies a configuration for LLM provider clients | | Optional: \{\}
| | `logging` _[LoggingSettings](#loggingsettings)_ | Logging settings for connections to log aggregation datastores | | Optional: \{\}
| | `cost` _[CostSettings](#costsettings)_ | Cost settings for managing Plural's cost management features | | Optional: \{\}
| +| `metrics` _[MetricsSettings](#metricssettings)_ | Metrics settings for OpenTelemetry metrics export | | Optional: \{\}
| | `deploymentRepositoryRef` _[NamespacedName](#namespacedname)_ | DeploymentRepositoryRef is a pointer to the deployment GIT repository to use | | Optional: \{\}
| | `scaffoldsRepositoryRef` _[NamespacedName](#namespacedname)_ | ScaffoldsRepositoryRef is a pointer to the Scaffolds GIT repository to use | | Optional: \{\}
| | `reconciliation` _[Reconciliation](#reconciliation)_ | Reconciliation settings for this resource.
Controls drift detection and reconciliation intervals. | | Optional: \{\}
| @@ -2306,6 +2307,24 @@ _Appears in:_ | `namespace` _string_ | Namespace specifies an optional namespace for categorizing or scoping related resources.
If empty then the ClusterSync's namespace will be used. | | Optional: \{\}
| +#### MetricsSettings + + + +MetricsSettings holds configuration for OpenTelemetry metrics export. + + + +_Appears in:_ +- [DeploymentSettingsSpec](#deploymentsettingsspec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `enabled` _boolean_ | Enabled defines whether to enable the metrics export or not. | false | Optional: \{\}
| +| `endpoint` _string_ | Endpoint is the OpenTelemetry collector endpoint to send metrics to. | | Optional: \{\}
| +| `crontab` _string_ | Crontab is the cron expression for how often to export metrics.
Example: "*/5 * * * *" for every 5 minutes. | | Optional: \{\}
| + + #### NamespaceCredentials diff --git a/go/controller/internal/controller/deploymentsettings_controller.go b/go/controller/internal/controller/deploymentsettings_controller.go index 3d26ef6785..76e3cf7e41 100644 --- a/go/controller/internal/controller/deploymentsettings_controller.go +++ b/go/controller/internal/controller/deploymentsettings_controller.go @@ -142,6 +142,7 @@ func (r *DeploymentSettingsReconciler) genDeploymentSettingsAttr(ctx context.Con attr := &console.DeploymentSettingsAttributes{ MgmtRepo: settings.Spec.ManagementRepo, Cost: settings.Spec.Cost.Attributes(), + Metrics: settings.Spec.Metrics.Attributes(), } if settings.Spec.AgentHelmValues != nil { diff --git a/lib/console/application.ex b/lib/console/application.ex index de5c9da7ae..03ce5431cf 100644 --- a/lib/console/application.ex +++ b/lib/console/application.ex @@ -54,6 +54,7 @@ defmodule Console.Application do {Absinthe.Subscription, ConsoleWeb.Endpoint}, Console.Cached.Supervisor, Console.Plural.Pinger, + Console.Otel.MetricsExporter, Console.AI.GothManager, Console.PromEx, Console.AI.Graph.Indexer.Supervisor, diff --git a/lib/console/graphql/deployments/settings.ex b/lib/console/graphql/deployments/settings.ex index 5a15b5af9c..9fa201a767 100644 --- a/lib/console/graphql/deployments/settings.ex +++ b/lib/console/graphql/deployments/settings.ex @@ -31,6 +31,7 @@ defmodule Console.GraphQl.Deployments.Settings do field :ai, :ai_settings_attributes, description: "configuration for LLM provider clients" field :cost, :cost_settings_attributes, description: "settings for cost management functionality" + field :metrics, :metrics_settings_attributes, description: "settings for OpenTelemetry metrics export" field :read_bindings, list_of(:policy_binding_attributes) field :write_bindings, list_of(:policy_binding_attributes) @@ -38,6 +39,13 @@ defmodule Console.GraphQl.Deployments.Settings do field :create_bindings, list_of(:policy_binding_attributes) end + @desc "Settings for OpenTelemetry metrics export" + input_object :metrics_settings_attributes do + field :enabled, :boolean, description: "whether to enable metrics export" + field :endpoint, :string, description: "the OpenTelemetry collector endpoint to send metrics to" + field :crontab, :string, description: "cron expression for how often to export metrics (e.g. '*/5 * * * *')" + end + input_object :http_connection_attributes do field :host, non_null(:string) field :user, :string, description: "user to connect w/ for basic auth" @@ -265,6 +273,7 @@ defmodule Console.GraphQl.Deployments.Settings do field :ai, :ai_settings, description: "settings for LLM provider clients" field :cost, :cost_settings, description: "settings for cost management" field :logging, :logging_settings, description: "settings for connections to log aggregation datastores" + field :metrics, :metrics_settings, description: "settings for OpenTelemetry metrics export" field :mgmt_repo, :string, description: "the root repo you used to run `plural up`" field :onboarded, :boolean, description: "whether the console has been onboarded and getting started pages need to be shown" @@ -316,6 +325,13 @@ defmodule Console.GraphQl.Deployments.Settings do field :recommendation_cushion, :integer, description: "the percentage cushion above baseline usage to give when generation recommendations, default 20%" end + @desc "Settings for OpenTelemetry metrics export" + object :metrics_settings do + field :enabled, :boolean, description: "whether metrics export is enabled" + field :endpoint, :string, description: "the OpenTelemetry collector endpoint" + field :crontab, :string, description: "cron expression for export schedule" + end + @desc "Settings for configuring access to common LLM providers" object :ai_settings do field :enabled, :boolean diff --git a/lib/console/otel/exporter.ex b/lib/console/otel/exporter.ex new file mode 100644 index 0000000000..793578a963 --- /dev/null +++ b/lib/console/otel/exporter.ex @@ -0,0 +1,95 @@ +defmodule Console.Otel.Exporter do + @moduledoc """ + Simple OTLP JSON exporter for metrics over HTTP. + Sends metrics to an OpenTelemetry Collector endpoint. + """ + require Logger + + @default_timeout :timer.seconds(30) + + @doc """ + Exports a list of metrics to the configured OTLP endpoint. + Returns :ok on success, {:error, reason} on failure. + """ + @spec export(binary, [map]) :: :ok | {:error, term} + def export(endpoint, metrics) when is_binary(endpoint) and is_list(metrics) do + url = build_url(endpoint) + payload = build_payload(metrics) + + case Req.post(url, json: payload, receive_timeout: @default_timeout) do + {:ok, %Req.Response{status: status}} when status in 200..299 -> + Logger.info("Successfully exported #{length(metrics)} metrics to #{endpoint}") + :ok + + {:ok, %Req.Response{status: status, body: body}} -> + Logger.error("Failed to export metrics to #{endpoint}: HTTP #{status} - #{inspect(body)}") + {:error, {:http_error, status, body}} + + {:error, reason} -> + Logger.error("Failed to export metrics to #{endpoint}: #{inspect(reason)}") + {:error, reason} + end + end + + defp build_url(endpoint) do + endpoint + |> String.trim_trailing("/") + |> Kernel.<>("/v1/metrics") + end + + defp build_payload(metrics) do + %{ + "resourceMetrics" => [ + %{ + "resource" => %{ + "attributes" => [ + %{"key" => "service.name", "value" => %{"stringValue" => "plural-console"}}, + %{"key" => "service.version", "value" => %{"stringValue" => Console.conf(:version)}} + ] + }, + "scopeMetrics" => [ + %{ + "scope" => %{"name" => "plural.metrics", "version" => "1.0.0"}, + "metrics" => Enum.map(metrics, &format_metric/1) + } + ] + } + ] + } + end + + defp format_metric(%{name: name, value: value, attributes: attrs} = metric) do + timestamp = Map.get(metric, :timestamp, DateTime.utc_now()) + + %{ + "name" => name, + "gauge" => %{ + "dataPoints" => [ + %{ + "asInt" => value, + "timeUnixNano" => DateTime.to_unix(timestamp, :nanosecond), + "attributes" => format_attributes(attrs) + } + ] + } + } + end + + defp format_attributes(attrs) when is_map(attrs) do + attrs + |> Enum.reject(fn {_k, v} -> is_nil(v) end) + |> Enum.map(fn {key, value} -> + %{ + "key" => to_string(key), + "value" => format_attribute_value(value) + } + end) + end + + defp format_attribute_value(value) when is_binary(value), do: %{"stringValue" => value} + defp format_attribute_value(value) when is_integer(value), do: %{"intValue" => value} + defp format_attribute_value(value) when is_float(value), do: %{"doubleValue" => value} + defp format_attribute_value(value) when is_boolean(value), do: %{"boolValue" => value} + defp format_attribute_value(value) when is_atom(value), do: %{"stringValue" => to_string(value)} + defp format_attribute_value(value), do: %{"stringValue" => inspect(value)} +end diff --git a/lib/console/otel/metrics_builder.ex b/lib/console/otel/metrics_builder.ex new file mode 100644 index 0000000000..6429307b62 --- /dev/null +++ b/lib/console/otel/metrics_builder.ex @@ -0,0 +1,149 @@ +defmodule Console.Otel.MetricsBuilder do + @moduledoc """ + Builds OpenTelemetry metrics from database entities. + Pure functions for transforming clusters and services into metric data structures. + """ + alias Console.Repo + alias Console.Schema.{Cluster, Service} + + @doc """ + Returns a stream of service health metrics. + Must be called within a Repo.transaction for Repo.stream to work. + """ + @spec service_metrics_stream(DateTime.t()) :: Enumerable.t() + def service_metrics_stream(timestamp \\ DateTime.utc_now()) do + Service + |> Service.ordered(asc: :id) + |> Service.preloaded([cluster: :project]) + |> Repo.stream(method: :keyset) + |> Console.throttle(count: 500, pause: 50) + |> Stream.map(&build_service_metric(&1, timestamp)) + end + + @doc """ + Returns a stream of cluster health and upgradeability metrics. + Must be called within a Repo.transaction for Repo.stream to work. + """ + @spec cluster_metrics_stream(DateTime.t()) :: Enumerable.t() + def cluster_metrics_stream(timestamp \\ DateTime.utc_now()) do + Cluster + |> Cluster.ordered(asc: :id) + |> Cluster.preloaded([:project, :upgrade_insights]) + |> Repo.stream(method: :keyset) + |> Console.throttle(count: 500, pause: 50) + |> Stream.flat_map(&build_cluster_metrics(&1, timestamp)) + end + + @doc """ + Builds a single service health metric from a Service struct. + """ + @spec build_service_metric(Service.t(), DateTime.t()) :: map() + def build_service_metric(%Service{} = service, timestamp) do + cluster = service.cluster + project = cluster && cluster.project + + %{ + name: "plural.service.health", + value: service_status_to_value(service.status), + timestamp: timestamp, + attributes: %{ + service_id: service.id, + service_name: service.name, + namespace: service.namespace, + cluster_id: cluster && cluster.id, + cluster_name: cluster && cluster.name, + cluster_handle: cluster && cluster.handle, + project_id: project && project.id, + project_name: project && project.name, + git_ref: get_in_safe(service, [:git, :ref]), + git_folder: get_in_safe(service, [:git, :folder]), + helm_chart: get_in_safe(service, [:helm, :chart]), + helm_version: get_in_safe(service, [:helm, :version]), + status: to_string(service.status) + } + } + end + + @doc """ + Builds cluster health and upgradeability metrics from a Cluster struct. + Returns a list containing one health metric and zero or more upgrade metrics. + """ + @spec build_cluster_metrics(Cluster.t(), DateTime.t()) :: [map()] + def build_cluster_metrics(%Cluster{} = cluster, timestamp) do + project = cluster.project + + base_attrs = %{ + cluster_id: cluster.id, + cluster_name: cluster.name, + cluster_handle: cluster.handle, + project_id: project && project.id, + project_name: project && project.name, + distro: to_string(cluster.distro), + version: cluster.version, + current_version: cluster.current_version + } + + health_metric = %{ + name: "plural.cluster.health", + value: cluster_health_to_value(cluster), + timestamp: timestamp, + attributes: Map.put(base_attrs, :healthy, Cluster.healthy?(cluster)) + } + + upgrade_metrics = + (cluster.upgrade_insights || []) + |> Enum.map(fn insight -> + %{ + name: "plural.cluster.upgradeability", + value: upgrade_status_to_value(insight.status), + timestamp: timestamp, + attributes: + base_attrs + |> Map.put(:target_version, insight.version) + |> Map.put(:insight_name, insight.name) + |> Map.put(:status, to_string(insight.status)) + } + end) + + [health_metric | upgrade_metrics] + end + + @doc """ + Converts a service status atom to a numeric value for the metric. + """ + @spec service_status_to_value(atom()) :: integer() + def service_status_to_value(:healthy), do: 2 + def service_status_to_value(:synced), do: 1 + def service_status_to_value(:stale), do: 0 + def service_status_to_value(:failed), do: -1 + def service_status_to_value(:paused), do: -2 + def service_status_to_value(_), do: 0 + + @doc """ + Converts a cluster's health state to a numeric value. + """ + @spec cluster_health_to_value(Cluster.t()) :: integer() + def cluster_health_to_value(%Cluster{} = cluster) do + if Cluster.healthy?(cluster), do: 1, else: 0 + end + + @doc """ + Converts an upgrade insight status to a numeric value. + """ + @spec upgrade_status_to_value(atom()) :: integer() + def upgrade_status_to_value(:passing), do: 1 + def upgrade_status_to_value(:warning), do: 0 + def upgrade_status_to_value(:unknown), do: -1 + def upgrade_status_to_value(:failed), do: -2 + def upgrade_status_to_value(_), do: -1 + + defp get_in_safe(struct, keys) do + Enum.reduce_while(keys, struct, fn key, acc -> + case acc do + nil -> {:halt, nil} + %{} = map -> {:cont, Map.get(map, key)} + _ -> {:halt, nil} + end + end) + end +end diff --git a/lib/console/otel/metrics_exporter.ex b/lib/console/otel/metrics_exporter.ex new file mode 100644 index 0000000000..2354754f84 --- /dev/null +++ b/lib/console/otel/metrics_exporter.ex @@ -0,0 +1,125 @@ +defmodule Console.Otel.MetricsExporter do + @moduledoc """ + GenServer for exporting cluster and service metrics to an OpenTelemetry collector. + Manages its own scheduling based on the crontab configured in DeploymentSettings. + Only runs on the leader node determined by Console.ClusterRing. + """ + use GenServer + alias Console.Repo + alias Console.Deployments.Settings + alias Console.Otel.{Exporter, MetricsBuilder} + require Logger + + defmodule State do + defstruct [:last_run_at, :timer_ref] + end + + @check_interval :timer.seconds(30) + @chunk_size 100 + + def start_link(opts \\ :ok) do + GenServer.start_link(__MODULE__, opts, name: __MODULE__) + end + + def init(_) do + if Console.conf(:initialize) do + send(self(), :check) + :timer.send_interval(@check_interval, :check) + end + {:ok, %State{}} + end + + def handle_info(:check, state) do + case {leader?(), get_config()} do + {true, {:ok, config}} -> + {:noreply, maybe_schedule(state, config)} + + _ -> + {:noreply, cancel_timer(state)} + end + end + + def handle_info(:export, state) do + case get_config() do + {:ok, %{endpoint: endpoint}} -> + do_export(endpoint) + {:noreply, %{state | last_run_at: DateTime.utc_now(), timer_ref: nil}} + + _ -> + {:noreply, %{state | timer_ref: nil}} + end + end + + def handle_info(_, state), do: {:noreply, state} + + defp get_config do + case Settings.fetch() do + %{metrics: %{enabled: true, endpoint: endpoint, crontab: crontab}} + when is_binary(endpoint) and is_binary(crontab) -> + {:ok, %{endpoint: endpoint, crontab: crontab}} + + _ -> + :disabled + end + end + + defp maybe_schedule(%State{timer_ref: ref} = state, %{crontab: _}) when not is_nil(ref) do + state + end + + defp maybe_schedule(%State{last_run_at: last} = state, %{crontab: crontab}) do + case next_run_time(crontab, last) do + {:ok, next_at} -> + delay = max(0, DateTime.diff(next_at, DateTime.utc_now(), :millisecond)) + Logger.info("Scheduling metrics export in #{delay}ms (at #{next_at})") + ref = Process.send_after(self(), :export, delay) + %{state | timer_ref: ref} + + :error -> state + end + end + + defp next_run_time(crontab, last_run_at) do + base_time = + (last_run_at || DateTime.utc_now()) + |> DateTime.to_naive() + |> NaiveDateTime.add(60, :second) + + with {:ok, expr} <- Crontab.CronExpression.Parser.parse(crontab), + {:ok, next} <- Crontab.Scheduler.get_next_run_date(expr, base_time) do + {:ok, DateTime.from_naive!(next, "Etc/UTC")} + else + {:error, reason} -> + Logger.warning("Invalid crontab expression for metrics export: #{inspect(reason)}") + :error + end + end + + defp cancel_timer(%State{timer_ref: nil} = state), do: state + defp cancel_timer(%State{timer_ref: ref} = state) do + Process.cancel_timer(ref) + %{state | timer_ref: nil} + end + + defp do_export(endpoint) do + timestamp = DateTime.utc_now() + + [] + |> Stream.concat(MetricsBuilder.service_metrics_stream(timestamp)) + |> Stream.concat(MetricsBuilder.cluster_metrics_stream(timestamp)) + |> Stream.chunk_every(@chunk_size) + |> Enum.reduce(0, fn chunk, count -> + case Exporter.export(endpoint, chunk) do + :ok -> + count + length(chunk) + + {:error, reason} -> + Logger.error("Failed to export chunk: #{inspect(reason)}") + count + end + end) + |> then(&Logger.info("Exported #{&1} metrics to #{endpoint}")) + end + + defp leader?(), do: Console.ClusterRing.node(:otel_metrics) == node() +end diff --git a/lib/console/schema/deployment_settings.ex b/lib/console/schema/deployment_settings.ex index 22129faaf0..f2c4746ab8 100644 --- a/lib/console/schema/deployment_settings.ex +++ b/lib/console/schema/deployment_settings.ex @@ -171,6 +171,12 @@ defmodule Console.Schema.DeploymentSettings do field :recommendation_cushion, :integer end + embeds_one :metrics, Metrics, on_replace: :update do + field :enabled, :boolean + field :endpoint, :string + field :crontab, :string + end + embeds_one :ai, AI, on_replace: :update do field :enabled, :boolean, default: false field :provider, AIProvider, default: :openai @@ -325,6 +331,7 @@ defmodule Console.Schema.DeploymentSettings do |> cast_embed(:smtp, with: &smtp_changeset/2) |> cast_embed(:stacks, with: &stacks_changeset/2) |> cast_embed(:cost, with: &cost_changeset/2) + |> cast_embed(:metrics, with: &metrics_changeset/2) |> cast_embed(:logging, with: &logging_changeset/2) |> change_markers(agent_helm_values: :helm_changed, agent_version: :version_changed) |> put_new_change(:write_policy_id, &Ecto.UUID.generate/0) @@ -435,6 +442,31 @@ defmodule Console.Schema.DeploymentSettings do |> cast(attrs, ~w(enabled recommendation_threshold recommendation_cushion)a) end + defp metrics_changeset(model, attrs) do + model + |> cast(attrs, ~w(enabled endpoint crontab)a) + |> validate_required_if_enabled() + |> validate_crontab() + end + + defp validate_required_if_enabled(changeset) do + case get_field(changeset, :enabled) do + true -> validate_required(changeset, [:endpoint, :crontab]) + _ -> changeset + end + end + + defp validate_crontab(changeset) do + case get_field(changeset, :crontab) do + nil -> changeset + crontab -> + case Crontab.CronExpression.Parser.parse(crontab) do + {:ok, _} -> changeset + {:error, _} -> add_error(changeset, :crontab, "is not a valid cron expression") + end + end + end + defp vector_store_changeset(model, attrs) do model |> cast(attrs, ~w(enabled store version)a) diff --git a/priv/repo/migrations/20250429000000_add_metrics_export_settings.exs b/priv/repo/migrations/20250429000000_add_metrics_export_settings.exs new file mode 100644 index 0000000000..b58727a176 --- /dev/null +++ b/priv/repo/migrations/20250429000000_add_metrics_export_settings.exs @@ -0,0 +1,9 @@ +defmodule Console.Repo.Migrations.AddMetricsExportSettings do + use Ecto.Migration + + def change do + alter table(:deployment_settings) do + add :metrics, :map + end + end +end diff --git a/schema/schema.graphql b/schema/schema.graphql index 9e257376c3..f3bf5fc7c7 100644 --- a/schema/schema.graphql +++ b/schema/schema.graphql @@ -4091,6 +4091,9 @@ input DeploymentSettingsAttributes { "settings for cost management functionality" cost: CostSettingsAttributes + "settings for OpenTelemetry metrics export" + metrics: MetricsSettingsAttributes + readBindings: [PolicyBindingAttributes] writeBindings: [PolicyBindingAttributes] @@ -4100,6 +4103,18 @@ input DeploymentSettingsAttributes { createBindings: [PolicyBindingAttributes] } +"Settings for OpenTelemetry metrics export" +input MetricsSettingsAttributes { + "whether to enable metrics export" + enabled: Boolean + + "the OpenTelemetry collector endpoint to send metrics to" + endpoint: String + + "cron expression for how often to export metrics (e.g. '*\/5 * * * *')" + crontab: String +} + input HttpConnectionAttributes { host: String! @@ -4461,6 +4476,9 @@ type DeploymentSettings { "settings for connections to log aggregation datastores" logging: LoggingSettings + "settings for OpenTelemetry metrics export" + metrics: MetricsSettings + "the root repo you used to run `plural up`" mgmtRepo: String @@ -4532,6 +4550,18 @@ type CostSettings { recommendationCushion: Int } +"Settings for OpenTelemetry metrics export" +type MetricsSettings { + "whether metrics export is enabled" + enabled: Boolean + + "the OpenTelemetry collector endpoint" + endpoint: String + + "cron expression for export schedule" + crontab: String +} + "Settings for configuring access to common LLM providers" type AiSettings { enabled: Boolean diff --git a/test/console/otel/exporter_test.exs b/test/console/otel/exporter_test.exs new file mode 100644 index 0000000000..b69c2b2671 --- /dev/null +++ b/test/console/otel/exporter_test.exs @@ -0,0 +1,220 @@ +defmodule Console.Otel.ExporterTest do + use Console.DataCase, async: false + use Mimic + alias Console.Otel.Exporter + + setup :set_mimic_global + + describe "export/2" do + test "it sends correctly formatted OTLP JSON to the endpoint" do + timestamp = ~U[2024-01-15 10:00:00Z] + + metrics = [ + %{ + name: "plural.service.health", + value: 2, + timestamp: timestamp, + attributes: %{ + cluster: "prod", + name: "api", + namespace: "apps" + } + } + ] + + expect(Req, :post, fn url, opts -> + assert url == "https://otel-collector.example.com/v1/metrics" + + payload = opts[:json] + assert payload["resourceMetrics"] + + [resource_metric] = payload["resourceMetrics"] + + resource_attrs = resource_metric["resource"]["attributes"] + assert Enum.any?(resource_attrs, &(&1["key"] == "service.name" && &1["value"]["stringValue"] == "plural-console")) + + [scope_metric] = resource_metric["scopeMetrics"] + assert scope_metric["scope"]["name"] == "plural.metrics" + + [metric] = scope_metric["metrics"] + assert metric["name"] == "plural.service.health" + assert metric["gauge"]["dataPoints"] + + [data_point] = metric["gauge"]["dataPoints"] + assert data_point["asInt"] == 2 + assert data_point["timeUnixNano"] == DateTime.to_unix(timestamp, :nanosecond) + + attrs = Map.new(data_point["attributes"], fn a -> {a["key"], a["value"]} end) + assert attrs["cluster"]["stringValue"] == "prod" + assert attrs["name"]["stringValue"] == "api" + assert attrs["namespace"]["stringValue"] == "apps" + + {:ok, %Req.Response{status: 200, body: ""}} + end) + + assert :ok = Exporter.export("https://otel-collector.example.com", metrics) + end + + test "it appends /v1/metrics to the endpoint" do + expect(Req, :post, fn url, _opts -> + assert url == "https://otel.example.com/v1/metrics" + {:ok, %Req.Response{status: 200, body: ""}} + end) + + Exporter.export("https://otel.example.com", [%{name: "test", value: 1, attributes: %{}}]) + end + + test "it handles trailing slash in endpoint" do + expect(Req, :post, fn url, _opts -> + assert url == "https://otel.example.com/v1/metrics" + {:ok, %Req.Response{status: 200, body: ""}} + end) + + Exporter.export("https://otel.example.com/", [%{name: "test", value: 1, attributes: %{}}]) + end + + test "it filters out nil attribute values" do + metrics = [ + %{ + name: "test.metric", + value: 1, + attributes: %{ + present: "value", + missing: nil, + also_present: "another" + } + } + ] + + expect(Req, :post, fn _url, opts -> + [resource_metric] = opts[:json]["resourceMetrics"] + [scope_metric] = resource_metric["scopeMetrics"] + [metric] = scope_metric["metrics"] + [data_point] = metric["gauge"]["dataPoints"] + + attr_keys = Enum.map(data_point["attributes"], & &1["key"]) + + assert "present" in attr_keys + assert "also_present" in attr_keys + refute "missing" in attr_keys + + {:ok, %Req.Response{status: 200, body: ""}} + end) + + Exporter.export("https://otel.example.com", metrics) + end + + test "it handles different attribute types" do + metrics = [ + %{ + name: "test.metric", + value: 1, + attributes: %{ + string_attr: "hello", + int_attr: 42, + float_attr: 3.14, + bool_attr: true, + atom_attr: :some_atom + } + } + ] + + expect(Req, :post, fn _url, opts -> + [resource_metric] = opts[:json]["resourceMetrics"] + [scope_metric] = resource_metric["scopeMetrics"] + [metric] = scope_metric["metrics"] + [data_point] = metric["gauge"]["dataPoints"] + + attrs = Map.new(data_point["attributes"], fn a -> {a["key"], a["value"]} end) + + assert attrs["string_attr"] == %{"stringValue" => "hello"} + assert attrs["int_attr"] == %{"intValue" => 42} + assert attrs["float_attr"] == %{"doubleValue" => 3.14} + assert attrs["bool_attr"] == %{"boolValue" => true} + assert attrs["atom_attr"] == %{"stringValue" => "some_atom"} + + {:ok, %Req.Response{status: 200, body: ""}} + end) + + Exporter.export("https://otel.example.com", metrics) + end + + @tag :capture_log + test "it returns error on HTTP failure" do + expect(Req, :post, fn _url, _opts -> + {:ok, %Req.Response{status: 503, body: "service unavailable"}} + end) + + assert {:error, {:http_error, 503, "service unavailable"}} = + Exporter.export("https://otel.example.com", [%{name: "test", value: 1, attributes: %{}}]) + end + + @tag :capture_log + test "it returns error on network failure" do + expect(Req, :post, fn _url, _opts -> + {:error, %Mint.TransportError{reason: :timeout}} + end) + + assert {:error, %Mint.TransportError{reason: :timeout}} = + Exporter.export("https://otel.example.com", [%{name: "test", value: 1, attributes: %{}}]) + end + + test "it uses default timestamp when not provided" do + metrics = [%{name: "test", value: 1, attributes: %{}}] + + expect(Req, :post, fn _url, opts -> + [resource_metric] = opts[:json]["resourceMetrics"] + [scope_metric] = resource_metric["scopeMetrics"] + [metric] = scope_metric["metrics"] + [data_point] = metric["gauge"]["dataPoints"] + + assert is_integer(data_point["timeUnixNano"]) + assert data_point["timeUnixNano"] > 0 + + {:ok, %Req.Response{status: 200, body: ""}} + end) + + Exporter.export("https://otel.example.com", metrics) + end + + test "it handles multiple metrics in a single export" do + metrics = [ + %{name: "plural.service.health", value: 2, attributes: %{service: "api"}}, + %{name: "plural.service.health", value: -1, attributes: %{service: "web"}}, + %{name: "plural.cluster.health", value: 1, attributes: %{cluster: "prod"}} + ] + + expect(Req, :post, fn _url, opts -> + [resource_metric] = opts[:json]["resourceMetrics"] + [scope_metric] = resource_metric["scopeMetrics"] + exported_metrics = scope_metric["metrics"] + + assert length(exported_metrics) == 3 + + service_metrics = Enum.filter(exported_metrics, &(&1["name"] == "plural.service.health")) + cluster_metrics = Enum.filter(exported_metrics, &(&1["name"] == "plural.cluster.health")) + + assert length(service_metrics) == 2 + assert length(cluster_metrics) == 1 + + {:ok, %Req.Response{status: 200, body: ""}} + end) + + assert :ok = Exporter.export("https://otel.example.com", metrics) + end + + test "it handles empty metrics list" do + expect(Req, :post, fn _url, opts -> + [resource_metric] = opts[:json]["resourceMetrics"] + [scope_metric] = resource_metric["scopeMetrics"] + exported_metrics = scope_metric["metrics"] + + assert exported_metrics == [] + + {:ok, %Req.Response{status: 200, body: ""}} + end) + + assert :ok = Exporter.export("https://otel.example.com", []) + end + end +end diff --git a/test/console/otel/metrics_builder_test.exs b/test/console/otel/metrics_builder_test.exs new file mode 100644 index 0000000000..23302335a1 --- /dev/null +++ b/test/console/otel/metrics_builder_test.exs @@ -0,0 +1,192 @@ +defmodule Console.Otel.MetricsBuilderTest do + use Console.DataCase, async: true + alias Console.Otel.MetricsBuilder + + describe "build_service_metric/2" do + test "builds metric with all attributes when service has full data" do + project = insert(:project, name: "test-project") + cluster = insert(:cluster, name: "test-cluster", handle: "test-handle", project: project) + service = insert(:service, + cluster: cluster, + name: "api-gateway", + namespace: "apps", + status: :healthy, + git: %{ref: "main", folder: "charts/api"}, + helm: %{chart: "api-gateway", version: "1.2.3"} + ) + + service = Repo.preload(service, cluster: :project) + timestamp = DateTime.utc_now() + + metric = MetricsBuilder.build_service_metric(service, timestamp) + + assert metric.name == "plural.service.health" + assert metric.value == 2 + assert metric.timestamp == timestamp + assert metric.attributes.service_id == service.id + assert metric.attributes.service_name == "api-gateway" + assert metric.attributes.namespace == "apps" + assert metric.attributes.cluster_id == cluster.id + assert metric.attributes.cluster_name == "test-cluster" + assert metric.attributes.cluster_handle == "test-handle" + assert metric.attributes.project_id == project.id + assert metric.attributes.project_name == "test-project" + assert metric.attributes.git_ref == "main" + assert metric.attributes.git_folder == "charts/api" + assert metric.attributes.helm_chart == "api-gateway" + assert metric.attributes.helm_version == "1.2.3" + assert metric.attributes.status == "healthy" + end + + test "handles services without git/helm gracefully" do + cluster = insert(:cluster) + service = insert(:service, cluster: cluster, status: :stale, git: nil, helm: nil) + service = Repo.preload(service, cluster: :project) + + metric = MetricsBuilder.build_service_metric(service, DateTime.utc_now()) + + assert metric.value == 0 + assert metric.attributes.git_ref == nil + assert metric.attributes.git_folder == nil + assert metric.attributes.helm_chart == nil + assert metric.attributes.helm_version == nil + end + end + + describe "build_cluster_metrics/2" do + test "builds health metric with correct attributes" do + project = insert(:project, name: "prod-project") + cluster = insert(:cluster, + name: "prod-cluster", + handle: "prod", + project: project, + distro: :eks, + version: "1.28", + current_version: "1.28.5", + pinged_at: Timex.now() + ) + cluster = Repo.preload(cluster, [:project, :upgrade_insights]) + + timestamp = DateTime.utc_now() + [health_metric | _] = MetricsBuilder.build_cluster_metrics(cluster, timestamp) + + assert health_metric.name == "plural.cluster.health" + assert health_metric.value == 1 + assert health_metric.timestamp == timestamp + assert health_metric.attributes.cluster_id == cluster.id + assert health_metric.attributes.cluster_name == "prod-cluster" + assert health_metric.attributes.cluster_handle == "prod" + assert health_metric.attributes.project_name == "prod-project" + assert health_metric.attributes.distro == "eks" + assert health_metric.attributes.version == "1.28" + assert health_metric.attributes.current_version == "1.28.5" + assert health_metric.attributes.healthy == true + end + + test "builds upgradeability metrics from upgrade insights" do + cluster = insert(:cluster, pinged_at: Timex.now()) + insert(:upgrade_insight, cluster: cluster, status: :passing, version: "1.29", name: "k8s-upgrade") + insert(:upgrade_insight, cluster: cluster, status: :failed, version: "1.30", name: "k8s-upgrade") + cluster = Repo.preload(cluster, [:project, :upgrade_insights], force: true) + + timestamp = DateTime.utc_now() + metrics = MetricsBuilder.build_cluster_metrics(cluster, timestamp) + + assert length(metrics) == 3 + + upgrade_metrics = Enum.filter(metrics, &(&1.name == "plural.cluster.upgradeability")) + assert length(upgrade_metrics) == 2 + + passing_metric = Enum.find(upgrade_metrics, &(&1.attributes.target_version == "1.29")) + assert passing_metric.value == 1 + assert passing_metric.attributes.status == "passing" + assert passing_metric.attributes.insight_name == "k8s-upgrade" + + failed_metric = Enum.find(upgrade_metrics, &(&1.attributes.target_version == "1.30")) + assert failed_metric.value == -2 + assert failed_metric.attributes.status == "failed" + end + + test "marks unhealthy clusters correctly" do + cluster = insert(:cluster, pinged_at: Timex.now() |> Timex.shift(hours: -1)) + cluster = Repo.preload(cluster, [:project, :upgrade_insights]) + + [health_metric | _] = MetricsBuilder.build_cluster_metrics(cluster, DateTime.utc_now()) + + assert health_metric.value == 0 + assert health_metric.attributes.healthy == false + end + + test "handles clusters without projects" do + cluster = insert(:cluster, project: nil, pinged_at: Timex.now()) + cluster = Repo.preload(cluster, [:project, :upgrade_insights]) + + [health_metric | _] = MetricsBuilder.build_cluster_metrics(cluster, DateTime.utc_now()) + + assert health_metric.attributes.project_id == nil + assert health_metric.attributes.project_name == nil + end + end + + describe "service_status_to_value/1" do + test "maps all status values correctly" do + assert MetricsBuilder.service_status_to_value(:healthy) == 2 + assert MetricsBuilder.service_status_to_value(:synced) == 1 + assert MetricsBuilder.service_status_to_value(:stale) == 0 + assert MetricsBuilder.service_status_to_value(:failed) == -1 + assert MetricsBuilder.service_status_to_value(:paused) == -2 + assert MetricsBuilder.service_status_to_value(:unknown) == 0 + end + end + + describe "upgrade_status_to_value/1" do + test "maps all status values correctly" do + assert MetricsBuilder.upgrade_status_to_value(:passing) == 1 + assert MetricsBuilder.upgrade_status_to_value(:warning) == 0 + assert MetricsBuilder.upgrade_status_to_value(:unknown) == -1 + assert MetricsBuilder.upgrade_status_to_value(:failed) == -2 + assert MetricsBuilder.upgrade_status_to_value(:other) == -1 + end + end + + describe "cluster_health_to_value/1" do + test "returns 1 for healthy cluster" do + cluster = insert(:cluster, pinged_at: Timex.now()) + assert MetricsBuilder.cluster_health_to_value(cluster) == 1 + end + + test "returns 0 for unhealthy cluster" do + cluster = insert(:cluster, pinged_at: Timex.now() |> Timex.shift(hours: -1)) + assert MetricsBuilder.cluster_health_to_value(cluster) == 0 + end + end + + describe "service_metrics_stream/1" do + test "streams metrics for all services" do + project = insert(:project) + cluster = insert(:cluster, project: project, pinged_at: Timex.now()) + insert(:service, cluster: cluster, status: :healthy) + insert(:service, cluster: cluster, status: :failed) + + Repo.transaction(fn -> + metrics = MetricsBuilder.service_metrics_stream() |> Enum.to_list() + assert length(metrics) == 2 + assert Enum.all?(metrics, &(&1.name == "plural.service.health")) + end) + end + end + + describe "cluster_metrics_stream/1" do + test "streams metrics for all clusters" do + project = insert(:project) + insert(:cluster, project: project, pinged_at: Timex.now()) + insert(:cluster, project: project, pinged_at: Timex.now()) + + Repo.transaction(fn -> + metrics = MetricsBuilder.cluster_metrics_stream() |> Enum.to_list() + health_metrics = Enum.filter(metrics, &(&1.name == "plural.cluster.health")) + assert length(health_metrics) == 2 + end) + end + end +end diff --git a/test/console/otel/metrics_exporter_test.exs b/test/console/otel/metrics_exporter_test.exs new file mode 100644 index 0000000000..eacf5345af --- /dev/null +++ b/test/console/otel/metrics_exporter_test.exs @@ -0,0 +1,277 @@ +defmodule Console.Otel.MetricsExporterTest do + use Console.DataCase, async: false + use Mimic + alias Console.Otel.MetricsExporter + + setup :set_mimic_global + + defp stub_and_collect_metrics do + test_pid = self() + + stub(Req, :post, fn url, opts -> + assert url == "https://otel.example.com/v1/metrics" + [resource_metric] = opts[:json]["resourceMetrics"] + [scope_metric] = resource_metric["scopeMetrics"] + metrics = scope_metric["metrics"] + send(test_pid, {:metrics_chunk, metrics}) + {:ok, %Req.Response{status: 200, body: ""}} + end) + end + + defp run_export do + {:ok, pid} = GenServer.start_link(MetricsExporter, :ok) + send(pid, :export) + :timer.sleep(100) + GenServer.stop(pid) + end + + defp collect_all_metrics(acc \\ []) do + receive do + {:metrics_chunk, metrics} -> collect_all_metrics(acc ++ metrics) + after + 0 -> acc + end + end + + describe "export via GenServer message" do + test "exports service and cluster metrics when enabled" do + project = insert(:project) + cluster = insert(:cluster, project: project, pinged_at: Timex.now()) + insert(:service, cluster: cluster, status: :healthy, namespace: "apps", name: "api") + + deployment_settings(metrics: %{ + enabled: true, + endpoint: "https://otel.example.com", + crontab: "*/5 * * * *" + }) + + stub_and_collect_metrics() + run_export() + metrics = collect_all_metrics() + + assert Enum.any?(metrics, &(&1["name"] == "plural.service.health")) + assert Enum.any?(metrics, &(&1["name"] == "plural.cluster.health")) + end + + test "does nothing when metrics export is disabled" do + insert(:cluster) + insert(:service) + + deployment_settings(metrics: %{enabled: false}) + + reject(&Req.post/2) + run_export() + end + + test "does nothing when metrics settings are not configured" do + insert(:cluster) + insert(:service) + + deployment_settings() + + reject(&Req.post/2) + run_export() + end + + test "exports metrics in chunks" do + project = insert(:project) + cluster = insert(:cluster, project: project, pinged_at: Timex.now()) + + for _ <- 1..5 do + insert(:service, cluster: cluster, status: :healthy) + end + + deployment_settings(metrics: %{ + enabled: true, + endpoint: "https://otel.example.com", + crontab: "*/5 * * * *" + }) + + stub_and_collect_metrics() + run_export() + metrics = collect_all_metrics() + + service_metrics = Enum.filter(metrics, &(&1["name"] == "plural.service.health")) + assert length(service_metrics) == 5 + end + + @tag :capture_log + test "continues exporting if a chunk fails" do + project = insert(:project) + cluster = insert(:cluster, project: project, pinged_at: Timex.now()) + insert(:service, cluster: cluster, status: :healthy) + + deployment_settings(metrics: %{ + enabled: true, + endpoint: "https://otel.example.com", + crontab: "*/5 * * * *" + }) + + call_count = :counters.new(1, []) + + stub(Req, :post, fn _url, _opts -> + count = :counters.get(call_count, 1) + :counters.add(call_count, 1, 1) + + if count == 0 do + {:error, :connection_refused} + else + {:ok, %Req.Response{status: 200, body: ""}} + end + end) + + run_export() + end + end + + describe "leader election" do + test "schedules export when node is leader and config is valid" do + project = insert(:project) + cluster = insert(:cluster, project: project, pinged_at: Timex.now()) + insert(:service, cluster: cluster, status: :healthy) + + deployment_settings(metrics: %{ + enabled: true, + endpoint: "https://otel.example.com", + crontab: "* * * * *" + }) + + expect(Console.ClusterRing, :node, fn :otel_metrics -> node() end) + + {:ok, pid} = GenServer.start_link(MetricsExporter, :ok) + send(pid, :check) + :timer.sleep(100) + + state = :sys.get_state(pid) + assert state.timer_ref != nil + + GenServer.stop(pid) + end + + test "does not schedule when node is not leader" do + insert(:cluster) + insert(:service) + + deployment_settings(metrics: %{ + enabled: true, + endpoint: "https://otel.example.com", + crontab: "* * * * *" + }) + + expect(Console.ClusterRing, :node, fn :otel_metrics -> :other_node end) + + {:ok, pid} = GenServer.start_link(MetricsExporter, :ok) + send(pid, :check) + :timer.sleep(100) + + state = :sys.get_state(pid) + assert state.timer_ref == nil + + GenServer.stop(pid) + end + + test "cancels timer when leadership is lost" do + deployment_settings(metrics: %{ + enabled: true, + endpoint: "https://otel.example.com", + crontab: "* * * * *" + }) + + expect(Console.ClusterRing, :node, fn :otel_metrics -> node() end) + expect(Console.ClusterRing, :node, fn :otel_metrics -> :other_node end) + + {:ok, pid} = GenServer.start_link(MetricsExporter, :ok) + + send(pid, :check) + :timer.sleep(50) + state1 = :sys.get_state(pid) + assert state1.timer_ref != nil + + send(pid, :check) + :timer.sleep(50) + state2 = :sys.get_state(pid) + assert state2.timer_ref == nil + + GenServer.stop(pid) + end + + @tag :capture_log + test "does not schedule when crontab is invalid" do + insert(:cluster) + insert(:service) + + deployment_settings(metrics: %{ + enabled: true, + endpoint: "https://otel.example.com", + crontab: "invalid cron expression" + }) + + expect(Console.ClusterRing, :node, fn :otel_metrics -> node() end) + + {:ok, pid} = GenServer.start_link(MetricsExporter, :ok) + send(pid, :check) + :timer.sleep(100) + + state = :sys.get_state(pid) + assert state.timer_ref == nil + + GenServer.stop(pid) + end + end + + describe "scheduling" do + test "updates last_run_at after export" do + project = insert(:project) + cluster = insert(:cluster, project: project, pinged_at: Timex.now()) + insert(:service, cluster: cluster, status: :healthy) + + deployment_settings(metrics: %{ + enabled: true, + endpoint: "https://otel.example.com", + crontab: "*/5 * * * *" + }) + + stub(Req, :post, fn _url, _opts -> + {:ok, %Req.Response{status: 200, body: ""}} + end) + + {:ok, pid} = GenServer.start_link(MetricsExporter, :ok) + + state_before = :sys.get_state(pid) + assert state_before.last_run_at == nil + + send(pid, :export) + :timer.sleep(100) + + state_after = :sys.get_state(pid) + assert state_after.last_run_at != nil + + GenServer.stop(pid) + end + + test "clears timer_ref after export completes" do + project = insert(:project) + cluster = insert(:cluster, project: project, pinged_at: Timex.now()) + insert(:service, cluster: cluster, status: :healthy) + + deployment_settings(metrics: %{ + enabled: true, + endpoint: "https://otel.example.com", + crontab: "*/5 * * * *" + }) + + stub(Req, :post, fn _url, _opts -> + {:ok, %Req.Response{status: 200, body: ""}} + end) + + {:ok, pid} = GenServer.start_link(MetricsExporter, :ok) + send(pid, :export) + :timer.sleep(100) + + state = :sys.get_state(pid) + assert state.timer_ref == nil + + GenServer.stop(pid) + end + end +end diff --git a/test/support/factory.ex b/test/support/factory.ex index 13100a02ed..b9b451e06f 100644 --- a/test/support/factory.ex +++ b/test/support/factory.ex @@ -261,6 +261,16 @@ defmodule Console.Factory do } end + def upgrade_insight_factory do + %Schema.UpgradeInsight{ + name: sequence(:upgrade_insight, &"insight-#{&1}"), + version: "1.29", + status: :passing, + description: "Test upgrade insight", + cluster: build(:cluster) + } + end + def api_deprecation_factory do %Schema.ApiDeprecation{ blocking: false, diff --git a/test/test_helper.exs b/test/test_helper.exs index 640ddaad93..d83d276969 100644 --- a/test/test_helper.exs +++ b/test/test_helper.exs @@ -61,6 +61,7 @@ Mimic.copy(CloudQuery.Client) Mimic.copy(Toolquery.ToolQuery.Stub) Mimic.copy(Console.Cache) Mimic.copy(ReqLLM) +Mimic.copy(Console.ClusterRing) ExUnit.start() Ecto.Adapters.SQL.Sandbox.mode(Console.Repo, :manual)