From 5fd0fa87891de7b6058ef40c641762b1f702756a Mon Sep 17 00:00:00 2001 From: Gabriel McGoldrick Date: Tue, 22 Apr 2025 15:33:35 +0100 Subject: [PATCH] standalone logging docs - first pass at base 6.0 docs --- _attributes/common-attributes.adoc | 7 + _topic_maps/_topic_map.yml | 31 +- about/about-logging.adoc | 172 ++++++- about/cluster-logging-support.adoc | 49 ++ about/log6x-about.adoc | 65 --- about/logging-visualization.adoc | 9 + configuring/_attributes | 1 + configuring/configuring-log-forwarding.adoc | 117 +++++ .../configuring-lokistack-storage.adoc | 44 ++ configuring/images | 1 + configuring/modules | 1 + configuring/snippets | 1 + installing/_attributes | 1 + installing/images | 1 + installing/installing-logging.adoc | 53 ++ installing/modules | 1 + installing/snippets | 1 + modules/audit-filtering.adoc | 115 +++++ ...r-logging-maintenance-support-list-6x.adoc | 15 + .../cluster-logging-must-gather-about.adoc | 13 + ...luster-logging-must-gather-collecting.adoc | 39 ++ modules/content-filter-drop-records.adoc | 104 ++++ modules/content-filter-prune-records.adoc | 55 ++ modules/enabling-loki-alerts.adoc | 101 ++++ ...abling-multi-line-exception-detection.adoc | 60 +++ modules/identity-federation.adoc | 59 +++ ...nput-spec-filter-audit-infrastructure.adoc | 54 ++ .../input-spec-filter-labels-expressions.adoc | 54 ++ ...input-spec-filter-namespace-container.adoc | 49 ++ modules/installing-logging-operator-cli.adoc | 163 ++++++ ...stalling-logging-operator-web-console.adoc | 163 ++++++ modules/installing-loki-operator-cli.adoc | 203 ++++++++ .../installing-loki-operator-web-console.adoc | 133 +++++ modules/log6x-quickstart-opentelemetry.adoc | 158 ------ modules/log6x-quickstart-viaq.adoc | 146 ------ modules/logging-oc-explain.adoc | 75 +++ modules/loki-memberlist-ip.adoc | 29 ++ modules/loki-pod-placement.adoc | 195 +++++++ modules/loki-rate-limit-errors.adoc | 73 +++ modules/loki-rbac-rules-permissions.adoc | 63 +++ modules/loki-reliability-hardening.adoc | 35 ++ modules/loki-restart-hardening.adoc | 5 + modules/loki-retention.adoc | 109 ++++ modules/loki-zone-aware-replication.adoc | 29 ++ modules/loki-zone-fail-recovery.adoc | 87 ++++ modules/setting-up-log-collection.adoc | 205 ++++++++ modules/unmanaged-operators.adoc | 53 ++ snippets/about-pod-affinity.adoc | 6 + snippets/logging-api-support-states-snip.adoc | 33 ++ snippets/logging-compatibility-snip.adoc | 6 + snippets/logging-loki-statement-snip.adoc | 8 + snippets/logging-retention-period-snip.adoc | 6 + snippets/logging-supported-config-snip.adoc | 10 + snippets/loki-statement-snip.adoc | 8 + snippets/technology-preview.adoc | 2 +- upgrading/_attributes | 1 + upgrading/images | 1 + upgrading/modules | 1 + upgrading/snippets | 1 + upgrading/upgrading-to-logging-60.adoc | 477 ++++++++++++++++++ 60 files changed, 3381 insertions(+), 376 deletions(-) create mode 100644 about/cluster-logging-support.adoc delete mode 100644 about/log6x-about.adoc create mode 100644 about/logging-visualization.adoc create mode 120000 configuring/_attributes create mode 100644 configuring/configuring-log-forwarding.adoc create mode 100644 configuring/configuring-lokistack-storage.adoc create mode 120000 configuring/images create mode 120000 configuring/modules create mode 120000 configuring/snippets create mode 120000 installing/_attributes create mode 120000 installing/images create mode 100644 installing/installing-logging.adoc create mode 120000 installing/modules create mode 120000 installing/snippets create mode 100644 modules/audit-filtering.adoc create mode 100644 modules/cluster-logging-maintenance-support-list-6x.adoc create mode 100644 modules/cluster-logging-must-gather-about.adoc create mode 100644 modules/cluster-logging-must-gather-collecting.adoc create mode 100644 modules/content-filter-drop-records.adoc create mode 100644 modules/content-filter-prune-records.adoc create mode 100644 modules/enabling-loki-alerts.adoc create mode 100644 modules/enabling-multi-line-exception-detection.adoc create mode 100644 modules/identity-federation.adoc create mode 100644 modules/input-spec-filter-audit-infrastructure.adoc create mode 100644 modules/input-spec-filter-labels-expressions.adoc create mode 100644 modules/input-spec-filter-namespace-container.adoc create mode 100644 modules/installing-logging-operator-cli.adoc create mode 100644 modules/installing-logging-operator-web-console.adoc create mode 100644 modules/installing-loki-operator-cli.adoc create mode 100644 modules/installing-loki-operator-web-console.adoc delete mode 100644 modules/log6x-quickstart-opentelemetry.adoc delete mode 100644 modules/log6x-quickstart-viaq.adoc create mode 100644 modules/logging-oc-explain.adoc create mode 100644 modules/loki-memberlist-ip.adoc create mode 100644 modules/loki-pod-placement.adoc create mode 100644 modules/loki-rate-limit-errors.adoc create mode 100644 modules/loki-rbac-rules-permissions.adoc create mode 100644 modules/loki-reliability-hardening.adoc create mode 100644 modules/loki-restart-hardening.adoc create mode 100644 modules/loki-retention.adoc create mode 100644 modules/loki-zone-aware-replication.adoc create mode 100644 modules/loki-zone-fail-recovery.adoc create mode 100644 modules/setting-up-log-collection.adoc create mode 100644 modules/unmanaged-operators.adoc create mode 100644 snippets/about-pod-affinity.adoc create mode 100644 snippets/logging-api-support-states-snip.adoc create mode 100644 snippets/logging-compatibility-snip.adoc create mode 100644 snippets/logging-loki-statement-snip.adoc create mode 100644 snippets/logging-retention-period-snip.adoc create mode 100644 snippets/logging-supported-config-snip.adoc create mode 100644 snippets/loki-statement-snip.adoc create mode 120000 upgrading/_attributes create mode 120000 upgrading/images create mode 120000 upgrading/modules create mode 120000 upgrading/snippets create mode 100644 upgrading/upgrading-to-logging-60.adoc diff --git a/_attributes/common-attributes.adoc b/_attributes/common-attributes.adoc index 4d418755381f..9d0dda5fe55e 100644 --- a/_attributes/common-attributes.adoc +++ b/_attributes/common-attributes.adoc @@ -378,3 +378,10 @@ endif::openshift-origin[] :LoggingProductName: OpenShift Logging :LoggingProductShortName: Logging + + +:ocp-product-title: OpenShift Container Platform +:ocp-product-version: 4.13 +:dedicated-product-title: OpenShift Dedicated +:aro-product-title: Azure Red Hat OpenShift +:rosa-product-title: Red Hat OpenShift Service on AWS \ No newline at end of file diff --git a/_topic_maps/_topic_map.yml b/_topic_maps/_topic_map.yml index 19ffd49e4f85..64252b637078 100644 --- a/_topic_maps/_topic_map.yml +++ b/_topic_maps/_topic_map.yml @@ -3,7 +3,32 @@ Name: About OpenShift Logging Dir: about Distros: openshift-logging Topics: -- Name: OpenShift Logging overview +- Name: Logging overview File: about-logging -- Name: Logging - File: log6x-about +- Name: Cluster logging support + File: cluster-logging-support +- Name: Visualization for logging + File: logging-visualization +--- +Name: Installing +Dir: installing +Distros: openshift-logging +Topics: +- Name: Installing logging + File: installing-logging +--- +Name: Configuring +Dir: configuring +Distros: openshift-logging +Topics: +- Name: Configuring log forwarding + File: configuring-log-forwarding +- Name: Configuring LokiStack storage + File: configuring-lokistack-storage +--- +Name: Upgrading +Dir: upgrading +Distros: openshift-logging +Topics: +- Name: Upgrading to Logging 6.0 + File: upgrading-to-logging-60 diff --git a/about/about-logging.adoc b/about/about-logging.adoc index 2e9b990f8010..cde05839e732 100644 --- a/about/about-logging.adoc +++ b/about/about-logging.adoc @@ -1,9 +1,175 @@ -:_content-type: ASSEMBLY +:_mod-docs-content-type: ASSEMBLY include::_attributes/common-attributes.adoc[] [id="about-logging"] -= {LoggingProductName} overview += {product-title} overview :context: about-logging toc::[] -Welcome to logging \ No newline at end of file +The `ClusterLogForwarder` custom resource (CR) is the central configuration point for log collection and forwarding. + +== Inputs and Outputs + +Inputs specify the sources of logs to be forwarded. Logging provides the following built-in input types that select logs from different parts of your cluster: + +* `application` +* `receiver` +* `infrastructure` +* `audit` + +You can also define custom inputs based on namespaces or pod labels to fine-tune log selection. + +Outputs define the destinations where logs are sent. Each output type has its own set of configuration options, allowing you to customize the behavior and authentication settings. + + +== Receiver Input Type +The receiver input type enables the Logging system to accept logs from external sources. It supports two formats for receiving logs: `http` and `syslog`. + +The `ReceiverSpec` field defines the configuration for a receiver input. + +== Pipelines and Filters + +Pipelines determine the flow of logs from inputs to outputs. A pipeline consists of one or more input refs, output refs, and optional filter refs. You can use filters to transform or drop log messages within a pipeline. The order of filters matters, as they are applied sequentially, and earlier filters can prevent log messages from reaching later stages. + +== Operator Behavior + +The Cluster Logging Operator manages the deployment and configuration of the collector based on the `managementState` field: + +- When set to `Managed` (default), the Operator actively manages the logging resources to match the configuration defined in the spec. +- When set to `Unmanaged`, the Operator does not take any action, allowing you to manually manage the logging components. + +== Validation +Logging includes extensive validation rules and default values to ensure a smooth and error-free configuration experience. The `ClusterLogForwarder` resource enforces validation checks on required fields, dependencies between fields, and the format of input values. Default values are provided for certain fields, reducing the need for explicit configuration in common scenarios. + +== Quick Start + +.Prerequisites +* You have access to an {ocp-product-title} cluster with `cluster-admin` permissions. +* You installed the {oc-first}. +* You have access to a supported object store. For example, AWS S3, Google Cloud Storage, {azure-short}, Swift, Minio, or {rh-storage}. + +.Procedure + +. Install the `{clo}`, `{loki-op}`, and `{coo-first}` from OperatorHub. + +. Create a secret to access an existing object storage bucket: ++ +.Example command for AWS +[source,terminal,subs="+quotes"] +---- +$ oc create secret generic logging-loki-s3 \ + --from-literal=bucketnames="" \ + --from-literal=endpoint="" \ + --from-literal=access_key_id="" \ + --from-literal=access_key_secret="" \ + --from-literal=region="" \ + -n openshift-logging +---- + +. Create a `LokiStack` custom resource (CR) in the `openshift-logging` namespace: ++ +[source,yaml] +---- +apiVersion: loki.grafana.com/v1 +kind: LokiStack +metadata: + name: logging-loki + namespace: openshift-logging +spec: + managementState: Managed + size: 1x.extra-small + storage: + schemas: + - effectiveDate: '2022-06-01' + version: v13 + secret: + name: logging-loki-s3 + type: s3 + storageClassName: gp3-csi + tenants: + mode: openshift-logging +---- + +. Create a service account for the collector: ++ +[source,shell] +---- +$ oc create sa collector -n openshift-logging +---- + +. Bind the `ClusterRole` to the service account: ++ +[source,shell] +---- +$ oc adm policy add-cluster-role-to-user logging-collector-logs-writer -z collector -n openshift-logging +---- + +. Create a `UIPlugin` to enable the Log section in the Observe tab: ++ +[source,yaml] +---- +apiVersion: observability.openshift.io/v1alpha1 +kind: UIPlugin +metadata: + name: logging +spec: + type: Logging + logging: + lokiStack: + name: logging-loki +---- + +. Add additional roles to the collector service account: ++ +[source,shell] +---- +$ oc adm policy add-cluster-role-to-user collect-application-logs -z collector -n openshift-logging +---- ++ +[source,terminal] +---- +$ oc adm policy add-cluster-role-to-user collect-audit-logs -z collector -n openshift-logging +---- ++ +[source,terminal] +---- +$ oc adm policy add-cluster-role-to-user collect-infrastructure-logs -z collector -n openshift-logging +---- + +. Create a `ClusterLogForwarder` CR to configure log forwarding: ++ +[source,yaml] +---- +apiVersion: observability.openshift.io/v1 +kind: ClusterLogForwarder +metadata: + name: collector + namespace: openshift-logging +spec: + serviceAccount: + name: collector + outputs: + - name: default-lokistack + type: lokiStack + lokiStack: + target: + name: logging-loki + namespace: openshift-logging + authentication: + token: + from: serviceAccount + tls: + ca: + key: service-ca.crt + configMapName: openshift-service-ca.crt + pipelines: + - name: default-logstore + inputRefs: + - application + - infrastructure + outputRefs: + - default-lokistack +---- + +.Verification +* Verify that logs are visible in the *Log* section of the *Observe* tab in the {ocp-product-title} web console. diff --git a/about/cluster-logging-support.adoc b/about/cluster-logging-support.adoc new file mode 100644 index 000000000000..444829f08cee --- /dev/null +++ b/about/cluster-logging-support.adoc @@ -0,0 +1,49 @@ +:_mod-docs-content-type: ASSEMBLY +[id="cluster-logging-support"] += Cluster logging support +include::_attributes/common-attributes.adoc[] +:context: cluster-logging-support + +toc::[] + +include::snippets/logging-supported-config-snip.adoc[] +include::snippets/logging-compatibility-snip.adoc[] +include::snippets/logging-loki-statement-snip.adoc[] + +{logging-uc} {for} is an opinionated collector and normalizer of application, infrastructure, and audit logs. It is intended to be used for forwarding logs to various supported systems. + +{logging-uc} is not: + +* A high scale log collection system +* Security Information and Event Monitoring (SIEM) compliant +* A "bring your own" (BYO) log collector configuration +* Historical or long term log retention or storage +* A guaranteed log sink +* Secure storage - audit logs are not stored by default + +[id="cluster-logging-support-CRDs_{context}"] +== Supported API custom resource definitions + +The following table describes the supported {logging-uc} APIs. + +include::snippets/logging-api-support-states-snip.adoc[] + +include::modules/cluster-logging-maintenance-support-list-6x.adoc[leveloffset=+1] +include::modules/unmanaged-operators.adoc[leveloffset=+1] + +//[id="support-exception-for-coo-logging-ui-plugin_{context}"] +//== Support exception for the Logging UI Plugin +// +//Until the approaching General Availability (GA) release of the Cluster Observability Operator (COO), which is currently in link:https://access.redhat.com/support/offerings/techpreview/[Technology Preview] (TP), Red{nbsp}Hat provides support to customers who are using Logging 6.0 or later with the COO for its Logging UI Plugin on {ocp-product-title} 4.14 or later. This support exception is temporary as the COO includes several independent features, some of which are still TP features, but the Logging UI Plugin is ready for GA. +// + +[id="cluster-logging-support-must-gather_{context}"] +== Collecting {logging} data for Red Hat Support + +When opening a support case, it is helpful to provide debugging information about your cluster to Red{nbsp}Hat Support. + +You can use the link:https://docs.openshift.com/container-platform/latest/support/gathering-cluster-data.html#gathering-cluster-data[must-gather tool] to collect diagnostic information for project-level resources, cluster-level resources, and each of the {logging} components. +For prompt support, supply diagnostic information for both {ocp-product-title} and {logging}. + +include::modules/cluster-logging-must-gather-about.adoc[leveloffset=+2] +include::modules/cluster-logging-must-gather-collecting.adoc[leveloffset=+2] diff --git a/about/log6x-about.adoc b/about/log6x-about.adoc deleted file mode 100644 index badd87ab2326..000000000000 --- a/about/log6x-about.adoc +++ /dev/null @@ -1,65 +0,0 @@ -:_mod-docs-content-type: ASSEMBLY -include::_attributes/common-attributes.adoc[] -[id="log6x-about-6-1"] -= Logging 6.1 -:context: logging-6x-6.1 - -toc::[] - -The `ClusterLogForwarder` custom resource (CR) is the central configuration point for log collection and forwarding. - -[id="inputs-and-outputs_6-1_{context}"] -== Inputs and outputs - -Inputs specify the sources of logs to be forwarded. Logging provides the following built-in input types that select logs from different parts of your cluster: - -* `application` -* `receiver` -* `infrastructure` -* `audit` - -You can also define custom inputs based on namespaces or pod labels to fine-tune log selection. - -Outputs define the destinations where logs are sent. Each output type has its own set of configuration options, allowing you to customize the behavior and authentication settings. - -[id="receiver-input-type_6-1_{context}"] -== Receiver input type -The receiver input type enables the Logging system to accept logs from external sources. It supports two formats for receiving logs: `http` and `syslog`. - -The `ReceiverSpec` field defines the configuration for a receiver input. - -[id="pipelines-and-filters_6-1_{context}"] -== Pipelines and filters - -Pipelines determine the flow of logs from inputs to outputs. A pipeline consists of one or more input refs, output refs, and optional filter refs. You can use filters to transform or drop log messages within a pipeline. The order of filters matters, as they are applied sequentially, and earlier filters can prevent log messages from reaching later stages. - -[id="operator-behavior_6-1_{context}"] -== Operator behavior - -The Cluster Logging Operator manages the deployment and configuration of the collector based on the `managementState` field of the `ClusterLogForwarder` resource: - -- When set to `Managed` (default), the Operator actively manages the logging resources to match the configuration defined in the spec. -- When set to `Unmanaged`, the Operator does not take any action, allowing you to manually manage the logging components. - -[id="validation_6-1_{context}"] -== Validation -Logging includes extensive validation rules and default values to ensure a smooth and error-free configuration experience. The `ClusterLogForwarder` resource enforces validation checks on required fields, dependencies between fields, and the format of input values. Default values are provided for certain fields, reducing the need for explicit configuration in common scenarios. - -[id="quick-start_6-1_{context}"] -== Quick start - -OpenShift Logging supports two data models: - -* ViaQ (General Availability) -* OpenTelemetry (Technology Preview) - -You can select either of these data models based on your requirement by configuring the `lokiStack.dataModel` field in the `ClusterLogForwarder`. ViaQ is the default data model when forwarding logs to LokiStack. - -[NOTE] -==== -In future releases of OpenShift Logging, the default data model will change from ViaQ to OpenTelemetry. -==== - -include::modules/log6x-quickstart-viaq.adoc[leveloffset=+2] - -include::modules/log6x-quickstart-opentelemetry.adoc[leveloffset=+2] \ No newline at end of file diff --git a/about/logging-visualization.adoc b/about/logging-visualization.adoc new file mode 100644 index 000000000000..f2d167034be9 --- /dev/null +++ b/about/logging-visualization.adoc @@ -0,0 +1,9 @@ +:_mod-docs-content-type: ASSEMBLY +[id="logging-visualization"] += Visualization for logging +include::_attributes/common-attributes.adoc[] +:context: logging-visualization + +toc::[] + +Visualization for logging is provided by deploying the link:https://docs.openshift.com/container-platform/latest/observability/cluster_observability_operator/ui_plugins/logging-ui-plugin.adoc#logging-ui-plugin[Logging UI Plugin] of the link:https://docs.openshift.com/container-platform/latest/observability/cluster_observability_operator/cluster-observability-operator-overview.adoc#cluster-observability-operator-overview[Cluster Observability Operator], which requires Operator installation. diff --git a/configuring/_attributes b/configuring/_attributes new file mode 120000 index 000000000000..f27fd275ea6b --- /dev/null +++ b/configuring/_attributes @@ -0,0 +1 @@ +../_attributes/ \ No newline at end of file diff --git a/configuring/configuring-log-forwarding.adoc b/configuring/configuring-log-forwarding.adoc new file mode 100644 index 000000000000..25b959d43131 --- /dev/null +++ b/configuring/configuring-log-forwarding.adoc @@ -0,0 +1,117 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="configuring-log-forwarding"] += Configuring log forwarding +:context: configuring-log-forwarding + +toc::[] + +The `ClusterLogForwarder` (CLF) allows users to configure forwarding of logs to various destinations. It provides a flexible way to select log messages from different sources, send them through a pipeline that can transform or filter them, and forward them to one or more outputs. + +.Key Functions of the ClusterLogForwarder +* Selects log messages using inputs +* Forwards logs to external destinations using outputs +* Filters, transforms, and drops log messages using filters +* Defines log forwarding pipelines connecting inputs, filters and outputs + +// need to verify if this is relevant still. +//include::modules/log6x-config-roles.adoc[leveloffset=+1] + +include::modules/setting-up-log-collection.adoc[leveloffset=+1] + +// OBSDOCS-1104 +== Modifying log level in collector + +To modify the log level in the collector, you can set the `observability.openshift.io/log-level` annotation to `trace`, `debug`, `info`, `warn`, `error`, and `off`. + +.Example log level annotation +[source,yaml] +---- +apiVersion: observability.openshift.io/v1 +kind: ClusterLogForwarder +metadata: + name: collector + annotations: + observability.openshift.io/log-level: debug +# ... +---- + +== Managing the Operator + +The `ClusterLogForwarder` resource has a `managementState` field that controls whether the operator actively manages its resources or leaves them Unmanaged: + +Managed:: (default) The operator will drive the logging resources to match the desired state in the CLF spec. + +Unmanaged:: The operator will not take any action related to the logging components. + +This allows administrators to temporarily pause log forwarding by setting `managementState` to `Unmanaged`. + +== Structure of the ClusterLogForwarder + +The CLF has a `spec` section that contains the following key components: + +Inputs:: Select log messages to be forwarded. Built-in input types `application`, `infrastructure` and `audit` forward logs from different parts of the cluster. You can also define custom inputs. + +Outputs:: Define destinations to forward logs to. Each output has a unique name and type-specific configuration. + +Pipelines:: Define the path logs take from inputs, through filters, to outputs. Pipelines have a unique name and consist of a list of input, output and filter names. + +Filters:: Transform or drop log messages in the pipeline. Users can define filters that match certain log fields and drop or modify the messages. Filters are applied in the order specified in the pipeline. + +=== Inputs + +Inputs are configured in an array under `spec.inputs`. There are three built-in input types: + +application:: Selects logs from all application containers, excluding those in infrastructure namespaces. + +infrastructure:: Selects logs from nodes and from infrastructure components running in the following namespaces: +** `default` +** `kube` +** `openshift` +** Containing the `kube-` or `openshift-` prefix + +audit:: Selects logs from the OpenShift API server audit logs, Kubernetes API server audit logs, ovn audit logs, and node audit logs from auditd. + +Users can define custom inputs of type `application` that select logs from specific namespaces or using pod labels. + +=== Outputs + +Outputs are configured in an array under `spec.outputs`. Each output must have a unique name and a type. Supported types are: + +azureMonitor:: Forwards logs to Azure Monitor. +cloudwatch:: Forwards logs to AWS CloudWatch. +//elasticsearch:: Forwards logs to an external Elasticsearch instance. +googleCloudLogging:: Forwards logs to Google Cloud Logging. +http:: Forwards logs to a generic HTTP endpoint. +kafka:: Forwards logs to a Kafka broker. +loki:: Forwards logs to a Loki logging backend. +lokistack:: Forwards logs to the logging supported combination of Loki and web proxy with {ocp-product-title} authentication integration. LokiStack's proxy uses {ocp-product-title} authentication to enforce multi-tenancy +otlp:: Forwards logs using the OpenTelemetry Protocol. +splunk:: Forwards logs to Splunk. +syslog:: Forwards logs to an external syslog server. + +Each output type has its own configuration fields. + +=== Pipelines + +Pipelines are configured in an array under `spec.pipelines`. Each pipeline must have a unique name and consists of: + +inputRefs:: Names of inputs whose logs should be forwarded to this pipeline. +outputRefs:: Names of outputs to send logs to. +filterRefs:: (optional) Names of filters to apply. + +The order of filterRefs matters, as they are applied sequentially. Earlier filters can drop messages that will not be processed by later filters. + +=== Filters + +Filters are configured in an array under `spec.filters`. They can match incoming log messages based on the value of structured fields and modify or drop them. + +Administrators can configure the following types of filters: + +include::modules/enabling-multi-line-exception-detection.adoc[leveloffset=+2] +include::modules/content-filter-drop-records.adoc[leveloffset=+2] +include::modules/audit-filtering.adoc[leveloffset=+2] +include::modules/input-spec-filter-labels-expressions.adoc[leveloffset=+2] +include::modules/content-filter-prune-records.adoc[leveloffset=+2] +include::modules/input-spec-filter-audit-infrastructure.adoc[leveloffset=+1] +include::modules/input-spec-filter-namespace-container.adoc[leveloffset=+1] diff --git a/configuring/configuring-lokistack-storage.adoc b/configuring/configuring-lokistack-storage.adoc new file mode 100644 index 000000000000..b694a720a09c --- /dev/null +++ b/configuring/configuring-lokistack-storage.adoc @@ -0,0 +1,44 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="configuring-lokistack-storage"] += Configuring LokiStack storage +:context: configuring-lokistack-storage + +toc::[] + +You can configure a `LokiStack` CR to store application, audit, and infrastructure-related logs. + +include::snippets/loki-statement-snip.adoc[leveloffset=+1] + +[id="prerequisites_{context}"] +== Prerequisites + +* You have installed the {loki-op} by using the CLI or web console. +* You have a `serviceAccount` in the same namespace in which you create the `ClusterLogForwarder`. +* The `serviceAccount` is assigned `collect-audit-logs`, `collect-application-logs`, and `collect-infrastructure-logs` cluster roles. + +[id="setup_{context}"] +== Core Setup and Configuration +*Role-based access controls, basic monitoring, and pod placement to deploy Loki.* + +include::modules/loki-rbac-rules-permissions.adoc[leveloffset=+2] +include::modules/enabling-loki-alerts.adoc[leveloffset=+2] +include::modules/loki-memberlist-ip.adoc[leveloffset=+2] +include::modules/loki-retention.adoc[leveloffset=+2] +include::modules/loki-pod-placement.adoc[leveloffset=+2] + +[id="performance_{context}"] +== Enhanced Reliability and Performance +*Configurations to ensure Loki’s reliability and efficiency in production.* + +include::modules/identity-federation.adoc[leveloffset=+2] +include::modules/loki-reliability-hardening.adoc[leveloffset=+2] +include::modules/loki-restart-hardening.adoc[leveloffset=+2] + +[id="advanced_{context}"] +== Advanced Deployment and Scalability +*Specialized configurations for high availability, scalability, and error handling.* + +include::modules/loki-zone-aware-replication.adoc[leveloffset=+2] +include::modules/loki-zone-fail-recovery.adoc[leveloffset=+2] +include::modules/loki-rate-limit-errors.adoc[leveloffset=+2] diff --git a/configuring/images b/configuring/images new file mode 120000 index 000000000000..e4c5bd02a10a --- /dev/null +++ b/configuring/images @@ -0,0 +1 @@ +../images/ \ No newline at end of file diff --git a/configuring/modules b/configuring/modules new file mode 120000 index 000000000000..43aab75b53c9 --- /dev/null +++ b/configuring/modules @@ -0,0 +1 @@ +../modules/ \ No newline at end of file diff --git a/configuring/snippets b/configuring/snippets new file mode 120000 index 000000000000..9d58b92e5058 --- /dev/null +++ b/configuring/snippets @@ -0,0 +1 @@ +../snippets/ \ No newline at end of file diff --git a/installing/_attributes b/installing/_attributes new file mode 120000 index 000000000000..f27fd275ea6b --- /dev/null +++ b/installing/_attributes @@ -0,0 +1 @@ +../_attributes/ \ No newline at end of file diff --git a/installing/images b/installing/images new file mode 120000 index 000000000000..e4c5bd02a10a --- /dev/null +++ b/installing/images @@ -0,0 +1 @@ +../images/ \ No newline at end of file diff --git a/installing/installing-logging.adoc b/installing/installing-logging.adoc new file mode 100644 index 000000000000..23f9c9221e00 --- /dev/null +++ b/installing/installing-logging.adoc @@ -0,0 +1,53 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="installing-logging"] += Installing Logging +:context: installing-logging + +toc::[] + +{ocp-product-title} Operators use custom resources (CRs) to manage applications and their components. You provide high-level configuration and settings through the CR. The Operator translates high-level directives into low-level actions, based on best practices embedded within the logic of the Operator. A custom resource definition (CRD) defines a CR and lists all the configurations available to users of the Operator. Installing an Operator creates the CRDs to generate CRs. + + +To get started with {logging}, you must install the following Operators: + +* {loki-op} to manage your log store. +* {clo} to manage log collection and forwarding. +* {coo-first} to manage visualization. + +You can use either the {ocp-product-title} web console or the {ocp-product-title} CLI to install or configure {logging}. + +[IMPORTANT] +==== +You must configure the {clo} after the {loki-op}. +==== + + + +[id="prerequisites_cluster-logging-deploying_{context}"] +== Prerequisites +* If you are using OKD, you have downloaded the {cluster-manager-url-pull} as shown in "Obtaining the installation program" in the installation documentation for your platform. ++ +If you have the pull secret, add the `redhat-operators` catalog to the `OperatorHub` custom resource (CR) as shown in "Configuring {ocp-product-title} to use Red{nbsp}Hat Operators". + + +[id="installing-loki-and-logging-cli_{context}"] +== Installation by using the CLI + +The following sections describe installing the {loki-op} and the {clo} by using the CLI. + +include::modules/installing-loki-operator-cli.adoc[leveloffset=+2] +include::modules/installing-logging-operator-cli.adoc[leveloffset=+2] + +[id="installing-loki-and-logging-gui_{context}"] +== Installation by using the web console + +The following sections describe installing the {loki-op} and the {clo} by using the web console. + +include::modules/installing-loki-operator-web-console.adoc[leveloffset=+2] +include::modules/installing-logging-operator-web-console.adoc[leveloffset=+2] + +[role="_additional-resources"] +.Additional resources + +* link:https://docs.openshift.com/container-platform/latest/networking/ovn_kubernetes_network_provider/about-ovn-kubernetes.html[About the OVN-Kubernetes network policy] diff --git a/installing/modules b/installing/modules new file mode 120000 index 000000000000..43aab75b53c9 --- /dev/null +++ b/installing/modules @@ -0,0 +1 @@ +../modules/ \ No newline at end of file diff --git a/installing/snippets b/installing/snippets new file mode 120000 index 000000000000..9d58b92e5058 --- /dev/null +++ b/installing/snippets @@ -0,0 +1 @@ +../snippets/ \ No newline at end of file diff --git a/modules/audit-filtering.adoc b/modules/audit-filtering.adoc new file mode 100644 index 000000000000..dcb4b9a77dc8 --- /dev/null +++ b/modules/audit-filtering.adoc @@ -0,0 +1,115 @@ +:_mod-docs-content-type: CONCEPT +[id="audit-filtering_{context}"] += Overview of API audit filter + +OpenShift API servers generate audit events for each API call, detailing the request, response, and the identity of the requester, leading to large volumes of data. The API Audit filter uses rules to enable the exclusion of non-essential events and the reduction of event size, facilitating a more manageable audit trail. Rules are checked in order, and checking stops at the first match. The amount of data that is included in an event is determined by the value of the `level` field: + +* `None`: The event is dropped. +* `Metadata`: Audit metadata is included, request and response bodies are removed. +* `Request`: Audit metadata and the request body are included, the response body is removed. +* `RequestResponse`: All data is included: metadata, request body and response body. The response body can be very large. For example, `oc get pods -A` generates a response body containing the YAML description of every pod in the cluster. + +The `ClusterLogForwarder` custom resource (CR) uses the same format as the standard link:https://kubernetes.io/docs/tasks/debug/debug-cluster/audit/#audit-policy[Kubernetes audit policy], while providing the following additional functions: + +Wildcards:: Names of users, groups, namespaces, and resources can have a leading or trailing `\*` asterisk character. For example, the namespace `openshift-\*` matches `openshift-apiserver` or `openshift-authentication`. Resource `\*/status` matches `Pod/status` or `Deployment/status`. + +Default Rules:: Events that do not match any rule in the policy are filtered as follows: +* Read-only system events such as `get`, `list`, and `watch` are dropped. +* Service account write events that occur within the same namespace as the service account are dropped. +* All other events are forwarded, subject to any configured rate limits. + +To disable these defaults, either end your rules list with a rule that has only a `level` field or add an empty rule. + +Omit Response Codes:: A list of integer status codes to omit. You can drop events based on the HTTP status code in the response by using the `OmitResponseCodes` field, which lists HTTP status codes for which no events are created. The default value is `[404, 409, 422, 429]`. If the value is an empty list, `[]`, then no status codes are omitted. + +The `ClusterLogForwarder` CR audit policy acts in addition to the {ocp-product-title} audit policy. The `ClusterLogForwarder` CR audit filter changes what the log collector forwards and provides the ability to filter by verb, user, group, namespace, or resource. You can create multiple filters to send different summaries of the same audit stream to different places. For example, you can send a detailed stream to the local cluster log store and a less detailed stream to a remote site. + +[NOTE] +==== +You must have a cluster role `collect-audit-logs` to collect the audit logs. The following example provided is intended to illustrate the range of rules possible in an audit policy and is not a recommended configuration. +==== + +.Example audit policy +[source,yaml] +---- +apiVersion: observability.openshift.io/v1 +kind: ClusterLogForwarder +metadata: + name: + namespace: +spec: + serviceAccount: + name: + pipelines: + - name: my-pipeline + inputRefs: audit # <1> + filterRefs: my-policy # <2> + filters: + - name: my-policy + type: kubeAPIAudit + kubeAPIAudit: + # Don't generate audit events for all requests in RequestReceived stage. + omitStages: + - "RequestReceived" + + rules: + # Log pod changes at RequestResponse level + - level: RequestResponse + resources: + - group: "" + resources: ["pods"] + + # Log "pods/log", "pods/status" at Metadata level + - level: Metadata + resources: + - group: "" + resources: ["pods/log", "pods/status"] + + # Don't log requests to a configmap called "controller-leader" + - level: None + resources: + - group: "" + resources: ["configmaps"] + resourceNames: ["controller-leader"] + + # Don't log watch requests by the "system:kube-proxy" on endpoints or services + - level: None + users: ["system:kube-proxy"] + verbs: ["watch"] + resources: + - group: "" # core API group + resources: ["endpoints", "services"] + + # Don't log authenticated requests to certain non-resource URL paths. + - level: None + userGroups: ["system:authenticated"] + nonResourceURLs: + - "/api*" # Wildcard matching. + - "/version" + + # Log the request body of configmap changes in kube-system. + - level: Request + resources: + - group: "" # core API group + resources: ["configmaps"] + # This rule only applies to resources in the "kube-system" namespace. + # The empty string "" can be used to select non-namespaced resources. + namespaces: ["kube-system"] + + # Log configmap and secret changes in all other namespaces at the Metadata level. + - level: Metadata + resources: + - group: "" # core API group + resources: ["secrets", "configmaps"] + + # Log all other resources in core and extensions at the Request level. + - level: Request + resources: + - group: "" # core API group + - group: "extensions" # Version of group should NOT be included. + + # A catch-all rule to log all other requests at the Metadata level. + - level: Metadata +---- +<1> The log types that are collected. The value for this field can be `audit` for audit logs, `application` for application logs, `infrastructure` for infrastructure logs, or a named input that has been defined for your application. +<2> The name of your audit policy. diff --git a/modules/cluster-logging-maintenance-support-list-6x.adoc b/modules/cluster-logging-maintenance-support-list-6x.adoc new file mode 100644 index 000000000000..5e68fe88b194 --- /dev/null +++ b/modules/cluster-logging-maintenance-support-list-6x.adoc @@ -0,0 +1,15 @@ +:_mod-docs-content-type: REFERENCE +[id="cluster-logging-maintenance-support-list_{context}"] += Unsupported configurations + +You must set the Red{nbsp}Hat OpenShift Logging Operator to the `Unmanaged` state to modify the following components: + +* The collector configuration file + +* The collector daemonset + +Explicitly unsupported cases include: + +* *Configuring the logging collector using environment variables*. You cannot use environment variables to modify the log collector. + +* *Configuring how the log collector normalizes logs*. You cannot modify default log normalization. diff --git a/modules/cluster-logging-must-gather-about.adoc b/modules/cluster-logging-must-gather-about.adoc new file mode 100644 index 000000000000..ce0c605b83bd --- /dev/null +++ b/modules/cluster-logging-must-gather-about.adoc @@ -0,0 +1,13 @@ +:_mod-docs-content-type: CONCEPT +[id="about-must-gather_{context}"] += About the must-gather tool + +The `oc adm must-gather` CLI command collects the information from your cluster that is most likely needed for debugging issues. + +For your {logging}, `must-gather` collects the following information: + +* Project-level resources, including pods, configuration maps, service accounts, roles, role bindings, and events at the project level +* Cluster-level resources, including nodes, roles, and role bindings at the cluster level +* OpenShift Logging resources in the `openshift-logging` and `openshift-operators-redhat` namespaces, including health status for the log collector, the log store, and the log visualizer + +When you run `oc adm must-gather`, a new pod is created on the cluster. The data is collected on that pod and saved in a new directory that starts with `must-gather.local`. This directory is created in the current working directory. diff --git a/modules/cluster-logging-must-gather-collecting.adoc b/modules/cluster-logging-must-gather-collecting.adoc new file mode 100644 index 000000000000..8dd90b62d445 --- /dev/null +++ b/modules/cluster-logging-must-gather-collecting.adoc @@ -0,0 +1,39 @@ +:_mod-docs-content-type: PROCEDURE +[id="cluster-logging-must-gather-collecting_{context}"] += Collecting {logging} data + +You can use the `oc adm must-gather` CLI command to collect information about {logging}. + +.Procedure + +To collect {logging} information with `must-gather`: + +. Navigate to the directory where you want to store the `must-gather` information. + +. Run the `oc adm must-gather` command against the {logging} image: ++ +If you are using OKD: ++ +[source,terminal] +---- +$ oc adm must-gather --image=quay.io/openshift/origin-cluster-logging-operator +---- ++ +Otherwise: ++ +[source,terminal] +---- +$ oc adm must-gather --image=$(oc -n openshift-logging get deployment.apps/cluster-logging-operator -o jsonpath='{.spec.template.spec.containers[?(@.name == "cluster-logging-operator")].image}') +---- ++ +The `must-gather` tool creates a new directory that starts with `must-gather.local` within the current directory. For example: +`must-gather.local.4157245944708210408`. + +. Create a compressed file from the `must-gather` directory that was just created. For example, on a computer that uses a Linux operating system, run the following command: ++ +[source,terminal] +---- +$ tar -cvaf must-gather.tar.gz must-gather.local.4157245944708210408 +---- + +. Attach the compressed file to your support case on the link:https://access.redhat.com/[Red Hat Customer Portal]. diff --git a/modules/content-filter-drop-records.adoc b/modules/content-filter-drop-records.adoc new file mode 100644 index 000000000000..bce7bc1a53dd --- /dev/null +++ b/modules/content-filter-drop-records.adoc @@ -0,0 +1,104 @@ +:_mod-docs-content-type: PROCEDURE +[id="content-filter-drop-records_{context}"] += Configuring content filters to drop unwanted log records + +When the `drop` filter is configured, the log collector evaluates log streams according to the filters before forwarding. The collector drops unwanted log records that match the specified configuration. + +.Procedure + +. Add a configuration for a filter to the `filters` spec in the `ClusterLogForwarder` CR. ++ +The following example shows how to configure the `ClusterLogForwarder` CR to drop log records based on regular expressions: ++ +.Example `ClusterLogForwarder` CR +[source,yaml] +---- +apiVersion: observability.openshift.io/v1 +kind: ClusterLogForwarder +metadata: +# ... +spec: + serviceAccount: + name: + filters: + - name: + type: drop # <1> + drop: # <2> + - test: # <3> + - field: .kubernetes.labels."foo-bar/baz" # <4> + matches: .+ # <5> + - field: .kubernetes.pod_name + notMatches: "my-pod" # <6> + pipelines: + - name: # <7> + filterRefs: [""] +# ... +---- +<1> Specifies the type of filter. The `drop` filter drops log records that match the filter configuration. +<2> Specifies configuration options for applying the `drop` filter. +<3> Specifies the configuration for tests that are used to evaluate whether a log record is dropped. +** If all the conditions specified for a test are true, the test passes and the log record is dropped. +** When multiple tests are specified for the `drop` filter configuration, if any of the tests pass, the record is dropped. +** If there is an error evaluating a condition, for example, the field is missing from the log record being evaluated, that condition evaluates to false. +<4> Specifies a dot-delimited field path, which is a path to a field in the log record. The path can contain alpha-numeric characters and underscores (`a-zA-Z0-9_`), for example, `.kubernetes.namespace_name`. If segments contain characters outside of this range, the segment must be in quotes, for example, `.kubernetes.labels."foo.bar-bar/baz"`. You can include multiple field paths in a single `test` configuration, but they must all evaluate to true for the test to pass and the `drop` filter to be applied. +<5> Specifies a regular expression. If log records match this regular expression, they are dropped. You can set either the `matches` or `notMatches` condition for a single `field` path, but not both. +<6> Specifies a regular expression. If log records do not match this regular expression, they are dropped. You can set either the `matches` or `notMatches` condition for a single `field` path, but not both. +<7> Specifies the pipeline that the `drop` filter is applied to. + +. Apply the `ClusterLogForwarder` CR by running the following command: ++ +[source,terminal] +---- +$ oc apply -f .yaml +---- + +.Additional examples + +The following additional example shows how you can configure the `drop` filter to only keep higher priority log records: + +[source,yaml] +---- +apiVersion: observability.openshift.io/v1 +kind: ClusterLogForwarder +metadata: +# ... +spec: + serviceAccount: + name: + filters: + - name: important + type: drop + drop: + - test: + - field: .message + notMatches: "(?i)critical|error" + - field: .level + matches: "info|warning" +# ... +---- + +In addition to including multiple field paths in a single `test` configuration, you can also include additional tests that are treated as _OR_ checks. In the following example, records are dropped if either `test` configuration evaluates to true. However, for the second `test` configuration, both field specs must be true for it to be evaluated to true: + +[source,yaml] +---- +apiVersion: observability.openshift.io/v1 +kind: ClusterLogForwarder +metadata: +# ... +spec: + serviceAccount: + name: + filters: + - name: important + type: drop + drop: + - test: + - field: .kubernetes.namespace_name + matches: "^open" + - test: + - field: .log_type + matches: "application" + - field: .kubernetes.pod_name + notMatches: "my-pod" +# ... +---- diff --git a/modules/content-filter-prune-records.adoc b/modules/content-filter-prune-records.adoc new file mode 100644 index 000000000000..9d84c6f402cb --- /dev/null +++ b/modules/content-filter-prune-records.adoc @@ -0,0 +1,55 @@ +:_mod-docs-content-type: PROCEDURE +[id="content-filter-prune-records_{context}"] += Configuring content filters to prune log records + +When the `prune` filter is configured, the log collector evaluates log streams according to the filters before forwarding. The collector prunes log records by removing low value fields such as pod annotations. + +.Procedure + +. Add a configuration for a filter to the `prune` spec in the `ClusterLogForwarder` CR. ++ +The following example shows how to configure the `ClusterLogForwarder` CR to prune log records based on field paths: ++ +[IMPORTANT] +==== +If both are specified, records are pruned based on the `notIn` array first, which takes precedence over the `in` array. After records have been pruned by using the `notIn` array, they are then pruned by using the `in` array. +==== ++ +.Example `ClusterLogForwarder` CR +[source,yaml] +---- +apiVersion: observability.openshift.io/v1 +kind: ClusterLogForwarder +metadata: +# ... +spec: + serviceAccount: + name: + filters: + - name: + type: prune # <1> + prune: # <2> + in: [.kubernetes.annotations, .kubernetes.namespace_id] # <3> + notIn: [.kubernetes,.log_type,.message,."@timestamp"] # <4> + pipelines: + - name: # <5> + filterRefs: [""] +# ... +---- +<1> Specify the type of filter. The `prune` filter prunes log records by configured fields. +<2> Specify configuration options for applying the `prune` filter. The `in` and `notIn` fields are specified as arrays of dot-delimited field paths, which are paths to fields in log records. These paths can contain alpha-numeric characters and underscores (`a-zA-Z0-9_`), for example, `.kubernetes.namespace_name`. If segments contain characters outside of this range, the segment must be in quotes, for example, `.kubernetes.labels."foo.bar-bar/baz"`. +<3> Optional: Any fields that are specified in this array are removed from the log record. +<4> Optional: Any fields that are not specified in this array are removed from the log record. +<5> Specify the pipeline that the `prune` filter is applied to. ++ +[NOTE] +==== +The filters exempts the `log_type`, `.log_source`, and `.message` fields. +==== + +. Apply the `ClusterLogForwarder` CR by running the following command: ++ +[source,terminal] +---- +$ oc apply -f .yaml +---- diff --git a/modules/enabling-loki-alerts.adoc b/modules/enabling-loki-alerts.adoc new file mode 100644 index 000000000000..ba51e0332891 --- /dev/null +++ b/modules/enabling-loki-alerts.adoc @@ -0,0 +1,101 @@ +:_mod-docs-content-type: PROCEDURE +[id="logging-enabling-loki-alerts_{context}"] += Creating a log-based alerting rule with Loki + +The `AlertingRule` CR contains a set of specifications and webhook validation definitions to declare groups of alerting rules for a single `LokiStack` instance. In addition, the webhook validation definition provides support for rule validation conditions: + +* If an `AlertingRule` CR includes an invalid `interval` period, it is an invalid alerting rule +* If an `AlertingRule` CR includes an invalid `for` period, it is an invalid alerting rule. +* If an `AlertingRule` CR includes an invalid LogQL `expr`, it is an invalid alerting rule. +* If an `AlertingRule` CR includes two groups with the same name, it is an invalid alerting rule. +* If none of the above applies, an alerting rule is considered valid. + +.AlertingRule definitions +[options="header"] +|=== +| Tenant type | Valid namespaces for `AlertingRule` CRs +| application a| `` +| audit a| `openshift-logging` +| infrastructure a| `openshift-/\*`, `kube-/\*`, `default` +|=== + +.Procedure + +. Create an `AlertingRule` custom resource (CR): ++ + +.Example infrastructure `AlertingRule` CR +[source,yaml] +---- + apiVersion: loki.grafana.com/v1 + kind: AlertingRule + metadata: + name: loki-operator-alerts + namespace: openshift-operators-redhat <1> + labels: <2> + openshift.io/: "true" + spec: + tenantID: "infrastructure" <3> + groups: + - name: LokiOperatorHighReconciliationError + rules: + - alert: HighPercentageError + expr: | <4> + sum(rate({kubernetes_namespace_name="openshift-operators-redhat", kubernetes_pod_name=~"loki-operator-controller-manager.*"} |= "error" [1m])) by (job) + / + sum(rate({kubernetes_namespace_name="openshift-operators-redhat", kubernetes_pod_name=~"loki-operator-controller-manager.*"}[1m])) by (job) + > 0.01 + for: 10s + labels: + severity: critical <5> + annotations: + summary: High Loki Operator Reconciliation Errors <6> + description: High Loki Operator Reconciliation Errors <7> +---- +<1> The namespace where this `AlertingRule` CR is created must have a label matching the LokiStack `spec.rules.namespaceSelector` definition. +<2> The `labels` block must match the LokiStack `spec.rules.selector` definition. +<3> `AlertingRule` CRs for `infrastructure` tenants are only supported in the `openshift-\*`, `kube-\*`, or `default` namespaces. +<4> The value for `kubernetes_namespace_name:` must match the value for `metadata.namespace`. +<5> The value of this mandatory field must be `critical`, `warning`, or `info`. +<6> This field is mandatory. +<7> This field is mandatory. + ++ +.Example application `AlertingRule` CR +[source,yaml] +---- + apiVersion: loki.grafana.com/v1 + kind: AlertingRule + metadata: + name: app-user-workload + namespace: app-ns <1> + labels: <2> + openshift.io/: "true" + spec: + tenantID: "application" + groups: + - name: AppUserWorkloadHighError + rules: + - alert: + expr: | <3> + sum(rate({kubernetes_namespace_name="app-ns", kubernetes_pod_name=~"podName.*"} |= "error" [1m])) by (job) + for: 10s + labels: + severity: critical <4> + annotations: + summary: <5> + description: <6> +---- +<1> The namespace where this `AlertingRule` CR is created must have a label matching the LokiStack `spec.rules.namespaceSelector` definition. +<2> The `labels` block must match the LokiStack `spec.rules.selector` definition. +<3> Value for `kubernetes_namespace_name:` must match the value for `metadata.namespace`. +<4> The value of this mandatory field must be `critical`, `warning`, or `info`. +<5> The value of this mandatory field is a summary of the rule. +<6> The value of this mandatory field is a detailed description of the rule. + +. Apply the `AlertingRule` CR: ++ +[source,terminal] +---- +$ oc apply -f .yaml +---- diff --git a/modules/enabling-multi-line-exception-detection.adoc b/modules/enabling-multi-line-exception-detection.adoc new file mode 100644 index 000000000000..8c8d2184846c --- /dev/null +++ b/modules/enabling-multi-line-exception-detection.adoc @@ -0,0 +1,60 @@ +:_mod-docs-content-type: PROCEDURE +[id="enabling-multi-line-exception-detection_{context}"] += Enabling multi-line exception detection + +Enables multi-line error detection of container logs. + +[WARNING] +==== +Enabling this feature could have performance implications and may require additional computing resources or alternate logging solutions. +==== + +Log parsers often incorrectly identify separate lines of the same exception as separate exceptions. This leads to extra log entries and an incomplete or inaccurate view of the traced information. + +.Example java exception +[source,java] +---- +java.lang.NullPointerException: Cannot invoke "String.toString()" because "" is null + at testjava.Main.handle(Main.java:47) + at testjava.Main.printMe(Main.java:19) + at testjava.Main.main(Main.java:10) +---- + +* To enable logging to detect multi-line exceptions and reassemble them into a single log entry, ensure that the `ClusterLogForwarder` Custom Resource (CR) contains a `detectMultilineErrors` field under the `.spec.filters`. + +.Example ClusterLogForwarder CR +[source,yaml] +---- +apiVersion: "observability.openshift.io/v1" +kind: ClusterLogForwarder +metadata: + name: + namespace: +spec: + serviceAccount: + name: + filters: + - name: + type: detectMultilineException + pipelines: + - inputRefs: + - + name: + filterRefs: + - + outputRefs: + - +---- + +== Details +When log messages appear as a consecutive sequence forming an exception stack trace, they are combined into a single, unified log record. The first log message's content is replaced with the concatenated content of all the message fields in the sequence. + +The collector supports the following languages: + +* Java +* JS +* Ruby +* Python +* Golang +* PHP +* Dart diff --git a/modules/identity-federation.adoc b/modules/identity-federation.adoc new file mode 100644 index 000000000000..b908dff04573 --- /dev/null +++ b/modules/identity-federation.adoc @@ -0,0 +1,59 @@ +:_mod-docs-content-type: PROCEDURE +[id="identity-federation_{context}"] += Enabling authentication to cloud-based log stores using short-lived tokens + +Workload identity federation enables authentication to cloud-based log stores using short-lived tokens. + +.Procedure + +* Use one of the following options to enable authentication: + +** If you use the {ocp-product-title} web console to install the {loki-op}, clusters that use short-lived tokens are automatically detected. You are prompted to create roles and supply the data required for the {loki-op} to create a `CredentialsRequest` object, which populates a secret. + +** If you use the {oc-first} to install the {loki-op}, you must manually create a `Subscription` object using the appropriate template for your storage provider, as shown in the following examples. This authentication strategy is only supported for the storage providers indicated. ++ +.Example Azure sample subscription +[source,yaml] +---- +apiVersion: operators.coreos.com/v1alpha1 +kind: Subscription +metadata: + name: loki-operator + namespace: openshift-operators-redhat +spec: + channel: "stable-6.0" + installPlanApproval: Manual + name: loki-operator + source: redhat-operators + sourceNamespace: openshift-marketplace + config: + env: + - name: CLIENTID + value: + - name: TENANTID + value: + - name: SUBSCRIPTIONID + value: + - name: REGION + value: +---- ++ +.Example AWS sample subscription +[source,yaml] +---- +apiVersion: operators.coreos.com/v1alpha1 +kind: Subscription +metadata: + name: loki-operator + namespace: openshift-operators-redhat +spec: + channel: "stable-6.0" + installPlanApproval: Manual + name: loki-operator + source: redhat-operators + sourceNamespace: openshift-marketplace + config: + env: + - name: ROLEARN + value: +---- diff --git a/modules/input-spec-filter-audit-infrastructure.adoc b/modules/input-spec-filter-audit-infrastructure.adoc new file mode 100644 index 000000000000..982eaaecdb78 --- /dev/null +++ b/modules/input-spec-filter-audit-infrastructure.adoc @@ -0,0 +1,54 @@ +:_mod-docs-content-type: PROCEDURE +[id="input-spec-filter-audit-infrastructure_{context}"] += Filtering the audit and infrastructure log inputs by source + +You can define the list of `audit` and `infrastructure` sources to collect the logs by using the `input` selector. + +.Procedure + +. Add a configuration to define the `audit` and `infrastructure` sources in the `ClusterLogForwarder` CR. + ++ +The following example shows how to configure the `ClusterLogForwarder` CR to define `audit` and `infrastructure` sources: ++ +.Example `ClusterLogForwarder` CR ++ +[source,yaml] +---- +apiVersion: observability.openshift.io/v1 +kind: ClusterLogForwarder +# ... +spec: + serviceAccount: + name: + inputs: + - name: mylogs1 + type: infrastructure + infrastructure: + sources: # <1> + - node + - name: mylogs2 + type: audit + audit: + sources: # <2> + - kubeAPI + - openshiftAPI + - ovn +# ... +---- +<1> Specifies the list of infrastructure sources to collect. The valid sources include: +** `node`: Journal log from the node +** `container`: Logs from the workloads deployed in the namespaces +<2> Specifies the list of audit sources to collect. The valid sources include: +** `kubeAPI`: Logs from the Kubernetes API servers +** `openshiftAPI`: Logs from the OpenShift API servers +** `auditd`: Logs from a node auditd service +** `ovn`: Logs from an open virtual network service + +. Apply the `ClusterLogForwarder` CR by running the following command: + ++ +[source,terminal] +---- +$ oc apply -f .yaml +---- diff --git a/modules/input-spec-filter-labels-expressions.adoc b/modules/input-spec-filter-labels-expressions.adoc new file mode 100644 index 000000000000..af536d3cf401 --- /dev/null +++ b/modules/input-spec-filter-labels-expressions.adoc @@ -0,0 +1,54 @@ +// Module included in the following assemblies: +// +// * observability/logging/logging-6.0/log6x-clf.adoc + +:_mod-docs-content-type: PROCEDURE +[id="input-spec-filter-labels-expressions_{context}"] += Filtering application logs at input by including the label expressions or a matching label key and values + +You can include the application logs based on the label expressions or a matching label key and its values by using the `input` selector. + +.Procedure + +. Add a configuration for a filter to the `input` spec in the `ClusterLogForwarder` CR. ++ +The following example shows how to configure the `ClusterLogForwarder` CR to include logs based on label expressions or matched label key/values: ++ +.Example `ClusterLogForwarder` CR +[source,yaml] +---- +apiVersion: observability.openshift.io/v1 +kind: ClusterLogForwarder +# ... +spec: + serviceAccount: + name: + inputs: + - name: mylogs + application: + selector: + matchExpressions: + - key: env # <1> + operator: In # <2> + values: ["prod", "qa"] # <3> + - key: zone + operator: NotIn + values: ["east", "west"] + matchLabels: # <4> + app: one + name: app1 + type: application +# ... +---- +<1> Specifies the label key to match. +<2> Specifies the operator. Valid values include: `In`, `NotIn`, `Exists`, and `DoesNotExist`. +<3> Specifies an array of string values. If the `operator` value is either `Exists` or `DoesNotExist`, the value array must be empty. +<4> Specifies an exact key or value mapping. + +. Apply the `ClusterLogForwarder` CR by running the following command: + ++ +[source,terminal] +---- +$ oc apply -f .yaml +---- diff --git a/modules/input-spec-filter-namespace-container.adoc b/modules/input-spec-filter-namespace-container.adoc new file mode 100644 index 000000000000..71a0d28e3594 --- /dev/null +++ b/modules/input-spec-filter-namespace-container.adoc @@ -0,0 +1,49 @@ +:_mod-docs-content-type: PROCEDURE +[id="input-spec-filter-namespace-container_{context}"] += Filtering application logs at input by including or excluding the namespace or container name + +You can include or exclude the application logs based on the namespace and container name by using the `input` selector. + +.Procedure + +. Add a configuration to include or exclude the namespace and container names in the `ClusterLogForwarder` CR. ++ +The following example shows how to configure the `ClusterLogForwarder` CR to include or exclude namespaces and container names: ++ +.Example `ClusterLogForwarder` CR +[source,yaml] +---- +apiVersion: observability.openshift.io/v1 +kind: ClusterLogForwarder +# ... +spec: + serviceAccount: + name: + inputs: + - name: mylogs + application: + includes: + - namespace: "my-project" # <1> + container: "my-container" # <2> + excludes: + - container: "other-container*" # <3> + namespace: "other-namespace" # <4> + type: application +# ... +---- +<1> Specifies that the logs are only collected from these namespaces. +<2> Specifies that the logs are only collected from these containers. +<3> Specifies the pattern of namespaces to ignore when collecting the logs. +<4> Specifies the set of containers to ignore when collecting the logs. ++ +[NOTE] +==== +The `excludes` field takes precedence over the `includes` field. +==== ++ +. Apply the `ClusterLogForwarder` CR by running the following command: ++ +[source,terminal] +---- +$ oc apply -f .yaml +---- diff --git a/modules/installing-logging-operator-cli.adoc b/modules/installing-logging-operator-cli.adoc new file mode 100644 index 000000000000..9a596f4f99f3 --- /dev/null +++ b/modules/installing-logging-operator-cli.adoc @@ -0,0 +1,163 @@ +// Module is included in the following assemblies: +// +// +:_mod-docs-content-type: PROCEDURE +[id="installing-logging-operator-cli_{context}"] += Installing {clo} by using the CLI + +Install {clo} on your {ocp-product-title} cluster to collect and forward logs to a log store by using the {oc-first}. + +.Prerequisites + +* You have administrator permissions. +* You installed the {oc-first}. +* You installed and configured {loki-op}. +* You have created the `openshift-logging` namespace. + +.Procedure + +. Create an `OperatorGroup` object: ++ +.Example `OperatorGroup` object +[source,yaml] +---- +apiVersion: operators.coreos.com/v1 +kind: OperatorGroup +metadata: + name: cluster-logging + namespace: openshift-logging # <1> +spec: + upgradeStrategy: Default +---- +<1> You must specify `openshift-logging` as the namespace. + +. Apply the `OperatorGroup` object by running the following command: ++ +[source,terminal] +---- +$ oc apply -f .yaml +---- + +. Create a `Subscription` object for {clo}: ++ +.Example `Subscription` object +[source,yaml] +---- +apiVersion: operators.coreos.com/v1alpha1 +kind: Subscription +metadata: + name: cluster-logging + namespace: openshift-logging # <1> +spec: + channel: stable-6. # <2> + installPlanApproval: Automatic # <3> + name: cluster-logging + source: redhat-operators # <4> + sourceNamespace: openshift-marketplace +---- +<1> You must specify `openshift-logging` as the namespace. +<2> Specify `stable-6.` as the channel. +<3> If the approval strategy in the subscription is set to `Automatic`, the update process initiates as soon as a new operator version is available in the selected channel. If the approval strategy is set to `Manual`, you must manually approve pending updates. +<4> Specify `redhat-operators` as the value. If your {ocp-product-title} cluster is installed on a restricted network, also known as a disconnected cluster, specify the name of the `CatalogSource` object that you created when you configured Operator Lifecycle Manager (OLM). + +. Apply the `Subscription` object by running the following command: ++ +[source,terminal] +---- +$ oc apply -f .yaml +---- + +. Create a service account to be used by the log collector: ++ +[source,terminal] +---- +$ oc create sa logging-collector -n openshift-logging +---- + +. Assign the necessary permissions to the service account for the collector to be able to collect and forward logs. In this example, the collector is provided permissions to collect logs from both infrastructure and application logs. ++ +[source,terminal] +---- +$ oc adm policy add-cluster-role-to-user logging-collector-logs-writer -z logging-collector -n openshift-logging +$ oc adm policy add-cluster-role-to-user collect-application-logs -z logging-collector -n openshift-logging +$ oc adm policy add-cluster-role-to-user collect-infrastructure-logs -z logging-collector -n openshift-logging +---- + +. Create a `ClusterLogForwarder` CR: ++ +.Example `ClusterLogForwarder` CR +[source,yaml] +---- +apiVersion: observability.openshift.io/v1 +kind: ClusterLogForwarder +metadata: + name: instance + namespace: openshift-logging # <1> +spec: + serviceAccount: + name: logging-collector # <2> + outputs: + - name: lokistack-out + type: lokiStack # <3> + lokiStack: + target: # <4> + name: logging-loki + namespace: openshift-logging + authentication: + token: + from: serviceAccount + tls: + ca: + key: service-ca.crt + configMapName: openshift-service-ca.crt + pipelines: + - name: infra-app-logs + inputRefs: # <5> + - application + - infrastructure + outputRefs: + - lokistack-out +---- +<1> You must specify the `openshift-logging` namespace. +<2> Specify the name of the service account created before. +<3> Select the `lokiStack` output type to send logs to the `LokiStack` instance. +<4> Point the `ClusterLogForwarder` to the `LokiStack` instance created earlier. +<5> Select the log output types you want to send to the `LokiStack` instance. + +. Apply the `ClusterLogForwarder CR` object by running the following command: ++ +[source,terminal] +---- +$ oc apply -f .yaml +---- + +.Verification + +. Verify the installation by running the following command: ++ +[source,terminal] +---- +$ oc get pods -n openshift-logging +---- ++ +.Example output +[source,terminal] +---- +$ oc get pods -n openshift-logging +NAME READY STATUS RESTARTS AGE +cluster-logging-operator-fb7f7cf69-8jsbq 1/1 Running 0 98m +instance-222js 2/2 Running 0 18m +instance-g9ddv 2/2 Running 0 18m +instance-hfqq8 2/2 Running 0 18m +instance-sphwg 2/2 Running 0 18m +instance-vv7zn 2/2 Running 0 18m +instance-wk5zz 2/2 Running 0 18m +logging-loki-compactor-0 1/1 Running 0 42m +logging-loki-distributor-7d7688bcb9-dvcj8 1/1 Running 0 42m +logging-loki-gateway-5f6c75f879-bl7k9 2/2 Running 0 42m +logging-loki-gateway-5f6c75f879-xhq98 2/2 Running 0 42m +logging-loki-index-gateway-0 1/1 Running 0 42m +logging-loki-ingester-0 1/1 Running 0 42m +logging-loki-querier-6b7b56bccc-2v9q4 1/1 Running 0 42m +logging-loki-query-frontend-84fb57c578-gq2f7 1/1 Running 0 42m +---- diff --git a/modules/installing-logging-operator-web-console.adoc b/modules/installing-logging-operator-web-console.adoc new file mode 100644 index 000000000000..724a97d72832 --- /dev/null +++ b/modules/installing-logging-operator-web-console.adoc @@ -0,0 +1,163 @@ +:_mod-docs-content-type: PROCEDURE +[id="installing-logging-operator-web-console_{context}"] += Installing {clo} by using the web console + +Install {clo} on your {ocp-product-title} cluster to collect and forward logs to a log store from the OperatorHub by using the {ocp-product-title} web console. + +.Prerequisites + +* You have administrator permissions. +* You have access to the {ocp-product-title} web console. +* You installed and configured {loki-op}. + +.Procedure + +. In the {ocp-product-title} web console *Administrator* perspective, go to *Operators* -> *OperatorHub*. + +. Type {clo} in the *Filter by keyword* field. Click *{clo}* in the list of available Operators, and then click *Install*. + +. Select *stable-x.y* as the *Update channel*. The latest version is already selected in the *Version* field. ++ +The {clo} must be deployed to the {logging} namespace `openshift-logging`, so the *Installation mode* and *Installed Namespace* are already selected. If this namespace does not already exist, it will be created for you. + +. Select *Enable Operator-recommended cluster monitoring on this namespace.* ++ +This option sets the `openshift.io/cluster-monitoring: "true"` label in the `Namespace` object. You must select this option to ensure that cluster monitoring scrapes the `openshift-logging` namespace. + +. For *Update approval* select *Automatic*, then click *Install*. ++ +If the approval strategy in the subscription is set to *Automatic*, the update process initiates as soon as a new operator version is available in the selected channel. If the approval strategy is set to *Manual*, you must manually approve pending updates. ++ +[NOTE] +==== +An Operator might display a `Failed` status before the installation completes. If the operator installation completes with an `InstallSucceeded` message, refresh the page. +==== + +. While the operator installs, create the service account that will be used by the log collector to collect the logs. + +.. Click the *+* in the top right of the screen to access the *Import YAML* page. + +.. Enter the YAML definition for the service account. ++ +.Example `ServiceAccount` object +[source,yaml] +---- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: logging-collector # <1> + namespace: openshift-logging # <2> +---- +<1> Note down the name used for the service account `logging-collector` to use it later when creating the `ClusterLogForwarder` resource. +<2> Set the namespace to `openshift-logging` because that is the namespace for deploying the `ClusterLogForwarder` resource. + +.. Click the *Create* button. + +. Create the `ClusterRoleBinding` objects to grant the necessary permissions to the log collector for accessing the logs that you want to collect and to write the log store, for example infrastructure and application logs. + +.. Click the *+* in the top right of the screen to access the *Import YAML* page. + +.. Enter the YAML definition for the `ClusterRoleBinding` resources. ++ +.Example `ClusterRoleBinding` resources +[source,yaml] +---- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: logging-collector:write-logs +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: logging-collector-logs-writer # <1> +subjects: +- kind: ServiceAccount + name: logging-collector + namespace: openshift-logging +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: logging-collector:collect-application +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: collect-application-logs # <2> +subjects: +- kind: ServiceAccount + name: logging-collector + namespace: openshift-logging +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: logging-collector:collect-infrastructure +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: collect-infrastructure-logs # <3> +subjects: +- kind: ServiceAccount + name: logging-collector + namespace: openshift-logging +---- +<1> The cluster role to allow the log collector to write logs to LokiStack. +<2> The cluster role to allow the log collector to collect logs from applications. +<3> The cluster role to allow the log collector to collect logs from infrastructure. + +.. Click the *Create* button. + +. Go to the *Operators* -> *Installed Operators* page. Select the operator and click the *All instances* tab. + +. After granting the necessary permissions to the service account, navigate to the *Installed Operators* page. Select the {clo} under the *Provided APIs*, find the *ClusterLogForwarder* resource and click *Create Instance*. + +. Select *YAML view*, and then use the following template to create a `ClusterLogForwarder` CR: ++ +.Example `ClusterLogForwarder` CR +[source,yaml] +---- +apiVersion: observability.openshift.io/v1 +kind: ClusterLogForwarder +metadata: + name: instance + namespace: openshift-logging # <1> +spec: + serviceAccount: + name: logging-collector # <2> + outputs: + - name: lokistack-out + type: lokiStack # <3> + lokiStack: + target: # <4> + name: logging-loki + namespace: openshift-logging + authentication: + token: + from: serviceAccount + tls: + ca: + key: service-ca.crt + configMapName: openshift-service-ca.crt + pipelines: + - name: infra-app-logs + inputRefs: # <5> + - application + - infrastructure + outputRefs: + - lokistack-out +---- +<1> You must specify `openshift-logging` as the namespace. +<2> Specify the name of the service account created earlier. +<3> Select the `lokiStack` output type to send logs to the `LokiStack` instance. +<4> Point the `ClusterLogForwarder` to the `LokiStack` instance created earlier. +<5> Select the log output types you want to send to the `LokiStack` instance. + +. Click *Create*. + +.Verification +. In the *ClusterLogForwarder* tab verify that you see your `ClusterLogForwarder` instance. + +. In the *Status* column, verify that you see the messages: + +* `Condition: observability.openshift.io/Authorized` +* `observability.openshift.io/Valid, Ready` diff --git a/modules/installing-loki-operator-cli.adoc b/modules/installing-loki-operator-cli.adoc new file mode 100644 index 000000000000..75c4077a7add --- /dev/null +++ b/modules/installing-loki-operator-cli.adoc @@ -0,0 +1,203 @@ +:_mod-docs-content-type: PROCEDURE +[id="install-loki-operator-cli_{context}"] += Installing the {loki-op} by using the CLI + +Install {loki-op} on your {ocp-product-title} cluster to manage the log store `Loki` by using the {ocp-product-title} command-line interface (CLI). You can deploy and configure the `Loki` log store by reconciling the resource LokiStack with the {loki-op}. + +.Prerequisites + +* You have administrator permissions. +* You installed the {oc-first}. +* You have access to a supported object store. For example: AWS S3, Google Cloud Storage, Azure, Swift, Minio, or {rh-storage}. + +.Procedure + +. Create a `Namespace` object for {loki-op}: ++ +.Example `Namespace` object +[source,yaml] +---- +apiVersion: v1 +kind: Namespace +metadata: + name: openshift-operators-redhat # <1> + labels: + openshift.io/cluster-monitoring: "true" # <2> +---- +<1> You must specify `openshift-operators-redhat` as the namespace. To enable monitoring for the operator, configure Cluster Monitoring Operator to scrape metrics from the `openshift-operators-redhat` namespace and not the `openshift-operators` namespace. The `openshift-operators` namespace might contain community operators, which are untrusted and could publish a metric with the same name as an {ocp-product-title} metric, causing conflicts. +<2> A string value that specifies the label as shown to ensure that cluster monitoring scrapes the `openshift-operators-redhat` namespace. + +. Apply the `Namespace` object by running the following command: ++ +[source,terminal] +---- +$ oc apply -f .yaml +---- + +. Create an `OperatorGroup` object. ++ +.Example `OperatorGroup` object +[source,yaml] +---- +apiVersion: operators.coreos.com/v1 +kind: OperatorGroup +metadata: + name: loki-operator + namespace: openshift-operators-redhat # <1> +spec: + upgradeStrategy: Default +---- +<1> You must specify `openshift-operators-redhat` as the namespace. + +. Apply the `OperatorGroup` object by running the following command: ++ +[source,terminal] +---- +$ oc apply -f .yaml +---- + +. Create a `Subscription` object for {loki-op}: ++ +.Example `Subscription` object +[source,yaml] +---- +apiVersion: operators.coreos.com/v1alpha1 +kind: Subscription +metadata: + name: loki-operator + namespace: openshift-operators-redhat # <1> +spec: + channel: stable-6. # <2> + installPlanApproval: Automatic # <3> + name: loki-operator + source: redhat-operators # <4> + sourceNamespace: openshift-marketplace +---- +<1> You must specify `openshift-operators-redhat` as the namespace. +<2> Specify `stable-6.` as the channel. +<3> If the approval strategy in the subscription is set to `Automatic`, the update process initiates as soon as a new operator version is available in the selected channel. If the approval strategy is set to `Manual`, you must manually approve pending updates. +<4> Specify `redhat-operators` as the value. If your {ocp-product-title} cluster is installed on a restricted network, also known as a disconnected cluster, specify the name of the `CatalogSource` object that you created when you configured Operator Lifecycle Manager (OLM). + +. Apply the `Subscription` object by running the following command: ++ +[source,terminal] +---- +$ oc apply -f .yaml +---- + +. Create a `namespace` object for deploy the LokiStack: ++ +.Example `namespace` object +[source,yaml] +---- +apiVersion: v1 +kind: Namespace +metadata: + name: openshift-logging # <1> + labels: + openshift.io/cluster-monitoring: "true" # <2> +---- +<1> The `openshift-logging` namespace is dedicated for all {logging} workloads. +<2> A string value that specifies the label, as shown, to ensure that cluster monitoring scrapes the `openshift-logging` namespace. + +. Apply the `namespace` object by running the following command: ++ +[source,terminal] +---- +$ oc apply -f .yaml +---- + +. Create a secret with the credentials to access the object storage. For example, create a secret to access {aws-first} s3. ++ +.Example `Secret` object +[source,yaml] +---- +apiVersion: v1 +kind: Secret +metadata: + name: logging-loki-s3 # <1> + namespace: openshift-logging +stringData: # <2> + access_key_id: + access_key_secret: + bucketnames: s3-bucket-name + endpoint: https://s3.eu-central-1.amazonaws.com + region: eu-central-1 +---- +<1> Use the name `logging-loki-s3` to match the name used in LokiStack. +<2> For the contents of the secret see the Loki object storage section. ++ +-- +include::snippets/logging-retention-period-snip.adoc[leveloffset=+1] +-- + +. Apply the `Secret` object by running the following command: ++ +[source,terminal] +---- +$ oc apply -f .yaml +---- + +. Create a `LokiStack` CR: ++ +.Example `LokiStack` CR +[source,yaml] +---- +apiVersion: loki.grafana.com/v1 +kind: LokiStack +metadata: + name: logging-loki # <1> + namespace: openshift-logging # <2> +spec: + size: 1x.small # <3> + storage: + schemas: + - version: v13 + effectiveDate: "--
" # <4> + secret: + name: logging-loki-s3 # <5> + type: s3 # <6> + storageClassName: # <7> + tenants: + mode: openshift-logging # <8> +---- +<1> Use the name `logging-loki`. +<2> You must specify `openshift-logging` as the namespace. +<3> Specify the deployment size. Supported size options for production instances of Loki are `1x.extra-small`, `1x.small`, or `1x.medium`. Additionally, `1x.pico` is supported starting with {logging} 6.1. +<4> For new installations this date should be set to the equivalent of "yesterday", as this will be the date from when the schema takes effect. +<5> Specify the name of your log store secret. +<6> Specify the corresponding storage type. +<7> Specify the name of a storage class for temporary storage. For best performance, specify a storage class that allocates block storage. You can list the available storage classes for your cluster by using the `oc get storageclasses` command. +<8> The `openshift-logging` mode is the default tenancy mode where a tenant is created for log types, such as audit, infrastructure, and application. This enables access control for individual users and user groups to different log streams. + + +. Apply the `LokiStack` CR object by running the following command: ++ +[source,terminal] +---- +$ oc apply -f .yaml +---- + +.Verification + +* Verify the installation by running the following command: ++ +[source,terminal] +---- +$ oc get pods -n openshift-logging +---- ++ +.Example output +[source,terminal] +---- +$ oc get pods -n openshift-logging +NAME READY STATUS RESTARTS AGE +logging-loki-compactor-0 1/1 Running 0 42m +logging-loki-distributor-7d7688bcb9-dvcj8 1/1 Running 0 42m +logging-loki-gateway-5f6c75f879-bl7k9 2/2 Running 0 42m +logging-loki-gateway-5f6c75f879-xhq98 2/2 Running 0 42m +logging-loki-index-gateway-0 1/1 Running 0 42m +logging-loki-ingester-0 1/1 Running 0 42m +logging-loki-querier-6b7b56bccc-2v9q4 1/1 Running 0 42m +logging-loki-query-frontend-84fb57c578-gq2f7 1/1 Running 0 42m +---- diff --git a/modules/installing-loki-operator-web-console.adoc b/modules/installing-loki-operator-web-console.adoc new file mode 100644 index 000000000000..2ef17cb148e6 --- /dev/null +++ b/modules/installing-loki-operator-web-console.adoc @@ -0,0 +1,133 @@ +:_mod-docs-content-type: PROCEDURE +[id="installing-loki-operator-web-console_{context}"] += Installing {logging-uc} by using the web console + +Install {loki-op} on your {ocp-product-title} cluster to manage the log store `Loki` from the OperatorHub by using the {ocp-product-title} web console. You can deploy and configure the `Loki` log store by reconciling the resource LokiStack with the {loki-op}. + +.Prerequisites + +* You have administrator permissions. +* You have access to the {ocp-product-title} web console. +* You have access to a supported object store (AWS S3, Google Cloud Storage, Azure, Swift, Minio, {rh-storage}). + +.Procedure + +. In the {ocp-product-title} web console *Administrator* perspective, go to *Operators* -> *OperatorHub*. + +. Type {loki-op} in the *Filter by keyword* field. Click *{loki-op}* in the list of available Operators, and then click *Install*. ++ +[IMPORTANT] +==== +The Community {loki-op} is not supported by Red{nbsp}Hat. +==== + +. Select *stable-x.y* as the *Update channel*. ++ +The {loki-op} must be deployed to the global Operator group namespace `openshift-operators-redhat`, so the *Installation mode* and *Installed Namespace* are already selected. If this namespace does not already exist, it will be created for you. + +. Select *Enable Operator-recommended cluster monitoring on this namespace.* ++ +This option sets the `openshift.io/cluster-monitoring: "true"` label in the `Namespace` object. You must select this option to ensure that cluster monitoring scrapes the `openshift-operators-redhat` namespace. + +. For *Update approval* select *Automatic*, then click *Install*. ++ +If the approval strategy in the subscription is set to *Automatic*, the update process initiates as soon as a new Operator version is available in the selected channel. If the approval strategy is set to *Manual*, you must manually approve pending updates. ++ +[NOTE] +==== +An Operator might display a `Failed` status before the installation completes. If the Operator install completes with an `InstallSucceeded` message, refresh the page. +==== + +. While the Operator installs, create the namespace to which the log store will be deployed. + +.. Click *+* in the top right of the screen to access the *Import YAML* page. + +.. Add the YAML definition for the `openshift-logging` namespace: ++ +.Example `namespace` object +[source,yaml] +---- +apiVersion: v1 +kind: Namespace +metadata: + name: openshift-logging # <1> + labels: + openshift.io/cluster-monitoring: "true" # <2> +---- +<1> The `openshift-logging` namespace is dedicated for all {logging} workloads. +<2> A string value that specifies the label, as shown, to ensure that cluster monitoring scrapes the `openshift-logging` namespace. + +.. Click *Create*. + +. Create a secret with the credentials to access the object storage. + +.. Click *+* in the top right of the screen to access the *Import YAML* page. + +.. Add the YAML definition for the secret. For example, create a secret to access Amazon Web Services (AWS) s3: ++ +.Example `Secret` object +[source,yaml] +---- +apiVersion: v1 +kind: Secret +metadata: + name: logging-loki-s3 <1> + namespace: openshift-logging <2> +stringData: <3> + access_key_id: + access_key_secret: + bucketnames: s3-bucket-name + endpoint: https://s3.eu-central-1.amazonaws.com + region: eu-central-1 +---- +<1> Note down the name used for the secret `logging-loki-s3` to use it later when creating the `LokiStack` resource. +<2> Set the namespace to `openshift-logging` as that will be the namespace used to deploy `LokiStack`. +<3> For the contents of the secret see the Loki object storage section. ++ +-- +include::snippets/logging-retention-period-snip.adoc[leveloffset=+1] +-- + +.. Click *Create*. + +. Navigate to the *Installed Operators* page. Select the {loki-op} under the *Provided APIs* find the *LokiStack* resource and click *Create Instance*. + +. Select *YAML view*, and then use the following template to create a `LokiStack` CR: ++ +-- +.Example `LokiStack` CR +[source,yaml] +---- +apiVersion: loki.grafana.com/v1 +kind: LokiStack +metadata: + name: logging-loki # <1> + namespace: openshift-logging # <2> +spec: + size: 1x.small # <3> + storage: + schemas: + - version: v13 + effectiveDate: "--
" + secret: + name: logging-loki-s3 # <4> + type: s3 # <5> + storageClassName: # <6> + tenants: + mode: openshift-logging # <7> +---- +<1> Use the name `logging-loki`. +<2> You must specify `openshift-logging` as the namespace. +<3> Specify the deployment size. Supported size options for production instances of Loki are `1x.extra-small`, `1x.small`, or `1x.medium`. Additionally, 1x.pico is supported starting with {logging} 6.1. +<4> Specify the name of your log store secret. +<5> Specify the corresponding storage type. +<6> Specify the name of a storage class for temporary storage. For best performance, specify a storage class that allocates block storage. You can list the available storage classes for your cluster by using the `oc get storageclasses` command. +<7> The `openshift-logging` mode is the default tenancy mode where a tenant is created for log types, such as audit, infrastructure, and application. This enables access control for individual users and user groups to different log streams. +-- + +. Click *Create*. + +.Verification + +. In the *LokiStack* tab veriy that you see your `LokiStack` instance. +. In the *Status* column, verify that you see the message `Condition: Ready` with a green checkmark. diff --git a/modules/log6x-quickstart-opentelemetry.adoc b/modules/log6x-quickstart-opentelemetry.adoc deleted file mode 100644 index e30926549ccd..000000000000 --- a/modules/log6x-quickstart-opentelemetry.adoc +++ /dev/null @@ -1,158 +0,0 @@ -// Module included in the following assemblies: -// -// * observability/logging/logging-6.0/log6x-about.adoc - -:_mod-docs-content-type: PROCEDURE -[id="quick-start-opentelemetry_{context}"] -= Quick start with OpenTelemetry - -:FeatureName: The OpenTelemetry Protocol (OTLP) output log forwarder -include::snippets/technology-preview.adoc[] - -To configure OTLP ingestion and enable the OpenTelemetry data model, follow these steps: - -.Prerequisites -* Cluster administrator permissions - -.Procedure - -. Install the {clo}, {loki-op}, and {coo-first} from OperatorHub. - -. Create a `LokiStack` custom resource (CR) in the `openshift-logging` namespace: -+ -[source,yaml] ----- -apiVersion: loki.grafana.com/v1 -kind: LokiStack -metadata: - name: logging-loki - namespace: openshift-logging -spec: - managementState: Managed - size: 1x.extra-small - storage: - schemas: - - effectiveDate: '2024-10-01' - version: v13 - secret: - name: logging-loki-s3 - type: s3 - storageClassName: gp3-csi - tenants: - mode: openshift-logging ----- -+ -[NOTE] -==== -Ensure that the `logging-loki-s3` secret is created beforehand. The contents of this secret vary depending on the object storage in use. For more information, see "Secrets and TLS Configuration". -==== - -. Create a service account for the collector: -+ -[source,terminal] ----- -$ oc create sa collector -n openshift-logging ----- - -. Allow the collector's service account to write data to the `LokiStack` CR: -+ -[source,terminal] ----- -$ oc adm policy add-cluster-role-to-user logging-collector-logs-writer -z collector ----- -+ -[NOTE] -==== -The `ClusterRole` resource is created automatically during the Cluster Logging Operator installation and does not need to be created manually. -==== - -. Allow the collector's service account to collect logs: -+ -[source,terminal] ----- -$ oc project openshift-logging ----- -+ -[source,terminal] ----- -$ oc adm policy add-cluster-role-to-user collect-application-logs -z collector ----- -+ -[source,terminal] ----- -$ oc adm policy add-cluster-role-to-user collect-audit-logs -z collector ----- -+ -[source,terminal] ----- -$ oc adm policy add-cluster-role-to-user collect-infrastructure-logs -z collector ----- -+ -[NOTE] -==== -The example binds the collector to all three roles (application, infrastructure, and audit). By default, only application and infrastructure logs are collected. To collect audit logs, update your `ClusterLogForwarder` configuration to include them. Assign roles based on the specific log types required for your environment. -==== - -. Create a `UIPlugin` CR to enable the *Log* section in the *Observe* tab: -+ -[source,yaml] ----- -apiVersion: observability.openshift.io/v1alpha1 -kind: UIPlugin -metadata: - name: logging -spec: - type: Logging - logging: - lokiStack: - name: logging-loki ----- - -. Create a `ClusterLogForwarder` CR to configure log forwarding: -+ -[source,yaml] ----- -apiVersion: observability.openshift.io/v1 -kind: ClusterLogForwarder -metadata: - name: collector - namespace: openshift-logging - annotations: - observability.openshift.io/tech-preview-otlp-output: "enabled" # <1> -spec: - serviceAccount: - name: collector - outputs: - - name: loki-otlp - type: lokiStack # <2> - lokiStack: - target: - name: logging-loki - namespace: openshift-logging - dataModel: Otel # <3> - authentication: - token: - from: serviceAccount - tls: - ca: - key: service-ca.crt - configMapName: openshift-service-ca.crt - pipelines: - - name: my-pipeline - inputRefs: - - application - - infrastructure - outputRefs: - - loki-otlp ----- -<1> Use the annotation to enable the `Otel` data model, which is a Technology Preview feature. -<2> Define the output type as `lokiStack`. -<3> Specifies the OpenTelemetry data model. -+ -[NOTE] -==== -You cannot use `lokiStack.labelKeys` when `dataModel` is `Otel`. To achieve similar functionality when `dataModel` is `Otel`, refer to "Configuring LokiStack for OTLP data ingestion". -==== - -.Verification -* Verify that OTLP is functioning correctly by going to *Observe* -> *OpenShift Logging* -> *LokiStack* -> *Writes* in the OpenShift web console, and checking *Distributor - Structured Metadata*. \ No newline at end of file diff --git a/modules/log6x-quickstart-viaq.adoc b/modules/log6x-quickstart-viaq.adoc deleted file mode 100644 index 4af406466e54..000000000000 --- a/modules/log6x-quickstart-viaq.adoc +++ /dev/null @@ -1,146 +0,0 @@ -// Module included in the following assemblies: -// -// * observability/logging/logging-6.0/log6x-about.adoc - -:_mod-docs-content-type: PROCEDURE -[id="quick-start-viaq_{context}"] -= Quick start with ViaQ - -To use the default ViaQ data model, follow these steps: - -.Prerequisites -* You have access to an {product-title} cluster with `cluster-admin` permissions. -* You installed the {oc-first}. -* You have access to a supported object store. For example, AWS S3, Google Cloud Storage, {azure-short}, Swift, Minio, or {rh-storage}. - -.Procedure - -. Install the `{clo}`, `{loki-op}`, and `{coo-first}` from OperatorHub. - -. Create a `LokiStack` custom resource (CR) in the `openshift-logging` namespace: -+ -[source,yaml] ----- -apiVersion: loki.grafana.com/v1 -kind: LokiStack -metadata: - name: logging-loki - namespace: openshift-logging -spec: - managementState: Managed - size: 1x.extra-small - storage: - schemas: - - effectiveDate: '2024-10-01' - version: v13 - secret: - name: logging-loki-s3 - type: s3 - storageClassName: gp3-csi - tenants: - mode: openshift-logging ----- -+ -[NOTE] -==== -Ensure that the `logging-loki-s3` secret is created beforehand. The contents of this secret vary depending on the object storage in use. For more information, see Secrets and TLS Configuration. -==== - -. Create a service account for the collector: -+ -[source,terminal] ----- -$ oc create sa collector -n openshift-logging ----- - -. Allow the collector's service account to write data to the `LokiStack` CR: -+ -[source,terminal] ----- -$ oc adm policy add-cluster-role-to-user logging-collector-logs-writer -z collector -n openshift-logging ----- -+ -[NOTE] -==== -The `ClusterRole` resource is created automatically during the Cluster Logging Operator installation and does not need to be created manually. -==== - -. To collect logs, use the service account of the collector by running the following commands: -+ -[source,terminal] ----- -$ oc adm policy add-cluster-role-to-user collect-application-logs -z collector -n openshift-logging ----- -+ -[source,terminal] ----- -$ oc adm policy add-cluster-role-to-user collect-audit-logs -z collector -n openshift-logging ----- -+ -[source,terminal] ----- -$ oc adm policy add-cluster-role-to-user collect-infrastructure-logs -z collector -n openshift-logging ----- -+ -[NOTE] -==== -The example binds the collector to all three roles (application, infrastructure, and audit), but by default, only application and infrastructure logs are collected. To collect audit logs, update your `ClusterLogForwarder` configuration to include them. Assign roles based on the specific log types required for your environment. -==== - -. Create a `UIPlugin` CR to enable the *Log* section in the *Observe* tab: -+ -[source,yaml] ----- -apiVersion: observability.openshift.io/v1alpha1 -kind: UIPlugin -metadata: - name: logging -spec: - type: Logging - logging: - lokiStack: - name: logging-loki ----- - -. Create a `ClusterLogForwarder` CR to configure log forwarding: -+ -[source,yaml] ----- -apiVersion: observability.openshift.io/v1 -kind: ClusterLogForwarder -metadata: - name: collector - namespace: openshift-logging -spec: - serviceAccount: - name: collector - outputs: - - name: default-lokistack - type: lokiStack - lokiStack: - authentication: - token: - from: serviceAccount - target: - name: logging-loki - namespace: openshift-logging - tls: - ca: - key: service-ca.crt - configMapName: openshift-service-ca.crt - pipelines: - - name: default-logstore - inputRefs: - - application - - infrastructure - outputRefs: - - default-lokistack ----- -+ -[NOTE] -==== -The `dataModel` field is optional and left unset (`dataModel: ""`) by default. This allows the Cluster Logging Operator (CLO) to automatically select a data model. Currently, the CLO defaults to the ViaQ model when the field is unset, but this will change in future releases. Specifying `dataModel: ViaQ` ensures the configuration remains compatible if the default changes. -==== - -.Verification -* Verify that logs are visible in the *Log* section of the *Observe* tab in the {product-title} web console. \ No newline at end of file diff --git a/modules/logging-oc-explain.adoc b/modules/logging-oc-explain.adoc new file mode 100644 index 000000000000..264c2d4214ed --- /dev/null +++ b/modules/logging-oc-explain.adoc @@ -0,0 +1,75 @@ +// Module included in the following assemblies: +// +:_mod-docs-content-type: CONCEPT +[id="logging-oc-explain_{context}"] + += Using the `oc explain` command + +The `oc explain` command is an essential tool in the OpenShift CLI `oc` that provides detailed descriptions of the fields within Custom Resources (CRs). This command is invaluable for administrators and developers who are configuring or troubleshooting resources in an OpenShift cluster. + +== Resource Descriptions +`oc explain` offers in-depth explanations of all fields associated with a specific object. This includes standard resources like pods and services, as well as more complex entities like statefulsets and custom resources defined by Operators. + +To view the documentation for the `outputs` field of the `ClusterLogForwarder` custom resource, you can use: + +[source,terminal] +---- +$ oc explain clusterlogforwarders.observability.openshift.io.spec.outputs +---- + +[NOTE] +==== +In place of `clusterlogforwarder` the short form `obsclf` can be used. +==== + +This will display detailed information about these fields, including their types, default values, and any associated sub-fields. + +== Hierarchical Structure +The command displays the structure of resource fields in a hierarchical format, clarifying the relationships between different configuration options. + +For instance, here's how you can drill down into the `storage` configuration for a `LokiStack` custom resource: + +[source,terminal] +---- +$ oc explain lokistacks.loki.grafana.com +$ oc explain lokistacks.loki.grafana.com.spec +$ oc explain lokistacks.loki.grafana.com.spec.storage +$ oc explain lokistacks.loki.grafana.com.spec.storage.schemas +---- + +Each command reveals a deeper level of the resource specification, making the structure clear. + +== Type Information +`oc explain` also indicates the type of each field (such as string, integer, or boolean), allowing you to verify that resource definitions use the correct data types. + +For example: + +[source,terminal] +---- +$ oc explain lokistacks.loki.grafana.com.spec.size +---- + +This will show that `size` should be defined using an integer value. + +== Default Values +When applicable, the command shows the default values for fields, providing insights into what values will be used if none are explicitly specified. + +Again using `lokistacks.loki.grafana.com` as an example: + +[source,terminal] +---- +$ oc explain lokistacks.spec.template.distributor.replicas +---- + +.Example output +[source,terminal] +---- +GROUP: loki.grafana.com +KIND: LokiStack +VERSION: v1 + +FIELD: replicas + +DESCRIPTION: + Replicas defines the number of replica pods of the component. +---- diff --git a/modules/loki-memberlist-ip.adoc b/modules/loki-memberlist-ip.adoc new file mode 100644 index 000000000000..a6ec2050498b --- /dev/null +++ b/modules/loki-memberlist-ip.adoc @@ -0,0 +1,29 @@ +:_mod-docs-content-type: CONCEPT +[id="loki-memberlist-ip_{context}"] += Configuring Loki to tolerate memberlist creation failure + +In an {ocp-product-title} cluster, administrators generally use a non-private IP network range. As a result, the LokiStack memberlist configuration fails because, by default, it only uses private IP networks. + +As an administrator, you can select the pod network for the memberlist configuration. You can modify the `LokiStack` custom resource (CR) to use the `podIP` address in the `hashRing` spec. To configure the `LokiStack` CR, use the following command: + +[source,terminal] +---- +$ oc patch LokiStack logging-loki -n openshift-logging --type=merge -p '{"spec": {"hashRing":{"memberlist":{"instanceAddrType":"podIP"},"type":"memberlist"}}}' +---- + +.Example LokiStack to include `podIP` +[source,yaml] +---- +apiVersion: loki.grafana.com/v1 +kind: LokiStack +metadata: + name: logging-loki + namespace: openshift-logging +spec: +# ... + hashRing: + type: memberlist + memberlist: + instanceAddrType: podIP +# ... +---- diff --git a/modules/loki-pod-placement.adoc b/modules/loki-pod-placement.adoc new file mode 100644 index 000000000000..4e4597d6ea0b --- /dev/null +++ b/modules/loki-pod-placement.adoc @@ -0,0 +1,195 @@ +:_mod-docs-content-type: CONCEPT +[id="loki-pod-placement_{context}"] += Loki pod placement + +You can control which nodes the Loki pods run on, and prevent other workloads from using those nodes, by using tolerations or node selectors on the pods. + +You can apply tolerations to the log store pods with the LokiStack custom resource (CR) and apply taints to a node with the node specification. A taint on a node is a `key:value` pair that instructs the node to repel all pods that do not allow the taint. Using a specific `key:value` pair that is not on other pods ensures that only the log store pods can run on that node. + +.Example LokiStack with node selectors +[source,yaml] +---- +apiVersion: loki.grafana.com/v1 +kind: LokiStack +metadata: + name: logging-loki + namespace: openshift-logging +spec: +# ... + template: + compactor: # <1> + nodeSelector: + node-role.kubernetes.io/infra: "" # <2> + distributor: + nodeSelector: + node-role.kubernetes.io/infra: "" + gateway: + nodeSelector: + node-role.kubernetes.io/infra: "" + indexGateway: + nodeSelector: + node-role.kubernetes.io/infra: "" + ingester: + nodeSelector: + node-role.kubernetes.io/infra: "" + querier: + nodeSelector: + node-role.kubernetes.io/infra: "" + queryFrontend: + nodeSelector: + node-role.kubernetes.io/infra: "" + ruler: + nodeSelector: + node-role.kubernetes.io/infra: "" +# ... +---- +<1> Specifies the component pod type that applies to the node selector. +<2> Specifies the pods that are moved to nodes containing the defined label. + + +.Example LokiStack CR with node selectors and tolerations +[source,yaml] +---- +apiVersion: loki.grafana.com/v1 +kind: LokiStack +metadata: + name: logging-loki + namespace: openshift-logging +spec: +# ... + template: + compactor: + nodeSelector: + node-role.kubernetes.io/infra: "" + tolerations: + - effect: NoSchedule + key: node-role.kubernetes.io/infra + value: reserved + - effect: NoExecute + key: node-role.kubernetes.io/infra + value: reserved + distributor: + nodeSelector: + node-role.kubernetes.io/infra: "" + tolerations: + - effect: NoSchedule + key: node-role.kubernetes.io/infra + value: reserved + - effect: NoExecute + key: node-role.kubernetes.io/infra + value: reserved + indexGateway: + nodeSelector: + node-role.kubernetes.io/infra: "" + tolerations: + - effect: NoSchedule + key: node-role.kubernetes.io/infra + value: reserved + - effect: NoExecute + key: node-role.kubernetes.io/infra + value: reserved + ingester: + nodeSelector: + node-role.kubernetes.io/infra: "" + tolerations: + - effect: NoSchedule + key: node-role.kubernetes.io/infra + value: reserved + - effect: NoExecute + key: node-role.kubernetes.io/infra + value: reserved + querier: + nodeSelector: + node-role.kubernetes.io/infra: "" + tolerations: + - effect: NoSchedule + key: node-role.kubernetes.io/infra + value: reserved + - effect: NoExecute + key: node-role.kubernetes.io/infra + value: reserved + queryFrontend: + nodeSelector: + node-role.kubernetes.io/infra: "" + tolerations: + - effect: NoSchedule + key: node-role.kubernetes.io/infra + value: reserved + - effect: NoExecute + key: node-role.kubernetes.io/infra + value: reserved + ruler: + nodeSelector: + node-role.kubernetes.io/infra: "" + tolerations: + - effect: NoSchedule + key: node-role.kubernetes.io/infra + value: reserved + - effect: NoExecute + key: node-role.kubernetes.io/infra + value: reserved + gateway: + nodeSelector: + node-role.kubernetes.io/infra: "" + tolerations: + - effect: NoSchedule + key: node-role.kubernetes.io/infra + value: reserved + - effect: NoExecute + key: node-role.kubernetes.io/infra + value: reserved +# ... +---- + +To configure the `nodeSelector` and `tolerations` fields of the LokiStack (CR), you can use the [command]`oc explain` command to view the description and fields for a particular resource: + +[source,terminal] +---- +$ oc explain lokistack.spec.template +---- + +.Example output +[source,text] +---- +KIND: LokiStack +VERSION: loki.grafana.com/v1 + +RESOURCE: template + +DESCRIPTION: + Template defines the resource/limits/tolerations/nodeselectors per + component + +FIELDS: + compactor + Compactor defines the compaction component spec. + + distributor + Distributor defines the distributor component spec. +... +---- + +For more detailed information, you can add a specific field: + +[source,terminal] +---- +$ oc explain lokistack.spec.template.compactor +---- + +.Example output +[source,text] +---- +KIND: LokiStack +VERSION: loki.grafana.com/v1 + +RESOURCE: compactor + +DESCRIPTION: + Compactor defines the compaction component spec. + +FIELDS: + nodeSelector + NodeSelector defines the labels required by a node to schedule the + component onto it. +... +---- diff --git a/modules/loki-rate-limit-errors.adoc b/modules/loki-rate-limit-errors.adoc new file mode 100644 index 000000000000..31bbfd2c5d69 --- /dev/null +++ b/modules/loki-rate-limit-errors.adoc @@ -0,0 +1,73 @@ +:_mod-docs-content-type: PROCEDURE +[id="loki-rate-limit-errors_{context}"] += Troubleshooting Loki rate limit errors + +If the Log Forwarder API forwards a large block of messages that exceeds the rate limit to Loki, Loki generates rate limit (`429`) errors. + +These errors can occur during normal operation. For example, when adding the {logging} to a cluster that already has some logs, rate limit errors might occur while the {logging} tries to ingest all of the existing log entries. In this case, if the rate of addition of new logs is less than the total rate limit, the historical data is eventually ingested, and the rate limit errors are resolved without requiring user intervention. + +In cases where the rate limit errors continue to occur, you can fix the issue by modifying the `LokiStack` custom resource (CR). + +[IMPORTANT] +==== +The `LokiStack` CR is not available on Grafana-hosted Loki. This topic does not apply to Grafana-hosted Loki servers. +==== + +.Conditions + +* The Log Forwarder API is configured to forward logs to Loki. + +* Your system sends a block of messages that is larger than 2 MB to Loki. For example: ++ +[source,text] +---- +"values":[["1630410392689800468","{\"kind\":\"Event\",\"apiVersion\":\ +....... +...... +...... +...... +\"received_at\":\"2021-08-31T11:46:32.800278+00:00\",\"version\":\"1.7.4 1.6.0\"}},\"@timestamp\":\"2021-08-31T11:46:32.799692+00:00\",\"viaq_index_name\":\"audit-write\",\"viaq_msg_id\":\"MzFjYjJkZjItNjY0MC00YWU4LWIwMTEtNGNmM2E5ZmViMGU4\",\"log_type\":\"audit\"}"]]}]} +---- + +* After you enter `oc logs -n openshift-logging -l component=collector`, the collector logs in your cluster show a line containing one of the following error messages: ++ +[source,text] +---- +429 Too Many Requests Ingestion rate limit exceeded +---- ++ +.Example Vector error message +[source,text] +---- +2023-08-25T16:08:49.301780Z WARN sink{component_kind="sink" component_id=default_loki_infra component_type=loki component_name=default_loki_infra}: vector::sinks::util::retries: Retrying after error. error=Server responded with an error: 429 Too Many Requests internal_log_rate_limit=true +---- ++ +The error is also visible on the receiving end. For example, in the LokiStack ingester pod: ++ +.Example Loki ingester error message +[source,text] +---- +level=warn ts=2023-08-30T14:57:34.155592243Z caller=grpc_logging.go:43 duration=1.434942ms method=/logproto.Pusher/Push err="rpc error: code = Code(429) desc = entry with timestamp 2023-08-30 14:57:32.012778399 +0000 UTC ignored, reason: 'Per stream rate limit exceeded (limit: 3MB/sec) while attempting to ingest for stream +---- + +.Procedure + +* Update the `ingestionBurstSize` and `ingestionRate` fields in the `LokiStack` CR: ++ +[source,yaml] +---- +apiVersion: loki.grafana.com/v1 +kind: LokiStack +metadata: + name: logging-loki + namespace: openshift-logging +spec: + limits: + global: + ingestion: + ingestionBurstSize: 16 # <1> + ingestionRate: 8 # <2> +# ... +---- +<1> The `ingestionBurstSize` field defines the maximum local rate-limited sample size per distributor replica in MB. This value is a hard limit. Set this value to at least the maximum logs size expected in a single push request. Single requests that are larger than the `ingestionBurstSize` value are not permitted. +<2> The `ingestionRate` field is a soft limit on the maximum amount of ingested samples per second in MB. Rate limit errors occur if the rate of logs exceeds the limit, but the collector retries sending the logs. As long as the total average is lower than the limit, the system recovers and errors are resolved without user intervention. diff --git a/modules/loki-rbac-rules-permissions.adoc b/modules/loki-rbac-rules-permissions.adoc new file mode 100644 index 000000000000..3c869a649f3c --- /dev/null +++ b/modules/loki-rbac-rules-permissions.adoc @@ -0,0 +1,63 @@ +:_mod-docs-content-type: REFERENCE +[id="loki-rbac-rules-permissions_{context}"] += Authorizing LokiStack rules RBAC permissions + +Administrators can allow users to create and manage their own alerting and recording rules by binding cluster roles to usernames. +Cluster roles are defined as `ClusterRole` objects that contain necessary role-based access control (RBAC) permissions for users. + +The following cluster roles for alerting and recording rules are available for LokiStack: + +[options="header"] +|=== +|Rule name |Description + +|`alertingrules.loki.grafana.com-v1-admin` +|Users with this role have administrative-level access to manage alerting rules. This cluster role grants permissions to create, read, update, delete, list, and watch `AlertingRule` resources within the `loki.grafana.com/v1` API group. + +|`alertingrules.loki.grafana.com-v1-crdview` +|Users with this role can view the definitions of Custom Resource Definitions (CRDs) related to `AlertingRule` resources within the `loki.grafana.com/v1` API group, but do not have permissions for modifying or managing these resources. + +|`alertingrules.loki.grafana.com-v1-edit` +|Users with this role have permission to create, update, and delete `AlertingRule` resources. + +|`alertingrules.loki.grafana.com-v1-view` +|Users with this role can read `AlertingRule` resources within the `loki.grafana.com/v1` API group. They can inspect configurations, labels, and annotations for existing alerting rules but cannot make any modifications to them. + +|`recordingrules.loki.grafana.com-v1-admin` +|Users with this role have administrative-level access to manage recording rules. This cluster role grants permissions to create, read, update, delete, list, and watch `RecordingRule` resources within the `loki.grafana.com/v1` API group. + +|`recordingrules.loki.grafana.com-v1-crdview` +|Users with this role can view the definitions of Custom Resource Definitions (CRDs) related to `RecordingRule` resources within the `loki.grafana.com/v1` API group, but do not have permissions for modifying or managing these resources. + +|`recordingrules.loki.grafana.com-v1-edit` +|Users with this role have permission to create, update, and delete `RecordingRule` resources. + +|`recordingrules.loki.grafana.com-v1-view` +|Users with this role can read `RecordingRule` resources within the `loki.grafana.com/v1` API group. They can inspect configurations, labels, and annotations for existing alerting rules but cannot make any modifications to them. + +|=== + +[id="loki-rbac-rules-permissions-examples_{context}"] +== Examples + +To apply cluster roles for a user, you must bind an existing cluster role to a specific username. + +Cluster roles can be cluster or namespace scoped, depending on which type of role binding you use. +When a `RoleBinding` object is used, as when using the `oc adm policy add-role-to-user` command, the cluster role only applies to the specified namespace. +When a `ClusterRoleBinding` object is used, as when using the `oc adm policy add-cluster-role-to-user` command, the cluster role applies to all namespaces in the cluster. + +The following example command gives the specified user create, read, update and delete (CRUD) permissions for alerting rules in a specific namespace in the cluster: + +.Example cluster role binding command for alerting rule CRUD permissions in a specific namespace +[source,terminal] +---- +$ oc adm policy add-role-to-user alertingrules.loki.grafana.com-v1-admin -n +---- + +The following command gives the specified user administrator permissions for alerting rules in all namespaces: + +.Example cluster role binding command for administrator permissions +[source,terminal] +---- +$ oc adm policy add-cluster-role-to-user alertingrules.loki.grafana.com-v1-admin +---- diff --git a/modules/loki-reliability-hardening.adoc b/modules/loki-reliability-hardening.adoc new file mode 100644 index 000000000000..bd54c1d24c18 --- /dev/null +++ b/modules/loki-reliability-hardening.adoc @@ -0,0 +1,35 @@ +:_mod-docs-content-type: CONCEPT +[id="loki-reliability-hardening_{context}"] += Configuring Loki to tolerate node failure + +The {loki-op} supports setting pod anti-affinity rules to request that pods of the same component are scheduled on different available nodes in the cluster. + +include::snippets/about-pod-affinity.adoc[] + +The Operator sets default, preferred `podAntiAffinity` rules for all Loki components, which includes the `compactor`, `distributor`, `gateway`, `indexGateway`, `ingester`, `querier`, `queryFrontend`, and `ruler` components. + +You can override the preferred `podAntiAffinity` settings for Loki components by configuring required settings in the `requiredDuringSchedulingIgnoredDuringExecution` field: + +.Example user settings for the ingester component +[source,yaml] +---- +apiVersion: loki.grafana.com/v1 +kind: LokiStack +metadata: + name: logging-loki + namespace: openshift-logging +spec: +# ... + template: + ingester: + podAntiAffinity: + # ... + requiredDuringSchedulingIgnoredDuringExecution: <1> + - labelSelector: + matchLabels: <2> + app.kubernetes.io/component: ingester + topologyKey: kubernetes.io/hostname +# ... +---- +<1> The stanza to define a required rule. +<2> The key-value pair (label) that must be matched to apply the rule. diff --git a/modules/loki-restart-hardening.adoc b/modules/loki-restart-hardening.adoc new file mode 100644 index 000000000000..abcc60aeb0ae --- /dev/null +++ b/modules/loki-restart-hardening.adoc @@ -0,0 +1,5 @@ +:_mod-docs-content-type: CONCEPT +[id="loki-restart-hardening_{context}"] += LokiStack behavior during cluster restarts + +When an {ocp-product-title} cluster is restarted, LokiStack ingestion and the query path continue to operate within the available CPU and memory resources available for the node. This means that there is no downtime for the LokiStack during {ocp-product-title} cluster updates. This behavior is achieved by using `PodDisruptionBudget` resources. The {loki-op} provisions `PodDisruptionBudget` resources for Loki, which determine the minimum number of pods that must be available per component to ensure normal operations under certain conditions. diff --git a/modules/loki-retention.adoc b/modules/loki-retention.adoc new file mode 100644 index 000000000000..63795e8677fa --- /dev/null +++ b/modules/loki-retention.adoc @@ -0,0 +1,109 @@ +:_mod-docs-content-type: PROCEDURE +[id="loki-retention_{context}"] += Enabling stream-based retention with Loki + +You can configure retention policies based on log streams. Rules for these may be set globally, per-tenant, or both. If you configure both, tenant rules apply before global rules. + +include::snippets/logging-retention-period-snip.adoc[] + +[NOTE] +==== +Schema v13 is recommended. +==== + +.Procedure + +. Create a `LokiStack` CR: ++ +** Enable stream-based retention globally as shown in the following example: ++ +.Example global stream-based retention for AWS +[source,yaml] +---- +apiVersion: loki.grafana.com/v1 +kind: LokiStack +metadata: + name: logging-loki + namespace: openshift-logging +spec: + limits: + global: <1> + retention: <2> + days: 20 + streams: + - days: 4 + priority: 1 + selector: '{kubernetes_namespace_name=~"test.+"}' <3> + - days: 1 + priority: 1 + selector: '{log_type="infrastructure"}' + managementState: Managed + replicationFactor: 1 + size: 1x.small + storage: + schemas: + - effectiveDate: "2020-10-11" + version: v13 + secret: + name: logging-loki-s3 + type: aws + storageClassName: gp3-csi + tenants: + mode: openshift-logging +---- +<1> Sets retention policy for all log streams. *Note: This field does not impact the retention period for stored logs in object storage.* +<2> Retention is enabled in the cluster when this block is added to the CR. +<3> Contains the link:https://grafana.com/docs/loki/latest/logql/query_examples/#query-examples[LogQL query] used to define the log stream.spec: + limits: + +** Enable stream-based retention per-tenant basis as shown in the following example: ++ +.Example per-tenant stream-based retention for AWS +[source,yaml] +---- +apiVersion: loki.grafana.com/v1 +kind: LokiStack +metadata: + name: logging-loki + namespace: openshift-logging +spec: + limits: + global: + retention: + days: 20 + tenants: <1> + application: + retention: + days: 1 + streams: + - days: 4 + selector: '{kubernetes_namespace_name=~"test.+"}' <2> + infrastructure: + retention: + days: 5 + streams: + - days: 1 + selector: '{kubernetes_namespace_name=~"openshift-cluster.+"}' + managementState: Managed + replicationFactor: 1 + size: 1x.small + storage: + schemas: + - effectiveDate: "2020-10-11" + version: v13 + secret: + name: logging-loki-s3 + type: aws + storageClassName: gp3-csi + tenants: + mode: openshift-logging +---- +<1> Sets retention policy by tenant. Valid tenant types are `application`, `audit`, and `infrastructure`. +<2> Contains the link:https://grafana.com/docs/loki/latest/logql/query_examples/#query-examples[LogQL query] used to define the log stream. + +. Apply the `LokiStack` CR: ++ +[source,terminal] +---- +$ oc apply -f .yaml +---- diff --git a/modules/loki-zone-aware-replication.adoc b/modules/loki-zone-aware-replication.adoc new file mode 100644 index 000000000000..96d60984e538 --- /dev/null +++ b/modules/loki-zone-aware-replication.adoc @@ -0,0 +1,29 @@ +:_mod-docs-content-type: CONCEPT +[id="loki-zone-aware-replication_{context}"] += Zone aware data replication + +The {loki-op} offers support for zone-aware data replication through pod topology spread constraints. Enabling this feature enhances reliability and safeguards against log loss in the event of a single zone failure. When configuring the deployment size as `1x.extra-small`, `1x.small`, or `1x.medium`, the `replication.factor` field is automatically set to 2. + +To ensure proper replication, you need to have at least as many availability zones as the replication factor specifies. While it is possible to have more availability zones than the replication factor, having fewer zones can lead to write failures. Each zone should host an equal number of instances for optimal operation. + +.Example LokiStack CR with zone replication enabled +[source,yaml] +---- +apiVersion: loki.grafana.com/v1 +kind: LokiStack +metadata: + name: logging-loki + namespace: openshift-logging +spec: + replicationFactor: 2 # <1> + replication: + factor: 2 # <2> + zones: + - maxSkew: 1 # <3> + topologyKey: topology.kubernetes.io/zone # <4> +---- +<1> Deprecated field, values entered are overwritten by `replication.factor`. +<2> This value is automatically set when deployment size is selected at setup. +<3> The maximum difference in number of pods between any two topology domains. The default is 1, and you cannot specify a value of 0. +<4> Defines zones in the form of a topology key that corresponds to a node label. + diff --git a/modules/loki-zone-fail-recovery.adoc b/modules/loki-zone-fail-recovery.adoc new file mode 100644 index 000000000000..7d1cae30655f --- /dev/null +++ b/modules/loki-zone-fail-recovery.adoc @@ -0,0 +1,87 @@ +// Module included in the following assemblies: +// +// * logging/cluster-logging-loki.adoc +// * observability/logging/logging-6.2/log6x-loki-6.2.adoc + +:_mod-docs-content-type: PROCEDURE +[id="loki-zone-fail-recovery_{context}"] += Recovering Loki pods from failed zones + +In {ocp-product-title} a zone failure happens when specific availability zone resources become inaccessible. Availability zones are isolated areas within a cloud provider's data center, aimed at enhancing redundancy and fault tolerance. If your {ocp-product-title} cluster is not configured to handle this, a zone failure can lead to service or data loss. + +Loki pods are part of a link:https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/[StatefulSet], and they come with Persistent Volume Claims (PVCs) provisioned by a `StorageClass` object. Each Loki pod and its PVCs reside in the same zone. When a zone failure occurs in a cluster, the StatefulSet controller automatically attempts to recover the affected pods in the failed zone. + +[WARNING] +==== +The following procedure will delete the PVCs in the failed zone, and all data contained therein. To avoid complete data loss the replication factor field of the `LokiStack` CR should always be set to a value greater than 1 to ensure that Loki is replicating. +==== + +.Prerequisites +* Verify your `LokiStack` CR has a replication factor greater than 1. +* Zone failure detected by the control plane, and nodes in the failed zone are marked by cloud provider integration. + +The StatefulSet controller automatically attempts to reschedule pods in a failed zone. Because the associated PVCs are also in the failed zone, automatic rescheduling to a different zone does not work. You must manually delete the PVCs in the failed zone to allow successful re-creation of the stateful Loki Pod and its provisioned PVC in the new zone. + + +.Procedure +. List the pods in `Pending` status by running the following command: ++ +[source,terminal] +---- +$ oc get pods --field-selector status.phase==Pending -n openshift-logging +---- ++ +.Example `oc get pods` output +[source,terminal] +---- +NAME READY STATUS RESTARTS AGE # <1> +logging-loki-index-gateway-1 0/1 Pending 0 17m +logging-loki-ingester-1 0/1 Pending 0 16m +logging-loki-ruler-1 0/1 Pending 0 16m +---- +<1> These pods are in `Pending` status because their corresponding PVCs are in the failed zone. + +. List the PVCs in `Pending` status by running the following command: ++ +[source,terminal] +---- +$ oc get pvc -o=json -n openshift-logging | jq '.items[] | select(.status.phase == "Pending") | .metadata.name' -r +---- ++ +.Example `oc get pvc` output +[source,terminal] +---- +storage-logging-loki-index-gateway-1 +storage-logging-loki-ingester-1 +wal-logging-loki-ingester-1 +storage-logging-loki-ruler-1 +wal-logging-loki-ruler-1 +---- + +. Delete the PVC(s) for a pod by running the following command: ++ +[source,terminal] +---- +$ oc delete pvc -n openshift-logging +---- ++ +. Delete the pod(s) by running the following command: ++ +[source,terminal] +---- +$ oc delete pod -n openshift-logging +---- ++ +Once these objects have been successfully deleted, they should automatically be rescheduled in an available zone. + +[id="logging-loki-zone-fail-term-state_{context}"] +== Troubleshooting PVC in a terminating state + +The PVCs might hang in the terminating state without being deleted, if PVC metadata finalizers are set to `kubernetes.io/pv-protection`. Removing the finalizers should allow the PVCs to delete successfully. + +* Remove the finalizer for each PVC by running the command below, then retry deletion. ++ +[source,terminal] +---- +$ oc patch pvc -p '{"metadata":{"finalizers":null}}' -n openshift-logging +---- diff --git a/modules/setting-up-log-collection.adoc b/modules/setting-up-log-collection.adoc new file mode 100644 index 000000000000..baae93bc504a --- /dev/null +++ b/modules/setting-up-log-collection.adoc @@ -0,0 +1,205 @@ +// Module included in the following assemblies: +// +// observability/logging/logging-6.0/log6x-clf.adoc + +:_mod-docs-content-type: PROCEDURE +[id="setting-up-log-collection_{context}"] += Setting up log collection + +This release of Cluster Logging requires administrators to explicitly grant log collection permissions to the service account associated with *ClusterLogForwarder*. This was not required in previous releases for the legacy logging scenario consisting of a *ClusterLogging* and, optionally, a *ClusterLogForwarder.logging.openshift.io* resource. + +The {clo} provides `collect-audit-logs`, `collect-application-logs`, and `collect-infrastructure-logs` cluster roles, which enable the collector to collect audit logs, application logs, and infrastructure logs respectively. + +Setup log collection by binding the required cluster roles to your service account. + +== Legacy service accounts +To use the existing legacy service account `logcollector`, create the following *ClusterRoleBinding*: + +[source,terminal] +---- +$ oc adm policy add-cluster-role-to-user collect-application-logs system:serviceaccount:openshift-logging:logcollector +---- + +[source,terminal] +---- +$ oc adm policy add-cluster-role-to-user collect-infrastructure-logs system:serviceaccount:openshift-logging:logcollector +---- + +Additionally, create the following *ClusterRoleBinding* if collecting audit logs: + +[source,terminal] +---- +$ oc adm policy add-cluster-role-to-user collect-audit-logs system:serviceaccount:openshift-logging:logcollector +---- + + +== Creating service accounts +.Prerequisites + +* The {clo} is installed in the `openshift-logging` namespace. +* You have administrator permissions. + +.Procedure + +. Create a service account for the collector. If you want to write logs to storage that requires a token for authentication, you must include a token in the service account. + +. Bind the appropriate cluster roles to the service account: ++ +.Example binding command +[source,terminal] +---- +$ oc adm policy add-cluster-role-to-user system:serviceaccount:: +---- + +=== Cluster Role Binding for your Service Account +The role_binding.yaml file binds the ClusterLogging operator's ClusterRole to a specific ServiceAccount, allowing it to manage Kubernetes resources cluster-wide. + +[source,yaml] +---- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: manager-rolebinding +roleRef: <1> + apiGroup: rbac.authorization.k8s.io <2> + kind: ClusterRole <3> + name: cluster-logging-operator <4> +subjects: <5> + - kind: ServiceAccount <6> + name: cluster-logging-operator <7> + namespace: openshift-logging <8> +---- +<1> roleRef: References the ClusterRole to which the binding applies. +<2> apiGroup: Indicates the RBAC API group, specifying that the ClusterRole is part of Kubernetes' RBAC system. +<3> kind: Specifies that the referenced role is a ClusterRole, which applies cluster-wide. +<4> name: The name of the ClusterRole being bound to the ServiceAccount, here cluster-logging-operator. +<5> subjects: Defines the entities (users or service accounts) that are being granted the permissions from the ClusterRole. +<6> kind: Specifies that the subject is a ServiceAccount. +<7> Name: The name of the ServiceAccount being granted the permissions. +<8> namespace: Indicates the namespace where the ServiceAccount is located. + +=== Writing application logs +The write-application-logs-clusterrole.yaml file defines a ClusterRole that grants permissions to write application logs to the Loki logging application. + +[source,yaml] +---- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cluster-logging-write-application-logs +rules: <1> + - apiGroups: <2> + - loki.grafana.com <3> + resources: <4> + - application <5> + resourceNames: <6> + - logs <7> + verbs: <8> + - create <9> +---- +<1> rules: Specifies the permissions granted by this ClusterRole. +<2> apiGroups: Refers to the API group loki.grafana.com, which relates to the Loki logging system. +<3> loki.grafana.com: The API group for managing Loki-related resources. +<4> resources: The resource type that the ClusterRole grants permission to interact with. +<5> application: Refers to the application resources within the Loki logging system. +<6> resourceNames: Specifies the names of resources that this role can manage. +<7> logs: Refers to the log resources that can be created. +<8> verbs: The actions allowed on the resources. +<9> create: Grants permission to create new logs in the Loki system. + + +=== Writing audit logs +The write-audit-logs-clusterrole.yaml file defines a ClusterRole that grants permissions to create audit logs in the Loki logging system. +[source,yaml] +---- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cluster-logging-write-audit-logs +rules: <1> + - apiGroups: <2> + - loki.grafana.com <3> + resources: <4> + - audit <5> + resourceNames: <6> + - logs <7> + verbs: <8> + - create <9> +---- +<1> rules: Defines the permissions granted by this ClusterRole. +<2> apiGroups: Specifies the API group loki.grafana.com. +<3> loki.grafana.com: The API group responsible for Loki logging resources. +<4> resources: Refers to the resource type this role manages, in this case, audit. +<5> audit: Specifies that the role manages audit logs within Loki. +<6> resourceNames: Defines the specific resources that the role can access. +<7> logs: Refers to the logs that can be managed under this role. +<8> verbs: The actions allowed on the resources. +<9> create: Grants permission to create new audit logs. + +=== Writing infrastructure logs +The write-infrastructure-logs-clusterrole.yaml file defines a ClusterRole that grants permission to create infrastructure logs in the Loki logging system. + +.Sample YAML +[source,yaml] +---- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cluster-logging-write-infrastructure-logs +rules: <1> + - apiGroups: <2> + - loki.grafana.com <3> + resources: <4> + - infrastructure <5> + resourceNames: <6> + - logs <7> + verbs: <8> + - create <9> +---- +<1> rules: Specifies the permissions this ClusterRole grants. +<2> apiGroups: Specifies the API group for Loki-related resources. +<3> loki.grafana.com: The API group managing the Loki logging system. +<4> resources: Defines the resource type that this role can interact with. +<5> infrastructure: Refers to infrastructure-related resources that this role manages. +<6> resourceNames: Specifies the names of resources this role can manage. +<7> logs: Refers to the log resources related to infrastructure. +<8> verbs: The actions permitted by this role. +<9> create: Grants permission to create infrastructure logs in the Loki system. + +=== ClusterLogForwarder editor role +The clusterlogforwarder-editor-role.yaml file defines a ClusterRole that allows users to manage ClusterLogForwarders in OpenShift. + + +[source,yaml] +---- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: clusterlogforwarder-editor-role +rules: <1> + - apiGroups: <2> + - observability.openshift.io <3> + resources: <4> + - clusterlogforwarders <5> + verbs: <6> + - create <7> + - delete <8> + - get <9> + - list <10> + - patch <11> + - update <12> + - watch <13> +---- +<1> rules: Specifies the permissions this ClusterRole grants. +<2> apiGroups: Refers to the OpenShift-specific API group +<3> obervability.openshift.io: The API group for managing observability resources, like logging. +<4> resources: Specifies the resources this role can manage. +<5> clusterlogforwarders: Refers to the log forwarding resources in OpenShift. +<6> verbs: Specifies the actions allowed on the ClusterLogForwarders. +<7> create: Grants permission to create new ClusterLogForwarders. +<8> delete: Grants permission to delete existing ClusterLogForwarders. +<9> get: Grants permission to retrieve information about specific ClusterLogForwarders. +<10> list: Allows listing all ClusterLogForwarders. +<11> patch: Grants permission to partially modify ClusterLogForwarders. +<12> update: Grants permission to update existing ClusterLogForwarders. +<13> watch: Grants permission to monitor changes to ClusterLogForwarders. diff --git a/modules/unmanaged-operators.adoc b/modules/unmanaged-operators.adoc new file mode 100644 index 000000000000..01b046fba5bd --- /dev/null +++ b/modules/unmanaged-operators.adoc @@ -0,0 +1,53 @@ +:_mod-docs-content-type: CONCEPT +[id="unmanaged-operators_{context}"] += Support policy for unmanaged Operators + +The _management state_ of an Operator determines whether an Operator is actively +managing the resources for its related component in the cluster as designed. If +an Operator is set to an _unmanaged_ state, it does not respond to changes in +configuration nor does it receive updates. + +While this can be helpful in non-production clusters or during debugging, +Operators in an unmanaged state are unsupported and the cluster administrator +assumes full control of the individual component configurations and upgrades. + +An Operator can be set to an unmanaged state using the following methods: + +* **Individual Operator configuration** ++ +Individual Operators have a `managementState` parameter in their configuration. +This can be accessed in different ways, depending on the Operator. For example, +the Red Hat OpenShift Logging Operator accomplishes this by modifying a custom resource +(CR) that it manages, while the Cluster Samples Operator uses a cluster-wide +configuration resource. ++ +Changing the `managementState` parameter to `Unmanaged` means that the Operator +is not actively managing its resources and will take no action related to the +related component. Some Operators might not support this management state as it +might damage the cluster and require manual recovery. ++ +[WARNING] +==== +Changing individual Operators to the `Unmanaged` state renders that particular +component and functionality unsupported. Reported issues must be reproduced in +`Managed` state for support to proceed. +==== + +* **Cluster Version Operator (CVO) overrides** ++ +The `spec.overrides` parameter can be added to the CVO's configuration to allow +administrators to provide a list of overrides to the CVO's behavior for a +component. Setting the `spec.overrides[].unmanaged` parameter to `true` for a +component blocks cluster upgrades and alerts the administrator after a CVO +override has been set: ++ +[source,terminal] +---- +Disabling ownership via cluster version overrides prevents upgrades. Please remove overrides before continuing. +---- ++ +[WARNING] +==== +Setting a CVO override puts the entire cluster in an unsupported state. Reported +issues must be reproduced after removing any overrides for support to proceed. +==== diff --git a/snippets/about-pod-affinity.adoc b/snippets/about-pod-affinity.adoc new file mode 100644 index 000000000000..3a6fbe32849c --- /dev/null +++ b/snippets/about-pod-affinity.adoc @@ -0,0 +1,6 @@ +:_mod-docs-content-type: SNIPPET + +Affinity is a property of pods that controls the nodes on which they prefer to be scheduled. Anti-affinity is a property of pods +that prevents a pod from being scheduled on a node. + +In {ocp-product-title}, _pod affinity_ and _pod anti-affinity_ allow you to constrain which nodes your pod is eligible to be scheduled on based on the key-value labels on other pods. diff --git a/snippets/logging-api-support-states-snip.adoc b/snippets/logging-api-support-states-snip.adoc new file mode 100644 index 000000000000..d770eee82bf3 --- /dev/null +++ b/snippets/logging-api-support-states-snip.adoc @@ -0,0 +1,33 @@ +:_mod-docs-content-type: SNIPPET + +.Logging API support states +[cols="3",options="header"] +|=== +|CustomResourceDefinition (CRD) +|ApiVersion +|Support state + +|LokiStack +|lokistack.loki.grafana.com/v1 +|Supported from 5.5 + +|RulerConfig +|rulerconfig.loki.grafana/v1 +|Supported from 5.7 + +|AlertingRule +|alertingrule.loki.grafana/v1 +|Supported from 5.7 + +|RecordingRule +|recordingrule.loki.grafana/v1 +|Supported from 5.7 + +|LogFileMetricExporter +|LogFileMetricExporter.logging.openshift.io/v1alpha1 +|Supported from 5.8 + +|ClusterLogForwarder +|clusterlogforwarder.observability.openshift.io/v1 +|Supported from 6.0 +|=== diff --git a/snippets/logging-compatibility-snip.adoc b/snippets/logging-compatibility-snip.adoc new file mode 100644 index 000000000000..9c3c0870fa02 --- /dev/null +++ b/snippets/logging-compatibility-snip.adoc @@ -0,0 +1,6 @@ +:_mod-docs-content-type: SNIPPET + +[NOTE] +==== +Logging is provided as an installable component, with a distinct release cycle from the core {ocp-product-title}. The link:https://access.redhat.com/support/policy/updates/openshift_operators#platform-agnostic[Red Hat OpenShift Container Platform Life Cycle Policy] outlines release compatibility. +==== diff --git a/snippets/logging-loki-statement-snip.adoc b/snippets/logging-loki-statement-snip.adoc new file mode 100644 index 000000000000..c0a1221f1e17 --- /dev/null +++ b/snippets/logging-loki-statement-snip.adoc @@ -0,0 +1,8 @@ +:_mod-docs-content-type: SNIPPET + +Loki is a horizontally scalable, highly available, multi-tenant log aggregation system offered as a GA log store for {logging} {for} that can be visualized with the OpenShift {ObservabilityShortName} UI. The Loki configuration provided by OpenShift {logging-uc} is a short-term log store designed to enable users to perform fast troubleshooting with the collected logs. For that purpose, the {logging} {for} configuration of Loki has short-term storage, and is optimized for very recent queries. + +[IMPORTANT] +==== +For long-term storage or queries over a long time period, users should look to log stores external to their cluster. Loki sizing is only tested and supported for short term storage, for a maximum of 30 days. +==== diff --git a/snippets/logging-retention-period-snip.adoc b/snippets/logging-retention-period-snip.adoc new file mode 100644 index 000000000000..4ecb56c768c2 --- /dev/null +++ b/snippets/logging-retention-period-snip.adoc @@ -0,0 +1,6 @@ +:_mod-docs-content-type: SNIPPET + +[IMPORTANT] +==== +If there is no retention period defined on the s3 bucket or in the LokiStack custom resource (CR), then the logs are not pruned and they stay in the s3 bucket forever, which might fill up the s3 storage. +==== diff --git a/snippets/logging-supported-config-snip.adoc b/snippets/logging-supported-config-snip.adoc new file mode 100644 index 000000000000..730bd440687f --- /dev/null +++ b/snippets/logging-supported-config-snip.adoc @@ -0,0 +1,10 @@ +:_mod-docs-content-type: SNIPPET + +Only the configuration options described in this documentation are supported for {logging}. + +Do not use any other configuration options, as they are unsupported. Configuration paradigms might change across {ocp-product-title} releases, and such cases can only be handled gracefully if all configuration possibilities are controlled. If you use configurations other than those described in this documentation, your changes will be overwritten, because Operators are designed to reconcile any differences. + +[NOTE] +==== +If you must perform configurations not described in the {ocp-product-title} documentation, you must set your Red Hat OpenShift Logging Operator to `Unmanaged`. An unmanaged {logging} instance is not supported and does not receive updates until you return its status to `Managed`. +==== diff --git a/snippets/loki-statement-snip.adoc b/snippets/loki-statement-snip.adoc new file mode 100644 index 000000000000..c0a1221f1e17 --- /dev/null +++ b/snippets/loki-statement-snip.adoc @@ -0,0 +1,8 @@ +:_mod-docs-content-type: SNIPPET + +Loki is a horizontally scalable, highly available, multi-tenant log aggregation system offered as a GA log store for {logging} {for} that can be visualized with the OpenShift {ObservabilityShortName} UI. The Loki configuration provided by OpenShift {logging-uc} is a short-term log store designed to enable users to perform fast troubleshooting with the collected logs. For that purpose, the {logging} {for} configuration of Loki has short-term storage, and is optimized for very recent queries. + +[IMPORTANT] +==== +For long-term storage or queries over a long time period, users should look to log stores external to their cluster. Loki sizing is only tested and supported for short term storage, for a maximum of 30 days. +==== diff --git a/snippets/technology-preview.adoc b/snippets/technology-preview.adoc index 532ef5a07c0c..9c1acea0a89b 100644 --- a/snippets/technology-preview.adoc +++ b/snippets/technology-preview.adoc @@ -9,4 +9,4 @@ For more information about the support scope of Red Hat Technology Preview features, see link:https://access.redhat.com/support/offerings/techpreview/[Technology Preview Features Support Scope]. ==== // Undefine {FeatureName} attribute, so that any mistakes are easily spotted -:!FeatureName: \ No newline at end of file +:!FeatureName: diff --git a/upgrading/_attributes b/upgrading/_attributes new file mode 120000 index 000000000000..f27fd275ea6b --- /dev/null +++ b/upgrading/_attributes @@ -0,0 +1 @@ +../_attributes/ \ No newline at end of file diff --git a/upgrading/images b/upgrading/images new file mode 120000 index 000000000000..e4c5bd02a10a --- /dev/null +++ b/upgrading/images @@ -0,0 +1 @@ +../images/ \ No newline at end of file diff --git a/upgrading/modules b/upgrading/modules new file mode 120000 index 000000000000..43aab75b53c9 --- /dev/null +++ b/upgrading/modules @@ -0,0 +1 @@ +../modules/ \ No newline at end of file diff --git a/upgrading/snippets b/upgrading/snippets new file mode 120000 index 000000000000..9d58b92e5058 --- /dev/null +++ b/upgrading/snippets @@ -0,0 +1 @@ +../snippets/ \ No newline at end of file diff --git a/upgrading/upgrading-to-logging-60.adoc b/upgrading/upgrading-to-logging-60.adoc new file mode 100644 index 000000000000..557434fceeda --- /dev/null +++ b/upgrading/upgrading-to-logging-60.adoc @@ -0,0 +1,477 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="upgrading-to-logging-60"] += Upgrading to Logging 6.0 +:context: upgrading-to-logging-60 + +toc::[] + +Logging v6.0 is a significant upgrade from previous releases, achieving several longstanding goals of Cluster Logging: + +* Introduction of distinct operators to manage logging components (e.g., collectors, storage, visualization). +* Removal of support for managed log storage and visualization based on Elastic products (i.e., Elasticsearch, Kibana). +* Deprecation of the Fluentd log collector implementation. +* Removal of support for `ClusterLogging.logging.openshift.io` and `ClusterLogForwarder.logging.openshift.io` resources. + +[NOTE] +==== +The *cluster-logging-operator* does not provide an automated upgrade process. +==== + +Given the various configurations for log collection, forwarding, and storage, no automated upgrade is provided by the *cluster-logging-operator*. This documentation assists administrators in converting existing `ClusterLogging.logging.openshift.io` and `ClusterLogForwarder.logging.openshift.io` specifications to the new API. Examples of migrated `ClusterLogForwarder.observability.openshift.io` resources for common use cases are included. + +include::modules/logging-oc-explain.adoc[leveloffset=+1] + +== Log Storage + +The only managed log storage solution available in this release is a Lokistack, managed by the *loki-operator*. This solution, previously available as the preferred alternative to the managed Elasticsearch offering, remains unchanged in its deployment process. + +[IMPORTANT] +==== +To continue using an existing Red Hat managed Elasticsearch or Kibana deployment provided by the *elasticsearch-operator*, remove the owner references from the `Elasticsearch` resource named `elasticsearch`, and the `Kibana` resource named `kibana` in the `openshift-logging` namespace before removing the `ClusterLogging` resource named `instance` in the same namespace. +==== + + +. Temporarily set *ClusterLogging* to state `Unmanaged` ++ +[source,terminal] +---- +$ oc -n openshift-logging patch clusterlogging/instance -p '{"spec":{"managementState": "Unmanaged"}}' --type=merge +---- + +. Remove *ClusterLogging* `ownerReferences` from the *Elasticsearch* resource ++ +The following command ensures that *ClusterLogging* no longer owns the *Elasticsearch* resource. Updates to the *ClusterLogging* resource's `logStore` field will no longer affect the *Elasticsearch* resource. ++ +[source,terminal] +---- +$ oc -n openshift-logging patch elasticsearch/elasticsearch -p '{"metadata":{"ownerReferences": []}}' --type=merge +---- + +. Remove *ClusterLogging* `ownerReferences` from the *Kibana* resource ++ +The following command ensures that *ClusterLogging* no longer owns the *Kibana* resource. Updates to the *ClusterLogging* resource's `visualization` field will no longer affect the *Kibana* resource. ++ +[source,terminal] +---- +$ oc -n openshift-logging patch kibana/kibana -p '{"metadata":{"ownerReferences": []}}' --type=merge +---- + +. Set *ClusterLogging* to state `Managed` ++ +[source,terminal] +---- +$ oc -n openshift-logging patch clusterlogging/instance -p '{"spec":{"managementState": "Managed"}}' --type=merge +---- + +== Log Visualization +[subs="+quotes"] +The OpenShift console UI plugin for log visualization has been moved to the *cluster-observability-operator* from the *cluster-logging-operator*. +// Pending support statement. + + + +== Log Collection and Forwarding +// Can't link to github, need to figure a workaround. + +Log collection and forwarding configurations are now specified under the new link:https://github.com/openshift/cluster-logging-operator/blob/master/docs/reference/operator/api_observability_v1.adoc[API], part of the `observability.openshift.io` API group. The following sections highlight the differences from the old API resources. + +[NOTE] +==== +Vector is the only supported collector implementation. +==== + +== Management, Resource Allocation, and Workload Scheduling + +Configuration for management state (e.g., Managed, Unmanaged), resource requests and limits, tolerations, and node selection is now part of the new *ClusterLogForwarder* API. + +.Previous Configuration +[source,yaml] +---- +apiVersion: "logging.openshift.io/v1" +kind: "ClusterLogging" +spec: + managementState: "Managed" + collection: + resources: + limits: {} + requests: {} + nodeSelector: {} + tolerations: {} +---- + +.Current Configuration +[source,yaml] +---- +apiVersion: "observability.openshift.io/v1" +kind: ClusterLogForwarder +spec: + managementState: Managed + collector: + resources: + limits: {} + requests: {} + nodeSelector: {} + tolerations: {} +---- + +== Input Specifications + +The input specification is an optional part of the *ClusterLogForwarder* specification. Administrators can continue to use the predefined values of *application*, *infrastructure*, and *audit* to collect these sources. + +=== Application Inputs + +Namespace and container inclusions and exclusions have been consolidated into a single field. + +.5.9 Application Input with Namespace and Container Includes and Excludes +[source,yaml] +---- +apiVersion: "logging.openshift.io/v1" +kind: ClusterLogForwarder +spec: + inputs: + - name: application-logs + type: application + application: + namespaces: + - foo + - bar + includes: + - namespace: my-important + container: main + excludes: + - container: too-verbose +---- + +.6.0 Application Input with Namespace and Container Includes and Excludes +[source,yaml] +---- +apiVersion: "observability.openshift.io/v1" +kind: ClusterLogForwarder +spec: + inputs: + - name: application-logs + type: application + application: + includes: + - namespace: foo + - namespace: bar + - namespace: my-important + container: main + excludes: + - container: too-verbose +---- + +[NOTE] +==== +*application*, *infrastructure*, and *audit* are reserved words and cannot be used as names when defining an input. +==== + +=== Input Receivers + +Changes to input receivers include: + +* Explicit configuration of the type at the receiver level. +* Port settings moved to the receiver level. + +.5.9 Input Receivers +[source,yaml] +---- +apiVersion: "logging.openshift.io/v1" +kind: ClusterLogForwarder +spec: + inputs: + - name: an-http + receiver: + http: + port: 8443 + format: kubeAPIAudit + - name: a-syslog + receiver: + type: syslog + syslog: + port: 9442 +---- + +.6.0 Input Receivers +[source,yaml] +---- +apiVersion: "observability.openshift.io/v1" +kind: ClusterLogForwarder +spec: + inputs: + - name: an-http + type: receiver + receiver: + type: http + port: 8443 + http: + format: kubeAPIAudit + - name: a-syslog + type: receiver + receiver: + type: syslog + port: 9442 +---- + +== Output Specifications + +High-level changes to output specifications include: + +* URL settings moved to each output type specification. +* Tuning parameters moved to each output type specification. +* Separation of TLS configuration from authentication. +* Explicit configuration of keys and secret/configmap for TLS and authentication. + +== Secrets and TLS Configuration + +Secrets and TLS configurations are now separated into authentication and TLS configuration for each output. They must be explicitly defined in the specification rather than relying on administrators to define secrets with recognized keys. Upgrading TLS and authorization configurations requires administrators to understand previously recognized keys to continue using existing secrets. Examples in the following sections provide details on how to configure *ClusterLogForwarder* secrets to forward to existing Red Hat managed log storage solutions. + +== Red Hat Managed Elasticsearch + +.v5.9 Forwarding to Red Hat Managed Elasticsearch +[source,yaml] +---- +apiVersion: logging.openshift.io/v1 +kind: ClusterLogging +metadata: + name: instance + namespace: openshift-logging +spec: + logStore: + type: elasticsearch +---- + +.v6.0 Forwarding to Red Hat Managed Elasticsearch +[source,yaml] +---- +apiVersion: observability.openshift.io/v1 +kind: ClusterLogForwarder +metadata: + name: instance + namespace: openshift-logging +spec: + serviceAccount: + name: + managementState: Managed + outputs: + - name: audit-elasticsearch + type: elasticsearch + elasticsearch: + url: https://elasticsearch:9200 + version: 6 + index: audit-write + tls: + ca: + key: ca-bundle.crt + secretName: collector + certificate: + key: tls.crt + secretName: collector + key: + key: tls.key + secretName: collector + - name: app-elasticsearch + type: elasticsearch + elasticsearch: + url: https://elasticsearch:9200 + version: 6 + index: app-write + tls: + ca: + key: ca-bundle.crt + secretName: collector + certificate: + key: tls.crt + secretName: collector + key: + key: tls.key + secretName: collector + - name: infra-elasticsearch + type: elasticsearch + elasticsearch: + url: https://elasticsearch:9200 + version: 6 + index: infra-write + tls: + ca: + key: ca-bundle.crt + secretName: collector + certificate: + key: tls.crt + secretName: collector + key: + key: tls.key + secretName: collector + pipelines: + - name: app + inputRefs: + - application + outputRefs: + - app-elasticsearch + - name: audit + inputRefs: + - audit + outputRefs: + - audit-elasticsearch + - name: infra + inputRefs: + - infrastructure + outputRefs: + - infra-elasticsearch +---- + +== Red Hat Managed LokiStack + +.v5.9 Forwarding to Red Hat Managed LokiStack +[source,yaml] +---- +apiVersion: logging.openshift.io/v1 +kind: ClusterLogging +metadata: + name: instance + namespace: openshift-logging +spec: + logStore: + type: lokistack + lokistack: + name: logging-loki +---- + +.v6.0 Forwarding to Red Hat Managed LokiStack +[source,yaml] +---- +apiVersion: observability.openshift.io/v1 +kind: ClusterLogForwarder +metadata: + name: instance + namespace: openshift-logging +spec: + serviceAccount: + name: + outputs: + - name: default-lokistack + type: lokiStack + lokiStack: + target: + name: logging-loki + namespace: openshift-logging + authentication: + token: + from: serviceAccount + tls: + ca: + key: service-ca.crt + configMapName: openshift-service-ca.crt + pipelines: + - outputRefs: + - default-lokistack + - inputRefs: + - application + - infrastructure +---- + +== Filters and Pipeline Configuration + +Pipeline configurations now define only the routing of input sources to their output destinations, with any required transformations configured separately as filters. All attributes of pipelines from previous releases have been converted to filters in this release. Individual filters are defined in the `filters` specification and referenced by a pipeline. + +.5.9 Filters +[source,yaml] +---- +apiVersion: logging.openshift.io/v1 +kind: ClusterLogForwarder +spec: + pipelines: + - name: application-logs + parse: json + labels: + foo: bar + detectMultilineErrors: true +---- + +.6.0 Filter Configuration +[source,yaml] +---- +apiVersion: observability.openshift.io/v1 +kind: ClusterLogForwarder +spec: + filters: + - name: detectexception + type: detectMultilineException + - name: parse-json + type: parse + - name: labels + type: openshiftLabels + openshiftLabels: + foo: bar + pipelines: + - name: application-logs + filterRefs: + - detectexception + - labels + - parse-json +---- + +== Validation and Status + +Most validations are enforced when a resource is created or updated, providing immediate feedback. This is a departure from previous releases, where validation occurred post-creation and required inspecting the resource status. Some validation still occurs post-creation for cases where it is not possible to validate at creation or update time. + +Instances of the `ClusterLogForwarder.observability.openshift.io` must satisfy the following conditions before the operator will deploy the log collector: Authorized, Valid, Ready. An example of these conditions is: + +.6.0 Status Conditions +[source,yaml] +---- +apiVersion: observability.openshift.io/v1 +kind: ClusterLogForwarder +status: + conditions: + - lastTransitionTime: "2024-09-13T03:28:44Z" + message: 'permitted to collect log types: [application]' + reason: ClusterRolesExist + status: "True" + type: observability.openshift.io/Authorized + - lastTransitionTime: "2024-09-13T12:16:45Z" + message: "" + reason: ValidationSuccess + status: "True" + type: observability.openshift.io/Valid + - lastTransitionTime: "2024-09-13T12:16:45Z" + message: "" + reason: ReconciliationComplete + status: "True" + type: Ready + filterConditions: + - lastTransitionTime: "2024-09-13T13:02:59Z" + message: filter "detectexception" is valid + reason: ValidationSuccess + status: "True" + type: observability.openshift.io/ValidFilter-detectexception + - lastTransitionTime: "2024-09-13T13:02:59Z" + message: filter "parse-json" is valid + reason: ValidationSuccess + status: "True" + type: observability.openshift.io/ValidFilter-parse-json + inputConditions: + - lastTransitionTime: "2024-09-13T12:23:03Z" + message: input "application1" is valid + reason: ValidationSuccess + status: "True" + type: observability.openshift.io/ValidInput-application1 + outputConditions: + - lastTransitionTime: "2024-09-13T13:02:59Z" + message: output "default-lokistack-application1" is valid + reason: ValidationSuccess + status: "True" + type: observability.openshift.io/ValidOutput-default-lokistack-application1 + pipelineConditions: + - lastTransitionTime: "2024-09-13T03:28:44Z" + message: pipeline "default-before" is valid + reason: ValidationSuccess + status: "True" + type: observability.openshift.io/ValidPipeline-default-before +---- + +[NOTE] +==== +Conditions that are satisfied and applicable have a "status" value of "True". Conditions with a status other than "True" provide a reason and a message explaining the issue. +====