Merge pull request openshift#2640 from ewolinetz/logging_deployer_tasks

Logging deployer tasks
alex-berger · Jan 18, 2017 · 7b512bf · 7b512bf
2 parents a2d9da8 + 598b265
commit 7b512bf
Show file tree

Hide file tree

Showing 60 changed files with 3,499 additions and 0 deletions.
diff --git a/playbooks/byo/openshift-cluster/openshift-logging.yml b/playbooks/byo/openshift-cluster/openshift-logging.yml
@@ -0,0 +1,35 @@
+---
+#
+# This playbook is a preview of upcoming changes for installing
+# Hosted logging on.  See inventory/byo/hosts.*.example for the
+# currently supported method.
+#
+- include: ../../common/openshift-cluster/verify_ansible_version.yml
+
+- name: Create initial host groups for localhost
+  hosts: localhost
+  connection: local
+  become: no
+  gather_facts: no
+  tags:
+  - always
+  tasks:
+  - include_vars: ../../byo/openshift-cluster/cluster_hosts.yml
+  - add_host:
+      name: "{{ item }}"
+      groups: l_oo_all_hosts
+    with_items: "{{ g_all_hosts | default([]) }}"
+
+- name: Create initial host groups for all hosts
+  hosts: l_oo_all_hosts
+  gather_facts: no
+  tags:
+  - always
+  tasks:
+  - include_vars: ../../byo/openshift-cluster/cluster_hosts.yml
+
+- include: ../../common/openshift-cluster/openshift_logging.yml
+  vars:
+    openshift_cluster_id: "{{ cluster_id | default('default') }}"
+    openshift_debug_level: "{{ debug_level | default(2) }}"
+    openshift_deployment_type: "{{ deployment_type }}"
diff --git a/playbooks/common/openshift-cluster/openshift_logging.yml b/playbooks/common/openshift-cluster/openshift_logging.yml
@@ -0,0 +1,5 @@
+---
+- name: OpenShift Aggregated Logging
+  hosts: oo_first_master
+  roles:
+  - openshift_logging
diff --git a/roles/openshift_logging/README.md b/roles/openshift_logging/README.md
@@ -0,0 +1,88 @@
+## openshift_logging Role
+
+### Please note this role is still a work in progress
+
+This role is used for installing the Aggregated Logging stack. It should be run against
+a single host, it will create any missing certificates and API objects that the current
+[logging deployer](https://github.com/openshift/origin-aggregated-logging/tree/master/deployer) does.
+
+As part of the installation, it is recommended that you add the Fluentd node selector label
+to the list of persisted [node labels](https://docs.openshift.org/latest/install_config/install/advanced_install.html#configuring-node-host-labels).
+
+###Required vars:
+
+- `openshift_logging_install_logging`: When `True` the `openshift_logging` role will install Aggregated Logging.
+- `openshift_logging_upgrade_logging`:  When `True` the `openshift_logging` role will upgrade Aggregated Logging.
+
+When both `openshift_logging_install_logging` and `openshift_logging_upgrade_logging` are `False` the `openshift_logging` role will uninstall Aggregated Logging.
+
+###Optional vars:
+
+- `openshift_logging_image_prefix`: The prefix for the logging images to use. Defaults to 'docker.io/openshift/origin-'.
+- `openshift_logging_image_version`: The image version for the logging images to use. Defaults to 'latest'.
+- `openshift_logging_use_ops`: If 'True', set up a second ES and Kibana cluster for infrastructure logs. Defaults to 'False'.
+- `master_url`: The URL for the Kubernetes master, this does not need to be public facing but should be accessible from within the cluster. Defaults to 'https://kubernetes.default.svc.cluster.local'.
+- `public_master_url`: The public facing URL for the Kubernetes master, this is used for Authentication redirection. Defaults to 'https://localhost:8443'.
+- `openshift_logging_namespace`: The namespace that Aggregated Logging will be installed in. Defaults to 'logging'.
+- `openshift_logging_curator_default_days`: The default minimum age (in days) Curator uses for deleting log records. Defaults to '30'.
+- `openshift_logging_curator_run_hour`: The hour of the day that Curator will run at. Defaults to '0'.
+- `openshift_logging_curator_run_minute`: The minute of the hour that Curator will run at. Defaults to '0'.
+- `openshift_logging_curator_run_timezone`: The timezone that Curator uses for figuring out its run time. Defaults to 'UTC'.
+- `openshift_logging_curator_script_log_level`: The script log level for Curator. Defaults to 'INFO'.
+- `openshift_logging_curator_log_level`: The log level for the Curator process. Defaults to 'ERROR'.
+- `openshift_logging_curator_cpu_limit`: The amount of CPU to allocate to Curator. Default is '100m'.
+- `openshift_logging_curator_memory_limit`: The amount of memory to allocate to Curator. Unset if not specified.
+
+- `openshift_logging_kibana_hostname`: The Kibana hostname. Defaults to 'kibana.example.com'.
+- `openshift_logging_kibana_cpu_limit`: The amount of CPU to allocate to Kibana or unset if not specified.
+- `openshift_logging_kibana_memory_limit`: The amount of memory to allocate to Kibana or unset if not specified.
+- `openshift_logging_kibana_proxy_debug`: When "True", set the Kibana Proxy log level to DEBUG. Defaults to 'false'.
+- `openshift_logging_kibana_proxy_cpu_limit`: The amount of CPU to allocate to Kibana proxy or unset if not specified.
+- `openshift_logging_kibana_proxy_memory_limit`: The amount of memory to allocate to Kibana proxy or unset if not specified.
+- `openshift_logging_kibana_replica_count`: The number of replicas Kibana should be scaled up to. Defaults to 1.
+
+- `openshift_logging_fluentd_nodeselector`: The node selector that the Fluentd daemonset uses to determine where to deploy to. Defaults to '"logging-infra-fluentd": "true"'.
+- `openshift_logging_fluentd_cpu_limit`: The CPU limit for Fluentd pods. Defaults to '100m'.
+- `openshift_logging_fluentd_memory_limit`: The memory limit for Fluentd pods. Defaults to '512Mi'.
+- `openshift_logging_fluentd_es_copy`: Whether or not to use the ES_COPY feature for Fluentd (DEPRECATED). Defaults to 'False'.
+- `openshift_logging_fluentd_use_journal`: Whether or not Fluentd should read log entries from Journal. Defaults to 'False'. NOTE: Fluentd will attempt to detect whether or not Docker is using the journald log driver and may overwrite this value.
+- `openshift_logging_fluentd_journal_read_from_head`: Whether or not Fluentd will try to read from the head of Journal when first starting up, using this may cause a delay in ES receiving current log records. Defaults to 'False'.
+- `openshift_logging_fluentd_hosts`: List of nodes that should be labeled for Fluentd to be deployed to. Defaults to ['--all'].
+
+- `openshift_logging_es_host`: The name of the ES service Fluentd should send logs to. Defaults to 'logging-es'.
+- `openshift_logging_es_port`: The port for the ES service Fluentd should sent its logs to. Defaults to '9200'.
+- `openshift_logging_es_ca`: The location of the ca Fluentd uses to communicate with its openshift_logging_es_host. Defaults to '/etc/fluent/keys/ca'.
+- `openshift_logging_es_client_cert`: The location of the client certificate Fluentd uses for openshift_logging_es_host. Defaults to '/etc/fluent/keys/cert'.
+- `openshift_logging_es_client_key`: The location of the client key Fluentd uses for openshift_logging_es_host. Defaults to '/etc/fluent/keys/key'.
+
+- `openshift_logging_es_cluster_size`: The number of ES cluster members. Defaults to '1'.
+- `openshift_logging_es_cpu_limit`:  The amount of CPU limit for the ES cluster.  Unused if not set
+- `openshift_logging_es_memory_limit`: The amount of RAM that should be assigned to ES. Defaults to '1024Mi'.
+- `openshift_logging_es_pv_selector`: A key/value map added to a PVC in order to select specific PVs.  Defaults to 'None'.
+- `openshift_logging_es_pvc_dynamic`: Whether or not to add the dynamic PVC annotation for any generated PVCs. Defaults to 'False'.
+- `openshift_logging_es_pvc_size`: The requested size for the ES PVCs, when not provided the role will not generate any PVCs. Defaults to '""'.
+- `openshift_logging_es_pvc_prefix`: The prefix for the generated PVCs. Defaults to 'logging-es'.
+- `openshift_logging_es_recover_after_time`: The amount of time ES will wait before it tries to recover. Defaults to '5m'.
+- `openshift_logging_es_storage_group`: The storage group used for ES. Defaults to '65534'.
+
+When `openshift_logging_use_ops` is `True`, there are some additional vars. These work the
+same as above for their non-ops counterparts, but apply to the OPS cluster instance:
+- `openshift_logging_es_ops_host`: logging-es-ops
+- `openshift_logging_es_ops_port`: 9200
+- `openshift_logging_es_ops_ca`: /etc/fluent/keys/ca
+- `openshift_logging_es_ops_client_cert`: /etc/fluent/keys/cert
+- `openshift_logging_es_ops_client_key`: /etc/fluent/keys/key
+- `openshift_logging_es_ops_cluster_size`: 1
+- `openshift_logging_es_ops_cpu_limit`:  The amount of CPU limit for the ES cluster.  Unused if not set
+- `openshift_logging_es_ops_memory_limit`: 1024Mi
+- `openshift_logging_es_ops_pvc_dynamic`: False
+- `openshift_logging_es_ops_pvc_size`: ""
+- `openshift_logging_es_ops_pvc_prefix`: logging-es-ops
+- `openshift_logging_es_ops_recover_after_time`: 5m
+- `openshift_logging_es_ops_storage_group`: 65534
+- `openshift_logging_kibana_ops_hostname`: The Operations Kibana hostname. Defaults to 'kibana-ops.example.com'.
+- `openshift_logging_kibana_ops_cpu_limit`: The amount of CPU to allocate to Kibana or unset if not specified.
+- `openshift_logging_kibana_ops_memory_limit`: The amount of memory to allocate to Kibana or unset if not specified.
+- `openshift_logging_kibana_ops_proxy_cpu_limit`: The amount of CPU to allocate to Kibana proxy or unset if not specified.
+- `openshift_logging_kibana_ops_proxy_memory_limit`: The amount of memory to allocate to Kibana proxy or unset if not specified.
+- `openshift_logging_kibana_ops_replica_count`: The number of replicas Kibana ops should be scaled up to. Defaults to 1.
diff --git a/roles/openshift_logging/defaults/main.yml b/roles/openshift_logging/defaults/main.yml
@@ -0,0 +1,85 @@
+---
+openshift_logging_image_prefix: docker.io/openshift/origin-
+openshift_logging_image_version: latest
+openshift_logging_use_ops: False
+master_url: "https://kubernetes.default.svc.{{ openshift.common.dns_domain }}"
+public_master_url: "https://{{openshift.common.public_hostname}}:8443"
+openshift_logging_namespace: logging
+openshift_logging_install_logging: True
+
+openshift_logging_curator_default_days: 30
+openshift_logging_curator_run_hour: 0
+openshift_logging_curator_run_minute: 0
+openshift_logging_curator_run_timezone: UTC
+openshift_logging_curator_script_log_level: INFO
+openshift_logging_curator_log_level: ERROR
+openshift_logging_curator_cpu_limit: 100m
+openshift_logging_curator_memory_limit: null
+
+openshift_logging_curator_ops_cpu_limit: 100m
+openshift_logging_curator_ops_memory_limit: null
+
+openshift_logging_kibana_hostname: "kibana.{{openshift.common.dns_domain}}"
+openshift_logging_kibana_cpu_limit: null
+openshift_logging_kibana_memory_limit: null
+openshift_logging_kibana_proxy_debug: false
+openshift_logging_kibana_proxy_cpu_limit: null
+openshift_logging_kibana_proxy_memory_limit: null
+openshift_logging_kibana_replica_count: 1
+
+openshift_logging_kibana_ops_hostname: "kibana-ops.{{openshift.common.dns_domain}}"
+openshift_logging_kibana_ops_cpu_limit: null
+openshift_logging_kibana_ops_memory_limit: null
+openshift_logging_kibana_ops_proxy_debug: false
+openshift_logging_kibana_ops_proxy_cpu_limit: null
+openshift_logging_kibana_ops_proxy_memory_limit: null
+openshift_logging_kibana_ops_replica_count: 1
+
+openshift_logging_fluentd_nodeselector: {'logging-infra-fluentd': 'true'}
+openshift_logging_fluentd_cpu_limit: 100m
+openshift_logging_fluentd_memory_limit: 512Mi
+openshift_logging_fluentd_es_copy: false
+openshift_logging_fluentd_use_journal: false
+openshift_logging_fluentd_journal_read_from_head: false
+openshift_logging_fluentd_hosts: ['--all']
+
+openshift_logging_es_host: logging-es
+openshift_logging_es_port: 9200
+openshift_logging_es_ca: /etc/fluent/keys/ca
+openshift_logging_es_client_cert: /etc/fluent/keys/cert
+openshift_logging_es_client_key: /etc/fluent/keys/key
+openshift_logging_es_cluster_size: 1
+openshift_logging_es_cpu_limit: null
+openshift_logging_es_memory_limit: 1024Mi
+openshift_logging_es_pv_selector: null
+openshift_logging_es_pvc_dynamic: False
+openshift_logging_es_pvc_size: ""
+openshift_logging_es_pvc_prefix: logging-es
+openshift_logging_es_recover_after_time: 5m
+openshift_logging_es_storage_group: 65534
+
+# allow cluster-admin or cluster-reader to view operations index
+openshift_logging_es_ops_allow_cluster_reader: False
+
+openshift_logging_es_ops_host: logging-es-ops
+openshift_logging_es_ops_port: 9200
+openshift_logging_es_ops_ca: /etc/fluent/keys/ca
+openshift_logging_es_ops_client_cert: /etc/fluent/keys/cert
+openshift_logging_es_ops_client_key: /etc/fluent/keys/key
+openshift_logging_es_ops_cluster_size: 1
+openshift_logging_es_ops_cpu_limit: null
+openshift_logging_es_ops_memory_limit: 1024Mi
+openshift_logging_es_ops_pv_selector: None
+openshift_logging_es_ops_pvc_dynamic: False
+openshift_logging_es_ops_pvc_size: ""
+openshift_logging_es_ops_pvc_prefix: logging-es-ops
+openshift_logging_es_ops_recover_after_time: 5m
+openshift_logging_es_ops_storage_group: 65534
+
+# following can be uncommented to provide values for configmaps -- take care when providing file contents as it may cause your cluster to not operate correctly
+#es_logging_contents:
+#es_config_contents:
+#curator_config_contents:
+#fluentd_config_contents:
+#fluentd_throttle_contents:
+#fluentd_secureforward_contents:
diff --git a/roles/openshift_logging/files/curator.yml b/roles/openshift_logging/files/curator.yml
@@ -0,0 +1,18 @@
+# Logging example curator config file
+
+# uncomment and use this to override the defaults from env vars
+#.defaults:
+#  delete:
+#    days: 30
+#  runhour: 0
+#  runminute: 0
+
+# to keep ops logs for a different duration:
+#.operations:
+#  delete:
+#    weeks: 8
+
+# example for a normal project
+#myapp:
+#  delete:
+#    weeks: 1
diff --git a/roles/openshift_logging/files/elasticsearch-logging.yml b/roles/openshift_logging/files/elasticsearch-logging.yml
@@ -0,0 +1,72 @@
+# you can override this using by setting a system property, for example -Des.logger.level=DEBUG
+es.logger.level: INFO
+rootLogger: ${es.logger.level}, console, file
+logger:
+  # log action execution errors for easier debugging
+  action: WARN
+  # reduce the logging for aws, too much is logged under the default INFO
+  com.amazonaws: WARN
+  io.fabric8.elasticsearch: ${PLUGIN_LOGLEVEL}
+  io.fabric8.kubernetes: ${PLUGIN_LOGLEVEL}
+
+  # gateway
+  #gateway: DEBUG
+  #index.gateway: DEBUG
+
+  # peer shard recovery
+  #indices.recovery: DEBUG
+
+  # discovery
+  #discovery: TRACE
+
+  index.search.slowlog: TRACE, index_search_slow_log_file
+  index.indexing.slowlog: TRACE, index_indexing_slow_log_file
+
+  # search-guard
+  com.floragunn.searchguard: WARN
+
+additivity:
+  index.search.slowlog: false
+  index.indexing.slowlog: false
+
+appender:
+  console:
+    type: console
+    layout:
+      type: consolePattern
+      conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"
+
+  file:
+    type: dailyRollingFile
+    file: ${path.logs}/${cluster.name}.log
+    datePattern: "'.'yyyy-MM-dd"
+    layout:
+      type: pattern
+      conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"
+
+  # Use the following log4j-extras RollingFileAppender to enable gzip compression of log files.
+  # For more information see https://logging.apache.org/log4j/extras/apidocs/org/apache/log4j/rolling/RollingFileAppender.html
+  #file:
+    #type: extrasRollingFile
+    #file: ${path.logs}/${cluster.name}.log
+    #rollingPolicy: timeBased
+    #rollingPolicy.FileNamePattern: ${path.logs}/${cluster.name}.log.%d{yyyy-MM-dd}.gz
+    #layout:
+      #type: pattern
+      #conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"
+
+  index_search_slow_log_file:
+    type: dailyRollingFile
+    file: ${path.logs}/${cluster.name}_index_search_slowlog.log
+    datePattern: "'.'yyyy-MM-dd"
+    layout:
+      type: pattern
+      conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"
+
+  index_indexing_slow_log_file:
+    type: dailyRollingFile
+    file: ${path.logs}/${cluster.name}_index_indexing_slowlog.log
+    datePattern: "'.'yyyy-MM-dd"
+    layout:
+      type: pattern
+      conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"
diff --git a/roles/openshift_logging/files/es_migration.sh b/roles/openshift_logging/files/es_migration.sh
@@ -0,0 +1,79 @@
+CA=${1:-/etc/openshift/logging/ca.crt}
+KEY=${2:-/etc/openshift/logging/system.admin.key}
+CERT=${3:-/etc/openshift/logging/system.admin.crt}
+openshift_logging_es_host=${4:-logging-es}
+openshift_logging_es_port=${5:-9200}
+namespace=${6:-logging}
+
+# for each index in _cat/indices
+# skip indices that begin with . - .kibana, .operations, etc.
+# skip indices that contain a uuid
+# get a list of unique project
+# daterx - the date regex that matches the .%Y.%m.%d at the end of the indices
+# we are interested in - the awk will strip that part off
+function get_list_of_indices() {
+    curl -s --cacert $CA --key $KEY --cert $CERT https://$openshift_logging_es_host:$openshift_logging_es_port/_cat/indices | \
+        awk -v daterx='[.]20[0-9]{2}[.][0-1]?[0-9][.][0-9]{1,2}$' \
+        '$3 !~ "^[.]" && $3 !~ "^[^.]+[.][^.]+"daterx && $3 !~ "^project." && $3 ~ daterx {print gensub(daterx, "", "", $3)}' | \
+    sort -u
+}
+
+# for each index in _cat/indices
+# skip indices that begin with . - .kibana, .operations, etc.
+# get a list of unique project.uuid
+# daterx - the date regex that matches the .%Y.%m.%d at the end of the indices
+# we are interested in - the awk will strip that part off
+function get_list_of_proj_uuid_indices() {
+    curl -s --cacert $CA --key $KEY --cert $CERT https://$openshift_logging_es_host:$openshift_logging_es_port/_cat/indices | \
+        awk -v daterx='[.]20[0-9]{2}[.][0-1]?[0-9][.][0-9]{1,2}$' \
+            '$3 !~ "^[.]" && $3 ~ "^[^.]+[.][^.]+"daterx && $3 !~ "^project." && $3 ~ daterx {print gensub(daterx, "", "", $3)}' | \
+        sort -u
+}
+
+if [[ -z "$(oc get pods -l component=es -o jsonpath='{.items[?(@.status.phase == "Running")].metadata.name}')" ]]; then
+  echo "No Elasticsearch pods found running.  Cannot update common data model."
+  exit 1
+fi
+
+count=$(get_list_of_indices | wc -l)
+if [ $count -eq 0 ]; then
+  echo No matching indices found - skipping update_for_uuid
+else
+  echo Creating aliases for $count index patterns . . .
+  {
+    echo '{"actions":['
+    get_list_of_indices | \
+      while IFS=. read proj ; do
+        # e.g. make test.uuid.* an alias of test.* so we can search for
+        # /test.uuid.*/_search and get both the test.uuid.* and
+        # the test.* indices
+        uid=$(oc get project "$proj" -o jsonpath='{.metadata.uid}' 2>/dev/null)
+        [ -n "$uid" ] && echo "{\"add\":{\"index\":\"$proj.*\",\"alias\":\"$proj.$uuid.*\"}}"
+      done
+    echo ']}'
+  } | curl -s --cacert $CA --key $KEY --cert $CERT -XPOST -d @- "https://$openshift_logging_es_host:$openshift_logging_es_port/_aliases"
+fi
+
+count=$(get_list_of_proj_uuid_indices | wc -l)
+if [ $count -eq 0 ] ; then
+    echo No matching indexes found - skipping update_for_common_data_model
+    exit 0
+fi
+
+echo Creating aliases for $count index patterns . . .
+# for each index in _cat/indices
+# skip indices that begin with . - .kibana, .operations, etc.
+# get a list of unique project.uuid
+# daterx - the date regex that matches the .%Y.%m.%d at the end of the indices
+# we are interested in - the awk will strip that part off
+{
+  echo '{"actions":['
+  get_list_of_proj_uuid_indices | \
+    while IFS=. read proj uuid ; do
+      # e.g. make project.test.uuid.* and alias of test.uuid.* so we can search for
+      # /project.test.uuid.*/_search and get both the test.uuid.* and
+      # the project.test.uuid.* indices
+      echo "{\"add\":{\"index\":\"$proj.$uuid.*\",\"alias\":\"${PROJ_PREFIX}$proj.$uuid.*\"}}"
+    done
+  echo ']}'
+} | curl -s --cacert $CA --key $KEY --cert $CERT -XPOST -d @- "https://$openshift_logging_es_host:$openshift_logging_es_port/_aliases"