From 1e4cd71b7b25941e44ec85883f02b187c173ac18 Mon Sep 17 00:00:00 2001 From: Bjoern Teipel Date: Wed, 24 Oct 2018 15:03:00 -0500 Subject: [PATCH] Report conntrack count for all namespaces In addition to checking the container root namespace, nested namespaces inside the container are checked against the container maximum configured connection tracking count. Only the highest connection tracking count is reported, as MAAS limites the number of metrics it can process. Closes-Bug: TURTLES-1006 --- .../files/rax-maas/plugins/conntrack_count.py | 60 ++++++++++++++++++- .../rax-maas/conntrack_count.yaml.j2 | 4 +- ...ecks-conntack-plugin-5dcd0ff5de96a3b2.yaml | 10 ++++ 3 files changed, 70 insertions(+), 4 deletions(-) create mode 100644 releasenotes/notes/TURTLES-1006-add-ns-checks-conntack-plugin-5dcd0ff5de96a3b2.yaml diff --git a/playbooks/files/rax-maas/plugins/conntrack_count.py b/playbooks/files/rax-maas/plugins/conntrack_count.py index 5d1e6a60a..8cec92339 100755 --- a/playbooks/files/rax-maas/plugins/conntrack_count.py +++ b/playbooks/files/rax-maas/plugins/conntrack_count.py @@ -24,6 +24,8 @@ pass import maas_common +import os +import subprocess import tempfile @@ -63,9 +65,29 @@ def get_metrics(): 'nf_conntrack_max': { 'path': '/proc/sys/net/netfilter/nf_conntrack_max'}} + # Retrieve root namespace count for data in metrics.viewvalues(): data['value'] = get_value(data['path']) + # Retrieve conntrack count per namespace + # and report the namespace with the highest count. + # This is necessary to limit the number of metrics to report to MAAS, + # as we can not report a metric per namespace, which by nature are + # also volatile. + try: + namespaces = os.listdir('/var/run/netns') + for ns in namespaces: + ps = subprocess.check_output(['ip', 'netns', 'exec', + ns, 'cat', + '/proc/sys/net/netfilter/' + 'nf_conntrack_count']) + nscount = int(ps.strip(os.linesep)) + + if nscount > metrics['nf_conntrack_count']['value']: + metrics['nf_conntrack_count']['value'] = nscount + except (OSError): + pass + return metrics @@ -89,11 +111,13 @@ def get_metrics_lxc_container(container_name=''): # Check if container is even running try: with tempfile.TemporaryFile() as tmpfile: + # Retrieve root namespace count if cont.attach_wait(lxc.attach_run_command, ['cat', '/proc/sys/net/netfilter/nf_conntrack_count', '/proc/sys/net/netfilter/nf_conntrack_max'], - stdout=tmpfile) > -1: + stdout=tmpfile, + stderr=tempfile.TemporaryFile()) > -1: tmpfile.seek(0) output = tmpfile.read() @@ -101,7 +125,39 @@ def get_metrics_lxc_container(container_name=''): 'nf_conntrack_count': {'value': output.split('\n')[0]}, 'nf_conntrack_max': {'value': output.split('\n')[1]}} - return metrics + # Retrieve conntrack count per namespace + # and report the namespace with the highest count. + # This is necessary to limit the number of metrics to report to MAAS, + # as we can not report a metric per namespace, which by nature are + # also volatile. + with tempfile.TemporaryFile() as nsfile: + if cont.attach_wait(lxc.attach_run_command, + ['ls', + '-1', + '/var/run/netns'], + stdout=nsfile, + stderr=tempfile.TemporaryFile()) > -1: + nsfile.seek(0) + + for line in nsfile.readlines(): + ns = line.strip(os.linesep) + nscountfile = tempfile.TemporaryFile() + + if cont.attach_wait(lxc.attach_run_command, + ['ip', 'netns', 'exec', + ns, 'cat', + '/proc/sys/net/netfilter/' + 'nf_conntrack_count'], + stdout=nscountfile, + stderr=tempfile.TemporaryFile()) > -1: + + nscountfile.seek(0) + nscount = int(nscountfile.read().strip(os.linesep)) + + if nscount > metrics['nf_conntrack_count']['value']: + metrics['nf_conntrack_count']['value'] = nscount + + return metrics except maas_common.MaaSException as e: maas_common.status_err(str(e), m_name='maas_conntrack') diff --git a/playbooks/templates/rax-maas/conntrack_count.yaml.j2 b/playbooks/templates/rax-maas/conntrack_count.yaml.j2 index f296b566c..2ac6f928b 100644 --- a/playbooks/templates/rax-maas/conntrack_count.yaml.j2 +++ b/playbooks/templates/rax-maas/conntrack_count.yaml.j2 @@ -20,8 +20,8 @@ alarms : criteria : | :set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (percentage(metric["nf_conntrack_count"] , metric["nf_conntrack_max"]) > {{ maas_nf_conntrack_critical_threshold }}) { - return new AlarmStatus(CRITICAL, "Connection count is > {{ maas_nf_conntrack_critical_threshold }}% of maximum allowed."); + return new AlarmStatus(CRITICAL, "Connection tracking count is > {{ maas_nf_conntrack_critical_threshold }}% of the critical threshold. Please check all namespaces listed at /var/run/netns including the host."); } if (percentage(metric["nf_conntrack_count"] , metric["nf_conntrack_max"]) > {{ maas_nf_conntrack_warning_threshold }}) { - return new AlarmStatus(WARNING, "Connection count is > {{ maas_nf_conntrack_warning_threshold }}% of maximum allowed."); + return new AlarmStatus(WARNING, "Connection tracking count is > {{ maas_nf_conntrack_warning_threshold }}% of the warning threshold. Please check all namespaces inside listed at /var/run/netns including the host."); } diff --git a/releasenotes/notes/TURTLES-1006-add-ns-checks-conntack-plugin-5dcd0ff5de96a3b2.yaml b/releasenotes/notes/TURTLES-1006-add-ns-checks-conntack-plugin-5dcd0ff5de96a3b2.yaml new file mode 100644 index 000000000..e76e6bb25 --- /dev/null +++ b/releasenotes/notes/TURTLES-1006-add-ns-checks-conntack-plugin-5dcd0ff5de96a3b2.yaml @@ -0,0 +1,10 @@ +--- +fixes: + - | + * The `conntrack_count.py` plugin is now checking for network namespaces + listed at `/var/run/netns` and retreives the iptables connection + tracking infomation for each namespace. + This ensures that embedded network namespaces are alerted in case + connection tracking hashes are about to exceed a configurable threshold. + Due to the limited availability of MAAS metrics per alarm, only the + namespace with the higest connection tracking count is reported.