Skip to content

Commit

Permalink
Report conntrack count for all namespaces
Browse files Browse the repository at this point in the history
In addition to checking the container root namespace,
nested namespaces inside the container are checked against the
container maximum configured connection tracking count.
Only the highest connection tracking count is reported, as
MAAS limites the number of metrics it can process.

Closes-Bug: TURTLES-1006
  • Loading branch information
BjoernT committed Oct 25, 2018
1 parent 203c56b commit 1e4cd71
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 4 deletions.
60 changes: 58 additions & 2 deletions playbooks/files/rax-maas/plugins/conntrack_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
pass

import maas_common
import os
import subprocess
import tempfile


Expand Down Expand Up @@ -63,9 +65,29 @@ def get_metrics():
'nf_conntrack_max': {
'path': '/proc/sys/net/netfilter/nf_conntrack_max'}}

# Retrieve root namespace count
for data in metrics.viewvalues():
data['value'] = get_value(data['path'])

# Retrieve conntrack count per namespace
# and report the namespace with the highest count.
# This is necessary to limit the number of metrics to report to MAAS,
# as we can not report a metric per namespace, which by nature are
# also volatile.
try:
namespaces = os.listdir('/var/run/netns')
for ns in namespaces:
ps = subprocess.check_output(['ip', 'netns', 'exec',
ns, 'cat',
'/proc/sys/net/netfilter/'
'nf_conntrack_count'])
nscount = int(ps.strip(os.linesep))

if nscount > metrics['nf_conntrack_count']['value']:
metrics['nf_conntrack_count']['value'] = nscount
except (OSError):
pass

return metrics


Expand All @@ -89,19 +111,53 @@ def get_metrics_lxc_container(container_name=''):
# Check if container is even running
try:
with tempfile.TemporaryFile() as tmpfile:
# Retrieve root namespace count
if cont.attach_wait(lxc.attach_run_command,
['cat',
'/proc/sys/net/netfilter/nf_conntrack_count',
'/proc/sys/net/netfilter/nf_conntrack_max'],
stdout=tmpfile) > -1:
stdout=tmpfile,
stderr=tempfile.TemporaryFile()) > -1:

tmpfile.seek(0)
output = tmpfile.read()
metrics = {
'nf_conntrack_count': {'value': output.split('\n')[0]},
'nf_conntrack_max': {'value': output.split('\n')[1]}}

return metrics
# Retrieve conntrack count per namespace
# and report the namespace with the highest count.
# This is necessary to limit the number of metrics to report to MAAS,
# as we can not report a metric per namespace, which by nature are
# also volatile.
with tempfile.TemporaryFile() as nsfile:
if cont.attach_wait(lxc.attach_run_command,
['ls',
'-1',
'/var/run/netns'],
stdout=nsfile,
stderr=tempfile.TemporaryFile()) > -1:
nsfile.seek(0)

for line in nsfile.readlines():
ns = line.strip(os.linesep)
nscountfile = tempfile.TemporaryFile()

if cont.attach_wait(lxc.attach_run_command,
['ip', 'netns', 'exec',
ns, 'cat',
'/proc/sys/net/netfilter/'
'nf_conntrack_count'],
stdout=nscountfile,
stderr=tempfile.TemporaryFile()) > -1:

nscountfile.seek(0)
nscount = int(nscountfile.read().strip(os.linesep))

if nscount > metrics['nf_conntrack_count']['value']:
metrics['nf_conntrack_count']['value'] = nscount

return metrics

except maas_common.MaaSException as e:
maas_common.status_err(str(e), m_name='maas_conntrack')
Expand Down
4 changes: 2 additions & 2 deletions playbooks/templates/rax-maas/conntrack_count.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ alarms :
criteria : |
:set consecutiveCount={{ maas_alarm_local_consecutive_count }}
if (percentage(metric["nf_conntrack_count"] , metric["nf_conntrack_max"]) > {{ maas_nf_conntrack_critical_threshold }}) {
return new AlarmStatus(CRITICAL, "Connection count is > {{ maas_nf_conntrack_critical_threshold }}% of maximum allowed.");
return new AlarmStatus(CRITICAL, "Connection tracking count is > {{ maas_nf_conntrack_critical_threshold }}% of the critical threshold. Please check all namespaces listed at /var/run/netns including the host.");
}
if (percentage(metric["nf_conntrack_count"] , metric["nf_conntrack_max"]) > {{ maas_nf_conntrack_warning_threshold }}) {
return new AlarmStatus(WARNING, "Connection count is > {{ maas_nf_conntrack_warning_threshold }}% of maximum allowed.");
return new AlarmStatus(WARNING, "Connection tracking count is > {{ maas_nf_conntrack_warning_threshold }}% of the warning threshold. Please check all namespaces inside listed at /var/run/netns including the host.");
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
fixes:
- |
* The `conntrack_count.py` plugin is now checking for network namespaces
listed at `/var/run/netns` and retreives the iptables connection
tracking infomation for each namespace.
This ensures that embedded network namespaces are alerted in case
connection tracking hashes are about to exceed a configurable threshold.
Due to the limited availability of MAAS metrics per alarm, only the
namespace with the higest connection tracking count is reported.

0 comments on commit 1e4cd71

Please sign in to comment.