Skip to content

Commit 75b3410

Browse files
committed
adding kv_engine profiling using linux perf profiling
Change-Id: I582bf31264a542dd87137659aec1fe448205c831 Reviewed-on: http://review.couchbase.org/c/perfrunner/+/145759 Reviewed-by: Bo-Chun Wang <[email protected]> Reviewed-by: <[email protected]> Tested-by: <[email protected]>
1 parent 41cd58a commit 75b3410

File tree

9 files changed

+164
-2
lines changed

9 files changed

+164
-2
lines changed

perfrunner/helpers/cluster.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,3 +519,23 @@ def set_min_tls_version(self):
519519
)
520520
check_tls_version = self.rest.get_minimum_tls_version(self.master_node)
521521
logger.info('new tls version: {}'.format(check_tls_version))
522+
523+
def get_debug_rpm_url(self):
524+
release, build_number = self.build.split('-')
525+
build = tuple(map(int, release.split('.'))) + (int(build_number),)
526+
if build > (7, 0, 0, 0):
527+
release = 'cheshire-cat'
528+
elif build > (6, 5, 0, 0) and build < (7, 0, 0, 0):
529+
release = 'mad-hatter'
530+
elif build < (6, 5, 0, 0):
531+
release = 'alice'
532+
centos_version = self.remote.detect_centos_release()
533+
534+
rpm_url = 'http://latestbuilds.service.couchbase.com/builds/' \
535+
'latestbuilds/couchbase-server/{}/{}/' \
536+
'couchbase-server-enterprise-debuginfo-{}-centos{}.x86_64.rpm' \
537+
''.format(release, build_number, self.build, centos_version)
538+
return rpm_url
539+
540+
def install_cb_debug_rpm(self):
541+
self.remote.install_cb_debug_rpm(url=self.get_debug_rpm_url())

perfrunner/helpers/profiler.py

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import threading
22
from typing import Callable
33

4+
import paramiko
45
import requests
56
from decorator import decorator
67
from sshtunnel import SSHTunnelForwarder
@@ -41,7 +42,8 @@ class Profiler:
4142
DEBUG_PORTS = {
4243
'fts': 8094,
4344
'index': 9102,
44-
'kv': 9998, # goxdcr
45+
'goxdcr': 9998,
46+
'kv': 9998, # will be deprecated in future
4547
'n1ql': 8093,
4648
}
4749

@@ -60,6 +62,12 @@ def __init__(self, cluster_spec: ClusterSpec, test_config: TestConfig):
6062

6163
self.ssh_username, self.ssh_password = cluster_spec.ssh_credentials
6264

65+
self.cluster_spec = cluster_spec
66+
67+
self.profiling_settings = test_config.profiling_settings
68+
69+
self.linux_perf_path = '/opt/couchbase/var/lib/couchbase/logs/'
70+
6371
def new_tunnel(self, host: str, port: int) -> SSHTunnelForwarder:
6472
return SSHTunnelForwarder(
6573
ssh_address_or_host=host,
@@ -74,12 +82,59 @@ def save(self, host: str, service: str, profile: str, content: bytes):
7482
with open(fname, 'wb') as fh:
7583
fh.write(content)
7684

85+
def linux_perf_profile(self, host: str, fname: str, path: str):
86+
87+
client = paramiko.SSHClient()
88+
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
89+
90+
try:
91+
client.connect(hostname=host, username=self.ssh_username,
92+
password=self.ssh_password)
93+
94+
except Exception:
95+
logger.info('Cannot connect to the "{}" via SSH Server'.format(host))
96+
exit()
97+
98+
logger.info('Capturing linux profile using linux perf record ')
99+
100+
cmd = 'perf record -a -F {} -g --call-graph {} ' \
101+
'-p $(pgrep memcached) -o {}{} ' \
102+
'-- sleep {}'.format(self.profiling_settings.linux_perf_frequency,
103+
self.profiling_settings.linux_perf_callgraph,
104+
path,
105+
fname,
106+
self.profiling_settings.linux_perf_profile_duration)
107+
stdin, stdout, stderr = client.exec_command(cmd)
108+
exit_status = stdout.channel.recv_exit_status()
109+
110+
if exit_status == 0:
111+
logger.info("linux perf record: linux perf profile capture completed")
112+
else:
113+
logger.info("perf record failed , exit_status : ", exit_status)
114+
115+
client.close()
116+
77117
def profile(self, host: str, service: str, profile: str):
78118
logger.info('Collecting {} profile on {}'.format(profile, host))
79119

80120
endpoint = self.ENDPOINTS[profile]
81121
port = self.DEBUG_PORTS[service]
82122

123+
if self.profiling_settings.linux_perf_profile_flag:
124+
logger.info('Collecting {} profile on {} using linux perf '
125+
'reccord'.format(profile, host))
126+
127+
fname = 'linux_{}_{}_{}_perf.data'.format(host, profile, uhex()[:4])
128+
self.linux_perf_profile(host=host, fname=fname, path=self.linux_perf_path)
129+
130+
else:
131+
logger.info('Collecting {} profile on {}'.format(profile, host))
132+
133+
with self.new_tunnel(host, port) as tunnel:
134+
url = endpoint.format(tunnel.local_bind_port)
135+
response = requests.get(url=url, auth=self.rest.auth)
136+
self.save(host, service, profile, response.content)
137+
83138
with self.new_tunnel(host, port) as tunnel:
84139
url = endpoint.format(tunnel.local_bind_port)
85140
response = requests.get(url=url, auth=self.rest.auth)

perfrunner/remote/linux.py

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
import os
2+
import time
23
from collections import defaultdict
34
from typing import Dict, List
45
from urllib.parse import urlparse
56

67
from fabric.api import cd, get, put, quiet, run, settings
78
from fabric.exceptions import CommandTimeout, NetworkError
8-
99
from logger import logger
1010
from perfrunner.helpers.misc import uhex
1111
from perfrunner.remote import Remote
@@ -29,6 +29,10 @@ class RemoteLinux(Remote):
2929

3030
PROCESS_PATTERNS = ('cbas', )
3131

32+
LINUX_PERF_PROFILES_PATH = '/opt/couchbase/var/lib/couchbase/logs/'
33+
34+
LINUX_PERF_DELAY = 30
35+
3236
@property
3337
def package(self):
3438
if self.os.upper() in ('UBUNTU', 'DEBIAN'):
@@ -812,3 +816,47 @@ def cbbackupmgr_restore(self, master_node: str, cluster_spec: ClusterSpec,
812816

813817
logger.info('Running: {}'.format(cmd))
814818
run(cmd)
819+
820+
@all_servers
821+
def install_cb_debug_rpm(self, url):
822+
logger.info('Installing Couchbase Debug rpm on all servers')
823+
run('rpm -iv {}'.format(url), quiet=True)
824+
825+
@all_servers
826+
def generate_linux_perf_script(self):
827+
828+
files_list = 'for i in {}*_perf.data; do echo $i; ' \
829+
'done'.format(self.LINUX_PERF_PROFILES_PATH)
830+
files = run(files_list).replace("\r", "").split("\n")
831+
832+
for filename in files:
833+
fname = filename.split('/')[-1]
834+
if fname != '*_perf.data':
835+
cmd_perf_script = 'perf script -i {}' \
836+
' --no-inline > {}.txt'.format(filename, filename)
837+
838+
logger.info('Generating linux script data : {}'.format(cmd_perf_script))
839+
try:
840+
841+
with settings(warn_only=True):
842+
run(cmd_perf_script, timeout=600, pty=False)
843+
time.sleep(self.LINUX_PERF_DELAY)
844+
845+
except CommandTimeout:
846+
logger.error('linux perf script timed out')
847+
848+
cmd_zip = 'cd {}; zip -q {}.zip {}.txt'.format(self.LINUX_PERF_PROFILES_PATH,
849+
fname,
850+
fname)
851+
852+
with settings(warn_only=True):
853+
run(cmd_zip, pty=False)
854+
855+
@all_servers
856+
def get_linuxperf_files(self):
857+
858+
logger.info('Collecting linux perf files from kv nodes')
859+
with cd(self.LINUX_PERF_PROFILES_PATH):
860+
r = run('stat *.zip', quiet=True, warn_only=True)
861+
if not r.return_code:
862+
get('*.zip', local_path='.')

perfrunner/settings.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,14 @@ class ProfilingSettings:
290290

291291
SERVICES = ''
292292

293+
LINUX_PERF_PROFILE_DURATION = 10 # seconds
294+
295+
LINUX_PERF_FREQUENCY = 99
296+
297+
LINUX_PERF_CALLGRAPH = 'lbr' # optional lbr, dwarf
298+
299+
LINUX_PERF_DELAY_MULTIPLIER = 2
300+
293301
def __init__(self, options: dict):
294302
self.services = options.get('services',
295303
self.SERVICES).split()
@@ -299,6 +307,19 @@ def __init__(self, options: dict):
299307
self.NUM_PROFILES))
300308
self.profiles = options.get('profiles',
301309
self.PROFILES).split(',')
310+
self.linux_perf_profile_duration = int(options.get('linux_perf_profile_duration',
311+
self.LINUX_PERF_PROFILE_DURATION))
312+
313+
self.linux_perf_profile_flag = bool(options.get('linux_perf_profile_flag'))
314+
315+
self.linux_perf_frequency = int(options.get('linux_perf_frequency',
316+
self.LINUX_PERF_FREQUENCY))
317+
318+
self.linux_perf_callgraph = options.get('linux_perf_callgraph',
319+
self.LINUX_PERF_CALLGRAPH)
320+
321+
self.linux_perf_delay_multiplier = int(options.get('linux_perf_delay_multiplier',
322+
self.LINUX_PERF_DELAY_MULTIPLIER))
302323

303324

304325
class BucketSettings:

perfrunner/tests/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,9 @@ def eventing_nodes(self) -> List[str]:
9595
return self.rest.get_active_nodes_by_role(self.master_node, 'eventing')
9696

9797
def tear_down(self):
98+
if self.test_config.profiling_settings.linux_perf_profile_flag:
99+
self.collect_linux_perf_profiles()
100+
98101
if self.test_config.test_case.use_workers:
99102
self.worker_manager.download_celery_logs()
100103
self.worker_manager.terminate()
@@ -107,6 +110,10 @@ def tear_down(self):
107110

108111
self.cluster.reset_memory_settings()
109112

113+
def collect_linux_perf_profiles(self):
114+
self.remote.generate_linux_perf_script()
115+
self.remote.get_linuxperf_files()
116+
110117
def collect_logs(self):
111118
self.remote.collect_info()
112119

perfrunner/tests/dcp.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ def _report_kpi(self, time_elapsed: float, clients: int, stream: str):
1414

1515
@with_stats
1616
@timeit
17+
@with_profiles
1718
def access(self, *args):
1819
username, password = self.cluster_spec.rest_credentials
1920

@@ -64,6 +65,7 @@ def init_java_dcp_client(self):
6465

6566
@with_stats
6667
@timeit
68+
@with_profiles
6769
def access(self, *args):
6870
for target in self.target_iterator:
6971
local.run_java_dcp_client(

perfrunner/tests/kv.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from logger import logger
44
from perfrunner.helpers.cbmonitor import timeit, with_stats
5+
from perfrunner.helpers.profiler import with_profiles
56
from perfrunner.helpers.worker import (
67
pillowfight_data_load_task,
78
pillowfight_task,
@@ -552,6 +553,7 @@ class CompressionTest(PillowFightTest):
552553

553554
@with_stats
554555
@timeit
556+
@with_profiles
555557
def wait_for_compression(self):
556558
for master in self.cluster_spec.masters:
557559
for bucket in self.test_config.buckets:
@@ -578,6 +580,7 @@ class CompactionTest(KVTest):
578580

579581
@with_stats
580582
@timeit
583+
@with_profiles
581584
def compact(self):
582585
self.compact_bucket()
583586

perfrunner/tests/rebalance.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ def post_rebalance(self):
9999
time.sleep(self.rebalance_settings.stop_after)
100100

101101
@with_stats
102+
@with_profiles
102103
def rebalance(self, services=None):
103104
self.pre_rebalance()
104105
self.rebalance_time = self._rebalance(services)
@@ -137,6 +138,7 @@ class RebalanceDurabilityTest(RebalanceTest):
137138
COLLECTORS = {'latency': True}
138139

139140
@with_stats
141+
@with_profiles
140142
def rebalance(self, services=None):
141143
self.access_bg()
142144
self.rebalance_time = self._rebalance(services)
@@ -177,6 +179,7 @@ def rebalance(self, services=None):
177179
self.post_rebalance()
178180

179181
@with_stats
182+
@with_profiles
180183
def create_fts_index(self):
181184
st = time.time()
182185
self.create_index()

perfrunner/utils/cluster.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ def main():
8181
cm.tweak_memory()
8282
cm.enable_n2n_encryption()
8383

84+
if cm.test_config.profiling_settings.linux_perf_profile_flag:
85+
cm.install_cb_debug_rpm()
86+
8487

8588
if __name__ == '__main__':
8689
main()

0 commit comments

Comments
 (0)