Add ability to collect perf data of process by specifying its listeni…

…ng port (#14) * perf: add arguments to collect perf data by process's listening port * logfiles & config: adjust output path to proper sub-directory * perf: add argument to run `perf archive` after collecting data * perf: fix compatibility with Python 2 * insight: disable collector and pdctl by default, if calling a specific task * process: minor cleanup of code * process: unify argument names
pingcap · Jul 3, 2018 · 02e982c · 02e982c
1 parent 9decce9
commit 02e982c
Show file tree

Hide file tree

Showing 7 changed files with 124 additions and 40 deletions.
diff --git a/insight.py b/insight.py
@@ -107,11 +107,21 @@ def run_perf(self, args):
             perf_proc = self.format_proc_info("name")
             self.insight_perf = perf.InsightPerf(perf_proc, args)
         # parse pid list
-        elif len(args.pid) > 0:
+        elif args.pid:
             perf_proc = {}
             for _pid in args.pid:
                 perf_proc[_pid] = None
             self.insight_perf = perf.InsightPerf(perf_proc, args)
+        # find process by port
+        elif args.proc_listen_port:
+            perf_proc = {}
+            pid_list = proc_meta.find_process_by_port(
+                args.proc_listen_port, args.proc_listen_proto)
+            if not pid_list or len(pid_list) < 1:
+                return
+            for _pid in pid_list:
+                perf_proc[_pid] = None
+            self.insight_perf = perf.InsightPerf(perf_proc, args)
         else:
             self.insight_perf = perf.InsightPerf(options=args)
         self.insight_perf.run(self.full_outdir)
@@ -170,10 +180,11 @@ def save_logfiles(self, args):
         proc_cmdline = self.format_proc_info("cmd")  # cmdline of process
         if args.log_auto:
             self.insight_logfiles.save_logfiles_auto(
-                proc_cmdline=proc_cmdline, outputdir=self.outdir)
+                proc_cmdline=proc_cmdline, outputdir=self.full_outdir)
         else:
-            self.insight_logfiles.save_tidb_logfiles(outputdir=self.outdir)
-        self.insight_logfiles.save_system_log(outputdir=self.outdir)
+            self.insight_logfiles.save_tidb_logfiles(
+                outputdir=self.full_outdir)
+        self.insight_logfiles.save_system_log(outputdir=self.full_outdir)
 
     def save_configs(self, args):
         if not args.config_file:
@@ -182,14 +193,15 @@ def save_configs(self, args):
 
         self.insight_configfiles = configfiles.InsightConfigFiles(options=args)
         if args.config_sysctl:
-            self.insight_configfiles.save_sysconf(outputdir=self.outdir)
+            self.insight_configfiles.save_sysconf(outputdir=self.full_outdir)
         # collect TiDB configs
         if args.config_auto:
             proc_cmdline = self.format_proc_info("cmd")  # cmdline of process
             self.insight_configfiles.save_configs_auto(
-                proc_cmdline=proc_cmdline, outputdir=self.outdir)
+                proc_cmdline=proc_cmdline, outputdir=self.full_outdir)
         else:
-            self.insight_configfiles.save_tidb_configs(outputdir=self.outdir)
+            self.insight_configfiles.save_tidb_configs(
+                outputdir=self.full_outdir)
 
     def read_pdctl(self, args):
         self.insight_pdctl = pdctl.PDCtl(host=args.pd_host, port=args.pd_port)
@@ -211,19 +223,24 @@ def read_pdctl(self, args):
 
     insight = Insight(args)
 
-    insight.collector()
+    if (not args.pid and not args.proc_listen_port
+        and not args.log_auto and not args.config_auto
+        ):
+        insight.collector()
+        # check size of data folder of TiDB processes
+        insight.get_datadir_size()
+        # list files opened by TiDB processes
+        insight.get_lsof_tidb()
     # WIP: call scripts that collect metrics of the node
     insight.run_perf(args)
-    # check size of data folder of TiDB processes
-    insight.get_datadir_size()
-    # list files opened by TiDB processes
-    insight.get_lsof_tidb()
     # save log files
     insight.save_logfiles(args)
     # save config files
     insight.save_configs(args)
-    # read and save `pd-ctl` info
-    insight.read_pdctl(args)
+
+    if args.pdctl:
+        # read and save `pd-ctl` info
+        insight.read_pdctl(args)
 
     # compress all output to tarball
     if args.compress:

diff --git a/measurement/files/configfiles.py b/measurement/files/configfiles.py
@@ -72,8 +72,9 @@ def list_config_files(base_dir, prefix):
             return file_list
 
         source_dir = self.config_options.config_dir
-        if not os.path.isdir(source_dir):
-            logging.fatal("Source config path is not a directory.")
+        if not source_dir or not os.path.isdir(source_dir):
+            logging.fatal(
+                "Source config path is not a directory. Did you set correct `--config-dir`?")
             return
         output_base = outputdir
         if not output_base:

diff --git a/measurement/files/fileutils.py b/measurement/files/fileutils.py
@@ -3,9 +3,13 @@
 
 import logging
 import os
+import sys
 
 from measurement import util
 
+# get a numeric Python version
+python_version = sys.version_info[0] + sys.version_info[1] / 10
+
 
 # read data from file
 def read_file(filename):
@@ -45,6 +49,28 @@ def create_dir(path):
     return None
 
 
+# os.scandir() is added in Python 3.5 and has better performance than os.listdir()
+# so we try to use it if available, and fall back to os.listdir() for older versions
+def list_dir(path):
+    file_list = []
+    try:
+        if python_version >= 3.5:
+            for entry in os.scandir(path):
+                file_list.append("%s/%s" % (path, entry.name))
+        else:
+            for file in os.listdir(path):
+                file_list.append("%s/%s" % (path, file))
+    except OSError as e:
+        # There is PermissionError in Python 3.3+, but only OSError in Python 2
+        import errno
+        if e.errno == errno.EACCES or e.errno == errno.EPERM:
+            logging.warn("Permission Denied reading %s" % path)
+        elif e.errno == errno.ENOENT:
+            # when a process just exited
+            pass
+    return file_list
+
+
 def build_full_output_dir(basedir=None, subdir=None):
     if basedir is None and subdir is None:
         # default to current working directory

diff --git a/measurement/files/logfiles.py b/measurement/files/logfiles.py
@@ -105,8 +105,9 @@ def save_system_log(self, outputdir=None):
     def save_tidb_logfiles(self, outputdir=None):
         # init values of args
         source_dir = self.log_options.log_dir
-        if not os.path.isdir(source_dir):
-            logging.fatal("Source log path is not a directory.")
+        if not source_dir or not os.path.isdir(source_dir):
+            logging.fatal(
+                "Source log path is not a directory. Did you set correct `--log-dir`?")
             return
         output_base = outputdir
         if not output_base:

diff --git a/measurement/perf.py b/measurement/perf.py
@@ -26,7 +26,7 @@ def __init__(self, process={}, options={}):
         self.perf_options = options
 
     # set params of perf
-    def build_cmd(self, pid=None, outfile=None, outdir=None):
+    def build_record_cmd(self, pid=None, outfile=None, outdir=None):
         cmd = ["perf",    # default executable name
                "record",  # default action of perf
                "-g",
@@ -44,17 +44,17 @@ def build_cmd(self, pid=None, outfile=None, outdir=None):
         except (KeyError, TypeError):
             cmd.append("120")  # default to 120Hz
 
-        if pid is not None:
+        if pid:
             cmd.append("-p")
             cmd.append("%d" % pid)
         else:
             cmd.append("-a")  # default to whole system
 
         # default will be perf.data if nothing specified
-        if outfile is not None:
+        if outfile:
             cmd.append("-o")
             cmd.append("%s/%s.data" % (outdir, outfile))
-        elif outfile is None and pid is not None:
+        elif not outfile and pid:
             cmd.append("-o")
             cmd.append("%s/%d.data" % (outdir, pid))
 
@@ -66,6 +66,20 @@ def build_cmd(self, pid=None, outfile=None, outdir=None):
 
         return cmd
 
+    def build_archive_cmd(self, pid=None, outfile=None, outdir=None):
+        cmd = ["perf",
+               "archive"]
+
+        # default will be perf.data if nothing specified
+        if outfile:
+            cmd.append("%s/%s.data" % (outdir, outfile))
+        elif not outfile and pid:
+            cmd.append("%s/%d.data" % (outdir, pid))
+        else:
+            cmd.append("%s/perf.data" % outdir)
+
+        return cmd
+
     def run(self, outputdir=None):
         # set output path of perf data
         full_outputdir = fileutils.build_full_output_dir(
@@ -79,7 +93,7 @@ def run(self, outputdir=None):
         if len(self.process_info) > 0:
             # perf on given process(es)
             for pid, pname in self.process_info.items():
-                cmd = self.build_cmd(pid, pname, full_outputdir)
+                cmd = self.build_record_cmd(pid, pname, full_outputdir)
                 # TODO: unified output: "Now perf recording %s(%d)..." % (pname, pid)
                 stdout, stderr = util.run_cmd(cmd)
                 if stdout:
@@ -88,13 +102,25 @@ def run(self, outputdir=None):
                 if stderr:
                     fileutils.write_file(
                         path.join(full_outputdir, "%s.stderr" % pname), stderr)
+                if self.perf_options.perf_archive:
+                    cmd = self.build_archive_cmd(pid, pname, full_outputdir)
+                    stdout, stderr = util.run_cmd(cmd)
+                    if stderr:
+                        fileutils.write_file(
+                            path.join(full_outputdir, "%s.archive.stderr" % pname), stderr)
         else:
             # perf the entire system
-            cmd = self.build_cmd()
+            cmd = self.build_record_cmd()
             stdout, stderr = util.run_cmd(cmd)
             if stdout:
                 fileutils.write_file(
                     path.join(full_outputdir, "perf.stdout"), stdout)
             if stderr:
                 fileutils.write_file(
                     path.join(full_outputdir, "perf.stderr"), stderr)
+            if self.perf_options.perf_archive:
+                cmd = self.build_archive_cmd()
+                stdout, stderr = util.run_cmd(cmd)
+                if stderr:
+                    fileutils.write_file(
+                        path.join(full_outputdir, "perf.archive.stderr"), stderr)
diff --git a/measurement/process/meta.py b/measurement/process/meta.py
@@ -5,7 +5,9 @@
 from measurement.files import fileutils
 
 
-def find_process_by_port(port=None, protocol="tcp"):
+def find_process_by_port(port=None, protocol=None):
+    if not protocol:
+        protocol = "tcp"
     process_list = []
     if not port:
         logging.fatal("No process listening port specified.")
@@ -14,22 +16,25 @@ def find_process_by_port(port=None, protocol="tcp"):
     # iterate over all file descriptors and build a socket address -> pid map
     def build_inode_to_pid_map():
         result = {}
-        for entry in os.scandir("/proc"):
+        for entry in fileutils.list_dir("/proc"):
             # find all PIDs
-            if str.isdigit(entry.name):
-                try:
-                    for _fd in os.scandir("/proc/%s/fd" % entry.name):
-                        _fd_target = os.readlink(_fd.path)
-                        if not str.startswith(_fd_target, "socket"):
-                            continue
-                        _socket = _fd_target.split(":[")[-1][:-1]
-                        try:
-                            result[_socket].append(entry.name)
-                        except KeyError:
-                            result[_socket] = [entry.name]
-                except PermissionError:
-                    logging.warn(
-                        "Permission Denied reading /proc/%s/fd" % entry.name)
+            fname = entry.split('/')[-1]
+            if str.isdigit(fname):
+                for _fd in fileutils.list_dir("/proc/%s/fd" % fname):
+                    try:
+                        _fd_target = os.readlink(_fd)
+                    except OSError as e:
+                        import errno
+                        if e.errno == errno.ENOENT:
+                            pass
+                        raise e
+                    if not str.startswith(_fd_target, "socket"):
+                        continue
+                    _socket = _fd_target.split(":[")[-1][:-1]
+                    try:
+                        result[_socket].append(int(fname))
+                    except KeyError:
+                        result[_socket] = [int(fname)]
         return result
 
     def find_inode_by_port(port, protocol):

diff --git a/measurement/util.py b/measurement/util.py
@@ -66,6 +66,10 @@ def parse_insight_opts():
                         help="Collect trace info using perf. Disabled by default.")
     parser.add_argument("--pid", type=int, action="append", default=None,
                         help="""PID of process to run perf on. If `-p`/`--perf` is not set, this value will not take effect. Multiple PIDs can be set by using more than one `--pid` argument. `None` by default which means the whole system.""")
+    parser.add_argument("--proc-listen-port", action="store", type=int, default=None,
+                        help="Collect perf data of process that listen on given port. This value will be ignored if `--pid` is set.")
+    parser.add_argument("--proc-listen-proto", action="store", default=None,
+                        help="Protocol type of listen port, available values are: tcp/udp. If not set, only TCP listening ports are checked.")
     parser.add_argument("--tidb-proc", action="store_true", default=False,
                         help="Collect perf data for PD/TiDB/TiKV processes instead of the whole system.")
     parser.add_argument("--perf-exec", type=int, action="store", default=None,
@@ -74,6 +78,8 @@ def parse_insight_opts():
                         help="Event sampling frequency of perf-record, in Hz.")
     parser.add_argument("--perf-time", type=int, action="store", default=None,
                         help="Time period of perf recording, in seconds.")
+    parser.add_argument("--perf-archive", action="store_true", default=False,
+                        help="Run `perf archive` after collecting data, useful when reading data on another machine. Disabled by default.")
 
     parser.add_argument("-l", "--log", action="store_true", default=False,
                         help="Collect log files in output. PD/TiDB/TiKV logs are included by default.")
@@ -99,6 +105,8 @@ def parse_insight_opts():
     parser.add_argument("--config-prefix", action="store", default=None,
                         help="The prefix of config files, will be directory name of all config files, will be in the name of output tarball. If `--config-auto` is set, the value will be ignored.")
 
+    parser.add_argument("--pdctl", action="store_true", default=False,
+                        help="Enable collecting data from PD API. Disabled by default.")
     parser.add_argument("--pd-host", action="store", default=None,
                         help="The host of the PD server. `localhost` by default.")
     parser.add_argument("--pd-port", type=int, action="store", default=None,