From b97921551fe1c048504241aacae16a2c85539ad0 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 8 Oct 2015 15:39:32 -0700
Subject: [PATCH] toplev: Add support for --bottleneck

---
 README.md |  4 +++-
 tl-tester |  1 +
 toplev.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 707a496e..f3baac6f 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,9 @@ analysis on Intel CPUs on top of [Linux perf](https://perf.wiki.kernel.org/index
 
 # Recent new features:
 
-* jevents now has a perf stat like tool called jstat.
+* toplev can print the critical bottleneck with --bottleneck
+* The tools can now find event lists for the original user of sudo
+* jevents now has a perf stat like tool called jstat (renamed to jestat)
 * jevents can now list and automatically resolve perf style events and aliases
 * simple-pebs is a simple reference Linux PEBS driver
 * ocperf now supports uncore events on some systems (experimential)
diff --git a/tl-tester b/tl-tester
index 464542f2..33081f5b 100755
--- a/tl-tester
+++ b/tl-tester
@@ -100,6 +100,7 @@ EVENTMAP=${cpus[hsw]} FORCECPU=hsw $WRAP ./toplev.py -d --stats --metrics --no-m
 EVENTMAP=${cpus[hsw]} FORCECPU=hsw $WRAP ./toplev.py -d --no-desc --power -l4 $LOAD
 EVENTMAP=${cpus[hsw]} FORCECPU=hsw $WRAP ./toplev.py -d --no-desc $ALL --no-group $LOAD
 EVENTMAP=${cpus[hsw]} FORCECPU=hsw $WRAP ./toplev.py -d --no-desc --sw -l4 $LOAD
+EVENTMAP=${cpus[hsw]} FORCECPU=hsw $WRAP ./toplev.py -d --print-bottleneck -l4 $LOAD
 if python -c 'import matplotlib.pyplot' ; then
 EVENTMAP=${cpus[hsw]} FORCECPU=hsw $WRAP ./toplev.py --graph -o x.png -d --metrics -l4 $LOAD
 fi
diff --git a/toplev.py b/toplev.py
index 3e4b255f..19a1f26a 100755
--- a/toplev.py
+++ b/toplev.py
@@ -294,6 +294,7 @@ def exe_dir():
 p.add_argument('--columns', help='Print CPU output in multiple columns', action='store_true')
 p.add_argument('--nodes', help='Include or exclude nodes (with + to add, ^ to remove, comma separated list, wildcards allowed)')
 p.add_argument('--quiet', help='Avoid unnecessary status output', action='store_true')
+p.add_argument('--bottleneck', help='Show critical bottleneck', action='store_true')
 args, rest = p.parse_known_args()
 
 if len(rest) > 0 and rest[0] == "--":
@@ -407,6 +408,9 @@ def metric(self, area, name, l, timestamp, desc, title, unit, valstat):
     def flush(self):
 	pass
 
+    def bottleneck(self, key, name, val):
+	pass
+
 class OutputHuman(Output):
     """Generate human readable single-column output."""
     def __init__(self, logfile):
@@ -460,6 +464,11 @@ def metric(self, area, name, l, timestamp, desc, title, unit, valstat):
         self.item(area, name, val, timestamp, unit, desc, title,
                   None, valstat)
 
+    def bottleneck(self, key, name, val):
+	if key:
+	    key += " "
+	print >>out.logf, "%sBOTTLENECK %s %.2f%%" % (key, name, val * 100.)
+
 class OutputColumns(OutputHuman):
     """Human-readable output data in per-cpu columns."""
     def __init__(self, logfile):
@@ -902,6 +911,8 @@ def print_keys(runner, res, rev, valstats, out, interval, env):
             st = [combine_valstat(z) for z in itertools.izip(*[valstats[j] for j in cpus])]
             runner.compute(r, rev[cpus[0]], st, env, smt_node, stat)
             runner.print_res(out, interval, core_fmt(core), smt_node)
+	    if args.bottleneck:
+		runner.print_bottleneck(out, core_fmt(core), smt_node)
 
         # print the non SMT nodes
         # recompute the nodes so we get up-to-date values
@@ -910,12 +921,16 @@ def print_keys(runner, res, rev, valstats, out, interval, env):
                 continue
             runner.compute(res[j], rev[j], valstats[j], env, not_smt_node, stat)
             runner.print_res(out, interval, thread_fmt(j), not_smt_node)
+	    if args.bottleneck:
+		runner.print_bottleneck(out, thread_fmt(core), not_smt_node)
     else:
         for j in sorted(res.keys()):
             if j != "" and int(j) not in runner.allowed_threads:
                 continue
             runner.compute(res[j], rev[j], valstats[j], env, lambda obj: True, stat)
             runner.print_res(out, interval, j, lambda obj: True)
+	    if args.bottleneck:
+		runner.print_bottleneck(out, j, lambda obj: True)
     out.flush()
     stat.referenced_check(res)
     stat.compute_errors()
@@ -1302,6 +1317,28 @@ def match(m):
                 return True
     return test()
 
+SIB_THRESH = 5.0
+
+# look for highest sibling, or parent if siblings are inconclusive
+def find_final(bn):
+    pct = lambda x: float(x[1])
+    prefix = ""
+    prev = None
+    for j in bn:
+	if not j[0].startswith(prefix):
+	    return prev
+        siblings = [x for x in bn
+              if x[0].startswith(prefix) and x[0].count('.') == j[0].count(".")]
+	siblings = sorted(siblings, key=pct, reverse=True)
+	# ambigious? use parent
+	if (prev and
+		len(siblings) > 1 and
+		pct(siblings[0]) - pct(siblings[1]) <= SIB_THRESH):
+	    return prev
+	prefix = j[0]
+	prev = j
+    return j
+
 class Runner:
     """Schedule measurements of event groups. Map events to groups."""
 
@@ -1535,6 +1572,15 @@ def print_res(self, out, timestamp, title, match):
 		    if obj.thresh or args.verbose:
 			self.sample_obj.add(obj)
 
+    def print_bottleneck(self, out, key, match):
+	bn = [(full_name(o), o.val) for o in self.olist if match(o) and o.thresh and not o.metric]
+	if len(bn) == 0:
+	    return
+	b = sorted(bn, key=lambda x: x[0].count("."))
+	b = sorted(bn, key=lambda x: float(x[1]), reverse=True)
+	final = find_final(b)
+	out.bottleneck(key, final[0], final[1])
+
 def remove_pp(s):
     if s.endswith(":pp"):
 	return s[:-3]