From b97921551fe1c048504241aacae16a2c85539ad0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 8 Oct 2015 15:39:32 -0700 Subject: [PATCH] toplev: Add support for --bottleneck --- README.md | 4 +++- tl-tester | 1 + toplev.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 707a496e..f3baac6f 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,9 @@ analysis on Intel CPUs on top of [Linux perf](https://perf.wiki.kernel.org/index # Recent new features: -* jevents now has a perf stat like tool called jstat. +* toplev can print the critical bottleneck with --bottleneck +* The tools can now find event lists for the original user of sudo +* jevents now has a perf stat like tool called jstat (renamed to jestat) * jevents can now list and automatically resolve perf style events and aliases * simple-pebs is a simple reference Linux PEBS driver * ocperf now supports uncore events on some systems (experimential) diff --git a/tl-tester b/tl-tester index 464542f2..33081f5b 100755 --- a/tl-tester +++ b/tl-tester @@ -100,6 +100,7 @@ EVENTMAP=${cpus[hsw]} FORCECPU=hsw $WRAP ./toplev.py -d --stats --metrics --no-m EVENTMAP=${cpus[hsw]} FORCECPU=hsw $WRAP ./toplev.py -d --no-desc --power -l4 $LOAD EVENTMAP=${cpus[hsw]} FORCECPU=hsw $WRAP ./toplev.py -d --no-desc $ALL --no-group $LOAD EVENTMAP=${cpus[hsw]} FORCECPU=hsw $WRAP ./toplev.py -d --no-desc --sw -l4 $LOAD +EVENTMAP=${cpus[hsw]} FORCECPU=hsw $WRAP ./toplev.py -d --print-bottleneck -l4 $LOAD if python -c 'import matplotlib.pyplot' ; then EVENTMAP=${cpus[hsw]} FORCECPU=hsw $WRAP ./toplev.py --graph -o x.png -d --metrics -l4 $LOAD fi diff --git a/toplev.py b/toplev.py index 3e4b255f..19a1f26a 100755 --- a/toplev.py +++ b/toplev.py @@ -294,6 +294,7 @@ def exe_dir(): p.add_argument('--columns', help='Print CPU output in multiple columns', action='store_true') p.add_argument('--nodes', help='Include or exclude nodes (with + to add, ^ to remove, comma separated list, wildcards allowed)') p.add_argument('--quiet', help='Avoid unnecessary status output', action='store_true') +p.add_argument('--bottleneck', help='Show critical bottleneck', action='store_true') args, rest = p.parse_known_args() if len(rest) > 0 and rest[0] == "--": @@ -407,6 +408,9 @@ def metric(self, area, name, l, timestamp, desc, title, unit, valstat): def flush(self): pass + def bottleneck(self, key, name, val): + pass + class OutputHuman(Output): """Generate human readable single-column output.""" def __init__(self, logfile): @@ -460,6 +464,11 @@ def metric(self, area, name, l, timestamp, desc, title, unit, valstat): self.item(area, name, val, timestamp, unit, desc, title, None, valstat) + def bottleneck(self, key, name, val): + if key: + key += " " + print >>out.logf, "%sBOTTLENECK %s %.2f%%" % (key, name, val * 100.) + class OutputColumns(OutputHuman): """Human-readable output data in per-cpu columns.""" def __init__(self, logfile): @@ -902,6 +911,8 @@ def print_keys(runner, res, rev, valstats, out, interval, env): st = [combine_valstat(z) for z in itertools.izip(*[valstats[j] for j in cpus])] runner.compute(r, rev[cpus[0]], st, env, smt_node, stat) runner.print_res(out, interval, core_fmt(core), smt_node) + if args.bottleneck: + runner.print_bottleneck(out, core_fmt(core), smt_node) # print the non SMT nodes # recompute the nodes so we get up-to-date values @@ -910,12 +921,16 @@ def print_keys(runner, res, rev, valstats, out, interval, env): continue runner.compute(res[j], rev[j], valstats[j], env, not_smt_node, stat) runner.print_res(out, interval, thread_fmt(j), not_smt_node) + if args.bottleneck: + runner.print_bottleneck(out, thread_fmt(core), not_smt_node) else: for j in sorted(res.keys()): if j != "" and int(j) not in runner.allowed_threads: continue runner.compute(res[j], rev[j], valstats[j], env, lambda obj: True, stat) runner.print_res(out, interval, j, lambda obj: True) + if args.bottleneck: + runner.print_bottleneck(out, j, lambda obj: True) out.flush() stat.referenced_check(res) stat.compute_errors() @@ -1302,6 +1317,28 @@ def match(m): return True return test() +SIB_THRESH = 5.0 + +# look for highest sibling, or parent if siblings are inconclusive +def find_final(bn): + pct = lambda x: float(x[1]) + prefix = "" + prev = None + for j in bn: + if not j[0].startswith(prefix): + return prev + siblings = [x for x in bn + if x[0].startswith(prefix) and x[0].count('.') == j[0].count(".")] + siblings = sorted(siblings, key=pct, reverse=True) + # ambigious? use parent + if (prev and + len(siblings) > 1 and + pct(siblings[0]) - pct(siblings[1]) <= SIB_THRESH): + return prev + prefix = j[0] + prev = j + return j + class Runner: """Schedule measurements of event groups. Map events to groups.""" @@ -1535,6 +1572,15 @@ def print_res(self, out, timestamp, title, match): if obj.thresh or args.verbose: self.sample_obj.add(obj) + def print_bottleneck(self, out, key, match): + bn = [(full_name(o), o.val) for o in self.olist if match(o) and o.thresh and not o.metric] + if len(bn) == 0: + return + b = sorted(bn, key=lambda x: x[0].count(".")) + b = sorted(bn, key=lambda x: float(x[1]), reverse=True) + final = find_final(b) + out.bottleneck(key, final[0], final[1]) + def remove_pp(s): if s.endswith(":pp"): return s[:-3]