Skip to content

Commit

Permalink
Merge branch 'master' into tmam-3.1pr
Browse files Browse the repository at this point in the history
  • Loading branch information
Andi Kleen committed Apr 22, 2016
2 parents 3c6e8e6 + c669696 commit 245fb85
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 30 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ analysis on Intel CPUs on top of [Linux perf](https://perf.wiki.kernel.org/index

# Recent new features:

* toplev now supports --sample-basename to specify the basename of the
sample perf.data file
* toplev now supports --sample-repeat to interleave measuring and sampling for
longer workloads.
* toplev now supports --sample-args to pass different arguments to the sample perf.
The arguments need to be specified with + instead of - (--sample-args "+b")
* toplev now automatically includes cycles with sampling
* toplev now checks for event errata and automatically disables affected nodes, unless --ignore-errata is specified.
* ucevents now supports Broadwell Xeon-D
* jevents has now limited support for Uncore events
* toplev updated to Ahmad Yasin's TopDown/TMAM 3.02:
Expand Down
6 changes: 6 additions & 0 deletions ocperf.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,10 @@ def __init__(self, name, row):
e.msr = None
e.overflow = 0
e.counter = "1" # dummy for toplev
if 'Errata' in row:
e.errata = row['Errata']
else:
e.errata = None

# {
# "Unit": "CBO",
Expand Down Expand Up @@ -464,9 +468,11 @@ def read_table(self, r, m):
d += " (Uses PEBS)"
else:
d = d.replace("(Precise Event)","") + " (Supports PEBS)"
e.errata = None
try:
if get('errata') != "null":
d += " Errata: " + get('errata')
e.errata = get('errata')
except KeyError:
pass
e.desc = d
Expand Down
6 changes: 6 additions & 0 deletions tl-tester
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ EVENTMAP=${cpus[$DCPU]} FORCECPU=$DCPU $WRAP ./toplev.py --no-desc --stats -d $A
grep :k log
grep /k log
EVENTMAP=${cpus[$DCPU]} FORCECPU=simple $WRAP ./toplev.py --no-desc -l1 $LOAD
# check errata handling
EVENTMAP=GenuineIntel-6-56 FORCECPU=bdw $WRAP ./toplev.py --no-desc --all $LOAD | tee log
grep BDE70 log

# test L1 uses a single group
onegroup() {
Expand Down Expand Up @@ -115,6 +118,9 @@ EVENTMAP=${cpus[ivb]} FORCECPU=ivb $WRAP ./toplev.py -d -l4 -I 100 --columns -x,
EVENTMAP=${cpus[ivb]} FORCECPU=ivb $WRAP ./toplev.py -d -l4 --valcsv val.csv --user $LOAD | tee log
grep :u log
grep /u log
EVENTMAP=${cpus[hsw]} FORCECPU=hsw $WRAP ./toplev.py --all --sample-repeat 10 -a sleep 0.1 | tee log
# do all perf.data exist?
rm perf.data.{1,2,3,4,5,6,7,8,9,10}

# need new perf
# test other perf output formats
Expand Down
114 changes: 84 additions & 30 deletions toplev.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@

smt_mode = False

errata_events = dict()

perf = os.getenv("PERF")
if not perf:
perf = "perf"
Expand Down Expand Up @@ -290,6 +292,9 @@ def exe_dir():
action='store_true')
p.add_argument('--show-sample', help='Show command line to rerun workload with sampling', action='store_true')
p.add_argument('--run-sample', help='Automatically rerun workload with sampling', action='store_true')
p.add_argument('--sample-args', help='Extra rguments to pass to perf record for sampling. Use + to specify -', default='-g')
p.add_argument('--sample-repeat', help='Repeat measurement and sampling N times. This interleaves counting and sampling', type=int)
p.add_argument('--sample-basename', help='Base name of sample perf.data files', default="perf.data")
p.add_argument('--valcsv', '-V', help='Write raw counter values into CSV file', type=argparse.FileType('w'))
p.add_argument('--stats', help='Show statistics on what events counted', action='store_true')
p.add_argument('--power', help='Display power metrics', action='store_true')
Expand All @@ -304,6 +309,7 @@ def exe_dir():
p.add_argument('--nodes', help='Include or exclude nodes (with + to add, ^ to remove, comma separated list, wildcards allowed)')
p.add_argument('--quiet', help='Avoid unnecessary status output', action='store_true')
p.add_argument('--bottleneck', help='Show critical bottleneck', action='store_true')
p.add_argument('--ignore-errata', help='Do not disable events with errata', action='store_true')
args, rest = p.parse_known_args()

rest = [x for x in rest if x != "--"]
Expand Down Expand Up @@ -345,6 +351,9 @@ def exe_dir():
print cmd
args.output = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE).stdin

if args.sample_repeat:
args.run_sample = True

print_all = args.verbose # or args.csv
dont_hide = args.verbose
detailed_model = (args.level > 1) or args.detailed
Expand Down Expand Up @@ -433,6 +442,7 @@ def add_filter(s):
notfound_cache = set()

def raw_event(i, name="", period=False):
orig_i = i
if i.count(".") > 0:
if i in fixed_counters:
return fixed_counters[i]
Expand All @@ -451,7 +461,7 @@ def raw_event(i, name="", period=False):
print "Event", oi, "maps to multiple units. Ignored."
return "dummy" # FIXME
emap.update_event(e.output(noname=True), e)
# next two things should be moved somewhere else
# next three things should be moved somewhere else
if i.startswith("uncore"):
outgroup_events.add(i)
if e.counter != cpu.standard_counters and not e.counter.startswith("Fixed"):
Expand All @@ -460,6 +470,8 @@ def raw_event(i, name="", period=False):
# CPUs
limited_counters[i] = int(e.counter.split(",")[0])
limited_set.add(i)
if e.errata:
errata_events[orig_i] = e.errata
return i

# generate list of converted raw events from events string
Expand All @@ -472,9 +484,13 @@ def mark_fixed(s):
return "%s[F]" % s
return s

def pwrap(s, linelen=60, indent=""):
def pwrap(s, linelen=70, indent=""):
print indent + ("\n" + indent).join(textwrap.wrap(s, linelen, break_long_words=False))

def pwrap_not_quiet(s, linelen=70, indent=""):
if not args.quiet:
pwrap(s, linelen, indent)

def has(obj, name):
return name in obj.__class__.__dict__

Expand Down Expand Up @@ -925,6 +941,7 @@ class BadEvent:
def __init__(self, name):
self.event = name

# XXX check for errata
def sample_event(e):
ev = emap.getevent(e.replace("_PS", ""))
if not ev:
Expand Down Expand Up @@ -1157,36 +1174,50 @@ def collect(self):
bad_nodes = set()
bad_events = set()
unsup_nodes = set()
errata_nodes = set()
errata_names = set()
min_kernel = []
for obj in self.olist:
obj.evlevels = []
obj.compute(lambda ev, level: ev_append(ev, level, obj))
obj.evlist = [x[0] for x in obj.evlevels]
obj.evnum = raw_events(obj.evlist)
obj.nc = needed_counters(obj.evnum)

# work arounds for lots of different problems
unsup = [x for x in obj.evlist if unsup_event(x, unsup_events, min_kernel)]
if any(unsup):
bad_nodes.add(obj)
bad_events |= set(unsup)
unsup = [x for x in obj.evlist if missing_pmu(x)]
if any(unsup):
unsup_nodes.add(obj)
if len(bad_nodes) > 0 and not args.quiet:
errata = [errata_events[x] for x in obj.evlist if x in errata_events]
if any(errata):
errata_nodes.add(obj)
errata_names |= set(errata)
if bad_nodes:
if args.force_events:
pwrap("warning: Using --force-events. Nodes: " +
" ".join([x.name for x in bad_nodes]) + " may be unreliable")
pwrap_not_quiet("warning: Using --force-events. Nodes: " +
" ".join([x.name for x in bad_nodes]) + " may be unreliable")
else:
pwrap("warning: removing " +
if not args.quiet:
pwrap("warning: removing " +
" ".join([x.name for x in bad_nodes]) +
" due to unsupported events in kernel: " +
" ".join(sorted(bad_events)), 80, "")
if min_kernel:
print "Fixed in kernel %d.%d" % (sorted(min_kernel, key=kv_to_key, reverse=True)[0])
print "Use --force-events to override (may result in wrong measurements)"
if min_kernel:
print "Fixed in kernel %d.%d" % (sorted(min_kernel, key=kv_to_key, reverse=True)[0])
print "Use --force-events to override (may result in wrong measurements)"
self.olist = [x for x in self.olist if x not in bad_nodes]
if len(unsup_nodes) > 0 and not args.quiet:
pwrap("Nodes " + " ".join(x.name for x in unsup_nodes) + " has unsupported PMUs")
if unsup_nodes:
pwrap_not_quiet("Nodes " + " ".join(x.name for x in unsup_nodes) + " has unsupported PMUs")
self.olist = [x for x in self.olist if x not in unsup_nodes]
if errata_nodes and not args.ignore_errata:
pwrap_not_quiet("Nodes " + " ".join(x.name for x in errata_nodes) + " have errata " +
" ".join(errata_names) + " and were disabled. " +
"Override with --ignore-errata")
self.olist = [x for x in self.olist if x in errata_nodes]

# fit events into available counters
# simple first fit algorithm
Expand Down Expand Up @@ -1241,6 +1272,8 @@ def compute(self, res, rev, valstats, env, match, stat):
# step 1: compute
for obj in self.olist:
obj.errcount = 0
if not obj.metric:
obj.thresh = False

if not match(obj):
continue
Expand Down Expand Up @@ -1282,7 +1315,8 @@ def print_res(self, out, timestamp, title, match):
continue
desc = obj_desc(obj, self.olist[1 + 1:])
if obj.metric:
out.metric(obj.area if has(obj, 'area') else None,
if obj.val != 0:
out.metric(obj.area if has(obj, 'area') else None,
obj.name, val, timestamp,
desc,
title,
Expand Down Expand Up @@ -1313,13 +1347,12 @@ def remove_pp(s):
return s[:-3]
return s

def print_sample(sample_obj, rest):
samples = []
def do_sample(sample_obj, rest, count):
# XXX use :ppp if available
samples = [("cycles:pp", "Precise cycles", )]
for obj in sample_obj:
for s in obj.sample:
samples.append((s, obj.name))
if len(samples) == 0:
return
nsamp = [x for x in samples if not unsup_event(x[0], unsup_events)]
nsamp = [(remove_pp(x[0]), x[1]) if unsup_event(x[0], unsup_pebs) else x
for x in nsamp]
Expand All @@ -1331,14 +1364,23 @@ def print_sample(sample_obj, rest):
sl = [raw_event(s[0], s[1] + "_" + remove_pp(s[0]).replace(".", "_"), period=True) for s in nsamp]
sl = add_filter(sl)
sample = ",".join([x for x in sl if x])
print "Sampling:"
sperf = [perf, "record", "-g", "-e", sample] + [x for x in rest if x != "-A"]
print " ".join(sperf)
if not args.quiet:
print "Sampling:"
extra_args = args.sample_args.replace("+", "-").split()
perf_data = args.sample_basename
if count:
perf_data += ".%d" % count
sperf = [perf, "record" ] + extra_args + ["-e", sample, "-o", perf_data] + [x for x in rest if x != "-A"]
if not args.quiet:
print " ".join(sperf)
if args.run_sample:
ret = os.system(" ".join(sperf))
if ret:
sys.exit(ret)
print "Run `" + perf + " report' to show the sampling results"
if not args.quiet:
print "Run `" + perf + " report -i %s%s' to show the sampling results" % (
perf_data,
" --no-branch-history" if "-b" in extra_args else "")

def sysctl(name):
try:
Expand Down Expand Up @@ -1517,14 +1559,26 @@ def setup_with_metrics(p, runner):
else:
out = tl_output.OutputHuman(args.output, args, version)
runner.schedule()
try:
if args.no_multiplex:
ret = execute_no_multiplex(runner, out, rest)
else:
ret = execute(runner, out, rest)
except KeyboardInterrupt:
sys.exit(1)
runner.stat.compute_errors()
if args.show_sample or args.run_sample:
print_sample(runner.sample_obj, rest)

def measure_and_sample(count):
try:
if args.no_multiplex:
ret = execute_no_multiplex(runner, out, rest)
else:
ret = execute(runner, out, rest)
except KeyboardInterrupt:
sys.exit(1)
runner.stat.compute_errors()
if args.show_sample or args.run_sample:
do_sample(runner.sample_obj, rest, count)
return ret

if args.sample_repeat:
for j in range(args.sample_repeat):
ret = measure_and_sample(j + 1)
if ret:
break
else:
ret = measure_and_sample(None)

sys.exit(ret)

0 comments on commit 245fb85

Please sign in to comment.