Skip to content

Commit

Permalink
toplev: Support automatic sampling
Browse files Browse the repository at this point in the history
TopDown 2.9 supported sampling events for nodes.
Add new --run-sample  and --show-sample options to toplev that
automatically call perf to sample the nodes over threshold.
  • Loading branch information
Andi Kleen committed Mar 3, 2015
1 parent 1871645 commit 0ebf735
Show file tree
Hide file tree
Showing 10 changed files with 80 additions and 20 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ analysis on Intel CPUs on top of [Linux perf](https://perf.wiki.kernel.org/index

# Recent new features:

* toplev can now automatically sample workloads with --run-sample
* Added cputop utility to easily enable/disable hyper threading
* toplev updated to TopDown 2.9:
- Many fixes to SMT support. SMT now supported on Haswell.
Expand Down
2 changes: 1 addition & 1 deletion hsw_client_ratios.py
Original file line number Diff line number Diff line change
Expand Up @@ -1482,7 +1482,7 @@ def __init__(self, r):
o["MEM_Bandwidth"].sample = []
o["MEM_Latency"].sample = []
o["Stores_Bound"].sample = ['MEM_UOPS_RETIRED.ALL_STORES:pp']
o["False_Sharing"].sample = [' MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM:pp', 'MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM:pp', 'OFFCORE_RESPONSE:request=DEMAND_RFO:response=L3_HIT.SNOOP_HITM']
o["False_Sharing"].sample = ['MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM:pp', 'OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE']
o["Split_Stores"].sample = ['MEM_UOPS_RETIRED.SPLIT_STORES:pp']
o["DTLB_Store"].sample = ['MEM_UOPS_RETIRED.STLB_MISS_STORES:pp']
o["Core_Bound"].sample = []
Expand Down
2 changes: 1 addition & 1 deletion hsx_server_ratios.py
Original file line number Diff line number Diff line change
Expand Up @@ -1528,7 +1528,7 @@ def __init__(self, r):
o["Split_Stores"].sample = ['MEM_UOPS_RETIRED.SPLIT_STORES:pp']
o["DTLB_Store"].sample = ['MEM_UOPS_RETIRED.STLB_MISS_STORES:pp']
o["Core_Bound"].sample = []
o["Divider"].sample = ['ARITH.FPU_DIV_ACTIVE']
o["Divider"].sample = []
o["Ports_Utilization"].sample = []
o["G0_Ports_Utilized"].sample = []
o["G1_Port_Utilized"].sample = []
Expand Down
4 changes: 2 additions & 2 deletions ivb_client_ratios.py
Original file line number Diff line number Diff line change
Expand Up @@ -1852,12 +1852,12 @@ def __init__(self, r):
o["Data_Sharing"].sample = ['MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM:pp']
o["L3_Latency"].sample = ['MEM_LOAD_UOPS_RETIRED.LLC_HIT:pp']
o["SQ_Full"].sample = []
o["MEM_Bound"].sample = ['MEM_LOAD_UOPS_RETIRED.L3_MISS:pp']
o["MEM_Bound"].sample = ['MEM_LOAD_UOPS_RETIRED.LLC_MISS:pp']
o["MEM_Bandwidth"].sample = []
o["MEM_Latency"].sample = []
o["Stores_Bound"].sample = ['MEM_UOPS_RETIRED.ALL_STORES:pp']
o["Store_Latency"].sample = []
o["False_Sharing"].sample = ['MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM:pp', 'MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM:pp', 'OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_Other_CORE_0']
o["False_Sharing"].sample = ['MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM:pp', 'OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_Other_CORE_0']
o["Split_Stores"].sample = ['MEM_UOPS_RETIRED.SPLIT_STORES:pp']
o["DTLB_Store"].sample = ['MEM_UOPS_RETIRED.STLB_MISS_STORES:pp']
o["Core_Bound"].sample = []
Expand Down
2 changes: 1 addition & 1 deletion ivb_server_ratios.py
Original file line number Diff line number Diff line change
Expand Up @@ -1895,7 +1895,7 @@ def __init__(self, r):
o["Data_Sharing"].sample = ['MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM:pp']
o["L3_Latency"].sample = ['MEM_LOAD_UOPS_RETIRED.LLC_HIT:pp']
o["SQ_Full"].sample = []
o["MEM_Bound"].sample = ['MEM_LOAD_UOPS_RETIRED.L3_MISS:pp']
o["MEM_Bound"].sample = ['MEM_LOAD_UOPS_RETIRED.LLC_MISS:pp']
o["MEM_Bandwidth"].sample = []
o["MEM_Latency"].sample = []
o["Local_DRAM"].sample = ['MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM:pp']
Expand Down
2 changes: 1 addition & 1 deletion jkt_server_ratios.py
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,7 @@ def __init__(self, r):
# sampling events

o["Frontend_Bound"].sample = []
o["Frontend_Latency"].sample = ['RS_EVENTS.EMPTY_END']
o["Frontend_Latency"].sample = []
o["ITLB_Misses"].sample = ['ITLB_MISSES.WALK_COMPLETED']
o["DSB_Switches"].sample = []
o["LCP"].sample = []
Expand Down
13 changes: 8 additions & 5 deletions ocperf.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,21 +121,24 @@ def __init__(self, name, val, desc):
self.msrval = 0
self.desc = desc

def output_newstyle(self, newextra="", noname=False, period=False):
def output_newstyle(self, newextra="", noname=False, period=False, name=""):
"""Format an perf event for output and return as perf event string.
Always uses new style (cpu/.../)."""
val = self.val
extra = self.newextra
if newextra:
extra += "," + newextra
e = "event=0x%x,umask=0x%x%s" % (val & 0xff, (val >> 8) & 0xff, extra)
if version.has_name and not noname:
e += ",name=%s" % (self.name.replace(".", "_"),)
if version.has_name:
if name:
e += ",name=" + name
elif not noname:
e += ",name=%s" % (self.name.replace(".", "_"),)
if period and self.period:
e += ",period=%d" % self.period
return e

def output(self, use_raw=False, flags="", noname=False, period=False):
def output(self, use_raw=False, flags="", noname=False, period=False, name=""):
"""Format an event for output and return as perf event string.
use_raw when true return old style perf string (rXXX).
Otherwise chose between old and new style based on the
Expand Down Expand Up @@ -165,7 +168,7 @@ def output(self, use_raw=False, flags="", noname=False, period=False):
if extra:
ename += ":" + extra
else:
ename = "cpu/%s/" % (self.output_newstyle(newextra=",".join(newe), noname=noname, period=period)) + extra
ename = "cpu/%s/" % (self.output_newstyle(newextra=",".join(newe), noname=noname, period=period, name=name)) + extra
return ename

box_to_perf = {
Expand Down
2 changes: 1 addition & 1 deletion snb_client_ratios.py
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,7 @@ def __init__(self, r):
# sampling events

o["Frontend_Bound"].sample = []
o["Frontend_Latency"].sample = ['RS_EVENTS.EMPTY_END']
o["Frontend_Latency"].sample = []
o["ITLB_Misses"].sample = ['ITLB_MISSES.WALK_COMPLETED']
o["DSB_Switches"].sample = []
o["LCP"].sample = []
Expand Down
7 changes: 7 additions & 0 deletions tl-tester
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,13 @@ EVENTMAP=${cpus[snb]} FORCEHT=0 FORCECPU=snb $WRAP ./toplev.py -d -l4 -I 1000 -a
EVENTMAP=${cpus[snb]} FORCEHT=0 FORCECPU=snb $WRAP ./toplev.py -d -l4 -I 1000 -a --per-socket sleep 1
EVENTMAP=${cpus[snb]} FORCEHT=0 FORCECPU=snb $WRAP ./toplev.py -d --no-desc -l4 -I 1000 -a -A sleep 1

$WRAP ./toplev.py -o /dev/null --no-desc -v -l5 --run-sample $LOAD
for cpu in $ALLCPUS ; do
EVENTMAP=${cpus[$cpu]} FORCECPU=$cpu $WRAP ./toplev.py -o /dev/null --no-desc -v --all --show-sample $LOAD >&log
cat log
grep "not found" log && exit 1
done

trap "" ERR 0

echo
Expand Down
65 changes: 57 additions & 8 deletions toplev.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,17 @@
"cpu/event=0x0,umask=0x3,any=1/" : 2,
}

# handle kernels that don't support all events
unsup_pebs = {
"BR_MISP_RETIRED.ALL_BRANCHES:pp": (("hsw",), (3, 18)),
"MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM:pp": (("hsw",), (3, 18)),
"MEM_LOAD_UOPS_RETIRED.L3_MISS:pp": (("hsw",), (3, 18)),
}

unsup_events = {
"OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE": (("hsw",), (3, 18)),
}

ingroup_events = frozenset(fixed_to_num.keys())

outgroup_events = set()
Expand Down Expand Up @@ -83,6 +94,14 @@ def __init__(self):
sys.exit("perf binary is too old. please upgrade")
self.supports_power = works(perf + " list | grep -q power/")

def unsup_event(e, table):
if e in table:
v = table[e]
return (cpu.cpu in v[0] and
kernel_version[0] <= v[1][0] and
kernel_version[1] < v[1][1])
return False

def needed_limited_counter(evlist, limit_table, limit_set):
limited_only = set(evlist) & set(limit_set)
assigned = Counter([limit_table[x] for x in limited_only]).values()
Expand Down Expand Up @@ -191,8 +210,6 @@ def event_group(evlist):
formatter_class=argparse.RawDescriptionHelpFormatter)
p.add_argument('--verbose', '-v', help='Print all results even when below threshold',
action='store_true')
p.add_argument('--force', help='Force potentially broken configurations',
action='store_true')
p.add_argument('--kernel', help='Only measure kernel code', action='store_true')
p.add_argument('--user', help='Only measure user code', action='store_true')
p.add_argument('--print-group', '-g', help='Print event group assignments',
Expand Down Expand Up @@ -221,6 +238,8 @@ def event_group(evlist):
p.add_argument('--no-multiplex',
help='Do not multiplex, but run the workload multiple times as needed. Requires reproducible workloads.',
action='store_true')
p.add_argument('--show-sample', help='Show command line to rerun workload with sampling', action='store_true')
p.add_argument('--run-sample', help='Automatically rerun workload with sampling', action='store_true')
p.add_argument('--stats', help='Show statistics on what events counted', action='store_true')
p.add_argument('--power', help='Display power metrics', action='store_true')
p.add_argument('--version', help=argparse.SUPPRESS, action='store_true')
Expand Down Expand Up @@ -262,7 +281,6 @@ def event_group(evlist):
detailed_model = (args.level > 1) or args.detailed
csv_mode = args.csv
interval_mode = args.interval
force = args.force
ring_filter = ""
if args.kernel:
ring_filter = 'k'
Expand Down Expand Up @@ -496,7 +514,7 @@ def add_filter(s):
s = [x + separator(x) + ring_filter for x in s]
return s

def raw_event(i):
def raw_event(i, name="", period=False):
if i.count(".") > 0:
if i in fixed_counters:
return fixed_counters[i]
Expand All @@ -506,10 +524,8 @@ def raw_event(i):
e = emap.getevent(event_fixes[i])
if e is None:
print >>sys.stderr, "%s not found" % (i,)
if not force:
sys.exit(1)
return "cycles" # XXX
i = e.output(noname=True)
return None
i = e.output(noname=True, name=name, period=period)
emap.update_event(e.output(noname=True), e)
if e.counter != cpu.standard_counters:
# for now only use the first counter only to simplify
Expand Down Expand Up @@ -914,6 +930,7 @@ def __init__(self, max_level):
self.max_level = max_level
self.missed = 0
self.already_warned = []
self.sample_obj = set()

def do_run(self, obj):
obj.res = None
Expand Down Expand Up @@ -1092,6 +1109,36 @@ def print_res(self, res, rev, out, timestamp, title, env, smt, referenced):
desc + disclaimer,
title,
sample_desc(obj.sample) if obj.sample else "")
if obj.thresh or args.verbose:
self.sample_obj.add(obj)

def remove_pp(s):
if s.endswith(":pp"):
return s[:-3]
return s

def print_sample(sample_obj, rest):
samples = []
for obj in sample_obj:
for s in obj.sample:
samples.append((s, obj.name))
if len(samples) == 0:
return
nsamp = [x for x in samples if not unsup_event(x[0], unsup_events)]
nsamp = [(remove_pp(x[0]), x[1]) if unsup_event(x[0], unsup_pebs) else x
for x in nsamp]
if cmp(nsamp, samples):
missing = [x[0] for x in set(samples) - set(nsamp)]
print >>sys.stderr, "warning: update kernel to handle sample events:"
print >>sys.stderr, "\n".join(missing)
sl = [raw_event(s[0], s[1], period=True) for s in nsamp]
sample = ",".join([x for x in sl if x])
print "Sampling:"
sperf = [perf, "record", "-g", "-e", sample] + [x for x in rest if x != "-A"]
print " ".join(sperf)
if args.run_sample:
os.system(" ".join(sperf))
print "Run `" + perf + " report' to show the sampling results"

def sysctl(name):
try:
Expand Down Expand Up @@ -1222,4 +1269,6 @@ def setup_with_metrics(p, runner):
ret = execute_no_multiplex(runner, out, rest)
else:
ret = execute(runner, out, rest)
if args.show_sample or args.run_sample:
print_sample(runner.sample_obj, rest)
sys.exit(ret)

0 comments on commit 0ebf735

Please sign in to comment.