diff --git a/README.md b/README.md
index f97877f3..44eb248e 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,7 @@ analysis on Intel CPUs on top of [Linux perf](https://perf.wiki.kernel.org/index
 
 # Recent new features:
 
+* toplev can now automatically sample workloads with --run-sample
 * Added cputop utility to easily enable/disable hyper threading
 * toplev updated to TopDown 2.9:
     - Many fixes to SMT support. SMT now supported on Haswell.
diff --git a/hsw_client_ratios.py b/hsw_client_ratios.py
index fb4fe8ce..5fcf0418 100644
--- a/hsw_client_ratios.py
+++ b/hsw_client_ratios.py
@@ -1482,7 +1482,7 @@ def __init__(self, r):
         o["MEM_Bandwidth"].sample = []
         o["MEM_Latency"].sample = []
         o["Stores_Bound"].sample = ['MEM_UOPS_RETIRED.ALL_STORES:pp']
-        o["False_Sharing"].sample = [' MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM:pp', 'MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM:pp', 'OFFCORE_RESPONSE:request=DEMAND_RFO:response=L3_HIT.SNOOP_HITM']
+	o["False_Sharing"].sample = ['MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM:pp', 'OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE']
         o["Split_Stores"].sample = ['MEM_UOPS_RETIRED.SPLIT_STORES:pp']
         o["DTLB_Store"].sample = ['MEM_UOPS_RETIRED.STLB_MISS_STORES:pp']
         o["Core_Bound"].sample = []
diff --git a/hsx_server_ratios.py b/hsx_server_ratios.py
index 4a354a5a..a5cb5528 100644
--- a/hsx_server_ratios.py
+++ b/hsx_server_ratios.py
@@ -1528,7 +1528,7 @@ def __init__(self, r):
         o["Split_Stores"].sample = ['MEM_UOPS_RETIRED.SPLIT_STORES:pp']
         o["DTLB_Store"].sample = ['MEM_UOPS_RETIRED.STLB_MISS_STORES:pp']
         o["Core_Bound"].sample = []
-        o["Divider"].sample = ['ARITH.FPU_DIV_ACTIVE']
+        o["Divider"].sample = []
         o["Ports_Utilization"].sample = []
         o["G0_Ports_Utilized"].sample = []
         o["G1_Port_Utilized"].sample = []
diff --git a/ivb_client_ratios.py b/ivb_client_ratios.py
index 2881660d..7373a4af 100644
--- a/ivb_client_ratios.py
+++ b/ivb_client_ratios.py
@@ -1852,12 +1852,12 @@ def __init__(self, r):
         o["Data_Sharing"].sample = ['MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM:pp']
         o["L3_Latency"].sample = ['MEM_LOAD_UOPS_RETIRED.LLC_HIT:pp']
         o["SQ_Full"].sample = []
-        o["MEM_Bound"].sample = ['MEM_LOAD_UOPS_RETIRED.L3_MISS:pp']
+        o["MEM_Bound"].sample = ['MEM_LOAD_UOPS_RETIRED.LLC_MISS:pp']
         o["MEM_Bandwidth"].sample = []
         o["MEM_Latency"].sample = []
         o["Stores_Bound"].sample = ['MEM_UOPS_RETIRED.ALL_STORES:pp']
         o["Store_Latency"].sample = []
-        o["False_Sharing"].sample = ['MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM:pp', 'MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM:pp', 'OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_Other_CORE_0']
+        o["False_Sharing"].sample = ['MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM:pp', 'OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_Other_CORE_0']
         o["Split_Stores"].sample = ['MEM_UOPS_RETIRED.SPLIT_STORES:pp']
         o["DTLB_Store"].sample = ['MEM_UOPS_RETIRED.STLB_MISS_STORES:pp']
         o["Core_Bound"].sample = []
diff --git a/ivb_server_ratios.py b/ivb_server_ratios.py
index 881f2baa..348d1eec 100644
--- a/ivb_server_ratios.py
+++ b/ivb_server_ratios.py
@@ -1895,7 +1895,7 @@ def __init__(self, r):
         o["Data_Sharing"].sample = ['MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM:pp']
         o["L3_Latency"].sample = ['MEM_LOAD_UOPS_RETIRED.LLC_HIT:pp']
         o["SQ_Full"].sample = []
-        o["MEM_Bound"].sample = ['MEM_LOAD_UOPS_RETIRED.L3_MISS:pp']
+        o["MEM_Bound"].sample = ['MEM_LOAD_UOPS_RETIRED.LLC_MISS:pp']
         o["MEM_Bandwidth"].sample = []
         o["MEM_Latency"].sample = []
         o["Local_DRAM"].sample = ['MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM:pp']
diff --git a/jkt_server_ratios.py b/jkt_server_ratios.py
index 9d03e131..02bf569d 100644
--- a/jkt_server_ratios.py
+++ b/jkt_server_ratios.py
@@ -749,7 +749,7 @@ def __init__(self, r):
         # sampling events
 
         o["Frontend_Bound"].sample = []
-        o["Frontend_Latency"].sample = ['RS_EVENTS.EMPTY_END']
+        o["Frontend_Latency"].sample = []
         o["ITLB_Misses"].sample = ['ITLB_MISSES.WALK_COMPLETED']
         o["DSB_Switches"].sample = []
         o["LCP"].sample = []
diff --git a/ocperf.py b/ocperf.py
index c1c05e62..b45017c2 100755
--- a/ocperf.py
+++ b/ocperf.py
@@ -121,7 +121,7 @@ def __init__(self, name, val, desc):
         self.msrval = 0
         self.desc = desc
 
-    def output_newstyle(self, newextra="", noname=False, period=False):
+    def output_newstyle(self, newextra="", noname=False, period=False, name=""):
         """Format an perf event for output and return as perf event string.
            Always uses new style (cpu/.../)."""
         val = self.val
@@ -129,13 +129,16 @@ def output_newstyle(self, newextra="", noname=False, period=False):
         if newextra:
             extra += "," + newextra
         e = "event=0x%x,umask=0x%x%s" % (val & 0xff, (val >> 8) & 0xff, extra)
-        if version.has_name and not noname:
-            e += ",name=%s" % (self.name.replace(".", "_"),)
+	if version.has_name:
+	    if name:
+		e += ",name=" + name
+	    elif not noname:
+		e += ",name=%s" % (self.name.replace(".", "_"),)
         if period and self.period:
             e += ",period=%d" % self.period
         return e
 
-    def output(self, use_raw=False, flags="", noname=False, period=False):
+    def output(self, use_raw=False, flags="", noname=False, period=False, name=""):
         """Format an event for output and return as perf event string.
            use_raw when true return old style perf string (rXXX).
            Otherwise chose between old and new style based on the 
@@ -165,7 +168,7 @@ def output(self, use_raw=False, flags="", noname=False, period=False):
             if extra:
                 ename += ":" + extra
         else:
-            ename = "cpu/%s/" % (self.output_newstyle(newextra=",".join(newe), noname=noname, period=period)) + extra
+	    ename = "cpu/%s/" % (self.output_newstyle(newextra=",".join(newe), noname=noname, period=period, name=name)) + extra
         return ename
 
 box_to_perf = {
diff --git a/snb_client_ratios.py b/snb_client_ratios.py
index b5acb576..5c1a58f5 100644
--- a/snb_client_ratios.py
+++ b/snb_client_ratios.py
@@ -749,7 +749,7 @@ def __init__(self, r):
         # sampling events
 
         o["Frontend_Bound"].sample = []
-        o["Frontend_Latency"].sample = ['RS_EVENTS.EMPTY_END']
+        o["Frontend_Latency"].sample = []
         o["ITLB_Misses"].sample = ['ITLB_MISSES.WALK_COMPLETED']
         o["DSB_Switches"].sample = []
         o["LCP"].sample = []
diff --git a/tl-tester b/tl-tester
index 5b7e2578..cdbbafb9 100755
--- a/tl-tester
+++ b/tl-tester
@@ -102,6 +102,13 @@ EVENTMAP=${cpus[snb]} FORCEHT=0 FORCECPU=snb $WRAP ./toplev.py -d -l4 -I 1000 -a
 EVENTMAP=${cpus[snb]} FORCEHT=0 FORCECPU=snb $WRAP ./toplev.py -d -l4 -I 1000 -a --per-socket sleep 1
 EVENTMAP=${cpus[snb]} FORCEHT=0 FORCECPU=snb $WRAP ./toplev.py -d --no-desc -l4 -I 1000 -a -A sleep 1
 
+$WRAP ./toplev.py -o /dev/null --no-desc -v -l5 --run-sample $LOAD
+for cpu in $ALLCPUS ; do
+EVENTMAP=${cpus[$cpu]} FORCECPU=$cpu $WRAP ./toplev.py -o /dev/null --no-desc -v --all --show-sample $LOAD >&log
+cat log
+grep "not found" log && exit 1
+done
+
 trap "" ERR 0
 
 echo
diff --git a/toplev.py b/toplev.py
index 79098d34..93b33b10 100755
--- a/toplev.py
+++ b/toplev.py
@@ -45,6 +45,17 @@
     "cpu/event=0x0,umask=0x3,any=1/" : 2,
 }
 
+# handle kernels that don't support all events
+unsup_pebs = {
+    "BR_MISP_RETIRED.ALL_BRANCHES:pp": (("hsw",), (3, 18)),
+    "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM:pp": (("hsw",), (3, 18)),
+    "MEM_LOAD_UOPS_RETIRED.L3_MISS:pp": (("hsw",), (3, 18)),
+}
+
+unsup_events = {
+    "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE": (("hsw",), (3, 18)),
+}
+
 ingroup_events = frozenset(fixed_to_num.keys())
 
 outgroup_events = set()
@@ -83,6 +94,14 @@ def __init__(self):
             sys.exit("perf binary is too old. please upgrade")
         self.supports_power = works(perf + " list  | grep -q power/")
 
+def unsup_event(e, table):
+    if e in table:
+	v = table[e]
+	return (cpu.cpu in v[0] and
+		kernel_version[0] <= v[1][0] and
+		kernel_version[1] < v[1][1])
+    return False
+
 def needed_limited_counter(evlist, limit_table, limit_set):
     limited_only = set(evlist) & set(limit_set)
     assigned = Counter([limit_table[x] for x in limited_only]).values()
@@ -191,8 +210,6 @@ def event_group(evlist):
 formatter_class=argparse.RawDescriptionHelpFormatter)
 p.add_argument('--verbose', '-v', help='Print all results even when below threshold',
                action='store_true')
-p.add_argument('--force', help='Force potentially broken configurations',
-               action='store_true')
 p.add_argument('--kernel', help='Only measure kernel code', action='store_true')
 p.add_argument('--user', help='Only measure user code', action='store_true')
 p.add_argument('--print-group', '-g', help='Print event group assignments',
@@ -221,6 +238,8 @@ def event_group(evlist):
 p.add_argument('--no-multiplex',
                help='Do not multiplex, but run the workload multiple times as needed. Requires reproducible workloads.',
                action='store_true')
+p.add_argument('--show-sample', help='Show command line to rerun workload with sampling', action='store_true')
+p.add_argument('--run-sample', help='Automatically rerun workload with sampling', action='store_true')
 p.add_argument('--stats', help='Show statistics on what events counted', action='store_true')
 p.add_argument('--power', help='Display power metrics', action='store_true')
 p.add_argument('--version', help=argparse.SUPPRESS, action='store_true')
@@ -262,7 +281,6 @@ def event_group(evlist):
 detailed_model = (args.level > 1) or args.detailed
 csv_mode = args.csv
 interval_mode = args.interval
-force = args.force
 ring_filter = ""
 if args.kernel:
     ring_filter = 'k'
@@ -496,7 +514,7 @@ def add_filter(s):
         s = [x + separator(x) + ring_filter for x in s]
     return s
 
-def raw_event(i):
+def raw_event(i, name="", period=False):
     if i.count(".") > 0:
         if i in fixed_counters:
             return fixed_counters[i]
@@ -506,10 +524,8 @@ def raw_event(i):
                 e = emap.getevent(event_fixes[i])
         if e is None:
             print >>sys.stderr, "%s not found" % (i,)
-            if not force:
-                sys.exit(1)
-            return "cycles" # XXX
-        i = e.output(noname=True)
+	    return None
+	i = e.output(noname=True, name=name, period=period)
         emap.update_event(e.output(noname=True), e)
         if e.counter != cpu.standard_counters:
             # for now only use the first counter only to simplify
@@ -914,6 +930,7 @@ def __init__(self, max_level):
         self.max_level = max_level
         self.missed = 0
         self.already_warned = []
+	self.sample_obj = set()
 
     def do_run(self, obj):
         obj.res = None
@@ -1092,6 +1109,36 @@ def print_res(self, res, rev, out, timestamp, title, env, smt, referenced):
                         desc + disclaimer,
                         title,
                         sample_desc(obj.sample) if obj.sample else "")
+		    if obj.thresh or args.verbose:
+			self.sample_obj.add(obj)
+
+def remove_pp(s):
+    if s.endswith(":pp"):
+	return s[:-3]
+    return s
+
+def print_sample(sample_obj, rest):
+    samples = []
+    for obj in sample_obj:
+	for s in obj.sample:
+	    samples.append((s, obj.name))
+    if len(samples) == 0:
+	return
+    nsamp = [x for x in samples if not unsup_event(x[0], unsup_events)]
+    nsamp = [(remove_pp(x[0]), x[1]) if unsup_event(x[0], unsup_pebs) else x
+		for x in nsamp]
+    if cmp(nsamp, samples):
+	missing = [x[0] for x in set(samples) - set(nsamp)]
+	print >>sys.stderr, "warning: update kernel to handle sample events:"
+	print >>sys.stderr, "\n".join(missing)
+    sl = [raw_event(s[0], s[1], period=True) for s in nsamp]
+    sample = ",".join([x for x in sl if x])
+    print "Sampling:"
+    sperf = [perf, "record", "-g", "-e", sample] + [x for x in rest if x != "-A"]
+    print " ".join(sperf)
+    if args.run_sample:
+	os.system(" ".join(sperf))
+        print "Run `" + perf + " report' to show the sampling results"
 
 def sysctl(name):
     try:
@@ -1222,4 +1269,6 @@ def setup_with_metrics(p, runner):
     ret = execute_no_multiplex(runner, out, rest)
 else:
     ret = execute(runner, out, rest)
+if args.show_sample or args.run_sample:
+    print_sample(runner.sample_obj, rest)
 sys.exit(ret)