Skip to content

Commit

Permalink
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux…
Browse files Browse the repository at this point in the history
…/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

  * Fix memcpy benchmark for large sizes, from Andi Kleen.

  * Support callchain sorting based on addresses, from Andi Kleen

  * Move weight back to common sort keys, From Andi Kleen.

  * Fix named threads support in 'perf script', from David Ahern.

  * Handle ENODEV on default cycles event, fix from David Ahern.

  * More install tests, from Jiri Olsa.

  * Fix build with perl 5.18, from Kirill A. Shutemov.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Ingo Molnar committed Jul 23, 2013
2 parents ea8596b + f9ea55d commit 4f16d61
Show file tree
Hide file tree
Showing 16 changed files with 124 additions and 40 deletions.
8 changes: 6 additions & 2 deletions tools/perf/Documentation/perf-report.txt
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ OPTIONS
--dump-raw-trace::
Dump raw trace in ASCII.

-g [type,min[,limit],order]::
-g [type,min[,limit],order[,key]]::
--call-graph::
Display call chains using type, min percent threshold, optional print
limit and order.
Expand All @@ -129,7 +129,11 @@ OPTIONS
- callee: callee based call graph.
- caller: inverted caller based call graph.

Default: fractal,0.5,callee.
key can be:
- function: compare on functions
- address: compare on individual code addresses

Default: fractal,0.5,callee,function.

-G::
--inverted::
Expand Down
4 changes: 2 additions & 2 deletions tools/perf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -631,10 +631,10 @@ $(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $<

$(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default $<

$(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default $<

$(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
Expand Down
2 changes: 2 additions & 0 deletions tools/perf/bench/mem-memcpy.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ static void alloc_mem(void **dst, void **src, size_t length)
*src = zalloc(length);
if (!*src)
die("memory allocation failed - maybe length is too large?\n");
/* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */
memset(*src, 0, length);
}

static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault)
Expand Down
19 changes: 15 additions & 4 deletions tools/perf/builtin-report.c
Original file line number Diff line number Diff line change
Expand Up @@ -667,12 +667,23 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
}

/* get the call chain order */
if (!strcmp(tok2, "caller"))
if (!strncmp(tok2, "caller", strlen("caller")))
callchain_param.order = ORDER_CALLER;
else if (!strcmp(tok2, "callee"))
else if (!strncmp(tok2, "callee", strlen("callee")))
callchain_param.order = ORDER_CALLEE;
else
return -1;

/* Get the sort key */
tok2 = strtok(NULL, ",");
if (!tok2)
goto setup;
if (!strncmp(tok2, "function", strlen("function")))
callchain_param.key = CCKEY_FUNCTION;
else if (!strncmp(tok2, "address", strlen("address")))
callchain_param.key = CCKEY_ADDRESS;
else
return -1;
setup:
if (callchain_register_param(&callchain_param) < 0) {
fprintf(stderr, "Can't register callchain params\n");
Expand Down Expand Up @@ -784,8 +795,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
"Only display entries with parent-match"),
OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
"Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit and callchain order. "
"Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt),
"Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
"Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt),
OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
"alias for inverted call graph"),
OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
Expand Down
6 changes: 3 additions & 3 deletions tools/perf/builtin-script.c
Original file line number Diff line number Diff line change
Expand Up @@ -397,10 +397,10 @@ static void print_sample_bts(union perf_event *event,

static void process_event(union perf_event *event, struct perf_sample *sample,
struct perf_evsel *evsel, struct machine *machine,
struct addr_location *al)
struct thread *thread,
struct addr_location *al __maybe_unused)
{
struct perf_event_attr *attr = &evsel->attr;
struct thread *thread = al->thread;

if (output[attr->type].fields == 0)
return;
Expand Down Expand Up @@ -511,7 +511,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
return 0;

scripting_ops->process_event(event, sample, evsel, machine, &al);
scripting_ops->process_event(event, sample, evsel, machine, thread, &al);

evsel->hists.stats.total_period += sample->period;
return 0;
Expand Down
67 changes: 59 additions & 8 deletions tools/perf/tests/make
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
PERF := .
MK := Makefile

has = $(shell which $1 2>/dev/null)

# standard single make variable specified
make_clean_all := clean all
make_python_perf_so := python/perf.so
Expand All @@ -25,6 +27,13 @@ make_help := help
make_doc := doc
make_perf_o := perf.o
make_util_map_o := util/map.o
make_install := install
make_install_bin := install-bin
make_install_doc := install-doc
make_install_man := install-man
make_install_html := install-html
make_install_info := install-info
make_install_pdf := install-pdf

# all the NO_* variable combined
make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1
Expand All @@ -50,14 +59,27 @@ run += make_no_backtrace
run += make_no_libnuma
run += make_no_libaudit
run += make_no_libbionic
run += make_tags
run += make_cscope
run += make_help
run += make_doc
run += make_perf_o
run += make_util_map_o
run += make_install
run += make_install_bin
# FIXME 'install-*' commented out till they're fixed
# run += make_install_doc
# run += make_install_man
# run += make_install_html
# run += make_install_info
# run += make_install_pdf
run += make_minimal

ifneq ($(call has,ctags),)
run += make_tags
endif
ifneq ($(call has,cscope),)
run += make_cscope
endif

# $(run_O) contains same portion of $(run) tests with '_O' attached
# to distinguish O=... tests
run_O := $(addsuffix _O,$(run))
Expand All @@ -84,6 +106,31 @@ test_make_python_perf_so := test -f $(PERF)/python/perf.so
test_make_perf_o := test -f $(PERF)/perf.o
test_make_util_map_o := test -f $(PERF)/util/map.o

test_make_install := test -x $$TMP_DEST/bin/perf
test_make_install_O := $(test_make_install)
test_make_install_bin := $(test_make_install)
test_make_install_bin_O := $(test_make_install)

# FIXME nothing gets installed
test_make_install_man := test -f $$TMP_DEST/share/man/man1/perf.1
test_make_install_man_O := $(test_make_install_man)

# FIXME nothing gets installed
test_make_install_doc := $(test_ok)
test_make_install_doc_O := $(test_ok)

# FIXME nothing gets installed
test_make_install_html := $(test_ok)
test_make_install_html_O := $(test_ok)

# FIXME nothing gets installed
test_make_install_info := $(test_ok)
test_make_install_info_O := $(test_ok)

# FIXME nothing gets installed
test_make_install_pdf := $(test_ok)
test_make_install_pdf_O := $(test_ok)

# Kbuild tests only
#test_make_python_perf_so_O := test -f $$TMP/tools/perf/python/perf.so
#test_make_perf_o_O := test -f $$TMP/tools/perf/perf.o
Expand All @@ -95,7 +142,7 @@ test_make_util_map_o_O := true
test_default = test -x $(PERF)/perf
test = $(if $(test_$1),$(test_$1),$(test_default))

test_default_O = test -x $$TMP/perf
test_default_O = test -x $$TMP_O/perf
test_O = $(if $(test_$1),$(test_$1),$(test_default_O))

all:
Expand All @@ -111,23 +158,27 @@ clean := @(cd $(PERF); make -s -f $(MK) clean >/dev/null)

$(run):
$(call clean)
@cmd="cd $(PERF) && make -f $(MK) $($@)"; \
@TMP_DEST=$$(mktemp -d); \
cmd="cd $(PERF) && make -f $(MK) DESTDIR=$$TMP_DEST $($@)"; \
echo "- $@: $$cmd" && echo $$cmd > $@ && \
( eval $$cmd ) >> $@ 2>&1; \
echo " test: $(call test,$@)"; \
$(call test,$@) && \
rm -f $@
rm -f $@ \
rm -rf $$TMP_DEST

$(run_O):
$(call clean)
@TMP=$$(mktemp -d); \
cmd="cd $(PERF) && make -f $(MK) $($(patsubst %_O,%,$@)) O=$$TMP"; \
@TMP_O=$$(mktemp -d); \
TMP_DEST=$$(mktemp -d); \
cmd="cd $(PERF) && make -f $(MK) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \
echo "- $@: $$cmd" && echo $$cmd > $@ && \
( eval $$cmd ) >> $@ 2>&1 && \
echo " test: $(call test_O,$@)"; \
$(call test_O,$@) && \
rm -f $@ && \
rm -rf $$TMP
rm -rf $$TMP_O \
rm -rf $$TMP_DEST

all: $(run) $(run_O)
@echo OK
Expand Down
7 changes: 5 additions & 2 deletions tools/perf/util/callchain.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <errno.h>
#include <math.h>

#include "hist.h"
#include "util.h"
#include "callchain.h"

Expand Down Expand Up @@ -327,7 +328,8 @@ append_chain(struct callchain_node *root,
/*
* Lookup in the current node
* If we have a symbol, then compare the start to match
* anywhere inside a function.
* anywhere inside a function, unless function
* mode is disabled.
*/
list_for_each_entry(cnode, &root->val, list) {
struct callchain_cursor_node *node;
Expand All @@ -339,7 +341,8 @@ append_chain(struct callchain_node *root,

sym = node->sym;

if (cnode->ms.sym && sym) {
if (cnode->ms.sym && sym &&
callchain_param.key == CCKEY_FUNCTION) {
if (cnode->ms.sym->start != sym->start)
break;
} else if (cnode->ip != node->ip)
Expand Down
6 changes: 6 additions & 0 deletions tools/perf/util/callchain.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,18 @@ struct callchain_param;
typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_root *,
u64, struct callchain_param *);

enum chain_key {
CCKEY_FUNCTION,
CCKEY_ADDRESS
};

struct callchain_param {
enum chain_mode mode;
u32 print_limit;
double min_percent;
sort_chain_func_t sort;
enum chain_order order;
enum chain_key key;
};

struct callchain_list {
Expand Down
2 changes: 1 addition & 1 deletion tools/perf/util/evsel.c
Original file line number Diff line number Diff line change
Expand Up @@ -1482,7 +1482,7 @@ int perf_evsel__fprintf(struct perf_evsel *evsel,
bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
char *msg, size_t msgsize)
{
if ((err == ENOENT || err == ENXIO) &&
if ((err == ENOENT || err == ENXIO || err == ENODEV) &&
evsel->attr.type == PERF_TYPE_HARDWARE &&
evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES) {
/*
Expand Down
3 changes: 2 additions & 1 deletion tools/perf/util/hist.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ enum hist_filter {
struct callchain_param callchain_param = {
.mode = CHAIN_GRAPH_REL,
.min_percent = 0.5,
.order = ORDER_CALLEE
.order = ORDER_CALLEE,
.key = CCKEY_FUNCTION
};

u16 hists__col_len(struct hists *hists, enum hist_column col)
Expand Down
14 changes: 8 additions & 6 deletions tools/perf/util/scripting-engines/trace-event-perl.c
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,8 @@ static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine __maybe_unused,
struct addr_location *al)
struct thread *thread,
struct addr_location *al)
{
struct format_field *field;
static char handler[256];
Expand All @@ -272,7 +273,6 @@ static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused,
int cpu = sample->cpu;
void *data = sample->raw_data;
unsigned long long nsecs = sample->time;
struct thread *thread = al->thread;
char *comm = thread->comm;

dSP;
Expand Down Expand Up @@ -351,7 +351,8 @@ static void perl_process_event_generic(union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine __maybe_unused,
struct addr_location *al __maybe_unused)
struct thread *thread __maybe_unused,
struct addr_location *al __maybe_unused)
{
dSP;

Expand All @@ -377,10 +378,11 @@ static void perl_process_event(union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine,
struct addr_location *al)
struct thread *thread,
struct addr_location *al)
{
perl_process_tracepoint(event, sample, evsel, machine, al);
perl_process_event_generic(event, sample, evsel, machine, al);
perl_process_tracepoint(event, sample, evsel, machine, thread, al);
perl_process_event_generic(event, sample, evsel, machine, thread, al);
}

static void run_start_sub(void)
Expand Down
9 changes: 5 additions & 4 deletions tools/perf/util/scripting-engines/trace-event-python.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ static void python_process_tracepoint(union perf_event *perf_event
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine __maybe_unused,
struct thread *thread,
struct addr_location *al)
{
PyObject *handler, *retval, *context, *t, *obj, *dict = NULL;
Expand All @@ -238,7 +239,6 @@ static void python_process_tracepoint(union perf_event *perf_event
int cpu = sample->cpu;
void *data = sample->raw_data;
unsigned long long nsecs = sample->time;
struct thread *thread = al->thread;
char *comm = thread->comm;

t = PyTuple_New(MAX_FIELDS);
Expand Down Expand Up @@ -345,12 +345,12 @@ static void python_process_general_event(union perf_event *perf_event
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine __maybe_unused,
struct thread *thread,
struct addr_location *al)
{
PyObject *handler, *retval, *t, *dict;
static char handler_name[64];
unsigned n = 0;
struct thread *thread = al->thread;

/*
* Use the MAX_FIELDS to make the function expandable, though
Expand Down Expand Up @@ -404,17 +404,18 @@ static void python_process_event(union perf_event *perf_event,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine,
struct thread *thread,
struct addr_location *al)
{
switch (evsel->attr.type) {
case PERF_TYPE_TRACEPOINT:
python_process_tracepoint(perf_event, sample, evsel,
machine, al);
machine, thread, al);
break;
/* Reserve for future process_hw/sw/raw APIs */
default:
python_process_general_event(perf_event, sample, evsel,
machine, al);
machine, thread, al);
}
}

Expand Down
Loading

0 comments on commit 4f16d61

Please sign in to comment.