forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
tracing, perf: Implement BPF programs attached to kprobes
BPF programs, attached to kprobes, provide a safe way to execute user-defined BPF byte-code programs without being able to crash or hang the kernel in any way. The BPF engine makes sure that such programs have a finite execution time and that they cannot break out of their sandbox. The user interface is to attach to a kprobe via the perf syscall: struct perf_event_attr attr = { .type = PERF_TYPE_TRACEPOINT, .config = event_id, ... }; event_fd = perf_event_open(&attr,...); ioctl(event_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); 'prog_fd' is a file descriptor associated with BPF program previously loaded. 'event_id' is an ID of the kprobe created. Closing 'event_fd': close(event_fd); ... automatically detaches BPF program from it. BPF programs can call in-kernel helper functions to: - lookup/update/delete elements in maps - probe_read - wraper of probe_kernel_read() used to access any kernel data structures BPF programs receive 'struct pt_regs *' as an input ('struct pt_regs' is architecture dependent) and return 0 to ignore the event and 1 to store kprobe event into the ring buffer. Note, kprobes are a fundamentally _not_ a stable kernel ABI, so BPF programs attached to kprobes must be recompiled for every kernel version and user must supply correct LINUX_VERSION_CODE in attr.kern_version during bpf_prog_load() call. Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Reviewed-by: Steven Rostedt <rostedt@goodmis.org> Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Arnaldo Carvalho de Melo <acme@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David S. Miller <davem@davemloft.net> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1427312966-8434-4-git-send-email-ast@plumgrid.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
- Loading branch information
Alexei Starovoitov
authored and
Ingo Molnar
committed
Apr 2, 2015
1 parent
72cbbc8
commit 2541517
Showing
8 changed files
with
219 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
/* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com | ||
* | ||
* This program is free software; you can redistribute it and/or | ||
* modify it under the terms of version 2 of the GNU General Public | ||
* License as published by the Free Software Foundation. | ||
*/ | ||
#include <linux/kernel.h> | ||
#include <linux/types.h> | ||
#include <linux/slab.h> | ||
#include <linux/bpf.h> | ||
#include <linux/filter.h> | ||
#include <linux/uaccess.h> | ||
#include "trace.h" | ||
|
||
static DEFINE_PER_CPU(int, bpf_prog_active); | ||
|
||
/** | ||
* trace_call_bpf - invoke BPF program | ||
* @prog: BPF program | ||
* @ctx: opaque context pointer | ||
* | ||
* kprobe handlers execute BPF programs via this helper. | ||
* Can be used from static tracepoints in the future. | ||
* | ||
* Return: BPF programs always return an integer which is interpreted by | ||
* kprobe handler as: | ||
* 0 - return from kprobe (event is filtered out) | ||
* 1 - store kprobe event into ring buffer | ||
* Other values are reserved and currently alias to 1 | ||
*/ | ||
unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) | ||
{ | ||
unsigned int ret; | ||
|
||
if (in_nmi()) /* not supported yet */ | ||
return 1; | ||
|
||
preempt_disable(); | ||
|
||
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { | ||
/* | ||
* since some bpf program is already running on this cpu, | ||
* don't call into another bpf program (same or different) | ||
* and don't send kprobe event into ring-buffer, | ||
* so return zero here | ||
*/ | ||
ret = 0; | ||
goto out; | ||
} | ||
|
||
rcu_read_lock(); | ||
ret = BPF_PROG_RUN(prog, ctx); | ||
rcu_read_unlock(); | ||
|
||
out: | ||
__this_cpu_dec(bpf_prog_active); | ||
preempt_enable(); | ||
|
||
return ret; | ||
} | ||
EXPORT_SYMBOL_GPL(trace_call_bpf); | ||
|
||
static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | ||
{ | ||
void *dst = (void *) (long) r1; | ||
int size = (int) r2; | ||
void *unsafe_ptr = (void *) (long) r3; | ||
|
||
return probe_kernel_read(dst, unsafe_ptr, size); | ||
} | ||
|
||
static const struct bpf_func_proto bpf_probe_read_proto = { | ||
.func = bpf_probe_read, | ||
.gpl_only = true, | ||
.ret_type = RET_INTEGER, | ||
.arg1_type = ARG_PTR_TO_STACK, | ||
.arg2_type = ARG_CONST_STACK_SIZE, | ||
.arg3_type = ARG_ANYTHING, | ||
}; | ||
|
||
static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) | ||
{ | ||
switch (func_id) { | ||
case BPF_FUNC_map_lookup_elem: | ||
return &bpf_map_lookup_elem_proto; | ||
case BPF_FUNC_map_update_elem: | ||
return &bpf_map_update_elem_proto; | ||
case BPF_FUNC_map_delete_elem: | ||
return &bpf_map_delete_elem_proto; | ||
case BPF_FUNC_probe_read: | ||
return &bpf_probe_read_proto; | ||
default: | ||
return NULL; | ||
} | ||
} | ||
|
||
/* bpf+kprobe programs can access fields of 'struct pt_regs' */ | ||
static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type) | ||
{ | ||
/* check bounds */ | ||
if (off < 0 || off >= sizeof(struct pt_regs)) | ||
return false; | ||
|
||
/* only read is allowed */ | ||
if (type != BPF_READ) | ||
return false; | ||
|
||
/* disallow misaligned access */ | ||
if (off % size != 0) | ||
return false; | ||
|
||
return true; | ||
} | ||
|
||
static struct bpf_verifier_ops kprobe_prog_ops = { | ||
.get_func_proto = kprobe_prog_func_proto, | ||
.is_valid_access = kprobe_prog_is_valid_access, | ||
}; | ||
|
||
static struct bpf_prog_type_list kprobe_tl = { | ||
.ops = &kprobe_prog_ops, | ||
.type = BPF_PROG_TYPE_KPROBE, | ||
}; | ||
|
||
static int __init register_kprobe_prog_ops(void) | ||
{ | ||
bpf_register_prog_type(&kprobe_tl); | ||
return 0; | ||
} | ||
late_initcall(register_kprobe_prog_ops); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters