Skip to content

Commit

Permalink
bpf: Add map side support for bpf timers.
Browse files Browse the repository at this point in the history
Restrict bpf timers to array, hash (both preallocated and kmalloced), and
lru map types. The per-cpu maps with timers don't make sense, since 'struct
bpf_timer' is a part of map value. bpf timers in per-cpu maps would mean that
the number of timers depends on number of possible cpus and timers would not be
accessible from all cpus. lpm map support can be added in the future.
The timers in inner maps are supported.

The bpf_map_update/delete_elem() helpers and sys_bpf commands cancel and free
bpf_timer in a given map element.

Similar to 'struct bpf_spin_lock' BTF is required and it is used to validate
that map element indeed contains 'struct bpf_timer'.

Make check_and_init_map_value() init both bpf_spin_lock and bpf_timer when
map element data is reused in preallocated htab and lru maps.

Teach copy_map_value() to support both bpf_spin_lock and bpf_timer in a single
map element. There could be one of each, but not more than one. Due to 'one
bpf_timer in one element' restriction do not support timers in global data,
since global data is a map of single element, but from bpf program side it's
seen as many global variables and restriction of single global timer would be
odd. The sys_bpf map_freeze and sys_mmap syscalls are not allowed on maps with
timers, since user space could have corrupted mmap element and crashed the
kernel. The maps with timers cannot be readonly. Due to these restrictions
search for bpf_timer in datasec BTF in case it was placed in the global data to
report clear error.

The previous patch allowed 'struct bpf_timer' as a first field in a map
element only. Relax this restriction.

Refactor lru map to s/bpf_lru_push_free/htab_lru_push_free/ to cancel and free
the timer when lru map deletes an element as a part of it eviction algorithm.

Make sure that bpf program cannot access 'struct bpf_timer' via direct load/store.
The timer operation are done through helpers only.
This is similar to 'struct bpf_spin_lock'.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210715005417.78572-5-alexei.starovoitov@gmail.com
  • Loading branch information
Alexei Starovoitov authored and borkmann committed Jul 15, 2021
1 parent b00628b commit 6813466
Show file tree
Hide file tree
Showing 9 changed files with 259 additions and 46 deletions.
44 changes: 33 additions & 11 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,24 +198,46 @@ static inline bool map_value_has_spin_lock(const struct bpf_map *map)
return map->spin_lock_off >= 0;
}

static inline void check_and_init_map_lock(struct bpf_map *map, void *dst)
static inline bool map_value_has_timer(const struct bpf_map *map)
{
if (likely(!map_value_has_spin_lock(map)))
return;
*(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
(struct bpf_spin_lock){};
return map->timer_off >= 0;
}

/* copy everything but bpf_spin_lock */
static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
{
if (unlikely(map_value_has_spin_lock(map)))
*(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
(struct bpf_spin_lock){};
if (unlikely(map_value_has_timer(map)))
*(struct bpf_timer *)(dst + map->timer_off) =
(struct bpf_timer){};
}

/* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */
static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
{
u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0;

if (unlikely(map_value_has_spin_lock(map))) {
u32 off = map->spin_lock_off;
s_off = map->spin_lock_off;
s_sz = sizeof(struct bpf_spin_lock);
} else if (unlikely(map_value_has_timer(map))) {
t_off = map->timer_off;
t_sz = sizeof(struct bpf_timer);
}

memcpy(dst, src, off);
memcpy(dst + off + sizeof(struct bpf_spin_lock),
src + off + sizeof(struct bpf_spin_lock),
map->value_size - off - sizeof(struct bpf_spin_lock));
if (unlikely(s_sz || t_sz)) {
if (s_off < t_off || !s_sz) {
swap(s_off, t_off);
swap(s_sz, t_sz);
}
memcpy(dst, src, t_off);
memcpy(dst + t_off + t_sz,
src + t_off + t_sz,
s_off - t_off - t_sz);
memcpy(dst + s_off + s_sz,
src + s_off + s_sz,
map->value_size - s_off - s_sz);
} else {
memcpy(dst, src, map->value_size);
}
Expand Down
1 change: 1 addition & 0 deletions include/linux/btf.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
const struct btf_member *m,
u32 expected_offset, u32 expected_size);
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
int btf_find_timer(const struct btf *btf, const struct btf_type *t);
bool btf_type_is_void(const struct btf_type *t);
s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
Expand Down
21 changes: 21 additions & 0 deletions kernel/bpf/arraymap.c
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,12 @@ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key
return 0;
}

static void check_and_free_timer_in_array(struct bpf_array *arr, void *val)
{
if (unlikely(map_value_has_timer(&arr->map)))
bpf_timer_cancel_and_free(val + arr->map.timer_off);
}

/* Called from syscall or from eBPF program */
static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
u64 map_flags)
Expand Down Expand Up @@ -321,6 +327,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
copy_map_value_locked(map, val, value, false);
else
copy_map_value(map, val, value);
check_and_free_timer_in_array(array, val);
}
return 0;
}
Expand Down Expand Up @@ -374,6 +381,19 @@ static void *array_map_vmalloc_addr(struct bpf_array *array)
return (void *)round_down((unsigned long)array, PAGE_SIZE);
}

static void array_map_free_timers(struct bpf_map *map)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;

if (likely(!map_value_has_timer(map)))
return;

for (i = 0; i < array->map.max_entries; i++)
bpf_timer_cancel_and_free(array->value + array->elem_size * i +
map->timer_off);
}

/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
static void array_map_free(struct bpf_map *map)
{
Expand Down Expand Up @@ -668,6 +688,7 @@ const struct bpf_map_ops array_map_ops = {
.map_alloc = array_map_alloc,
.map_free = array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_release_uref = array_map_free_timers,
.map_lookup_elem = array_map_lookup_elem,
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
Expand Down
77 changes: 63 additions & 14 deletions kernel/bpf/btf.c
Original file line number Diff line number Diff line change
Expand Up @@ -3046,43 +3046,92 @@ static void btf_struct_log(struct btf_verifier_env *env,
btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
}

/* find 'struct bpf_spin_lock' in map value.
* return >= 0 offset if found
* and < 0 in case of error
*/
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t)
static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t,
const char *name, int sz, int align)
{
const struct btf_member *member;
u32 i, off = -ENOENT;

if (!__btf_type_is_struct(t))
return -EINVAL;

for_each_member(i, t, member) {
const struct btf_type *member_type = btf_type_by_id(btf,
member->type);
if (!__btf_type_is_struct(member_type))
continue;
if (member_type->size != sizeof(struct bpf_spin_lock))
if (member_type->size != sz)
continue;
if (strcmp(__btf_name_by_offset(btf, member_type->name_off),
"bpf_spin_lock"))
if (strcmp(__btf_name_by_offset(btf, member_type->name_off), name))
continue;
if (off != -ENOENT)
/* only one 'struct bpf_spin_lock' is allowed */
/* only one such field is allowed */
return -E2BIG;
off = btf_member_bit_offset(t, member);
if (off % 8)
/* valid C code cannot generate such BTF */
return -EINVAL;
off /= 8;
if (off % __alignof__(struct bpf_spin_lock))
/* valid struct bpf_spin_lock will be 4 byte aligned */
if (off % align)
return -EINVAL;
}
return off;
}

static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
const char *name, int sz, int align)
{
const struct btf_var_secinfo *vsi;
u32 i, off = -ENOENT;

for_each_vsi(i, t, vsi) {
const struct btf_type *var = btf_type_by_id(btf, vsi->type);
const struct btf_type *var_type = btf_type_by_id(btf, var->type);

if (!__btf_type_is_struct(var_type))
continue;
if (var_type->size != sz)
continue;
if (vsi->size != sz)
continue;
if (strcmp(__btf_name_by_offset(btf, var_type->name_off), name))
continue;
if (off != -ENOENT)
/* only one such field is allowed */
return -E2BIG;
off = vsi->offset;
if (off % align)
return -EINVAL;
}
return off;
}

static int btf_find_field(const struct btf *btf, const struct btf_type *t,
const char *name, int sz, int align)
{

if (__btf_type_is_struct(t))
return btf_find_struct_field(btf, t, name, sz, align);
else if (btf_type_is_datasec(t))
return btf_find_datasec_var(btf, t, name, sz, align);
return -EINVAL;
}

/* find 'struct bpf_spin_lock' in map value.
* return >= 0 offset if found
* and < 0 in case of error
*/
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t)
{
return btf_find_field(btf, t, "bpf_spin_lock",
sizeof(struct bpf_spin_lock),
__alignof__(struct bpf_spin_lock));
}

int btf_find_timer(const struct btf *btf, const struct btf_type *t)
{
return btf_find_field(btf, t, "bpf_timer",
sizeof(struct bpf_timer),
__alignof__(struct bpf_timer));
}

static void __btf_struct_show(const struct btf *btf, const struct btf_type *t,
u32 type_id, void *data, u8 bits_offset,
struct btf_show *show)
Expand Down
Loading

0 comments on commit 6813466

Please sign in to comment.