diff --git a/bpf/process/bpf_exit.c b/bpf/process/bpf_exit.c index 90282467f38..bd3460a4334 100644 --- a/bpf/process/bpf_exit.c +++ b/bpf/process/bpf_exit.c @@ -8,12 +8,9 @@ char _license[] __attribute__((section("license"), used)) = "GPL"; /* - * Hooking on do_task_dead kernel function, which is the last one the - * task would execute after exiting. It's stable since v4.19, so it's - * safe to hook for us. - * - * To find out if we are the last thread of execution in the task we - * use current->signal->live counter (thanks Djalal! ;-) ) + * Hooking on acct_process kernel function, which is called on the task's + * exit path once the task is the last one in the group. It's stable since + * v4.19, so it's safe to hook for us. * * It's initialized for thread leader: * @@ -35,29 +32,21 @@ char _license[] __attribute__((section("license"), used)) = "GPL"; * Decremented for each exiting thread: * * do_exit { - * atomic_dec_and_test(&tsk->signal->live); + * group_dead = atomic_dec_and_test(&tsk->signal->live); + * ... + * if (group_dead) + * acct_process(); * ... - * do_task_dead - * __schedule - * BUG * } * - * If task->signal->live == 0 we are the last thread of execution and we - * won't race with another clone, because there's no other thread to call - * it (current thread is in do_exit). + * Hooking to acct_process we ensure tsk->signal->live is 0 and + * we are the last one of the thread group. */ -__attribute__((section("kprobe/do_task_dead"), used)) int +__attribute__((section("kprobe/acct_process"), used)) int event_exit(struct pt_regs *ctx) { - struct task_struct *task = (struct task_struct *)get_current_task(); __u64 pid_tgid = get_current_pid_tgid(); - struct signal_struct *signal; - atomic_t live; - - probe_read(&signal, sizeof(signal), _(&task->signal)); - probe_read(&live, sizeof(live), _(&signal->live)); - if (live.counter == 0) - event_exit_send(ctx, pid_tgid >> 32); + event_exit_send(ctx, pid_tgid >> 32); return 0; } diff --git a/contrib/tester-progs/Makefile b/contrib/tester-progs/Makefile index 96cc038f105..e2a67f1cbd6 100644 --- a/contrib/tester-progs/Makefile +++ b/contrib/tester-progs/Makefile @@ -16,7 +16,8 @@ PROGS = sigkill-tester \ uprobe-test-2 \ lseek-pipe \ threads-tester \ - bench-reader + bench-reader \ + threads-exit all: $(PROGS) @@ -29,6 +30,9 @@ bench-reader: bench-reader.c threads-tester: threads-tester.c $(GCC) -Wall -fno-inline $< -o $@ -lcap -lpthread +threads-exit: threads-exit.c + $(GCC) -Wall -fno-inline $< -o $@ -lcap -lpthread + capabilities-tester: capabilities-tester.c $(GCC) -Wall $< -o $@ -lcap diff --git a/contrib/tester-progs/threads-exit.c b/contrib/tester-progs/threads-exit.c new file mode 100644 index 00000000000..2614c576d06 --- /dev/null +++ b/contrib/tester-progs/threads-exit.c @@ -0,0 +1,71 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int goo; + +static void *worker(void *ctx) +{ + int ready_out = (intptr_t) ctx; + + write(ready_out, "R", 1); + + while (!goo) {} + syscall(SYS_exit, 0); + return NULL; +} + +int main(void) +{ + int ncpus = get_nprocs(), nthreads = ncpus * 10; + int i, err, readyfds[2]; + pthread_t th[nthreads]; + cpu_set_t set; + char dummy; + + /* make sure we can run on all cpus */ + CPU_ZERO(&set); + for (i = 0; i < ncpus; i++) + CPU_SET(i, &set); + if (sched_setaffinity(0, sizeof(set), &set) == -1) { + perror("sched_setaffinity"); + return -1; + } + + + if (pipe(readyfds)) { + perror("pipe"); + return -1; + } + + /* print out group leader for test checker */ + printf("TGID %d\n", getpid()); + fflush(NULL); + + for (i = 0; i < nthreads; i++) { + err = pthread_create(&th[i], NULL, worker, (void*)(intptr_t) readyfds[1]); + if (err) { + perror("pthread_create"); + return -1; + } + } + + /* Make sure all threads started.. */ + for (i = 0; i < nthreads; i++) { + if (read(readyfds[0], &dummy, 1) != 1) { + perror("read"); + return -1; + } + } + + /* .. and then tell threads to exit */ + goo = 1; + syscall(SYS_exit, 0); +} diff --git a/pkg/sensors/base/base.go b/pkg/sensors/base/base.go index 28b830135d0..5db31754768 100644 --- a/pkg/sensors/base/base.go +++ b/pkg/sensors/base/base.go @@ -20,8 +20,8 @@ var ( Exit = program.Builder( "bpf_exit.o", - "do_task_dead", - "kprobe/do_task_dead", + "acct_process", + "kprobe/acct_process", "event_exit", "kprobe", ) diff --git a/pkg/sensors/exec/exec_test.go b/pkg/sensors/exec/exec_test.go index 3ad1435dd65..e3532fa237b 100644 --- a/pkg/sensors/exec/exec_test.go +++ b/pkg/sensors/exec/exec_test.go @@ -15,6 +15,7 @@ import ( "time" "github.com/cilium/ebpf" + "github.com/cilium/tetragon/api/v1/tetragon" ec "github.com/cilium/tetragon/api/v1/tetragon/codegen/eventchecker" "github.com/cilium/tetragon/pkg/api" "github.com/cilium/tetragon/pkg/api/dataapi" @@ -154,6 +155,78 @@ func TestNamespaces(t *testing.T) { assert.NoError(t, err) } +func TestEventExitThreads(t *testing.T) { + var doneWG, readyWG sync.WaitGroup + defer doneWG.Wait() + + ctx, cancel := context.WithTimeout(context.Background(), tus.Conf().CmdWaitTime) + defer cancel() + + obs, err := observertesthelper.GetDefaultObserver(t, ctx, tus.Conf().TetragonLib, observertesthelper.WithMyPid()) + if err != nil { + t.Fatalf("Failed to run observer: %s", err) + } + observertesthelper.LoopEvents(ctx, t, &doneWG, &readyWG, obs) + readyWG.Wait() + + testThreadsExit := testutils.RepoRootPath("contrib/tester-progs/threads-exit") + + // array of all pids we shuold receive in exet events + tgids := make(map[int]bool) + + // running the workload 10 times to make the change we hit the race + // window bigger and collect all tgids from testThreadsExit output + for i := 0; i < 10; i++ { + out, err := exec.Command(testThreadsExit).Output() + if err != nil { + t.Fatalf("Failed to execute test binary: %s\n", err) + } + + tgid := 0 + if n, err := fmt.Sscanf(string(out[:]), "TGID %d", &tgid); n != 1 || err != nil { + t.Fatalf("Failed to parse test binary output: %s\n", err) + } + tgids[tgid] = false + } + + // check we got single exit event for each testThreadsExit + // execution and no more + nextCheck := func(event ec.Event, l *logrus.Logger) (bool, error) { + switch ev := event.(type) { + case *tetragon.ProcessExit: + if ev.Process.Binary != testThreadsExit { + return false, nil + } + // Make sure there's only single exit event with given pid + pid := int(ev.Process.Pid.GetValue()) + assert.False(t, tgids[pid], "got extra exit event with pid %d", pid) + tgids[pid] = true + return false, nil + default: + return false, nil + + } + } + + finalCheck := func(l *logrus.Logger) error { + // Make sure we saw all pids + for pid, used := range tgids { + assert.True(t, used, "did not see exit event for pid %d", pid) + } + return nil + } + + checker_ := ec.FnEventChecker{ + NextCheckFn: nextCheck, + FinalCheckFn: finalCheck, + } + + checker := testsensor.NewTestChecker(&checker_) + + err = jsonchecker.JsonTestCheck(t, checker) + assert.NoError(t, err) +} + func TestEventExecve(t *testing.T) { var doneWG, readyWG sync.WaitGroup defer doneWG.Wait()