From e5875ba3415b8f1a57ef3f04d36c9ba31db6c66d Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 26 Sep 2023 20:15:43 +0000
Subject: [PATCH] tetragon: Hook exit sensor on acct_process

[upstream commit 0c60ef2015b0b16e5222d070f0dc0b5f74acb5b4]

Djalal and Anastasios found another way we could race in exit
event hook, so we could receive multiple exit events with same
pid value.

Anastasios suggested to hook acct_process instead, which is
called only for the last task in the thread group.

The acct_process depends on CONFIG_BSD_PROCESS_ACCT config
option but it seems to be present on all supported kernels.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 bpf/process/bpf_exit.c   | 33 +++++++++++----------------------
 pkg/sensors/base/base.go |  4 ++--
 2 files changed, 13 insertions(+), 24 deletions(-)

diff --git a/bpf/process/bpf_exit.c b/bpf/process/bpf_exit.c
index 90282467f38..bd3460a4334 100644
--- a/bpf/process/bpf_exit.c
+++ b/bpf/process/bpf_exit.c
@@ -8,12 +8,9 @@
 char _license[] __attribute__((section("license"), used)) = "GPL";
 
 /*
- * Hooking on do_task_dead kernel function, which is the last one the
- * task would execute after exiting. It's stable since v4.19, so it's
- * safe to hook for us.
- *
- * To find out if we are the last thread of execution in the task we
- * use current->signal->live counter (thanks Djalal! ;-) )
+ * Hooking on acct_process kernel function, which is called on the task's
+ * exit path once the task is the last one in the group. It's stable since
+ * v4.19, so it's safe to hook for us.
  *
  * It's initialized for thread leader:
  *
@@ -35,29 +32,21 @@ char _license[] __attribute__((section("license"), used)) = "GPL";
  * Decremented for each exiting thread:
  *
  *   do_exit {
- *     atomic_dec_and_test(&tsk->signal->live);
+ *     group_dead = atomic_dec_and_test(&tsk->signal->live);
+ *     ...
+ *     if (group_dead)
+ *              acct_process();
  *     ...
- *     do_task_dead
- *       __schedule
- *       BUG
  *   }
  *
- * If task->signal->live == 0 we are the last thread of execution and we
- * won't race with another clone, because there's no other thread to call
- * it (current thread is in do_exit).
+ * Hooking to acct_process we ensure tsk->signal->live is 0 and
+ * we are the last one of the thread group.
  */
-__attribute__((section("kprobe/do_task_dead"), used)) int
+__attribute__((section("kprobe/acct_process"), used)) int
 event_exit(struct pt_regs *ctx)
 {
-	struct task_struct *task = (struct task_struct *)get_current_task();
 	__u64 pid_tgid = get_current_pid_tgid();
-	struct signal_struct *signal;
-	atomic_t live;
-
-	probe_read(&signal, sizeof(signal), _(&task->signal));
-	probe_read(&live, sizeof(live), _(&signal->live));
 
-	if (live.counter == 0)
-		event_exit_send(ctx, pid_tgid >> 32);
+	event_exit_send(ctx, pid_tgid >> 32);
 	return 0;
 }
diff --git a/pkg/sensors/base/base.go b/pkg/sensors/base/base.go
index 9089d385e4d..ff416deb10f 100644
--- a/pkg/sensors/base/base.go
+++ b/pkg/sensors/base/base.go
@@ -22,8 +22,8 @@ var (
 
 	Exit = program.Builder(
 		"bpf_exit.o",
-		"do_task_dead",
-		"kprobe/do_task_dead",
+		"acct_process",
+		"kprobe/acct_process",
 		"event_exit",
 		"kprobe",
 	)