diff --git a/bpf/process/bpf_execve_event.c b/bpf/process/bpf_execve_event.c index d1b736767e6..919338e7e65 100644 --- a/bpf/process/bpf_execve_event.c +++ b/bpf/process/bpf_execve_event.c @@ -177,10 +177,14 @@ event_execve(struct sched_execve_args *ctx) p = &event->process; p->flags = EVENT_EXECVE; - /* Send the TGID and TID, as during an execve all threads other - * than the calling thread are destroyed, but since we hook late - * during the execve then the calling thread at the hook time is - * already the new thread group leader. + /** + * Per thread tracking rules TID == PID : + * At exec all threads other than the calling one are destoryed, so + * current becomes the new thread leader since we hook late during + * execve. + * + * TODO: for now we send the TID but later we should remove this + * and set it directly inside user space. */ p->pid = pid >> 32; p->tid = (__u32)pid; diff --git a/bpf/process/bpf_exit.h b/bpf/process/bpf_exit.h index de6436cfca6..20ac56764cb 100644 --- a/bpf/process/bpf_exit.h +++ b/bpf/process/bpf_exit.h @@ -56,7 +56,15 @@ static inline __attribute__((always_inline)) void event_exit_send(void *ctx, __u exit->current.pad[3] = 0; exit->current.ktime = enter->key.ktime; - /* We track and report only thread leader so here tgid == tid */ + /** + * Per thread tracking rules TID == PID : + * We want the exit event to match the exec one, and since during exec + * we report the thread group leader, do same here as we read the exec + * entry from the execve_map anyway and explicitly set it to the to tgid. + * + * TODO: for now we send the TID but later we should remove this + * and set it directly inside user space. + */ exit->info.tid = tgid; probe_read(&exit->info.code, sizeof(exit->info.code), _(&task->exit_code)); diff --git a/bpf/process/bpf_fork.c b/bpf/process/bpf_fork.c index e27138856b1..75ef663bd6b 100644 --- a/bpf/process/bpf_fork.c +++ b/bpf/process/bpf_fork.c @@ -75,9 +75,14 @@ BPF_KPROBE(event_wake_up_new_task, struct task_struct *task) .common.ktime = curr->key.ktime, .parent = curr->pkey, .tgid = curr->key.pid, - /* Since we generate one event per thread group, then when - * the task wakes up, there will be only one thread here: - * the thread group leader. Pass its thread id to user-space. + /** + * Per thread tracking rules TID == PID : + * Since we generate one event per thread group, then when this task + * wakes up it will be the only one in the thread group, and it is + * the leader. Ensure to pass TID to user space. + * + * TODO: for now we send the TID but later we should remove this + * and set it directly inside user space. */ .tid = BPF_CORE_READ(task, pid), .ktime = curr->key.ktime, diff --git a/bpf/process/generic_calls.h b/bpf/process/generic_calls.h index be05ab6f2eb..cd448d9fff5 100644 --- a/bpf/process/generic_calls.h +++ b/bpf/process/generic_calls.h @@ -79,7 +79,10 @@ generic_process_init(struct msg_generic_kprobe *e, u8 op, struct event_config *c e->action = 0; - /* Initialize with the calling TID */ + /** + * Per thread tracking rules TID is the calling thread: + * At kprobes, tracpoints etc we report the calling thread ID to user space. + */ e->tid = (__u32)get_current_pid_tgid(); } diff --git a/pkg/eventcache/eventcache.go b/pkg/eventcache/eventcache.go index 502d32570fb..37084635f22 100644 --- a/pkg/eventcache/eventcache.go +++ b/pkg/eventcache/eventcache.go @@ -69,10 +69,17 @@ func HandleGenericInternal(ev notify.Event, pid uint32, tid *uint32, timestamp u } if internal != nil { + // When we report the per thread fields, take a copy + // of the thread leader from the cache then update the corresponding + // per thread fields. + // + // The cost to get this is relatively high because it requires a + // deep copy of all the fields of the thread leader from the cache in + // order to safely modify them, to not corrupt gRPC streams. + // + // TODO: improve this so it copies only the shared fileds and directly + // update the per thread feilds with values recorded from BPF. proc := internal.GetProcessCopy() - // The TID of the cached process can be different from the - // TID that triggered the event, so always use the recorded - // one from bpf. process.UpdateEventProcessTid(proc, tid) ev.SetProcess(proc) } else { @@ -97,10 +104,17 @@ func HandleGenericEvent(internal *process.ProcessInternal, ev notify.Event, tid return ErrFailedToGetPodInfo } + // When we report the per thread fields, take a copy + // of the thread leader from the cache then update the corresponding + // per thread fields. + // + // The cost to get this is relatively high because it requires a + // deep copy of all the fields of the thread leader from the cache in + // order to safely modify them, to not corrupt gRPC streams. + // + // TODO: improve this so it copies only the shared fileds and directly + // update the per thread feilds with values recorded from BPF. proc := internal.GetProcessCopy() - // The TID of the cached process can be different from the - // TID that triggered the event, so always use the recorded - // one from bpf. process.UpdateEventProcessTid(proc, tid) ev.SetProcess(proc) return nil diff --git a/pkg/grpc/exec/exec.go b/pkg/grpc/exec/exec.go index 57a27109654..5c1e318a8df 100644 --- a/pkg/grpc/exec/exec.go +++ b/pkg/grpc/exec/exec.go @@ -288,7 +288,9 @@ func GetProcessExit(event *MsgExitEventUnix) *tetragon.ProcessExit { code := event.Info.Code >> 8 signal := readerexec.Signal(event.Info.Code & 0xFF) - // Ensure that we get PID == TID + // Per thread tracking rules PID == TID: ensure that we get TID equals PID. + // + // TODO: remove the TID from bpf side and the following check. if event.ProcessKey.Pid != event.Info.Tid { logger.GetLogger().WithFields(logrus.Fields{ "event.name": "Exit", diff --git a/pkg/grpc/tracing/tracing.go b/pkg/grpc/tracing/tracing.go index cf470ca6082..d9f22e0321d 100644 --- a/pkg/grpc/tracing/tracing.go +++ b/pkg/grpc/tracing/tracing.go @@ -256,8 +256,17 @@ func GetProcessKprobe(event *MsgGenericKprobeUnix) *tetragon.ProcessKprobe { } if proc != nil { + // At kprobes we report the per thread fields, so take a copy + // of the thread leader from the cache then update the corresponding + // per thread fields. + // + // The cost to get this is relatively high because it requires a + // deep copy of all the fields of the thread leader from the cache in + // order to safely modify them, to not corrupt gRPC streams. + // + // TODO: improve this so it copies only the shared fields and directly + // update the per thread feilds with values recorded from BPF. tetragonEvent.Process = proc.GetProcessCopy() - // Use the bpf recorded TID to update the event process.UpdateEventProcessTid(tetragonEvent.Process, &event.Tid) } if parent != nil { @@ -356,10 +365,17 @@ func (msg *MsgGenericTracepointUnix) HandleMessage() *tetragon.GetEventsResponse } if proc != nil { - tetragonEvent.Process = proc.GetProcessCopy() - // Use the bpf recorded TID to update the event + // At tracepoints we report the per thread fields, so take a copy + // of the thread leader from the cache then update the corresponding + // per thread fields. + // // The cost to get this is relatively high because it requires a - // deep copyo of the process in order to safely modify it. + // deep copy of all the fields of the thread leader from the cache in + // order to safely modify them, to not corrupt gRPC streams. + // + // TODO: improve this so it copies only the shared fields and directly + // update the per thread feilds with values recorded from BPF. + tetragonEvent.Process = proc.GetProcessCopy() process.UpdateEventProcessTid(tetragonEvent.Process, &msg.Tid) } @@ -576,8 +592,17 @@ func GetProcessUprobe(event *MsgGenericUprobeUnix) *tetragon.ProcessUprobe { } if proc != nil { + // At uprobes we report the per thread fields, so take a copy + // of the thread leader from the cache then update the corresponding + // per thread fields. + // + // The cost to get this is relatively high because it requires a + // deep copy of all the fields of the thread leader from the cache in + // order to safely modify them, to not corrupt gRPC streams. + // + // TODO: improve this so it copies only the shared fields and directly + // update the per thread feilds with values recorded from BPF. tetragonEvent.Process = proc.GetProcessCopy() - // Use the bpf recorded TID to update the event process.UpdateEventProcessTid(tetragonEvent.Process, &event.Tid) } return tetragonEvent diff --git a/pkg/process/process.go b/pkg/process/process.go index 343e1d7a1a1..4eab71b2d79 100644 --- a/pkg/process/process.go +++ b/pkg/process/process.go @@ -206,9 +206,13 @@ func initProcessInternalExec( ns := namespace.GetMsgNamespaces(namespaces) binary := path.GetBinaryAbsolutePath(process.Filename, cwd) - // Ensure that exported events have the TID set. For events from Kernel + // Per thread tracking rules PID == TID: ensure that we get TID equals PID. + // + // Also ensure that exported events have the TID set. For events from kernel // we usually use PID == 0, so instead of checking against 0, assert that - // TGID == TID + // TGID == TID. + // + // TODO: remove the TID from bpf side and the following check. if process.PID != process.TID { logger.GetLogger().WithFields(logrus.Fields{ "event.name": "Execve", @@ -261,8 +265,11 @@ func initProcessInternalClone(event *tetragonAPI.MsgCloneEvent, pi.process.ParentExecId = parentExecId pi.process.ExecId = GetProcessID(event.PID, event.Ktime) pi.process.Pid = &wrapperspb.UInt32Value{Value: event.PID} - // Since from BPF side we only generate one clone event per - // thread group that is for the leader, assert on that. + // Per thread tracking rules PID == TID: ensure that we get TID equals PID. + // Since from BPF side we only generate one clone event per + // thread group that is for the leader, assert on that. + // + // TODO: remove the TID from bpf side and the following check. if event.PID != event.TID { logger.GetLogger().WithFields(logrus.Fields{ "event.name": "Clone",