From 509efb366189b63ba7271e2125143e9c16fc6dae Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Thu, 25 Apr 2024 23:02:20 +0800 Subject: [PATCH] Support tracing XDP Add an option --filter-trace-xdp to trace all XDP progs on host by fentry-ing on the progs, like the way of tracing tc-bpf https://github.com/Asphaltt/pwru/commit/2347755900fffe100ab9eebaa180bb9f482df6fb. The diff from tracing tc-bpf: 1. Not support to filter mark. 2. No mark in meta output. 3. No proto in meta output. 4. Not support --output-skb. Signed-off-by: Leon Hwang --- bpf/kprobe_pwru.c | 142 +++++++++++++++++++-- internal/libpcap/inject.go | 4 + internal/pwru/output.go | 16 ++- internal/pwru/{tc_tracer.go => tracing.go} | 96 ++++++++++---- internal/pwru/types.go | 2 + main.go | 60 ++++++--- 6 files changed, 268 insertions(+), 52 deletions(-) rename internal/pwru/{tc_tracer.go => tracing.go} (59%) diff --git a/bpf/kprobe_pwru.c b/bpf/kprobe_pwru.c index 43896598..c5139fa0 100644 --- a/bpf/kprobe_pwru.c +++ b/bpf/kprobe_pwru.c @@ -6,6 +6,7 @@ #include "bpf/bpf_helpers.h" #include "bpf/bpf_core_read.h" #include "bpf/bpf_tracing.h" +#include "bpf/bpf_endian.h" #include "bpf/bpf_ipv6.h" #define PRINT_SKB_STR_SIZE 2048 @@ -13,6 +14,7 @@ #define ETH_P_IP 0x800 #define ETH_P_IPV6 0x86dd +#define ETH_P_8021Q 0x8100 const static bool TRUE = true; @@ -55,6 +57,12 @@ struct tuple { u64 print_skb_id = 0; u64 print_shinfo_id = 0; +enum event_type { + EVENT_TYPE_KPROBE = 0, + EVENT_TYPE_TC = 1, + EVENT_TYPE_XDP = 2, +}; + struct event_t { u32 pid; u32 type; @@ -233,24 +241,19 @@ set_meta(struct sk_buff *skb, struct skb_meta *meta) { } static __always_inline void -set_tuple(struct sk_buff *skb, struct tuple *tpl) { - void *skb_head = BPF_CORE_READ(skb, head); - u16 l3_off = BPF_CORE_READ(skb, network_header); +__set_tuple(struct tuple *tpl, void *data, u16 l3_off, bool is_ipv4) { u16 l4_off; - struct iphdr *l3_hdr = (struct iphdr *) (skb_head + l3_off); - u8 ip_vsn = BPF_CORE_READ_BITFIELD_PROBED(l3_hdr, version); - - if (ip_vsn == 4) { - struct iphdr *ip4 = (struct iphdr *) l3_hdr; + if (is_ipv4) { + struct iphdr *ip4 = (struct iphdr *) (data + l3_off); BPF_CORE_READ_INTO(&tpl->saddr, ip4, saddr); BPF_CORE_READ_INTO(&tpl->daddr, ip4, daddr); tpl->l4_proto = BPF_CORE_READ(ip4, protocol); tpl->l3_proto = ETH_P_IP; l4_off = l3_off + BPF_CORE_READ_BITFIELD_PROBED(ip4, ihl) * 4; - } else if (ip_vsn == 6) { - struct ipv6hdr *ip6 = (struct ipv6hdr *) l3_hdr; + } else { + struct ipv6hdr *ip6 = (struct ipv6hdr *) (data + l3_off); BPF_CORE_READ_INTO(&tpl->saddr, ip6, saddr); BPF_CORE_READ_INTO(&tpl->daddr, ip6, daddr); tpl->l4_proto = BPF_CORE_READ(ip6, nexthdr); // TODO: ipv6 l4 protocol @@ -259,16 +262,32 @@ set_tuple(struct sk_buff *skb, struct tuple *tpl) { } if (tpl->l4_proto == IPPROTO_TCP) { - struct tcphdr *tcp = (struct tcphdr *) (skb_head + l4_off); + struct tcphdr *tcp = (struct tcphdr *) (data + l4_off); tpl->sport= BPF_CORE_READ(tcp, source); tpl->dport= BPF_CORE_READ(tcp, dest); } else if (tpl->l4_proto == IPPROTO_UDP) { - struct udphdr *udp = (struct udphdr *) (skb_head + l4_off); + struct udphdr *udp = (struct udphdr *) (data + l4_off); tpl->sport= BPF_CORE_READ(udp, source); tpl->dport= BPF_CORE_READ(udp, dest); } } +static __always_inline void +set_tuple(struct sk_buff *skb, struct tuple *tpl) { + void *skb_head = BPF_CORE_READ(skb, head); + u16 l3_off = BPF_CORE_READ(skb, network_header); + + struct iphdr *l3_hdr = (struct iphdr *) (skb_head + l3_off); + u8 ip_vsn = BPF_CORE_READ_BITFIELD_PROBED(l3_hdr, version); + + if (ip_vsn !=4 && ip_vsn != 6) + return; + + bool is_ipv4 = ip_vsn == 4; + __set_tuple(tpl, skb_head, l3_off, is_ipv4); +} + + static __always_inline void set_skb_btf(struct sk_buff *skb, typeof(print_skb_id) *event_id) { #ifdef OUTPUT_SKB @@ -526,6 +545,105 @@ int BPF_PROG(fentry_tc, struct sk_buff *skb) { event.skb_addr = (u64) skb; event.addr = BPF_PROG_ADDR; + event.type = EVENT_TYPE_TC; + bpf_map_push_elem(&events, &event, BPF_EXIST); + + return BPF_OK; +} + + +static __always_inline bool +filter_xdp_netns(struct xdp_buff *xdp) { + if (cfg->netns && BPF_CORE_READ(xdp, rxq, dev, nd_net.net, ns.inum) != cfg->netns) + return false; + + return true; +} + +static __always_inline bool +filter_xdp_ifindex(struct xdp_buff *xdp) { + if (cfg->ifindex && BPF_CORE_READ(xdp, rxq, dev, ifindex) != cfg->ifindex) + return false; + + return true; +} + +static __always_inline bool +filter_xdp_meta(struct xdp_buff *xdp) { + return filter_xdp_netns(xdp) && filter_xdp_ifindex(xdp); +} + +static __always_inline bool +filter_xdp_pcap(struct xdp_buff *xdp) { + void *data = (void *)(long) BPF_CORE_READ(xdp, data); + void *data_end = (void *)(long) BPF_CORE_READ(xdp, data_end); + return filter_pcap_ebpf_l2((void *)xdp, (void *)xdp, (void *)xdp, data, data_end); +} + +static __always_inline bool +filter_xdp(struct xdp_buff *xdp) { + return filter_xdp_pcap(xdp) && filter_xdp_meta(xdp); +} + +static __always_inline void +set_xdp_meta(struct xdp_buff *xdp, struct skb_meta *meta) { + struct net_device *dev = BPF_CORE_READ(xdp, rxq, dev); + meta->netns = BPF_CORE_READ(dev, nd_net.net, ns.inum); + meta->ifindex = BPF_CORE_READ(dev, ifindex); + meta->mtu = BPF_CORE_READ(dev, mtu); + meta->len = BPF_CORE_READ(xdp, data_end) - BPF_CORE_READ(xdp, data); +} + +static __always_inline void +set_xdp_tuple(struct xdp_buff *xdp, struct tuple *tpl) { + void *data = (void *)(long) BPF_CORE_READ(xdp, data); + void *data_end = (void *)(long) BPF_CORE_READ(xdp, data_end); + struct ethhdr *eth = (struct ethhdr *) data; + u16 l3_off = sizeof(*eth); + u16 l4_off; + + __be16 proto = BPF_CORE_READ(eth, h_proto); + if (proto == bpf_htons(ETH_P_8021Q)) { + struct vlan_hdr *vlan = (struct vlan_hdr *) (eth + 1); + proto = BPF_CORE_READ(vlan, h_vlan_encapsulated_proto); + l3_off += sizeof(*vlan); + } + if (proto != bpf_htons(ETH_P_IP) && proto != bpf_htons(ETH_P_IPV6)) + return; + + bool is_ipv4 = proto == bpf_htons(ETH_P_IP); + __set_tuple(tpl, data, l3_off, is_ipv4); +} + +static __always_inline void +set_xdp_output(void *ctx, struct xdp_buff *xdp, struct event_t *event) { + if (cfg->output_meta) + set_xdp_meta(xdp, &event->meta); + + if (cfg->output_tuple) + set_xdp_tuple(xdp, &event->tuple); + + if (cfg->output_stack) + event->print_stack_id = bpf_get_stackid(ctx, &print_stack_map, BPF_F_FAST_STACK_CMP); +} + +SEC("fentry/xdp") +int BPF_PROG(fentry_xdp, struct xdp_buff *xdp) { + struct event_t event = {}; + + if (cfg->is_set) { + if (!filter_xdp(xdp)) + return BPF_OK; + + set_xdp_output(ctx, xdp, &event); + } + + event.pid = bpf_get_current_pid_tgid() >> 32; + event.ts = bpf_ktime_get_ns(); + event.cpu_id = bpf_get_smp_processor_id(); + event.skb_addr = (u64) xdp; + event.addr = BPF_PROG_ADDR; + event.type = EVENT_TYPE_XDP; bpf_map_push_elem(&events, &event, BPF_EXIST); return BPF_OK; diff --git a/internal/libpcap/inject.go b/internal/libpcap/inject.go index e8955b73..ee5a5cbe 100644 --- a/internal/libpcap/inject.go +++ b/internal/libpcap/inject.go @@ -8,6 +8,10 @@ import ( "github.com/cloudflare/cbpfc" ) +func InjectL2Filter(program *ebpf.ProgramSpec, filterExpr string) (err error) { + return injectFilter(program, filterExpr, false) +} + func InjectFilters(program *ebpf.ProgramSpec, filterExpr string) (err error) { if err = injectFilter(program, filterExpr, false); err != nil { return diff --git a/internal/pwru/output.go b/internal/pwru/output.go index b65182f2..7b70b99b 100644 --- a/internal/pwru/output.go +++ b/internal/pwru/output.go @@ -31,6 +31,12 @@ import ( const absoluteTS string = "15:04:05.000" +const ( + eventTypeKprobe = 0 + eventTypeTracingTc = 1 + eventTypeTracingXdp = 2 +) + type output struct { flags *Flags lastSeenSkb map[uint64]uint64 // skb addr => last seen TS @@ -211,7 +217,6 @@ func (o *output) PrintJson(event *Event) { encoder.SetEscapeHTML(false) err := encoder.Encode(d) - if err != nil { log.Fatalf("Error encoding JSON: %s", err) } @@ -351,6 +356,15 @@ func getOutFuncName(o *output, event *Event, addr uint64) string { } } + if event.Type != eventTypeKprobe { + switch event.Type { + case eventTypeTracingTc: + outFuncName += "(tc)" + case eventTypeTracingXdp: + outFuncName += "(xdp)" + } + } + return outFuncName } diff --git a/internal/pwru/tc_tracer.go b/internal/pwru/tracing.go similarity index 59% rename from internal/pwru/tc_tracer.go rename to internal/pwru/tracing.go index a93cb989..7d9c16b8 100644 --- a/internal/pwru/tc_tracer.go +++ b/internal/pwru/tracing.go @@ -14,29 +14,55 @@ import ( "golang.org/x/sync/errgroup" ) -type tcTracer struct { +type tracing struct { sync.Mutex links []link.Link + progs []*ebpf.Program } -func (t *tcTracer) close() { +func (t *tracing) HaveTracing() bool { t.Lock() defer t.Unlock() + return len(t.links) > 0 +} + +func (t *tracing) Detach() { + t.Lock() + defer t.Unlock() + + t.detach() + + for _, p := range t.progs { + _ = p.Close() + } + t.progs = nil +} + +func (t *tracing) detach() { + var errg errgroup.Group + for _, l := range t.links { - _ = l.Close() + l := l + errg.Go(func() error { + _ = l.Close() + return nil + }) } + + _ = errg.Wait() } -func (t *tcTracer) addLink(l link.Link) { +func (t *tracing) addLink(l link.Link) { t.Lock() defer t.Unlock() t.links = append(t.links, l) } -func (t *tcTracer) trace(spec *ebpf.CollectionSpec, +func (t *tracing) traceProg(spec *ebpf.CollectionSpec, opts *ebpf.CollectionOptions, prog *ebpf.Program, n2a BpfProgName2Addr, + tracingName string, ) error { entryFn, name, err := getEntryFuncName(prog) if err != nil { @@ -58,7 +84,7 @@ func (t *tcTracer) trace(spec *ebpf.CollectionSpec, if !ok { addr, ok = n2a[name] if !ok { - return fmt.Errorf("failed to find address for function %s of bpf prog %s", name, prog) + return fmt.Errorf("failed to find address for function %s of bpf prog %v", name, prog) } } @@ -69,8 +95,8 @@ func (t *tcTracer) trace(spec *ebpf.CollectionSpec, return fmt.Errorf("failed to rewrite bpf prog addr: %w", err) } - spec.Programs["fentry_tc"].AttachTarget = prog - spec.Programs["fentry_tc"].AttachTo = entryFn + spec.Programs[tracingName].AttachTarget = prog + spec.Programs[tracingName].AttachTo = entryFn coll, err := ebpf.NewCollectionWithOptions(spec, *opts) if err != nil { var ( @@ -86,7 +112,7 @@ func (t *tcTracer) trace(spec *ebpf.CollectionSpec, defer coll.Close() tracing, err := link.AttachTracing(link.TracingOptions{ - Program: coll.Programs["fentry_tc"], + Program: coll.Programs[tracingName], }) if err != nil { return fmt.Errorf("failed to attach tracing: %w", err) @@ -97,12 +123,13 @@ func (t *tcTracer) trace(spec *ebpf.CollectionSpec, return nil } -func TraceTC(coll *ebpf.Collection, spec *ebpf.CollectionSpec, - opts *ebpf.CollectionOptions, outputSkb bool, outputShinfo bool, n2a BpfProgName2Addr, -) func() { - progs, err := listBpfProgs(ebpf.SchedCLS) +func (t *tracing) trace(coll *ebpf.Collection, spec *ebpf.CollectionSpec, + opts *ebpf.CollectionOptions, outputSkb bool, outputShinfo bool, + n2a BpfProgName2Addr, progType ebpf.ProgramType, tracingName string, +) error { + progs, err := listBpfProgs(progType) if err != nil { - log.Fatalf("Failed to list TC bpf progs: %v", err) + return fmt.Errorf("failed to list bpf progs: %w", err) } // Reusing maps from previous collection is to handle the events together @@ -119,27 +146,50 @@ func TraceTC(coll *ebpf.Collection, spec *ebpf.CollectionSpec, } opts.MapReplacements = replacedMaps - var tt tcTracer - tt.links = make([]link.Link, 0, len(progs)) + t.links = make([]link.Link, 0, len(progs)) + t.progs = progs var errg errgroup.Group for _, prog := range progs { prog := prog errg.Go(func() error { - return tt.trace(spec, opts, prog, n2a) + return t.traceProg(spec, opts, prog, n2a, tracingName) }) } if err := errg.Wait(); err != nil { - log.Fatalf("Failed to trace TC: %v", err) + t.Detach() + return fmt.Errorf("failed to trace bpf progs: %w", err) } - return func() { - tt.close() + return nil +} - for _, prog := range progs { - _ = prog.Close() - } +func TraceTC(coll *ebpf.Collection, spec *ebpf.CollectionSpec, + opts *ebpf.CollectionOptions, outputSkb bool, outputShinfo bool, + n2a BpfProgName2Addr, +) *tracing { + log.Printf("Attaching tc-bpf progs...\n") + + var t tracing + if err := t.trace(coll, spec, opts, outputSkb, outputShinfo, n2a, ebpf.SchedCLS, "fentry_tc"); err != nil { + log.Fatalf("failed to trace TC progs: %v", err) } + + return &t +} + +func TraceXDP(coll *ebpf.Collection, spec *ebpf.CollectionSpec, + opts *ebpf.CollectionOptions, outputSkb bool, outputShinfo bool, + n2a BpfProgName2Addr, +) *tracing { + log.Printf("Attaching xdp progs...\n") + + var t tracing + if err := t.trace(coll, spec, opts, outputSkb, outputShinfo, n2a, ebpf.XDP, "fentry_xdp"); err != nil { + log.Fatalf("failed to trace XDP progs: %v", err) + } + + return &t } diff --git a/internal/pwru/types.go b/internal/pwru/types.go index 1ce4a946..414ece52 100644 --- a/internal/pwru/types.go +++ b/internal/pwru/types.go @@ -33,6 +33,7 @@ type Flags struct { FilterTrackSkb bool FilterTrackSkbByStackid bool FilterTraceTc bool + FilterTraceXdp bool FilterIfname string FilterPcap string FilterKprobeBatch uint @@ -69,6 +70,7 @@ func (f *Flags) SetFlags() { flag.BoolVar(&f.FilterTrackSkb, "filter-track-skb", false, "trace a packet even if it does not match given filters (e.g., after NAT or tunnel decapsulation)") flag.BoolVar(&f.FilterTrackSkbByStackid, "filter-track-skb-by-stackid", false, "trace a packet even after it is kfreed (e.g., traffic going through bridge)") flag.BoolVar(&f.FilterTraceTc, "filter-trace-tc", false, "trace TC bpf progs") + flag.BoolVar(&f.FilterTraceXdp, "filter-trace-xdp", false, "trace XDP bpf progs") flag.StringVar(&f.FilterIfname, "filter-ifname", "", "filter skb ifname in --filter-netns (if not specified, use current netns)") flag.UintVar(&f.FilterKprobeBatch, "filter-kprobe-batch", 10, "batch size for kprobe attaching/detaching") flag.StringVar(&f.OutputTS, "timestamp", "none", "print timestamp per skb (\"current\", \"relative\", \"absolute\", \"none\")") diff --git a/main.go b/main.go index e256cb17..31e79514 100644 --- a/main.go +++ b/main.go @@ -92,13 +92,14 @@ func main() { if err != nil { log.Fatalf("Failed to get skb-accepting functions: %s", err) } - if len(funcs) <= 0 { + if len(funcs) == 0 && !flags.FilterTraceTc && !flags.FilterTraceXdp { log.Fatalf("Cannot find a matching kernel function") } - // If --filter-trace-tc, it's to retrieve and print bpf prog's name. + // If --filter-trace-tc/--filter-trace-xdp, it's to retrieve and print bpf + // prog's name. addr2name, name2addr, err := pwru.ParseKallsyms(funcs, flags.OutputStack || - len(flags.KMods) != 0 || flags.FilterTraceTc || len(flags.FilterNonSkbFuncs) > 0 || - flags.OutputCaller) + len(flags.KMods) != 0 || flags.FilterTraceTc || flags.FilterTraceXdp || + len(flags.FilterNonSkbFuncs) > 0 || flags.OutputCaller) if err != nil { log.Fatalf("Failed to get function addrs: %s", err) } @@ -130,6 +131,12 @@ func main() { name == "fexit_skb_copy" { continue } + if name == "fentry_xdp" { + if err := libpcap.InjectL2Filter(program, flags.FilterPcap); err != nil { + log.Fatalf("Failed to inject filter ebpf for %s: %v", name, err) + } + continue + } if err = libpcap.InjectFilters(program, flags.FilterPcap); err != nil { log.Fatalf("Failed to inject filter ebpf for %s: %v", name, err) } @@ -146,25 +153,33 @@ func main() { } haveFexit := pwru.HaveBPFLinkTracing() - if flags.FilterTraceTc && !haveFexit { - log.Fatalf("Current kernel does not support fentry/fexit to run with --filter-trace-tc") + if (flags.FilterTraceTc || flags.FilterTraceXdp) && !haveFexit { + log.Fatalf("Current kernel does not support fentry/fexit to run with --filter-trace-tc/--filter-trace-xdp") } // As we know, for every fentry tracing program, there is a corresponding // bpf prog spec with attaching target and attaching function. So, we can - // just copy the spec and keep the fentry_tc program spec only in the copied - // spec. - var bpfSpecFentry *ebpf.CollectionSpec + // just copy the spec and keep the fentry_tc/fentry_xdp program spec only in + // the copied spec. + var bpfSpecFentryTc *ebpf.CollectionSpec if flags.FilterTraceTc { - bpfSpecFentry = bpfSpec.Copy() - bpfSpecFentry.Programs = map[string]*ebpf.ProgramSpec{ - "fentry_tc": bpfSpec.Programs["fentry_tc"], + bpfSpecFentryTc = bpfSpec.Copy() + bpfSpecFentryTc.Programs = map[string]*ebpf.ProgramSpec{ + "fentry_tc": bpfSpecFentryTc.Programs["fentry_tc"], + } + } + var bpfSpecFentryXdp *ebpf.CollectionSpec + if flags.FilterTraceXdp { + bpfSpecFentryXdp = bpfSpec.Copy() + bpfSpecFentryXdp.Programs = map[string]*ebpf.ProgramSpec{ + "fentry_xdp": bpfSpecFentryXdp.Programs["fentry_xdp"], } } - // fentry_tc is not used in the kprobe/kprobe-multi cases. So, it should be - // deleted from the spec. + // fentry_tc&fentry_xdp are not used in the kprobe/kprobe-multi cases. So, + // they should be deleted from the spec. delete(bpfSpec.Programs, "fentry_tc") + delete(bpfSpec.Programs, "fentry_xdp") // If not tracking skb, deleting the skb-tracking programs to reduce loading // time. @@ -191,9 +206,22 @@ func main() { } defer coll.Close() + traceTc := false if flags.FilterTraceTc { - close := pwru.TraceTC(coll, bpfSpecFentry, &opts, flags.OutputSkb, flags.OutputShinfo, name2addr) - defer close() + t := pwru.TraceTC(coll, bpfSpecFentryTc, &opts, flags.OutputSkb, flags.OutputShinfo, name2addr) + defer t.Detach() + traceTc = t.HaveTracing() + } + + traceXdp := false + if flags.FilterTraceXdp { + t := pwru.TraceXDP(coll, bpfSpecFentryXdp, &opts, flags.OutputSkb, flags.OutputShinfo, name2addr) + defer t.Detach() + traceXdp = t.HaveTracing() + } + + if !traceTc && !traceXdp && len(funcs) == 0 { + log.Fatalf("No kprobe/tc-bpf/xdp to trace!") } if flags.FilterTrackSkb || flags.FilterTrackSkbByStackid {