diff --git a/examples/README.md b/examples/README.md index 8cf3236c0..a8309ef03 100644 --- a/examples/README.md +++ b/examples/README.md @@ -20,7 +20,7 @@ Please see our [guide on what makes a good example](https://ebpf-go.dev/contribu * [tcp_connect](fentry/) - Trace outgoing IPv4 TCP connections. * [tcp_close](tcprtt/) - Log RTT of IPv4 TCP connections using eBPF CO-RE helpers. * TCx - Attach a program to Linux TC (Traffic Control) to process incoming and outgoing packets. - * [tcx](./tcx/) - monitor the number of incoming and outgoing packets for each network flow identified with the traditional 5-tuple session identifier (IP addresses, L4 Ports, IP protocol). + * [tcx](./tcx/) - Print packet counts for ingress and egress. * XDP - Attach a program to a network interface to process incoming packets. * [xdp](xdp/) - Print packet counts by IPv4 source address. diff --git a/examples/headers/common.h b/examples/headers/common.h index cf8c9ecb4..55d4fe9d9 100644 --- a/examples/headers/common.h +++ b/examples/headers/common.h @@ -24,15 +24,6 @@ typedef __u32 __wsum; #include "bpf_helpers.h" -// compatibility with kernel definitions -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -# define __LITTLE_ENDIAN_BITFIELD -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -# define __BIG_ENDIAN_BITFIELD -#else -# error "Fix your compiler's __BYTE_ORDER__?!" -#endif - enum bpf_map_type { BPF_MAP_TYPE_UNSPEC = 0, BPF_MAP_TYPE_HASH = 1, @@ -73,6 +64,19 @@ enum xdp_action { XDP_REDIRECT = 4, }; +enum tc_action { + TC_ACT_UNSPEC = -1, + TC_ACT_OK = 0, + TC_ACT_RECLASSIFY = 1, + TC_ACT_SHOT = 2, + TC_ACT_PIPE = 3, + TC_ACT_STOLEN = 4, + TC_ACT_QUEUED = 5, + TC_ACT_REPEAT = 6, + TC_ACT_REDIRECT = 7, + TC_ACT_JUMP = 0x10000000 +}; + struct xdp_md { __u32 data; __u32 data_end; @@ -84,10 +88,7 @@ struct xdp_md { typedef __u16 __sum16; -#define ETH_P_IP 0x0800 -#define IPPROTO_ICMP 1 -#define IPPROTO_TCP 6 -#define IPPROTO_UDP 17 +#define ETH_P_IP 0x0800 struct ethhdr { unsigned char h_dest[6]; @@ -109,128 +110,6 @@ struct iphdr { __be32 daddr; }; -/*struct __sk_buff https://github.com/torvalds/linux/blob/master/include/uapi/linux/bpf.h */ -#define __bpf_md_ptr(type, name) \ -union { \ - type name; \ - __u64 :64; \ -} __attribute__((aligned(8))) - -struct __sk_buff { - __u32 len; - __u32 pkt_type; - __u32 mark; - __u32 queue_mapping; - __u32 protocol; - __u32 vlan_present; - __u32 vlan_tci; - __u32 vlan_proto; - __u32 priority; - __u32 ingress_ifindex; - __u32 ifindex; - __u32 tc_index; - __u32 cb[5]; - __u32 hash; - __u32 tc_classid; - __u32 data; - __u32 data_end; - __u32 napi_id; - - /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */ - __u32 family; - __u32 remote_ip4; /* Stored in network byte order */ - __u32 local_ip4; /* Stored in network byte order */ - __u32 remote_ip6[4]; /* Stored in network byte order */ - __u32 local_ip6[4]; /* Stored in network byte order */ - __u32 remote_port; /* Stored in network byte order */ - __u32 local_port; /* stored in host byte order */ - /* ... here. */ - - __u32 data_meta; - __bpf_md_ptr(struct bpf_flow_keys *, flow_keys); - __u64 tstamp; - __u32 wire_len; - __u32 gso_segs; - __bpf_md_ptr(struct bpf_sock *, sk); - __u32 gso_size; -}; - -/*TCP Header https://github.com/torvalds/linux/blob/master/include/uapi/linux/tcp.h */ -struct tcphdr { - __be16 source; - __be16 dest; - __be32 seq; - __be32 ack_seq; -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u16 res1:4, - doff:4, - fin:1, - syn:1, - rst:1, - psh:1, - ack:1, - urg:1, - ece:1, - cwr:1; -#elif defined(__BIG_ENDIAN_BITFIELD) - __u16 doff:4, - res1:4, - cwr:1, - ece:1, - urg:1, - ack:1, - psh:1, - rst:1, - syn:1, - fin:1; -#else -#error "Adjust your defines" -#endif - __be16 window; - __sum16 check; - __be16 urg_ptr; -}; - -/*UDP Header https://github.com/torvalds/linux/blob/master/include/uapi/linux/udp.h */ -struct udphdr { - __be16 source; - __be16 dest; - __be16 len; - __sum16 check; -}; - -/*ICMP Header https://github.com/torvalds/linux/blob/master/include/uapi/linux/icmp.h */ -struct icmphdr { - __u8 type; - __u8 code; - __sum16 checksum; - union { - struct { - __be16 id; - __be16 sequence; - } echo; - __be32 gateway; - struct { - __be16 __unused; - __be16 mtu; - } frag; - __u8 reserved[4]; - } un; -}; - -enum tc_action { - TC_ACT_UNSPEC = -1, - TC_ACT_OK = 0, - TC_ACT_RECLASSIFY = 1, - TC_ACT_SHOT = 2, - TC_ACT_PIPE = 3, - TC_ACT_STOLEN = 4, - TC_ACT_QUEUED = 5, - TC_ACT_REPEAT = 6, - TC_ACT_REDIRECT = 7, - TC_ACT_JUMP = 0x10000000 -}; - enum { BPF_ANY = 0, BPF_NOEXIST = 1, diff --git a/examples/tcx/bpf_bpfeb.go b/examples/tcx/bpf_bpfeb.go index e83ac6ea3..36b1dceb9 100644 --- a/examples/tcx/bpf_bpfeb.go +++ b/examples/tcx/bpf_bpfeb.go @@ -12,20 +12,6 @@ import ( "github.com/cilium/ebpf" ) -type bpfSessionKey struct { - Saddr uint32 - Daddr uint32 - Sport uint16 - Dport uint16 - Proto uint8 - _ [3]byte -} - -type bpfSessionValue struct { - InCount uint32 - EgCount uint32 -} - // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) @@ -75,7 +61,8 @@ type bpfProgramSpecs struct { // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { - StatsMap *ebpf.MapSpec `ebpf:"stats_map"` + EgressPktCount *ebpf.MapSpec `ebpf:"egress_pkt_count"` + IngressPktCount *ebpf.MapSpec `ebpf:"ingress_pkt_count"` } // bpfObjects contains all objects after they have been loaded into the kernel. @@ -97,12 +84,14 @@ func (o *bpfObjects) Close() error { // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { - StatsMap *ebpf.Map `ebpf:"stats_map"` + EgressPktCount *ebpf.Map `ebpf:"egress_pkt_count"` + IngressPktCount *ebpf.Map `ebpf:"ingress_pkt_count"` } func (m *bpfMaps) Close() error { return _BpfClose( - m.StatsMap, + m.EgressPktCount, + m.IngressPktCount, ) } diff --git a/examples/tcx/bpf_bpfeb.o b/examples/tcx/bpf_bpfeb.o index 9e23b089d..ba3fc848f 100644 Binary files a/examples/tcx/bpf_bpfeb.o and b/examples/tcx/bpf_bpfeb.o differ diff --git a/examples/tcx/bpf_bpfel.go b/examples/tcx/bpf_bpfel.go index 945151de5..7767e316d 100644 --- a/examples/tcx/bpf_bpfel.go +++ b/examples/tcx/bpf_bpfel.go @@ -12,20 +12,6 @@ import ( "github.com/cilium/ebpf" ) -type bpfSessionKey struct { - Saddr uint32 - Daddr uint32 - Sport uint16 - Dport uint16 - Proto uint8 - _ [3]byte -} - -type bpfSessionValue struct { - InCount uint32 - EgCount uint32 -} - // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) @@ -75,7 +61,8 @@ type bpfProgramSpecs struct { // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { - StatsMap *ebpf.MapSpec `ebpf:"stats_map"` + EgressPktCount *ebpf.MapSpec `ebpf:"egress_pkt_count"` + IngressPktCount *ebpf.MapSpec `ebpf:"ingress_pkt_count"` } // bpfObjects contains all objects after they have been loaded into the kernel. @@ -97,12 +84,14 @@ func (o *bpfObjects) Close() error { // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { - StatsMap *ebpf.Map `ebpf:"stats_map"` + EgressPktCount *ebpf.Map `ebpf:"egress_pkt_count"` + IngressPktCount *ebpf.Map `ebpf:"ingress_pkt_count"` } func (m *bpfMaps) Close() error { return _BpfClose( - m.StatsMap, + m.EgressPktCount, + m.IngressPktCount, ) } diff --git a/examples/tcx/bpf_bpfel.o b/examples/tcx/bpf_bpfel.o index 314606148..b2df0607a 100644 Binary files a/examples/tcx/bpf_bpfel.o and b/examples/tcx/bpf_bpfel.o differ diff --git a/examples/tcx/main.go b/examples/tcx/main.go index 70814946f..04a27364a 100644 --- a/examples/tcx/main.go +++ b/examples/tcx/main.go @@ -1,30 +1,22 @@ // This program demonstrates attaching an eBPF program to a network interface -// with Linux TC. The program parses the IPv4 source address -// from packets and writes the Ingress and Egress packet count to an Hash map. -// The userspace program (Go code in this file) prints the content of the map to stdout. +// with Linux TC (Traffic Control). The program counts ingress and egress +// packets using two ARRAY maps. +// The userspace program (Go code in this file) prints the contents +// of the two maps to stdout every second. +// This example depends on bpf_link, available in Linux kernel version 5.7 or newer. package main import ( - "encoding/binary" "fmt" "log" "net" - "net/netip" "os" - "strings" "time" "github.com/cilium/ebpf" "github.com/cilium/ebpf/link" ) -// mapping between integer value and L4 protocol string -var protoMap = map[uint8]string{ - 1: "ICMP", - 6: "TCP", - 17: "UDP", -} - //go:generate go run github.com/cilium/ebpf/cmd/bpf2go bpf tcx.c -- -I../headers func main() { if len(os.Args) < 2 { @@ -72,49 +64,36 @@ func main() { log.Printf("Attached TCx program to EGRESS iface %q (index %d)", iface.Name, iface.Index) log.Printf("Press Ctrl-C to exit and remove the program") - // Print the contents of the BPF hash map. + // Print the contents of the counters maps. ticker := time.NewTicker(1 * time.Second) defer ticker.Stop() for range ticker.C { - s, err := formatMapContent(objs.StatsMap) + s, err := formatCounters(objs.IngressPktCount, objs.EgressPktCount) if err != nil { log.Printf("Error reading map: %s", err) continue } - log.Printf("Map contents:\n%s", s) + log.Printf("Packet Count: %s\n", s) } } -// formatMapContent prints the content of the map into a string. -func formatMapContent(m *ebpf.Map) (string, error) { +func formatCounters(ingressMap, egressMap *ebpf.Map) (string, error) { var ( - sb strings.Builder - key bpfSessionKey - val bpfSessionValue + ingressPacketCount uint64 + egressPacketCount uint64 + key int32 ) - iter := m.Iterate() - for iter.Next(&key, &val) { - sb.WriteString(fmt.Sprintf("\t%15s:%5d - %15s:%5d Proto:%4s => Ingress:%10d Egress:%10d\n", - intToIp(key.Saddr), portToLittleEndian(key.Sport), - intToIp(key.Daddr), portToLittleEndian(key.Dport), - protoMap[key.Proto], val.InCount, val.EgCount)) + // retrieve value from the ingress map + if err := ingressMap.Lookup(&key, &ingressPacketCount); err != nil { + return "", err } - return sb.String(), iter.Err() -} - -// intToIp convert an int32 value retrieved from the network traffic (big endian) into a netip.Addr -func intToIp(val uint32) netip.Addr { - a4 := [4]byte{} - binary.LittleEndian.PutUint32(a4[:], val) - return netip.AddrFrom4(a4) -} + // retrieve value from the egress map + if err := egressMap.Lookup(&key, &egressPacketCount); err != nil { + return "", err + } -// portToLittleEndian convert a uint16 value retrieved from the network traffic (big endian) into a little endian -func portToLittleEndian(val uint16) uint16 { - p2 := [2]byte{} - binary.LittleEndian.PutUint16(p2[:], val) - return binary.LittleEndian.Uint16(p2[:]) + return fmt.Sprintf("%10v Ingress, %10v Egress", ingressPacketCount, egressPacketCount), nil } diff --git a/examples/tcx/tcx.c b/examples/tcx/tcx.c index d6b00c117..82161d82c 100644 --- a/examples/tcx/tcx.c +++ b/examples/tcx/tcx.c @@ -1,155 +1,47 @@ //go:build ignore #include "common.h" -#include "bpf_endian.h" char __license[] SEC("license") = "Dual MIT/GPL"; -// Session identifier -struct session_key { - __u32 saddr; // IP source address - __u32 daddr; // IP dest address - __u16 sport; // Source port (set to 0 if ICMP) - __u16 dport; // Dest port (set to 0 if ICMP) - __u8 proto; // Protocol ID -}; - -// Session value -struct session_value { - __u32 in_count; // Ingress packet count - __u32 eg_count; // Egress packet count -}; - -#define MAX_MAP_ENTRIES 16 - -// Define an Hash map for storing packet Ingress and Egress count by 5-tuple session identifier -// User-space logic is responsible for cleaning the map, if potentially new entries needs to be monitored. +/* Define an ARRAY map for storing ingress packet count */ struct { - __uint(type, BPF_MAP_TYPE_HASH); - __uint(max_entries, MAX_MAP_ENTRIES); - __type(key, struct session_key); - __type(value, struct session_value); -} stats_map SEC(".maps"); - -// Attempt to parse the 5-tuple session identifier from the packet. -// Returns 0 if the operation failed, i.e. not IPv4 packet or not UDP, TCP or ICMP. -static __always_inline int parse_session_identifier(void *data, void *data_end, struct session_key *key) { - // First, parse the ethernet header. - struct ethhdr *eth = data; - if ((void *)(eth + 1) > data_end) { - return 0; - } - - // Check for IPv4 packet. - if (eth->h_proto != bpf_htons(ETH_P_IP)) { - return 0; - } - - // Then parse the IP header. - struct iphdr *ip = (void *)(eth + 1); - if ((void *)(ip + 1) > data_end) { - return 0; - } + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u64); + __uint(max_entries, 1); +} ingress_pkt_count SEC(".maps"); - // Then parse the L4 header. - switch (ip->protocol) { - case IPPROTO_TCP: { - // TCP protocol carried, parse TCP header. - struct tcphdr *tcp = (void *)(ip + 1); - if ((void *)(tcp + 1) > data_end) - return 0; - key->sport = (__u16)(tcp->source); - key->dport = (__u16)(tcp->dest); - break; - } - case IPPROTO_UDP: { - // UDP protocol carried, parse UDP header. - struct udphdr *udp = (void *)(ip + 1); - if ((void *)(udp + 1) > data_end) - return 0; - key->sport = (__u16)(udp->source); - key->dport = (__u16)(udp->dest); - break; - } - case IPPROTO_ICMP: { - // ICMP protocol carried, no source/dest port. - break; - } - // Unchecked protocols, ignore packet and return. - default: { - return 0; - } - } - - // Fill session key with IP header data - key->proto = (__u8)(ip->protocol); - key->saddr = (__u32)(ip->saddr); - key->daddr = (__u32)(ip->daddr); +/* Define an ARRAY map for storing egress packet count */ +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u64); + __uint(max_entries, 1); +} egress_pkt_count SEC(".maps"); + + +/* +Upon arrival of each network packet, retrieve and increment +the packet count from the provided map. +Returns TC_ACT_OK, allowing the packet to proceed. +*/ +static __always_inline int update_map_pkt_count(void *map_fd) { + __u32 key = 0; + __u64 *count = bpf_map_lookup_elem(map_fd, &key); + if (count) { + __sync_fetch_and_add(count, 1); + } - return 1; + return TC_ACT_OK; } -// TC Ingress hook, to monitoring TCP/UDP/ICMP network connections and count packets. SEC("tc") int ingress_prog_func(struct __sk_buff *skb) { - void *data = (void *)(long)skb->data; - void *data_end = (void *)(long)skb->data_end; - - struct session_key key = {}; - if (!parse_session_identifier(data, data_end, &key)) { - goto ingress_done; - } - - struct session_value *val = bpf_map_lookup_elem(&stats_map, &key); - if (!val) { - // No entry in the map for this 5-tuple identifier yet, so set the initial value to 1. - struct session_value new_val = {.in_count = 1}; - bpf_map_update_elem(&stats_map, &key, &new_val, BPF_ANY); - goto ingress_done; - } - - // Entry already exists for this 5-tuple identifier, so increment it atomically using an LLVM built-in. - __sync_fetch_and_add(&val->in_count, 1); - -ingress_done: - - // Return code corresponds to the PASS action in TC - return TC_ACT_OK; + return update_map_pkt_count(&ingress_pkt_count); } -// TC Egress hook, same as Ingress but with IPs and Ports inverted in the key. -// This way, the connections match the same entry for the Ingress in the bpf map. SEC("tc") int egress_prog_func(struct __sk_buff *skb) { - void *data = (void *)(long)skb->data; - void *data_end = (void *)(long)skb->data_end; - - struct session_key key = {}; - if (!parse_session_identifier(data, data_end, &key)) { - goto egress_done; - } - - // Swap addresses and L4 port before doing the map lookup. - __u32 tmp = key.saddr; - __u16 tmp2 = key.sport; - key.saddr = key.daddr; - key.sport = key.dport; - key.daddr = tmp; - key.dport = tmp2; - - struct session_value *val = bpf_map_lookup_elem(&stats_map, &key); - if (!val) { - // No entry in the map for this 5-tuple identifier yet, so set the initial value to 1. - struct session_value new_val = {.eg_count = 1}; - bpf_map_update_elem(&stats_map, &key, &new_val, BPF_ANY); - goto egress_done; - } - - // Entry already exists for this 5-tuple identifier, so increment it atomically using an LLVM built-in. - __sync_fetch_and_add(&val->eg_count, 1); - -egress_done: - - // Return code corresponds to the PASS action in TC - return TC_ACT_OK; + return update_map_pkt_count(&egress_pkt_count); }