-
Notifications
You must be signed in to change notification settings - Fork 181
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Intro libpcap pkg for ebpf compilation and injection
The new libpcap pkg mainly provides two functions: 1. Compile pcap-filter expression to ebpf instructions: a. Libpcap compiles filter expression to cbpf b. Cloudflare/cbpfc converts cbpf to ebpf c. We adjust the generated ebpf to pass verifier 2. Inject filter ebpf instructions: a. Find the injection position and needed registers b. Make preparation: adjust jump offsets c. Inject filter ebpf Find more details in the code comments. Signed-off-by: Zhichuan Liang <gray.liang@isovalent.com>
- Loading branch information
1 parent
ddddb13
commit 7dab5f1
Showing
2 changed files
with
391 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,202 @@ | ||
package libpcap | ||
|
||
import ( | ||
"fmt" | ||
"unsafe" | ||
|
||
"github.com/cilium/ebpf/asm" | ||
"github.com/cloudflare/cbpfc" | ||
"golang.org/x/net/bpf" | ||
) | ||
|
||
/* | ||
#cgo LDFLAGS: -L/usr/local/lib -lpcap -static | ||
#include <stdlib.h> | ||
#include <pcap.h> | ||
*/ | ||
import "C" | ||
|
||
type pcapBpfProgram C.struct_bpf_program | ||
|
||
const ( | ||
MaxBpfInstructions = 4096 | ||
bpfInstructionBufferSize = 8 * MaxBpfInstructions | ||
MAXIMUM_SNAPLEN = 262144 | ||
) | ||
|
||
func CompileCbpf(expr string) (insts []bpf.Instruction, err error) { | ||
if len(expr) == 0 { | ||
return | ||
} | ||
|
||
/* | ||
DLT_RAW linktype tells pcap_compile() to generate cbpf instructions for | ||
skb without link layer. This is because kernel doesn't supply L2 data | ||
for many of functions, where skb->mac_len == 0, while the default | ||
pcap_compile mode only works for a complete frame data, so we have to | ||
specify this linktype to tell pcap that the data starts from L3 network | ||
header. | ||
*/ | ||
pcap := C.pcap_open_dead(C.DLT_RAW, MAXIMUM_SNAPLEN) | ||
if pcap == nil { | ||
return nil, fmt.Errorf("failed to pcap_open_dead: %+v\n", C.PCAP_ERROR) | ||
} | ||
defer C.pcap_close(pcap) | ||
|
||
cexpr := C.CString(expr) | ||
defer C.free(unsafe.Pointer(cexpr)) | ||
|
||
var bpfProg pcapBpfProgram | ||
if C.pcap_compile(pcap, (*C.struct_bpf_program)(&bpfProg), cexpr, 1, C.PCAP_NETMASK_UNKNOWN) < 0 { | ||
return nil, fmt.Errorf("failed to pcap_compile: %+v", C.GoString(C.pcap_geterr(pcap))) | ||
} | ||
defer C.pcap_freecode((*C.struct_bpf_program)(&bpfProg)) | ||
|
||
for _, v := range (*[bpfInstructionBufferSize]C.struct_bpf_insn)(unsafe.Pointer(bpfProg.bf_insns))[0:bpfProg.bf_len:bpfProg.bf_len] { | ||
insts = append(insts, bpf.RawInstruction{ | ||
Op: uint16(v.code), | ||
Jt: uint8(v.jt), | ||
Jf: uint8(v.jf), | ||
K: uint32(v.k), | ||
}.Disassemble()) | ||
} | ||
return | ||
} | ||
|
||
/* | ||
Steps: | ||
1. Compile pcap expresion to cbpf using libpcap | ||
2. Convert cbpf to ebpf using cloudflare/cbpfc | ||
3. Convert direct memory load to bpf_probe_read_kernel | ||
The conversion to ebpf requires two registers pointing to the start and | ||
end of the packet data. As we mentioned in the comment of DLT_RAW, | ||
packet data starts from L3 network header, rather than L2 ethernet | ||
header, caller should make sure to pass the correct arguments. | ||
*/ | ||
func CompileEbpf(expr string, opts cbpfc.EBPFOpts) (insts asm.Instructions, err error) { | ||
cbpfInsts, err := CompileCbpf(expr) | ||
if err != nil { | ||
return | ||
} | ||
|
||
ebpfInsts, err := cbpfc.ToEBPF(cbpfInsts, opts) | ||
if err != nil { | ||
return | ||
} | ||
|
||
return adjustEbpf(ebpfInsts, opts) | ||
} | ||
|
||
/* | ||
We have to adjust the ebpf instructions because verifier prevents us from | ||
directly loading data from memory. For example, the instruction "r0 = *(u8 *)(r9 +0)" | ||
will break verifier with error "R9 invalid mem access 'scalar", we therefore | ||
need to convert this direct memory load to bpf_probe_read_kernel function call: | ||
- r1 = r10 // r10 is stack top | ||
- r1 += -8 // r1 = r10-8 | ||
- r2 = 1 // r2 = sizeof(u8) | ||
- r3 = r9 // r9 is start of packet data, aka L3 header | ||
- r3 += 0 // r3 = r9+0 | ||
- call bpf_probe_read_kernel // *(r10-8) = *(u8 *)(r9+0) | ||
- r0 = *(u8 *)(r10 -8) // r0 = *(r10-8) | ||
To safely borrow R1, R2 and R3 for setting up the arguments for | ||
bpf_probe_read_kernel(), we need to save the original values of R1, R2 and R3 | ||
on stack, and restore them after the function call. | ||
More details in the comments below. | ||
*/ | ||
func adjustEbpf(insts asm.Instructions, opts cbpfc.EBPFOpts) (newInsts asm.Instructions, err error) { | ||
replaceIdx := []int{} | ||
replaceInsts := map[int]asm.Instructions{} | ||
for idx, inst := range insts { | ||
if inst.OpCode.Class().IsLoad() { | ||
replaceIdx = append(replaceIdx, idx) | ||
replaceInsts[idx] = append(replaceInsts[idx], | ||
|
||
/* | ||
Store R1, R2, R3 on stack. Offsets -16, -24, | ||
-32 are used to store R1, R2, R3 | ||
respectively, we consider these stack area | ||
safe to write for now, because: | ||
1. bpf_probe_read_kernel uses offset -8 as | ||
R1, our choice of -16, -24, and -32 doesn't | ||
overlap that; | ||
2. [r10-32, r10] stack area has been | ||
initialized by "struct event_t event = {}" | ||
in the very first of handle_everything(), | ||
with nothing set on that so far, so we can | ||
borrow this stack temporarily. | ||
*/ | ||
asm.StoreMem(asm.RFP, -16, asm.R1, asm.DWord), | ||
asm.StoreMem(asm.RFP, -24, asm.R2, asm.DWord), | ||
asm.StoreMem(asm.RFP, -32, asm.R3, asm.DWord), | ||
|
||
// bpf_probe_read_kernel(RFP-8, size, inst.Src) | ||
asm.Mov.Reg(asm.R1, asm.RFP), | ||
asm.Add.Imm(asm.R1, -8), | ||
asm.Mov.Imm(asm.R2, int32(inst.OpCode.Size().Sizeof())), | ||
asm.Mov.Reg(asm.R3, inst.Src), | ||
asm.Add.Imm(asm.R3, int32(inst.Offset)), | ||
asm.FnProbeReadKernel.Call(), | ||
|
||
// inst.Dst = *(RFP-8) | ||
asm.LoadMem(inst.Dst, asm.RFP, -8, inst.OpCode.Size()), | ||
) | ||
|
||
/* | ||
Restore R1, R2, R3 from stack, special handling when | ||
inst.Dst is R1, R2 or R3, as we don't want to overwrite | ||
its value by mistake. | ||
*/ | ||
restoreInsts := asm.Instructions{ | ||
asm.LoadMem(asm.R1, asm.RFP, -16, asm.DWord), | ||
asm.LoadMem(asm.R2, asm.RFP, -24, asm.DWord), | ||
asm.LoadMem(asm.R3, asm.RFP, -32, asm.DWord), | ||
} | ||
switch inst.Dst { | ||
case asm.R1, asm.R2, asm.R3: | ||
restoreInsts = append(restoreInsts[:inst.Dst-1], restoreInsts[inst.Dst:]...) | ||
} | ||
replaceInsts[idx] = append(replaceInsts[idx], restoreInsts...) | ||
|
||
/* | ||
Metadata is crucial for adjusting jump offsets. We | ||
ditched original instructions, which could hold symbol | ||
names targeted by other jump instructions, so here we | ||
inherit the metadata from the ditched ones. | ||
*/ | ||
replaceInsts[idx][0].Metadata = inst.Metadata | ||
} | ||
} | ||
|
||
// Replace the memory load instructions with the new ones | ||
for i := len(replaceIdx) - 1; i >= 0; i-- { | ||
idx := replaceIdx[i] | ||
insts = append(insts[:idx], append(replaceInsts[idx], insts[idx+1:]...)...) | ||
} | ||
|
||
/* | ||
Prepend instructions to init R1, R2, R3 so as to avoid verifier error: | ||
permission denied: *(u64 *)(r10 -24) = r2: R2 !read_ok | ||
*/ | ||
insts = append([]asm.Instruction{ | ||
asm.Mov.Imm(asm.R1, 0), | ||
asm.Mov.Imm(asm.R2, 0), | ||
asm.Mov.Imm(asm.R3, 0), | ||
}, insts...) | ||
|
||
// Append instructions to implement "exit immediately if not matched" | ||
insts = append(insts, | ||
asm.Mov.Imm(asm.R0, 0).WithSymbol("result"), // r0 = 0 | ||
asm.JNE.Imm(opts.Result, 0, "continue"), // if %result != 0 (match): jump to continue | ||
asm.Return().WithSymbol("return"), // else return TC_ACT_OK | ||
asm.Mov.Imm(asm.R0, 0).WithSymbol("continue"), | ||
) | ||
|
||
return insts, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,189 @@ | ||
package libpcap | ||
|
||
import ( | ||
"errors" | ||
"fmt" | ||
|
||
"github.com/cilium/ebpf" | ||
"github.com/cilium/ebpf/asm" | ||
"github.com/cloudflare/cbpfc" | ||
) | ||
|
||
/* | ||
Steps: | ||
1. Find the injection position, which is the bpf_printk call | ||
2. Make some necessary preparations for the injection | ||
3. Compile the filter expression into ebpf instructions | ||
4. Inject the instructions | ||
*/ | ||
func InjectFilter(program *ebpf.ProgramSpec, filterExpr string) (err error) { | ||
/* | ||
First let's mark references and symbols for the jump instructions. | ||
This is even required when filterExpr is empty, because we still | ||
need to remove the bpf_printk call in that case, which breaks the | ||
jump instructions as well. | ||
*/ | ||
injectIdx := 0 | ||
for idx, inst := range program.Instructions { | ||
// In the kprobe_pwru.c, we deliberately put a bpf_printk call to mark the injection position, see the comments over there. | ||
if inst.OpCode.JumpOp() == asm.Call && inst.Constant == int64(asm.FnTracePrintk) { | ||
injectIdx = idx | ||
break | ||
} | ||
|
||
/* | ||
As we are injecting a bunch of instructions into the | ||
program, the jump instructions are likely to require | ||
adjustments on their pc-related offsets. For example, | ||
we have the original bpf program as follows: | ||
26: if r9 >= r8 goto +384 <LBB1_39> | ||
... | ||
96: call bpf_trace_printk#6 | ||
... | ||
After the injection, the instruction No.96 is replaced | ||
by multiple instructions, leaving the instruction No.26 | ||
jumping to a wrong instruction. The offset should be | ||
adjusted accordingly! | ||
We solve this problem smart way by using references and | ||
symbols. The code below sets -1 to the affected jump | ||
instructions' offsets, adds necessary symbols and | ||
references, let cilium/ebpf collectionLoader adjust the | ||
offsets according to these additional information. This | ||
way, we don't have to calculate the new offsets by | ||
hand, which is extremely likely to mess up. | ||
*/ | ||
if inst.OpCode.Class().IsJump() { | ||
// Zero jump offset implies a function call, leave it alone | ||
if inst.Offset == 0 { | ||
continue | ||
} | ||
|
||
// If there already is a reference and corresponding | ||
// symbol, we don't have to create new symbol, just set -1 | ||
// to the offset so that cilium/ebpf loader can adjust it. | ||
if inst.Reference() != "" { | ||
program.Instructions[idx].Offset = -1 | ||
continue | ||
} | ||
|
||
var gotoIns *asm.Instruction | ||
iter := asm.Instructions(program.Instructions[idx+1:]).Iterate() | ||
for iter.Next() { | ||
if int16(iter.Offset) == inst.Offset { | ||
gotoIns = iter.Ins | ||
break | ||
} | ||
} | ||
if gotoIns == nil { | ||
return errors.New("Cannot find the jump target") | ||
} | ||
symbol := gotoIns.Symbol() | ||
if symbol == "" { | ||
symbol = fmt.Sprintf("PWRU_%d", idx) | ||
*gotoIns = gotoIns.WithSymbol(symbol) | ||
} | ||
program.Instructions[idx] = program.Instructions[idx].WithReference(symbol) | ||
program.Instructions[idx].Offset = -1 | ||
} | ||
} | ||
if injectIdx == 0 { | ||
return errors.New("Cannot find the injection position") | ||
} | ||
|
||
if filterExpr == "" { | ||
/* | ||
No need to inject anything, just remove the bpf_printk call | ||
to avoid the unnecessary overhead. | ||
bpf_printk() compiles to 5 instructions from index idx-4 to | ||
idx: the former 4 are setting up registers from R1 to R4, | ||
the last one calls printk(). | ||
But we can't delete former 4 instructions, otherwise we'll | ||
hit verifier with "R1 !read_ok"; they're required to stay | ||
there for register initialization. | ||
*/ | ||
program.Instructions = append(program.Instructions[:injectIdx], | ||
program.Instructions[injectIdx+1:]..., | ||
) | ||
return | ||
} | ||
|
||
/* | ||
Conversion from cbpf to ebpf requires indication of the packet | ||
start and end positions. These two position should be held by | ||
two registers, thanks to the `bpf_printk("..", start, end)` | ||
statement, which makes it clear that start is at R3 and end is | ||
at R4. | ||
The code below searches the instructions prior to the | ||
injection position to find the registers holding the packet | ||
start and end positions, by looking for the mov instructions | ||
targeting R3 and R4. | ||
*/ | ||
var ( | ||
dataReg asm.Register = 255 | ||
dataEndReg asm.Register = 255 | ||
) | ||
for idx := injectIdx - 1; idx >= 0; idx-- { | ||
inst := program.Instructions[idx] | ||
if inst.OpCode.ALUOp() == asm.Mov { | ||
if inst.Dst == asm.R3 { | ||
dataReg = inst.Src | ||
} else if inst.Dst == asm.R4 { | ||
dataEndReg = inst.Src | ||
} | ||
} | ||
if dataReg != 255 && dataEndReg != 255 { | ||
break | ||
} | ||
} | ||
if dataReg == 255 || dataEndReg == 255 { | ||
return errors.New("Cannot find the data / data_end registers") | ||
} | ||
|
||
filterEbpf, err := CompileEbpf(filterExpr, cbpfc.EBPFOpts{ | ||
PacketStart: dataReg, | ||
PacketEnd: dataEndReg, | ||
// R4 is safe to use, because at the injection position, we are | ||
// originally preparing to perform a bpf-helper func call with 4 | ||
// arguments, which leaves r0, r1, r2, r3 and r4 registers ready | ||
// to use. | ||
Result: asm.R4, | ||
ResultLabel: "result", | ||
// Same reason stated above, r0, r1, r2, r3 are safe to use. | ||
Working: [4]asm.Register{asm.R0, asm.R1, asm.R2, asm.R3}, | ||
LabelPrefix: "filter", | ||
// In the kprobe_pwru.c:handle_everything, the first line of | ||
// code `struct event_t event = {}` initializes stack [r10-136, | ||
// r10-16] with zero value, so during the filtering stage, this | ||
// stack area is safe to use. Here we use stack from -40 | ||
// because -32, -24, -16 are reserved for pcap-filter ebpf, see | ||
// the comments in compile.go | ||
StackOffset: -40, | ||
}) | ||
if err != nil { | ||
return | ||
} | ||
/* | ||
; bpf_printk("%d %d", data, data_end); | ||
injectIdx-4 -> 88: r1 = 54 ll | ||
90: r2 = 6 | ||
91: r3 = r9 | ||
92: r4 = r8 | ||
injectIdx -> 93: call 6 | ||
; return filter_pcap(skb) && filter_meta(skb); | ||
injectIdx+1 -> 94: if r9 >= r8 goto +384 <LBB1_39> | ||
[injectIdx-4:injectIdx] is compiled from bpf_printk(); | ||
[injectIdx+1] is from `return data < data_end` statement; | ||
both statements shall be replaced by pcap filter instructions. | ||
*/ | ||
program.Instructions = append(program.Instructions[:injectIdx-4], | ||
append(filterEbpf, program.Instructions[injectIdx+2:]...)..., | ||
) | ||
|
||
return nil | ||
} |