From d184704de4dc65c0e4e73c9e47c198a79dd82dac Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Wed, 8 Nov 2023 10:24:26 +0000 Subject: [PATCH 1/6] asm: rename SMov to MovSX Signed-off-by: Lorenz Bauer --- asm/alu.go | 12 ++++++------ asm/alu_string.go | 14 +++++++------- asm/instruction.go | 12 ++++++------ asm/instruction_test.go | 10 +++++----- 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/asm/alu.go b/asm/alu.go index 31b1e4cea..52be63a23 100644 --- a/asm/alu.go +++ b/asm/alu.go @@ -79,12 +79,12 @@ const ( Xor ALUOp = 0x00a0 // Mov - move value from one place to another Mov ALUOp = 0x00b0 - // SMov8 - move lower 8 bits, sign extended upper bits of target - SMov8 ALUOp = 0x08b0 - // SMov16 - move lower 16 bits, sign extended upper bits of target - SMov16 ALUOp = 0x10b0 - // SMov32 - move lower 32 bits, sign extended upper bits of target - SMov32 ALUOp = 0x20b0 + // MovSX8 - move lower 8 bits, sign extended upper bits of target + MovSX8 ALUOp = 0x08b0 + // MovSX16 - move lower 16 bits, sign extended upper bits of target + MovSX16 ALUOp = 0x10b0 + // MovSX32 - move lower 32 bits, sign extended upper bits of target + MovSX32 ALUOp = 0x20b0 // ArSh - arithmetic shift ArSh ALUOp = 0x00c0 // Swap - endian conversions diff --git a/asm/alu_string.go b/asm/alu_string.go index 8ce8d8af3..110c8348f 100644 --- a/asm/alu_string.go +++ b/asm/alu_string.go @@ -77,14 +77,14 @@ func _() { _ = x[SMod-400] _ = x[Xor-160] _ = x[Mov-176] - _ = x[SMov8-2224] - _ = x[SMov16-4272] - _ = x[SMov32-8368] + _ = x[MovSX8-2224] + _ = x[MovSX16-4272] + _ = x[MovSX32-8368] _ = x[ArSh-192] _ = x[Swap-208] } -const _ALUOp_name = "AddSubMulDivOrAndLShRShNegModXorMovArShSwapInvalidALUOpSDivSModSMov8SMov16SMov32" +const _ALUOp_name = "AddSubMulDivOrAndLShRShNegModXorMovArShSwapInvalidALUOpSDivSModMovSX8MovSX16MovSX32" var _ALUOp_map = map[ALUOp]string{ 0: _ALUOp_name[0:3], @@ -104,9 +104,9 @@ var _ALUOp_map = map[ALUOp]string{ 255: _ALUOp_name[43:55], 304: _ALUOp_name[55:59], 400: _ALUOp_name[59:63], - 2224: _ALUOp_name[63:68], - 4272: _ALUOp_name[68:74], - 8368: _ALUOp_name[74:80], + 2224: _ALUOp_name[63:69], + 4272: _ALUOp_name[69:76], + 8368: _ALUOp_name[76:83], } func (i ALUOp) String() string { diff --git a/asm/instruction.go b/asm/instruction.go index d21a21a72..51d8a9b1f 100644 --- a/asm/instruction.go +++ b/asm/instruction.go @@ -76,13 +76,13 @@ func (ins *Instruction) Unmarshal(r io.Reader, bo binary.ByteOrder) (uint64, err case Mov: switch ins.Offset { case 8: - ins.OpCode = ins.OpCode.SetALUOp(SMov8) + ins.OpCode = ins.OpCode.SetALUOp(MovSX8) ins.Offset = 0 case 16: - ins.OpCode = ins.OpCode.SetALUOp(SMov16) + ins.OpCode = ins.OpCode.SetALUOp(MovSX16) ins.Offset = 0 case 32: - ins.OpCode = ins.OpCode.SetALUOp(SMov32) + ins.OpCode = ins.OpCode.SetALUOp(MovSX32) ins.Offset = 0 } } @@ -142,13 +142,13 @@ func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error) case SMod: ins.OpCode = ins.OpCode.SetALUOp(Mod) ins.Offset = 1 - case SMov8: + case MovSX8: ins.OpCode = ins.OpCode.SetALUOp(Mov) ins.Offset = 8 - case SMov16: + case MovSX16: ins.OpCode = ins.OpCode.SetALUOp(Mov) ins.Offset = 16 - case SMov32: + case MovSX32: ins.OpCode = ins.OpCode.SetALUOp(Mov) ins.Offset = 32 } diff --git a/asm/instruction_test.go b/asm/instruction_test.go index 653e60727..48109fa56 100644 --- a/asm/instruction_test.go +++ b/asm/instruction_test.go @@ -384,11 +384,11 @@ func TestISAv4(t *testing.T) { "LdXMemSXW dst: r3 src: r6 off: 8 imm: 0", "LdXMemSXB dst: r1 src: r4 off: 0 imm: 0", "LdXMemSXH dst: r2 src: r5 off: 4 imm: 0", - "SMov8Reg dst: r1 src: r4", - "SMov16Reg dst: r2 src: r5", - "SMov32Reg dst: r3 src: r6", - "SMov8Reg32 dst: r1 src: r3", - "SMov16Reg32 dst: r2 src: r4", + "MovSX8Reg dst: r1 src: r4", + "MovSX16Reg dst: r2 src: r5", + "MovSX32Reg dst: r3 src: r6", + "MovSX8Reg32 dst: r1 src: r3", + "MovSX16Reg32 dst: r2 src: r4", "Ja32 imm: 3", "SDivReg dst: r1 src: r3", "SModReg dst: r2 src: r4", From 84862e879a13532152b533d9202831d2bc7a3d69 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Wed, 8 Nov 2023 10:24:55 +0000 Subject: [PATCH 2/6] asm: return an error when extended opcodes have offset Signed-off-by: Lorenz Bauer --- asm/instruction.go | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/asm/instruction.go b/asm/instruction.go index 51d8a9b1f..ae96d622f 100644 --- a/asm/instruction.go +++ b/asm/instruction.go @@ -135,23 +135,28 @@ func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error) } if ins.OpCode.Class().IsALU() { + newOffset := int16(0) switch ins.OpCode.ALUOp() { case SDiv: ins.OpCode = ins.OpCode.SetALUOp(Div) - ins.Offset = 1 + newOffset = 1 case SMod: ins.OpCode = ins.OpCode.SetALUOp(Mod) - ins.Offset = 1 + newOffset = 1 case MovSX8: ins.OpCode = ins.OpCode.SetALUOp(Mov) - ins.Offset = 8 + newOffset = 8 case MovSX16: ins.OpCode = ins.OpCode.SetALUOp(Mov) - ins.Offset = 16 + newOffset = 16 case MovSX32: ins.OpCode = ins.OpCode.SetALUOp(Mov) - ins.Offset = 32 + newOffset = 32 } + if newOffset != 0 && ins.Offset != 0 { + return 0, fmt.Errorf("extended ALU opcodes should have an .Offset of 0: %s", ins) + } + ins.Offset = newOffset } data := make([]byte, InstructionSize) From 9450fc38ea0249022bc3e8ac24f9171d6d0ccbe5 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Wed, 8 Nov 2023 10:29:17 +0000 Subject: [PATCH 3/6] asm: make extended ALUOp easier to understand Rejig the extended ALUOp a litte to make the encoding a little bit easier to understand. For MovSX we don't need to encode the width directly in the extended Op, so simplify the encoding to just be consecutive numbers. Signed-off-by: Lorenz Bauer --- asm/alu.go | 14 ++++++------- asm/alu_string.go | 50 +++++++++++++++++++++++------------------------ 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/asm/alu.go b/asm/alu.go index 52be63a23..282233d32 100644 --- a/asm/alu.go +++ b/asm/alu.go @@ -45,12 +45,12 @@ const ( // +-------+----+-+---+ type ALUOp uint16 -const aluMask OpCode = 0xfff0 +const aluMask OpCode = 0x3ff0 const ( // InvalidALUOp is returned by getters when invoked // on non ALU OpCodes - InvalidALUOp ALUOp = 0xff + InvalidALUOp ALUOp = 0xffff // Add - addition Add ALUOp = 0x0000 // Sub - subtraction @@ -60,7 +60,7 @@ const ( // Div - division Div ALUOp = 0x0030 // SDiv - signed division - SDiv ALUOp = 0x0130 + SDiv ALUOp = Div + 0x0100 // Or - bitwise or Or ALUOp = 0x0040 // And - bitwise and @@ -74,17 +74,17 @@ const ( // Mod - modulo Mod ALUOp = 0x0090 // SMod - signed modulo - SMod ALUOp = 0x0190 + SMod ALUOp = Mod + 0x0100 // Xor - bitwise xor Xor ALUOp = 0x00a0 // Mov - move value from one place to another Mov ALUOp = 0x00b0 // MovSX8 - move lower 8 bits, sign extended upper bits of target - MovSX8 ALUOp = 0x08b0 + MovSX8 ALUOp = Mov + 0x0100 // MovSX16 - move lower 16 bits, sign extended upper bits of target - MovSX16 ALUOp = 0x10b0 + MovSX16 ALUOp = Mov + 0x0200 // MovSX32 - move lower 32 bits, sign extended upper bits of target - MovSX32 ALUOp = 0x20b0 + MovSX32 ALUOp = Mov + 0x0300 // ArSh - arithmetic shift ArSh ALUOp = 0x00c0 // Swap - endian conversions diff --git a/asm/alu_string.go b/asm/alu_string.go index 110c8348f..35b406bf3 100644 --- a/asm/alu_string.go +++ b/asm/alu_string.go @@ -62,7 +62,7 @@ func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} - _ = x[InvalidALUOp-255] + _ = x[InvalidALUOp-65535] _ = x[Add-0] _ = x[Sub-16] _ = x[Mul-32] @@ -77,36 +77,36 @@ func _() { _ = x[SMod-400] _ = x[Xor-160] _ = x[Mov-176] - _ = x[MovSX8-2224] - _ = x[MovSX16-4272] - _ = x[MovSX32-8368] + _ = x[MovSX8-432] + _ = x[MovSX16-688] + _ = x[MovSX32-944] _ = x[ArSh-192] _ = x[Swap-208] } -const _ALUOp_name = "AddSubMulDivOrAndLShRShNegModXorMovArShSwapInvalidALUOpSDivSModMovSX8MovSX16MovSX32" +const _ALUOp_name = "AddSubMulDivOrAndLShRShNegModXorMovArShSwapSDivSModMovSX8MovSX16MovSX32InvalidALUOp" var _ALUOp_map = map[ALUOp]string{ - 0: _ALUOp_name[0:3], - 16: _ALUOp_name[3:6], - 32: _ALUOp_name[6:9], - 48: _ALUOp_name[9:12], - 64: _ALUOp_name[12:14], - 80: _ALUOp_name[14:17], - 96: _ALUOp_name[17:20], - 112: _ALUOp_name[20:23], - 128: _ALUOp_name[23:26], - 144: _ALUOp_name[26:29], - 160: _ALUOp_name[29:32], - 176: _ALUOp_name[32:35], - 192: _ALUOp_name[35:39], - 208: _ALUOp_name[39:43], - 255: _ALUOp_name[43:55], - 304: _ALUOp_name[55:59], - 400: _ALUOp_name[59:63], - 2224: _ALUOp_name[63:69], - 4272: _ALUOp_name[69:76], - 8368: _ALUOp_name[76:83], + 0: _ALUOp_name[0:3], + 16: _ALUOp_name[3:6], + 32: _ALUOp_name[6:9], + 48: _ALUOp_name[9:12], + 64: _ALUOp_name[12:14], + 80: _ALUOp_name[14:17], + 96: _ALUOp_name[17:20], + 112: _ALUOp_name[20:23], + 128: _ALUOp_name[23:26], + 144: _ALUOp_name[26:29], + 160: _ALUOp_name[29:32], + 176: _ALUOp_name[32:35], + 192: _ALUOp_name[35:39], + 208: _ALUOp_name[39:43], + 304: _ALUOp_name[43:47], + 400: _ALUOp_name[47:51], + 432: _ALUOp_name[51:57], + 688: _ALUOp_name[57:64], + 944: _ALUOp_name[64:71], + 65535: _ALUOp_name[71:83], } func (i ALUOp) String() string { From 2b6e4c4a25ed50c130df9f45bbca57654ce3217e Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Wed, 8 Nov 2023 10:30:17 +0000 Subject: [PATCH 4/6] asm: correct jumpMask We don't need to extend the jump opcode yet, so we shouldn't reuse aluMask. Signed-off-by: Lorenz Bauer --- asm/jump.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asm/jump.go b/asm/jump.go index 0c69ec95d..2738d736b 100644 --- a/asm/jump.go +++ b/asm/jump.go @@ -10,7 +10,7 @@ package asm // +----+-+---+ type JumpOp uint8 -const jumpMask OpCode = aluMask +const jumpMask OpCode = 0xf0 const ( // InvalidJumpOp is returned by getters when invoked From 4cac57444b466b94fd972b137062243c555f1dc5 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Wed, 8 Nov 2023 10:33:36 +0000 Subject: [PATCH 5/6] asm: fix OpCode doc comment The diagrams are aligned in a way that makes it difficult to figure out which field a bit belongs to. Signed-off-by: Lorenz Bauer --- asm/opcode.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/asm/opcode.go b/asm/opcode.go index 3d408496d..bbda2107f 100644 --- a/asm/opcode.go +++ b/asm/opcode.go @@ -72,22 +72,22 @@ func (cls Class) isJumpOrALU() bool { // The encoding varies based on a 3-bit Class: // // 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 -// ??? | CLS +// ??? | CLS // // For ALUClass and ALUCLass32: // // 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 -// OPC |S| CLS +// OPC |S| CLS // // For LdClass, LdXclass, StClass and StXClass: // // 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 -// 0 | MDE |SIZ| CLS +// 0 | MDE |SIZ| CLS // // For JumpClass, Jump32Class: // // 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 -// 0 | OPC |S| CLS +// 0 | OPC |S| CLS type OpCode uint16 // InvalidOpCode is returned by setters on OpCode From 712458c36649b01e004288a6663a5834913f91c9 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Wed, 8 Nov 2023 11:03:20 +0000 Subject: [PATCH 6/6] asm: refuse marshaling unknown extended opcodes We currently rely on the low byte of OpCode to be a valid BPF opcode. Make this clearer by adding a method to OpCode. This allows us to reject invalid extended opcodes during marshaling. Signed-off-by: Lorenz Bauer --- asm/instruction.go | 7 ++++++- asm/opcode.go | 11 +++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/asm/instruction.go b/asm/instruction.go index ae96d622f..67cd39d6f 100644 --- a/asm/instruction.go +++ b/asm/instruction.go @@ -159,8 +159,13 @@ func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error) ins.Offset = newOffset } + op, err := ins.OpCode.bpfOpCode() + if err != nil { + return 0, err + } + data := make([]byte, InstructionSize) - data[0] = byte(ins.OpCode) + data[0] = op data[1] = byte(regs) bo.PutUint16(data[2:4], uint16(ins.Offset)) bo.PutUint32(data[4:8], uint32(cons)) diff --git a/asm/opcode.go b/asm/opcode.go index bbda2107f..1dfd0b171 100644 --- a/asm/opcode.go +++ b/asm/opcode.go @@ -93,6 +93,17 @@ type OpCode uint16 // InvalidOpCode is returned by setters on OpCode const InvalidOpCode OpCode = 0xffff +// bpfOpCode returns the actual BPF opcode. +func (op OpCode) bpfOpCode() (byte, error) { + const opCodeMask = 0xff + + if !valid(op, opCodeMask) { + return 0, fmt.Errorf("invalid opcode %x", op) + } + + return byte(op & opCodeMask), nil +} + // rawInstructions returns the number of BPF instructions required // to encode this opcode. func (op OpCode) rawInstructions() int {