From 224747b21fd8d56756322d8d8b9003e5b86f19a1 Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Thu, 10 Aug 2023 13:45:35 +0900 Subject: [PATCH 1/8] cleanup Signed-off-by: Takeshi Yoneda --- .../engine/wazevo/backend/backend_test.go | 14 ++-- .../engine/wazevo/backend/isa/arm64/instr.go | 22 ++--- .../backend/isa/arm64/instr_encoding.go | 8 +- .../backend/isa/arm64/instr_encoding_test.go | 5 +- .../wazevo/backend/isa/arm64/lower_instr.go | 30 +++++-- .../isa/arm64/machine_pro_epi_logue.go | 6 +- .../isa/arm64/machine_pro_epi_logue_test.go | 4 +- internal/engine/wazevo/frontend/frontend.go | 61 ++++---------- .../engine/wazevo/frontend/frontend_test.go | 31 +++---- internal/engine/wazevo/frontend/lower.go | 82 +++++++++++++++++-- internal/engine/wazevo/ssa/basic_block.go | 7 ++ internal/engine/wazevo/ssa/builder.go | 55 +++++++++---- internal/engine/wazevo/ssa/instructions.go | 65 ++++++++------- internal/engine/wazevo/ssa/pass_test.go | 2 +- internal/engine/wazevo/testcases/testcases.go | 63 ++++++++++++++ internal/engine/wazevo/wazevoapi/exitcode.go | 17 +++- .../engine/wazevo/wazevoapi/offsetdata.go | 13 +++ 17 files changed, 328 insertions(+), 157 deletions(-) diff --git a/internal/engine/wazevo/backend/backend_test.go b/internal/engine/wazevo/backend/backend_test.go index 6725ee8948..91fcb53800 100644 --- a/internal/engine/wazevo/backend/backend_test.go +++ b/internal/engine/wazevo/backend/backend_test.go @@ -224,14 +224,13 @@ L2 (SSA Block: blk1): afterLoweringARM64: ` L1 (SSA Block: blk0): mov x0?, x0 - mov x4?, xzr - cbz w4?, (L2) + mov x3?, xzr + cbz w3?, (L2) L3 (SSA Block: blk1): ret L2 (SSA Block: blk2): -L4 (SSA Block: blk3): - orr w5?, wzr, #0x2 - str w5?, [x0?] + movz x27, #0x2, LSL 0 + str w27, [x0?] trap_sequence w0? `, afterFinalizeARM64: ` @@ -243,9 +242,8 @@ L3 (SSA Block: blk1): ldr x30, [sp], #0x10 ret L2 (SSA Block: blk2): -L4 (SSA Block: blk3): - orr w8, wzr, #0x2 - str w8, [x0] + movz x27, #0x2, LSL 0 + str w27, [x0] trap_sequence w0 `, }, diff --git a/internal/engine/wazevo/backend/isa/arm64/instr.go b/internal/engine/wazevo/backend/isa/arm64/instr.go index 65f670373a..e3f667ed32 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr.go @@ -63,7 +63,7 @@ var defKinds = [numInstructionKinds]defKind{ ret: defKindNone, store32: defKindNone, store64: defKindNone, - trapSequence: defKindNone, + exitSequence: defKindNone, condBr: defKindNone, br: defKindNone, cSet: defKindRD, @@ -146,7 +146,7 @@ var useKinds = [numInstructionKinds]useKind{ ret: useKindRet, store32: useKindRNAMode, store64: useKindRNAMode, - trapSequence: useKindRN, + exitSequence: useKindRN, condBr: useKindCond, br: useKindNone, cSet: useKindNone, @@ -872,7 +872,7 @@ func (i *instruction) String() (str string) { panic("TODO") case loadAddr: panic("TODO") - case trapSequence: + case exitSequence: str = fmt.Sprintf("trap_sequence %s", formatVRegSized(i.rn.nr(), 32)) case udf: str = "udf" @@ -1056,9 +1056,9 @@ const ( jtSequence // loadAddr represents a load address instruction. loadAddr - // trapSequence consists of multiple instructions, and exits the execution immediately. - // See encodeTrapSequence. - trapSequence + // exitSequence consists of multiple instructions, and exits the execution immediately. + // See encodeExitSequence. + exitSequence // UDF is the undefined instruction. For debugging only. udf @@ -1070,8 +1070,8 @@ func (i *instruction) asUDF() { i.kind = udf } -func (i *instruction) asTrapSequence(ctx regalloc.VReg) { - i.kind = trapSequence +func (i *instruction) asExitSequence(ctx regalloc.VReg) { + i.kind = exitSequence i.rn = operandNR(ctx) } @@ -1358,13 +1358,13 @@ func binarySize(begin, end *instruction) (size int64) { return size } -const trapSequenceSize = 5 * 4 // 5 instructions as in encodeTrapSequence. +const exitSequenceSize = 5 * 4 // 5 instructions as in encodeExitSequence. // size returns the size of the instruction in encoded bytes. func (i *instruction) size() int64 { switch i.kind { - case trapSequence: - return trapSequenceSize // 5 instructions as in encodeTrapSequence. + case exitSequence: + return exitSequenceSize // 5 instructions as in encodeExitSequence. case nop0: return 0 case loadFpuConst32: diff --git a/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go b/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go index 7c5f25a7bd..59cd01ebd1 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go @@ -20,8 +20,8 @@ func (m *machine) encode(root *instruction) { func (i *instruction) encode(c backend.Compiler) { switch kind := i.kind; kind { case nop0: - case trapSequence: - encodeTrapSequence(c, i.rn.reg()) + case exitSequence: + encodeExitSequence(c, i.rn.reg()) case ret: // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/RET--Return-from-subroutine-?lang=en c.Emit4Bytes(encodeRet()) @@ -740,8 +740,8 @@ func encodeAluRRImm(op aluOp, rd, rn, amount, _64bit uint32) uint32 { return _64bit<<31 | opc<<29 | 0b100110<<23 | _64bit<<22 | immr<<16 | imms<<10 | rn<<5 | rd } -// encodeTrapSequence matches the implementation detail of abiImpl.emitGoEntryPreamble. -func encodeTrapSequence(c backend.Compiler, ctxReg regalloc.VReg) { +// encodeExitSequence matches the implementation detail of abiImpl.emitGoEntryPreamble. +func encodeExitSequence(c backend.Compiler, ctxReg regalloc.VReg) { // Restore the FP, SP and LR, and return to the Go code: // ldr fp, [savedExecutionContextPtr, #OriginalFramePointer] // ldr tmp, [savedExecutionContextPtr, #OriginalStackPointer] diff --git a/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go b/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go index 7426a659d5..7123f967fe 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go @@ -749,13 +749,14 @@ func TestInstruction_encoding_store(t *testing.T) { } } -func Test_encodeTrapSequence(t *testing.T) { +func Test_encodeExitSequence(t *testing.T) { m := &mockCompiler{} - encodeTrapSequence(m, x22VReg) + encodeExitSequence(m, x22VReg) // ldr x29, [x22, #0x10] // ldr x27, [x22, #0x18] // mov sp, x27 // ldr x30, [x22, #0x20] // ret require.Equal(t, "dd0a40f9db0e40f97f030091de1240f9c0035fd6", hex.EncodeToString(m.buf)) + require.Equal(t, len(m.buf), exitSequenceSize) } diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go index e8b48eb8e6..2d3089f994 100644 --- a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go +++ b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go @@ -9,6 +9,7 @@ package arm64 import ( "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" + "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" ) // LowerSingleBranch implements backend.Machine. @@ -126,8 +127,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { case ssa.OpcodeFadd, ssa.OpcodeFsub, ssa.OpcodeFmul, ssa.OpcodeFdiv, ssa.OpcodeFmax, ssa.OpcodeFmin: m.lowerFpuBinOp(instr) case ssa.OpcodeIconst, ssa.OpcodeF32const, ssa.OpcodeF64const: // Constant instructions are inlined. - case ssa.OpcodeTrap: - m.lowerTrap(instr.Arg()) + case ssa.OpcodeExitWithCode: + execCtx, code := instr.ExitWithCodeData() + m.lowerExitWithCode(execCtx, code) case ssa.OpcodeStore, ssa.OpcodeIstore8, ssa.OpcodeIstore16, ssa.OpcodeIstore32: m.lowerStore(instr) case ssa.OpcodeLoad: @@ -308,10 +310,24 @@ func (m *machine) lowerImul(x, y, result ssa.Value) { m.insert(mul) } -// lowerTrap lowers the trap as trapSequence instruction that takes a context pointer as argument. -func (m *machine) lowerTrap(ctx ssa.Value) { +// lowerExitWithCode lowers the lowerExitWithCode takes a context pointer as argument. +func (m *machine) lowerExitWithCode(ctx ssa.Value, code wazevoapi.ExitCode) { execCtxVReg := m.compiler.VRegOf(ctx) - instr := m.allocateInstr() - instr.asTrapSequence(execCtxVReg) - m.insert(instr) + + loadExitCodeConst := m.allocateInstr() + loadExitCodeConst.asMOVZ(tmpRegVReg, uint64(code), 0, true) + + setExitCode := m.allocateInstr() + setExitCode.asStore(operandNR(tmpRegVReg), + addressMode{ + kind: addressModeKindRegUnsignedImm12, + rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsets.ExitCodeOffset.I64(), + }, 32) + + exitSeq := m.allocateInstr() + exitSeq.asExitSequence(execCtxVReg) + + m.insert(loadExitCodeConst) + m.insert(setExitCode) + m.insert(exitSeq) } diff --git a/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go b/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go index 75e575583f..3b3623e552 100644 --- a/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go +++ b/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go @@ -375,7 +375,7 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi kind: addressModeKindRegUnsignedImm12, // Execution context is always the first argument. rn: x0VReg, imm: wazevoapi.ExecutionContextOffsets.ExitCodeOffset.I64(), - }, 64) + }, 32) setExistStatus.prev = cur cur.next = setExistStatus cur = setExistStatus @@ -405,7 +405,7 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi // Read the return address into tmp, and store it in the execution context. adr := m.allocateInstrAfterLowering() - adr.asAdr(tmpRegVReg, trapSequenceSize+8) + adr.asAdr(tmpRegVReg, exitSequenceSize+8) adr.prev = cur cur.next = adr cur = adr @@ -422,7 +422,7 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi // Exit the execution. trapSeq := m.allocateInstrAfterLowering() - trapSeq.asTrapSequence(x0VReg) + trapSeq.asExitSequence(x0VReg) trapSeq.prev = cur cur.next = trapSeq cur = trapSeq diff --git a/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue_test.go b/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue_test.go index ed33807e7f..19842b20c6 100644 --- a/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue_test.go +++ b/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue_test.go @@ -218,7 +218,7 @@ func TestMachine_insertStackBoundsCheck(t *testing.T) { mov x27, sp str x27, [x0, #0x38] movz x27, #0x1, LSL 0 - str x27, [x0] + str w27, [x0] movz x27, #0xfff0, LSL 0 str x27, [x0, #0x40] adr x27, #0x1c @@ -316,7 +316,7 @@ func TestMachine_insertStackBoundsCheck(t *testing.T) { mov x27, sp str x27, [x0, #0x38] movz x27, #0x1, LSL 0 - str x27, [x0] + str w27, [x0] orr x27, xzr, #0x10 str x27, [x0, #0x40] adr x27, #0x1c diff --git a/internal/engine/wazevo/frontend/frontend.go b/internal/engine/wazevo/frontend/frontend.go index 1c7c392d66..ecd08a521d 100644 --- a/internal/engine/wazevo/frontend/frontend.go +++ b/internal/engine/wazevo/frontend/frontend.go @@ -24,16 +24,14 @@ type Compiler struct { // wasmLocalToVariable maps the index (considered as wasm.Index of locals) // to the corresponding ssa.Variable. - wasmLocalToVariable map[wasm.Index]ssa.Variable - wasmLocalFunctionIndex wasm.Index - wasmFunctionTyp *wasm.FunctionType - wasmFunctionLocalTypes []wasm.ValueType - wasmFunctionBody []byte + wasmLocalToVariable map[wasm.Index]ssa.Variable + wasmLocalFunctionIndex wasm.Index + wasmFunctionTyp *wasm.FunctionType + wasmFunctionLocalTypes []wasm.ValueType + wasmFunctionBody []byte + memoryBaseVariable, memoryLenVariable ssa.Variable // br is reused during lowering. - br *bytes.Reader - // trapBlocks maps wazevoapi.ExitCode to the corresponding BasicBlock which - // exits the execution with the code. - trapBlocks [wazevoapi.ExitCodeCount]ssa.BasicBlock + br *bytes.Reader loweringState loweringState execCtxPtrValue, moduleCtxPtrValue ssa.Value @@ -76,7 +74,6 @@ func NewFrontendCompiler(m *wasm.Module, ssaBuilder ssa.Builder, offset *wazevoa func (c *Compiler) Init(idx wasm.Index, typ *wasm.FunctionType, localTypes []wasm.ValueType, body []byte) { c.ssaBuilder.Init(c.signatures[typ]) c.loweringState.reset() - c.trapBlocks = [wazevoapi.ExitCodeCount]ssa.BasicBlock{} c.wasmLocalFunctionIndex = idx c.wasmFunctionTyp = typ @@ -129,9 +126,9 @@ func (c *Compiler) LowerToSSA() error { c.wasmLocalToVariable[wasm.Index(i)] = variable } c.declareWasmLocals(entryBlock) + c.declareNecessaryVariables() c.lowerBody(entryBlock) - c.emitTrapBlocks() return nil } @@ -168,6 +165,12 @@ func (c *Compiler) declareWasmLocals(entry ssa.BasicBlock) { } } +func (c *Compiler) declareNecessaryVariables() { + c.memoryBaseVariable = c.ssaBuilder.DeclareVariable(ssa.TypeI64) + c.memoryLenVariable = c.ssaBuilder.DeclareVariable(ssa.TypeI64) + // TODO: add tables, globals. +} + // wasmToSSA converts wasm.ValueType to ssa.Type. func wasmToSSA(vt wasm.ValueType) ssa.Type { switch vt { @@ -197,39 +200,3 @@ func (c *Compiler) formatBuilder() string { // TODO: use source position to add the Wasm-level source info. return c.ssaBuilder.Format() } - -// getOrCreateTrapBlock returns the trap block for the given trap code. -func (c *Compiler) getOrCreateTrapBlock(code wazevoapi.ExitCode) ssa.BasicBlock { - blk := c.trapBlocks[code] - if blk == nil { - blk = c.ssaBuilder.AllocateBasicBlock() - c.trapBlocks[code] = blk - } - return blk -} - -// emitTrapBlocks emits the trap blocks. -func (c *Compiler) emitTrapBlocks() { - builder := c.ssaBuilder - for exitCode := wazevoapi.ExitCode(0); exitCode < wazevoapi.ExitCodeCount; exitCode++ { - blk := c.trapBlocks[exitCode] - if blk == nil { - continue - } - builder.SetCurrentBlock(blk) - - exitCodeInstr := builder.AllocateInstruction() - exitCodeInstr.AsIconst32(uint32(exitCode)) - builder.InsertInstruction(exitCodeInstr) - exitCodeVal := exitCodeInstr.Return() - - execCtx := c.execCtxPtrValue - store := builder.AllocateInstruction() - store.AsStore(exitCodeVal, execCtx, wazevoapi.ExecutionContextOffsets.ExitCodeOffset.U32()) - builder.InsertInstruction(store) - - trap := builder.AllocateInstruction() - trap.AsTrap(c.execCtxPtrValue) - builder.InsertInstruction(trap) - } -} diff --git a/internal/engine/wazevo/frontend/frontend_test.go b/internal/engine/wazevo/frontend/frontend_test.go index 5f84131acc..594bedd742 100644 --- a/internal/engine/wazevo/frontend/frontend_test.go +++ b/internal/engine/wazevo/frontend/frontend_test.go @@ -38,12 +38,7 @@ blk0: (exec_ctx:i64, module_ctx:i64) name: "unreachable", m: testcases.Unreachable.Module, exp: ` blk0: (exec_ctx:i64, module_ctx:i64) - Jump blk1 - -blk1: () <-- (blk0) - v2:i32 = Iconst_32 0x2 - Store v2, exec_ctx, 0x0 - Trap exec_ctx + Exit exec_ctx, unreachable `, }, { @@ -178,12 +173,7 @@ blk1: () <-- (blk0) Jump blk_ret blk2: () <-- (blk0) - Jump blk3 - -blk3: () <-- (blk2) - v3:i32 = Iconst_32 0x2 - Store v3, exec_ctx, 0x0 - Trap exec_ctx + Exit exec_ctx, unreachable `, }, { @@ -196,7 +186,6 @@ blk1: () <-- (blk0,blk1) Jump blk1 blk2: () - Jump blk_ret `, expAfterOpt: ` blk0: (exec_ctx:i64, module_ctx:i64) @@ -218,7 +207,6 @@ blk1: () <-- (blk0,blk1) Jump blk3 blk2: () - Jump blk_ret blk3: () <-- (blk1) Return @@ -246,11 +234,10 @@ blk0: (exec_ctx:i64, module_ctx:i64) v5:f64 = F64const 0.000000 Jump blk1 -blk1: () <-- (blk0,blk2) +blk1: () <-- (blk0) Jump blk_ret blk2: () - Jump blk1 `, expAfterOpt: ` blk0: (exec_ctx:i64, module_ctx:i64) @@ -376,8 +363,14 @@ blk0: (exec_ctx:i64, module_ctx:i64, v2:i32) blk1: (v4:i32) <-- (blk0) Return v4 -blk2: (v5:i32) - Jump blk_ret, v5 +blk2: () +`, + expAfterOpt: ` +blk0: (exec_ctx:i64, module_ctx:i64, v2:i32) + Jump blk1 + +blk1: () <-- (blk0) + Return v2 `, }, { @@ -432,7 +425,6 @@ blk1: (v3:i32) <-- (blk0,blk3) Jump blk4 blk2: () - Jump blk_ret blk3: () <-- (blk4) v4:i32 = Iconst_32 0x1 @@ -883,6 +875,7 @@ blk0: (exec_ctx:i64, module_ctx:i64, v2:i32) Jump blk_ret, v5 `, }, + //{name: "memory_loads", m: testcases.MemoryLoads.Module,}, } { tc := tc t.Run(tc.name, func(t *testing.T) { diff --git a/internal/engine/wazevo/frontend/lower.go b/internal/engine/wazevo/frontend/lower.go index 00cc7c3fb3..35482e9629 100644 --- a/internal/engine/wazevo/frontend/lower.go +++ b/internal/engine/wazevo/frontend/lower.go @@ -118,7 +118,7 @@ func (l *loweringState) ctrlPeekAt(n int) (ret *controlFrame) { return &l.controlFrames[tail-n] } -const debug = false +const debug = true // lowerBody lowers the body of the Wasm function to the SSA form. func (c *Compiler) lowerBody(entryBlk ssa.BasicBlock) { @@ -424,7 +424,7 @@ func (c *Compiler) lowerOpcode(op wasm.Opcode) { return } variable := c.localVariable(index) - v := builder.FindValue(variable) + v := builder.MustFindValue(variable) state.push(v) case wasm.OpcodeLocalSet: index := c.readI32u() @@ -434,6 +434,26 @@ func (c *Compiler) lowerOpcode(op wasm.Opcode) { variable := c.localVariable(index) v := state.pop() builder.DefineVariableInCurrentBB(variable, v) + //case wasm.OpcodeI32Load, wasm.OpcodeI64Load, wasm.OpcodeF32Load, wasm.OpcodeF64Load: + // _, offset, _ := c.readMemArg() + // if state.unreachable { + // return + // } + // + // var ceil = offset + // switch op { + // case wasm.OpcodeI32Load, wasm.OpcodeF32Load: + // ceil += 4 + // case wasm.OpcodeI64Load, wasm.OpcodeF64Load: + // ceil += 8 + // } + // + // baseAdr := state.pop() + // memBase := c.getMemoryBaseValue() + // memLen := c.getMemoryLenValue() + // + // // Check for out of bounds memory access. + case wasm.OpcodeBlock: // Note: we do not need to create a BB for this as that would always have only one predecessor // which is the current BB, and therefore it's always ok to merge them in any way. @@ -659,10 +679,9 @@ func (c *Compiler) lowerOpcode(op wasm.Opcode) { state.unreachable = true case wasm.OpcodeUnreachable: - // TODO: in order to assign the correct source address, we need to have - // a dedicated block before jumping to `trapBlk` which is shared across functions. - trapBlk := c.getOrCreateTrapBlock(wazevoapi.ExitCodeUnreachable) - c.insertJumpToBlock(nil, trapBlk) + exit := builder.AllocateInstruction() + exit.AsExitWithCode(c.execCtxPtrValue, wazevoapi.ExitCodeUnreachable) + builder.InsertInstruction(exit) state.unreachable = true case wasm.OpcodeCall: @@ -720,6 +739,33 @@ func (c *Compiler) lowerOpcode(op wasm.Opcode) { } } +func (c *Compiler) getMemoryBaseValue() ssa.Value { + if c.offset.LocalMemoryBegin < 0 { + panic("TODO: imported memory") + } + return c.getModuleCtxValue(c.memoryBaseVariable, c.offset.LocalMemoryBase(), ssa.TypeI64) +} + +func (c *Compiler) getMemoryLenValue() ssa.Value { + if c.offset.LocalMemoryBegin < 0 { + panic("TODO: imported memory") + } + return c.getModuleCtxValue(c.memoryLenVariable, c.offset.LocalMemoryLen(), ssa.TypeI64) +} + +func (c *Compiler) getModuleCtxValue(variable ssa.Variable, offset wazevoapi.Offset, typ ssa.Type) ssa.Value { + builder := c.ssaBuilder + if v := builder.FindValue(variable); v.Valid() { + return v + } + load := builder.AllocateInstruction() + load.AsLoad(c.moduleCtxPtrValue, uint32(offset), typ) + builder.InsertInstruction(load) + ret := load.Return() + builder.DefineVariableInCurrentBB(variable, ret) + return ret +} + func (c *Compiler) insertIcmp(cond ssa.IntegerCmpCond) { state, builder := &c.loweringState, c.ssaBuilder y, x := state.pop(), state.pop() @@ -802,6 +848,26 @@ func (c *Compiler) readBlockType() *wasm.FunctionType { return bt } +func (c *Compiler) readMemArg() (align, offset uint32, err error) { + state := &c.loweringState + + align, num, err := leb128.LoadUint32(c.wasmFunctionBody[state.pc+1:]) + if err != nil { + err = fmt.Errorf("read memory align: %v", err) + return + } + + state.pc += int(num) + offset, num, err = leb128.LoadUint32(c.wasmFunctionBody[state.pc+1:]) + if err != nil { + err = fmt.Errorf("read memory offset: %v", err) + return + } + + state.pc += int(num) + return align, offset, nil +} + // insertJumpToBlock inserts a jump instruction to the given block in the current block. func (c *Compiler) insertJumpToBlock(args []ssa.Value, targetBlk ssa.BasicBlock) { builder := c.ssaBuilder @@ -826,6 +892,10 @@ func (c *Compiler) insertIntegerExtend(signed bool, from, to byte) { } func (c *Compiler) switchTo(originalStackLen int, targetBlk ssa.BasicBlock) { + if targetBlk.Preds() == 0 { + c.loweringState.unreachable = true + } + // Now we should adjust the stack and start translating the continuation block. c.loweringState.values = c.loweringState.values[:originalStackLen] diff --git a/internal/engine/wazevo/ssa/basic_block.go b/internal/engine/wazevo/ssa/basic_block.go index 0cbedb8b2c..63258b29d3 100644 --- a/internal/engine/wazevo/ssa/basic_block.go +++ b/internal/engine/wazevo/ssa/basic_block.go @@ -56,6 +56,8 @@ type BasicBlock interface { BeginPredIterator() BasicBlock // NextPredIterator returns the next predecessor of this block. NextPredIterator() BasicBlock + // Preds returns the number of predecessors of this block. + Preds() int } type ( @@ -217,6 +219,11 @@ func (bb *basicBlock) NextPredIterator() BasicBlock { return pred } +// Preds implements BasicBlock.Preds. +func (bb *basicBlock) Preds() int { + return len(bb.preds) +} + // Root implements BasicBlock.Root. func (bb *basicBlock) Root() *Instruction { return bb.rootInstr diff --git a/internal/engine/wazevo/ssa/builder.go b/internal/engine/wazevo/ssa/builder.go index 8750ed7b36..db796c319e 100644 --- a/internal/engine/wazevo/ssa/builder.go +++ b/internal/engine/wazevo/ssa/builder.go @@ -48,7 +48,11 @@ type Builder interface { // allocateValue allocates an unused Value. allocateValue(typ Type) Value - // FindValue searches the latest definition of the given Variable and returns the result. + // MustFindValue searches the latest definition of the given Variable and returns the result. + MustFindValue(variable Variable) Value + + // FindValue is the same as FindValue except that if the Value is not defined anywhere yet, + // the given callback `create` is called to create a new Value. FindValue(variable Variable) Value // Seal declares that we've known all the predecessors to this block and were added via AddPred. @@ -350,35 +354,50 @@ func (b *builder) allocateValue(typ Type) (v Value) { return } +// MustFindValue implements Builder.MustFindValue. +func (b *builder) MustFindValue(variable Variable) Value { + typ := b.definedVariableType(variable) + return b.findValue(typ, variable, b.currentBB, true) +} + // FindValue implements Builder.FindValue. func (b *builder) FindValue(variable Variable) Value { typ := b.definedVariableType(variable) - return b.findValue(typ, variable, b.currentBB) + return b.findValue(typ, variable, b.currentBB, false) } // findValue recursively tries to find the latest definition of a `variable`. The algorithm is described in // the section 2 of the paper https://link.springer.com/content/pdf/10.1007/978-3-642-37051-9_6.pdf. // // TODO: reimplement this in iterative, not recursive, to avoid stack overflow. -func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock) Value { +func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock, must bool) Value { if val, ok := blk.lastDefinitions[variable]; ok { // The value is already defined in this block! return val } else if !blk.sealed { // Incomplete CFG as in the paper. - // If this is not sealed, that means it might have additional unknown predecessor later on. - // So we temporarily define the placeholder value here (not add as a parameter yet!), - // and record it as unknown. - // The unknown values are resolved when we call seal this block via BasicBlock.Seal(). - value := b.allocateValue(typ) - blk.lastDefinitions[variable] = value - blk.unknownValues[variable] = value - return value + if must { + // If this is not sealed, that means it might have additional unknown predecessor later on. + // So we temporarily define the placeholder value here (not add as a parameter yet!), + // and record it as unknown. + // The unknown values are resolved when we call seal this block via BasicBlock.Seal(). + value := b.allocateValue(typ) + blk.lastDefinitions[variable] = value + blk.unknownValues[variable] = value + return value + } + return ValueInvalid } if pred := blk.singlePred; pred != nil { // If this block is sealed and have only one predecessor, // we can use the value in that block without ambiguity on definition. - return b.findValue(typ, variable, pred) + return b.findValue(typ, variable, pred, must) + } else if len(blk.preds) == 0 { + // This case the value is not defined. + if must { + panic("BUG: value is not defined for " + variable.String()) + } + return ValueInvalid } // If this block has multiple predecessors, we have to gather the definitions, @@ -393,7 +412,7 @@ func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock) Value for i := range blk.preds { pred := &blk.preds[i] // Find the definition in the predecessor recursively. - value := b.findValue(typ, variable, pred.blk) + value := b.findValue(typ, variable, pred.blk, must) pred.branch.addArgumentBranchInst(value) } return paramValue @@ -412,7 +431,10 @@ func (b *builder) Seal(raw BasicBlock) { blk.addParamOn(typ, phiValue) for i := range blk.preds { pred := &blk.preds[i] - predValue := b.findValue(typ, variable, pred.blk) + predValue := b.findValue(typ, variable, pred.blk, false) + if !predValue.Valid() { + panic("BUG: value is not defined anywhere in the predecessors in the CFG") + } pred.branch.addArgumentBranchInst(predValue) } } @@ -655,8 +677,9 @@ func (b *builder) LayoutBlocks() { } for sidx, succ := range blk.success { - if len(succ.preds) < 2 { - // If there's no multiple incoming edges to this successor, (pred, succ) is not critical. + if !succ.ReturnBlock() && // If the successor is a return block, we need to split the edge any way because we need "epilogue" to be inserted. + // Plus if there's no multiple incoming edges to this successor, (pred, succ) is not critical. + len(succ.preds) < 2 { continue } diff --git a/internal/engine/wazevo/ssa/instructions.go b/internal/engine/wazevo/ssa/instructions.go index 4373640255..4153626bfa 100644 --- a/internal/engine/wazevo/ssa/instructions.go +++ b/internal/engine/wazevo/ssa/instructions.go @@ -2,6 +2,7 @@ package ssa import ( "fmt" + "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" "math" "strings" ) @@ -127,8 +128,8 @@ const ( // `BrTable x, block, JT`. OpcodeBrTable - // OpcodeTrap exit the execution immediately. - OpcodeTrap + // OpcodeExitWithCode exit the execution immediately. + OpcodeExitWithCode // OpcodeReturn returns from the function: `return rvalues`. OpcodeReturn @@ -852,7 +853,7 @@ var instructionSideEffects = [opcodeEnd]sideEffect{ OpcodeSshr: sideEffectFalse, OpcodeUshr: sideEffectFalse, OpcodeStore: sideEffectTrue, - OpcodeTrap: sideEffectTrue, + OpcodeExitWithCode: sideEffectTrue, OpcodeReturn: sideEffectTrue, OpcodeBrz: sideEffectTrue, OpcodeBrnz: sideEffectTrue, @@ -912,25 +913,25 @@ var instructionReturnTypes = [opcodeEnd]returnTypesFn{ } return }, - OpcodeLoad: returnTypesFnSingle, - OpcodeIadd: returnTypesFnSingle, - OpcodeIsub: returnTypesFnSingle, - OpcodeImul: returnTypesFnSingle, - OpcodeIcmp: returnTypesFnI32, - OpcodeFcmp: returnTypesFnI32, - OpcodeFadd: returnTypesFnSingle, - OpcodeFsub: returnTypesFnSingle, - OpcodeFdiv: returnTypesFnSingle, - OpcodeFmul: returnTypesFnSingle, - OpcodeFmax: returnTypesFnSingle, - OpcodeFmin: returnTypesFnSingle, - OpcodeF32const: returnTypesFnF32, - OpcodeF64const: returnTypesFnF64, - OpcodeStore: returnTypesFnNoReturns, - OpcodeTrap: returnTypesFnNoReturns, - OpcodeReturn: returnTypesFnNoReturns, - OpcodeBrz: returnTypesFnNoReturns, - OpcodeBrnz: returnTypesFnNoReturns, + OpcodeLoad: returnTypesFnSingle, + OpcodeIadd: returnTypesFnSingle, + OpcodeIsub: returnTypesFnSingle, + OpcodeImul: returnTypesFnSingle, + OpcodeIcmp: returnTypesFnI32, + OpcodeFcmp: returnTypesFnI32, + OpcodeFadd: returnTypesFnSingle, + OpcodeFsub: returnTypesFnSingle, + OpcodeFdiv: returnTypesFnSingle, + OpcodeFmul: returnTypesFnSingle, + OpcodeFmax: returnTypesFnSingle, + OpcodeFmin: returnTypesFnSingle, + OpcodeF32const: returnTypesFnF32, + OpcodeF64const: returnTypesFnF64, + OpcodeStore: returnTypesFnNoReturns, + OpcodeExitWithCode: returnTypesFnNoReturns, + OpcodeReturn: returnTypesFnNoReturns, + OpcodeBrz: returnTypesFnNoReturns, + OpcodeBrnz: returnTypesFnNoReturns, } // AsLoad initializes this instruction as a store instruction with OpcodeLoad. @@ -1127,10 +1128,16 @@ func (i *Instruction) ReturnVals() []Value { return i.vs } -// AsTrap initializes this instruction as a trap instruction with OpcodeTrap. -func (i *Instruction) AsTrap(ctx Value) { - i.opcode = OpcodeTrap +// AsExitWithCode initializes this instruction as a trap instruction with OpcodeExitWithCode. +func (i *Instruction) AsExitWithCode(ctx Value, code wazevoapi.ExitCode) { + i.opcode = OpcodeExitWithCode i.v = ctx + i.u64 = uint64(code) +} + +// ExitWithCodeData returns the context and exit code of OpcodeExitWithCode. +func (i *Instruction) ExitWithCodeData() (ctx Value, code wazevoapi.ExitCode) { + return i.v, wazevoapi.ExitCode(i.u64) } // InvertBrx inverts either OpcodeBrz or OpcodeBrnz to the other. @@ -1287,8 +1294,8 @@ func (i *Instruction) ExtendFromToBits() (from, to byte) { func (i *Instruction) Format(b Builder) string { var instSuffix string switch i.opcode { - case OpcodeTrap: - instSuffix = fmt.Sprintf(" %s", i.v.Format(b)) + case OpcodeExitWithCode: + instSuffix = fmt.Sprintf(" %s, %s", i.v.Format(b), wazevoapi.ExitCode(i.u64)) case OpcodeIadd, OpcodeIsub, OpcodeImul, OpcodeFadd, OpcodeFsub, OpcodeFmin, OpcodeFmax, OpcodeFdiv, OpcodeFmul: instSuffix = fmt.Sprintf(" %s, %s", i.v.Format(b), i.v2.Format(b)) case OpcodeIcmp: @@ -1419,8 +1426,8 @@ func (o Opcode) String() (ret string) { return "Brnz" case OpcodeBrTable: return "BrTable" - case OpcodeTrap: - return "Trap" + case OpcodeExitWithCode: + return "Exit" case OpcodeReturn: return "Return" case OpcodeCall: diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go index 668f8c30d3..572d6ddf1c 100644 --- a/internal/engine/wazevo/ssa/pass_test.go +++ b/internal/engine/wazevo/ssa/pass_test.go @@ -134,7 +134,7 @@ blk3: () <-- (blk1,blk2) { // At this point, loop is not sealed, so PHI will be added to this header. However, the only // input to the PHI is iConst above, so there must be an alias to iConst from the PHI value. - value := b.FindValue(var1) + value := b.MustFindValue(var1) tmpInst := b.AllocateInstruction() tmpInst.AsIconst32(0xff) diff --git a/internal/engine/wazevo/testcases/testcases.go b/internal/engine/wazevo/testcases/testcases.go index a628db360c..082d5020ae 100644 --- a/internal/engine/wazevo/testcases/testcases.go +++ b/internal/engine/wazevo/testcases/testcases.go @@ -1,6 +1,7 @@ package testcases import ( + "github.com/tetratelabs/wazero/internal/leb128" "math" "github.com/tetratelabs/wazero/internal/wasm" @@ -778,6 +779,53 @@ var ( }}}, }, } + MemoryLoads = TestCase{ + Name: "memory_loads", + Module: &wasm.Module{ + TypeSection: []wasm.FunctionType{{ + Params: []wasm.ValueType{i32}, + Results: []wasm.ValueType{ + i32, i64, f32, f64, i32, i64, f32, f64, + }, + }}, + MemorySection: &wasm.Memory{Min: 1}, + FunctionSection: []wasm.Index{0}, + CodeSection: []wasm.Code{{Body: []byte{ + wasm.OpcodeLocalGet, 0, + // Basic loads (without extensions). + wasm.OpcodeI32Load, 0x2, 0x0, // alignment=2 (natural alignment) staticOffset=0 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI64Load, 0x3, 0x0, // alignment=4 (natural alignment) staticOffset=0 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeF32Load, 0x2, 0x0, // alignment=2 (natural alignment) staticOffset=0 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeF64Load, 0x3, 0x0, // alignment=4 (natural alignment) staticOffset=0 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI32Load, 0x2, 0xf, // alignment=2 (natural alignment) staticOffset=16 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI64Load, 0x3, 0xf, // alignment=4 (natural alignment) staticOffset=16 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeF32Load, 0x2, 0xf, // alignment=2 (natural alignment) staticOffset=16 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeF64Load, 0x3, 0xf, // alignment=4 (natural alignment) staticOffset=16 + + // TODO: + // wasm.OpcodeI32Load8S + // wasm.OpcodeI32Load8U + // wasm.OpcodeI32Load16S + // wasm.OpcodeI32Load16U + // wasm.OpcodeI64Load8S + // wasm.OpcodeI64Load8U + // wasm.OpcodeI64Load16S + // wasm.OpcodeI64Load16U + // wasm.OpcodeI64Load32S + // wasm.OpcodeI64Load32U + + wasm.OpcodeEnd, + }}}, + DataSection: []wasm.DataSegment{{OffsetExpression: constOffsetExpr(0), Init: maskedBuf(int(wasm.MemoryPageSize))}}, + }, + } ) type TestCase struct { @@ -818,3 +866,18 @@ const ( blockSignature_vv = 0x40 // 0x40 is the v_v signature in 33-bit signed. See wasm.DecodeBlockType. ) + +func maskedBuf(size int) []byte { + ret := make([]byte, size) + for i := range ret { + ret[i] = byte(i) + } + return ret +} + +func constOffsetExpr(i int32) wasm.ConstantExpression { + return wasm.ConstantExpression{ + Opcode: wasm.OpcodeI32Const, + Data: leb128.EncodeInt32(i), + } +} diff --git a/internal/engine/wazevo/wazevoapi/exitcode.go b/internal/engine/wazevo/wazevoapi/exitcode.go index 9f93b90220..2df01ff574 100644 --- a/internal/engine/wazevo/wazevoapi/exitcode.go +++ b/internal/engine/wazevo/wazevoapi/exitcode.go @@ -1,10 +1,23 @@ package wazevoapi -type ExitCode byte +// ExitCode is an exit code of an execution of a function. +type ExitCode uint32 const ( ExitCodeOK ExitCode = iota ExitCodeGrowStack ExitCodeUnreachable - ExitCodeCount ) + +// String implements fmt.Stringer. +func (e ExitCode) String() string { + switch e { + case ExitCodeOK: + return "ok" + case ExitCodeGrowStack: + return "grow_stack" + case ExitCodeUnreachable: + return "unreachable" + } + panic("TODO") +} diff --git a/internal/engine/wazevo/wazevoapi/offsetdata.go b/internal/engine/wazevo/wazevoapi/offsetdata.go index 26f4e3676d..8fd21fe2e4 100644 --- a/internal/engine/wazevo/wazevoapi/offsetdata.go +++ b/internal/engine/wazevo/wazevoapi/offsetdata.go @@ -65,6 +65,19 @@ func (o Offset) I64() int64 { return int64(o) } +// LocalMemoryBase returns an offset of the first byte of the local memory. +func (m *ModuleContextOffsetData) LocalMemoryBase() Offset { + return m.LocalMemoryBegin +} + +// LocalMemoryLen returns an offset of the length of the local memory buffer. +func (m *ModuleContextOffsetData) LocalMemoryLen() Offset { + if l := m.LocalMemoryBegin; l >= 0 { + return l + 8 + } + return -1 +} + // NewModuleContextOffsetData creates a ModuleContextOffsetData determining the structure of moduleContextOpaque for the given Module. // The structure is described in the comment of wazevo.moduleContextOpaque. func NewModuleContextOffsetData(m *wasm.Module) ModuleContextOffsetData { From b7447f6fd915762a34060f90d984161ed60046f7 Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Thu, 10 Aug 2023 15:53:24 +0900 Subject: [PATCH 2/8] frontend Signed-off-by: Takeshi Yoneda --- .../engine/wazevo/frontend/frontend_test.go | 178 +++++++++++++++++- internal/engine/wazevo/frontend/lower.go | 142 +++++++++++--- internal/engine/wazevo/ssa/instructions.go | 170 ++++++++++------- internal/engine/wazevo/testcases/testcases.go | 66 +++++-- internal/engine/wazevo/wazevoapi/exitcode.go | 3 + 5 files changed, 441 insertions(+), 118 deletions(-) diff --git a/internal/engine/wazevo/frontend/frontend_test.go b/internal/engine/wazevo/frontend/frontend_test.go index 594bedd742..12c7f351a5 100644 --- a/internal/engine/wazevo/frontend/frontend_test.go +++ b/internal/engine/wazevo/frontend/frontend_test.go @@ -875,7 +875,183 @@ blk0: (exec_ctx:i64, module_ctx:i64, v2:i32) Jump blk_ret, v5 `, }, - //{name: "memory_loads", m: testcases.MemoryLoads.Module,}, + { + name: "memory_loads", m: testcases.MemoryLoads.Module, + exp: ` +blk0: (exec_ctx:i64, module_ctx:i64, v2:i32) + v3:i64 = Iconst_64 0x4 + v4:i64 = UExtend v2, 32->64 + v5:i64 = Uload32 module_ctx, 0x8 + v6:i64 = Iadd v4, v3 + v7:i32 = Icmp lt_u, v5, v6 + ExitIfNotZero v7, exec_ctx, memory_out_of_bounds + v8:i64 = Uload32 module_ctx, 0x0 + v9:i32 = Load v8, 0x0 + v10:i64 = Iconst_64 0x8 + v11:i64 = UExtend v2, 32->64 + v12:i64 = Iadd v11, v10 + v13:i32 = Icmp lt_u, v5, v12 + ExitIfNotZero v13, exec_ctx, memory_out_of_bounds + v14:i64 = Load v8, 0x0 + v15:i64 = Iconst_64 0x4 + v16:i64 = UExtend v2, 32->64 + v17:i64 = Iadd v16, v15 + v18:i32 = Icmp lt_u, v5, v17 + ExitIfNotZero v18, exec_ctx, memory_out_of_bounds + v19:f32 = Load v8, 0x0 + v20:i64 = Iconst_64 0x8 + v21:i64 = UExtend v2, 32->64 + v22:i64 = Iadd v21, v20 + v23:i32 = Icmp lt_u, v5, v22 + ExitIfNotZero v23, exec_ctx, memory_out_of_bounds + v24:f64 = Load v8, 0x0 + v25:i64 = Iconst_64 0x13 + v26:i64 = UExtend v2, 32->64 + v27:i64 = Iadd v26, v25 + v28:i32 = Icmp lt_u, v5, v27 + ExitIfNotZero v28, exec_ctx, memory_out_of_bounds + v29:i32 = Load v8, 0xf + v30:i64 = Iconst_64 0x17 + v31:i64 = UExtend v2, 32->64 + v32:i64 = Iadd v31, v30 + v33:i32 = Icmp lt_u, v5, v32 + ExitIfNotZero v33, exec_ctx, memory_out_of_bounds + v34:i64 = Load v8, 0xf + v35:i64 = Iconst_64 0x13 + v36:i64 = UExtend v2, 32->64 + v37:i64 = Iadd v36, v35 + v38:i32 = Icmp lt_u, v5, v37 + ExitIfNotZero v38, exec_ctx, memory_out_of_bounds + v39:f32 = Load v8, 0xf + v40:i64 = Iconst_64 0x17 + v41:i64 = UExtend v2, 32->64 + v42:i64 = Iadd v41, v40 + v43:i32 = Icmp lt_u, v5, v42 + ExitIfNotZero v43, exec_ctx, memory_out_of_bounds + v44:f64 = Load v8, 0xf + v45:i64 = Iconst_64 0x1 + v46:i64 = UExtend v2, 32->64 + v47:i64 = Iadd v46, v45 + v48:i32 = Icmp lt_u, v5, v47 + ExitIfNotZero v48, exec_ctx, memory_out_of_bounds + v49:i32 = Sload8 v8, 0x0 + v50:i64 = Iconst_64 0x10 + v51:i64 = UExtend v2, 32->64 + v52:i64 = Iadd v51, v50 + v53:i32 = Icmp lt_u, v5, v52 + ExitIfNotZero v53, exec_ctx, memory_out_of_bounds + v54:i32 = Sload8 v8, 0xf + v55:i64 = Iconst_64 0x1 + v56:i64 = UExtend v2, 32->64 + v57:i64 = Iadd v56, v55 + v58:i32 = Icmp lt_u, v5, v57 + ExitIfNotZero v58, exec_ctx, memory_out_of_bounds + v59:i32 = Uload8 v8, 0x0 + v60:i64 = Iconst_64 0x10 + v61:i64 = UExtend v2, 32->64 + v62:i64 = Iadd v61, v60 + v63:i32 = Icmp lt_u, v5, v62 + ExitIfNotZero v63, exec_ctx, memory_out_of_bounds + v64:i32 = Uload8 v8, 0xf + v65:i64 = Iconst_64 0x2 + v66:i64 = UExtend v2, 32->64 + v67:i64 = Iadd v66, v65 + v68:i32 = Icmp lt_u, v5, v67 + ExitIfNotZero v68, exec_ctx, memory_out_of_bounds + v69:i32 = Sload16 v8, 0x0 + v70:i64 = Iconst_64 0x11 + v71:i64 = UExtend v2, 32->64 + v72:i64 = Iadd v71, v70 + v73:i32 = Icmp lt_u, v5, v72 + ExitIfNotZero v73, exec_ctx, memory_out_of_bounds + v74:i32 = Sload16 v8, 0xf + v75:i64 = Iconst_64 0x2 + v76:i64 = UExtend v2, 32->64 + v77:i64 = Iadd v76, v75 + v78:i32 = Icmp lt_u, v5, v77 + ExitIfNotZero v78, exec_ctx, memory_out_of_bounds + v79:i32 = Uload16 v8, 0x0 + v80:i64 = Iconst_64 0x11 + v81:i64 = UExtend v2, 32->64 + v82:i64 = Iadd v81, v80 + v83:i32 = Icmp lt_u, v5, v82 + ExitIfNotZero v83, exec_ctx, memory_out_of_bounds + v84:i32 = Uload16 v8, 0xf + v85:i64 = Iconst_64 0x1 + v86:i64 = UExtend v2, 32->64 + v87:i64 = Iadd v86, v85 + v88:i32 = Icmp lt_u, v5, v87 + ExitIfNotZero v88, exec_ctx, memory_out_of_bounds + v89:i64 = Sload8 v8, 0x0 + v90:i64 = Iconst_64 0x10 + v91:i64 = UExtend v2, 32->64 + v92:i64 = Iadd v91, v90 + v93:i32 = Icmp lt_u, v5, v92 + ExitIfNotZero v93, exec_ctx, memory_out_of_bounds + v94:i64 = Sload8 v8, 0xf + v95:i64 = Iconst_64 0x1 + v96:i64 = UExtend v2, 32->64 + v97:i64 = Iadd v96, v95 + v98:i32 = Icmp lt_u, v5, v97 + ExitIfNotZero v98, exec_ctx, memory_out_of_bounds + v99:i64 = Uload8 v8, 0x0 + v100:i64 = Iconst_64 0x10 + v101:i64 = UExtend v2, 32->64 + v102:i64 = Iadd v101, v100 + v103:i32 = Icmp lt_u, v5, v102 + ExitIfNotZero v103, exec_ctx, memory_out_of_bounds + v104:i64 = Uload8 v8, 0xf + v105:i64 = Iconst_64 0x2 + v106:i64 = UExtend v2, 32->64 + v107:i64 = Iadd v106, v105 + v108:i32 = Icmp lt_u, v5, v107 + ExitIfNotZero v108, exec_ctx, memory_out_of_bounds + v109:i64 = Sload16 v8, 0x0 + v110:i64 = Iconst_64 0x11 + v111:i64 = UExtend v2, 32->64 + v112:i64 = Iadd v111, v110 + v113:i32 = Icmp lt_u, v5, v112 + ExitIfNotZero v113, exec_ctx, memory_out_of_bounds + v114:i64 = Sload16 v8, 0xf + v115:i64 = Iconst_64 0x2 + v116:i64 = UExtend v2, 32->64 + v117:i64 = Iadd v116, v115 + v118:i32 = Icmp lt_u, v5, v117 + ExitIfNotZero v118, exec_ctx, memory_out_of_bounds + v119:i64 = Uload16 v8, 0x0 + v120:i64 = Iconst_64 0x11 + v121:i64 = UExtend v2, 32->64 + v122:i64 = Iadd v121, v120 + v123:i32 = Icmp lt_u, v5, v122 + ExitIfNotZero v123, exec_ctx, memory_out_of_bounds + v124:i64 = Uload16 v8, 0xf + v125:i64 = Iconst_64 0x4 + v126:i64 = UExtend v2, 32->64 + v127:i64 = Iadd v126, v125 + v128:i32 = Icmp lt_u, v5, v127 + ExitIfNotZero v128, exec_ctx, memory_out_of_bounds + v129:i64 = Sload32 v8, 0x0 + v130:i64 = Iconst_64 0x13 + v131:i64 = UExtend v2, 32->64 + v132:i64 = Iadd v131, v130 + v133:i32 = Icmp lt_u, v5, v132 + ExitIfNotZero v133, exec_ctx, memory_out_of_bounds + v134:i64 = Sload32 v8, 0xf + v135:i64 = Iconst_64 0x4 + v136:i64 = UExtend v2, 32->64 + v137:i64 = Iadd v136, v135 + v138:i32 = Icmp lt_u, v5, v137 + ExitIfNotZero v138, exec_ctx, memory_out_of_bounds + v139:i64 = Uload32 v8, 0x0 + v140:i64 = Iconst_64 0x13 + v141:i64 = UExtend v2, 32->64 + v142:i64 = Iadd v141, v140 + v143:i32 = Icmp lt_u, v5, v142 + ExitIfNotZero v143, exec_ctx, memory_out_of_bounds + v144:i64 = Uload32 v8, 0xf + Jump blk_ret, v9, v14, v19, v24, v29, v34, v39, v44, v49, v54, v59, v64, v69, v74, v79, v84, v89, v94, v99, v104, v109, v114, v119, v124, v129, v134, v139, v144 +`, + }, } { tc := tc t.Run(tc.name, func(t *testing.T) { diff --git a/internal/engine/wazevo/frontend/lower.go b/internal/engine/wazevo/frontend/lower.go index 35482e9629..c42808d88a 100644 --- a/internal/engine/wazevo/frontend/lower.go +++ b/internal/engine/wazevo/frontend/lower.go @@ -118,7 +118,7 @@ func (l *loweringState) ctrlPeekAt(n int) (ret *controlFrame) { return &l.controlFrames[tail-n] } -const debug = true +const debug = false // lowerBody lowers the body of the Wasm function to the SSA form. func (c *Compiler) lowerBody(entryBlk ssa.BasicBlock) { @@ -434,26 +434,108 @@ func (c *Compiler) lowerOpcode(op wasm.Opcode) { variable := c.localVariable(index) v := state.pop() builder.DefineVariableInCurrentBB(variable, v) - //case wasm.OpcodeI32Load, wasm.OpcodeI64Load, wasm.OpcodeF32Load, wasm.OpcodeF64Load: - // _, offset, _ := c.readMemArg() - // if state.unreachable { - // return - // } - // - // var ceil = offset - // switch op { - // case wasm.OpcodeI32Load, wasm.OpcodeF32Load: - // ceil += 4 - // case wasm.OpcodeI64Load, wasm.OpcodeF64Load: - // ceil += 8 - // } - // - // baseAdr := state.pop() - // memBase := c.getMemoryBaseValue() - // memLen := c.getMemoryLenValue() - // - // // Check for out of bounds memory access. - + case wasm.OpcodeI32Load, + wasm.OpcodeI64Load, + wasm.OpcodeF32Load, + wasm.OpcodeF64Load, + wasm.OpcodeI32Load8S, + wasm.OpcodeI32Load8U, + wasm.OpcodeI32Load16S, + wasm.OpcodeI32Load16U, + wasm.OpcodeI64Load8S, + wasm.OpcodeI64Load8U, + wasm.OpcodeI64Load16S, + wasm.OpcodeI64Load16U, + wasm.OpcodeI64Load32S, + wasm.OpcodeI64Load32U: + _, offset := c.readMemArg() + if state.unreachable { + return + } + + var ceil = offset + switch op { + case wasm.OpcodeI32Load, wasm.OpcodeF32Load: + ceil += 4 + case wasm.OpcodeI64Load, wasm.OpcodeF64Load: + ceil += 8 + case wasm.OpcodeI32Load8S, wasm.OpcodeI32Load8U: + ceil += 1 + case wasm.OpcodeI32Load16S, wasm.OpcodeI32Load16U: + ceil += 2 + case wasm.OpcodeI64Load8S, wasm.OpcodeI64Load8U: + ceil += 1 + case wasm.OpcodeI64Load16S, wasm.OpcodeI64Load16U: + ceil += 2 + case wasm.OpcodeI64Load32S, wasm.OpcodeI64Load32U: + ceil += 4 + default: + panic("BUG") + } + + ceilConst := builder.AllocateInstruction() + ceilConst.AsIconst64(uint64(ceil)) + builder.InsertInstruction(ceilConst) + + // We calculate the offset in 64-bit space. + baseAddr := state.pop() + extBaseAddr := builder.AllocateInstruction() + extBaseAddr.AsUExtend(baseAddr, 32, 64) + builder.InsertInstruction(extBaseAddr) + + // Note: memLen is already zero extended to 64-bit space at the load time. + memLen := c.getMemoryLenValue() + + // baseAddrPlusCeil = baseAddr + ceil + baseAddrPlusCeil := builder.AllocateInstruction() + baseAddrPlusCeil.AsIadd(extBaseAddr.Return(), ceilConst.Return()) + builder.InsertInstruction(baseAddrPlusCeil) + + // Check for out of bounds memory access: `baseAddrPlusCeil > memLen`. + cmp := builder.AllocateInstruction() + cmp.AsIcmp(memLen, baseAddrPlusCeil.Return(), ssa.IntegerCmpCondUnsignedLessThan) + builder.InsertInstruction(cmp) + exitIfNZ := builder.AllocateInstruction() + exitIfNZ.AsExitIfNotZeroWithCode(c.execCtxPtrValue, cmp.Return(), wazevoapi.ExitCodeMemoryOutOfBounds) + builder.InsertInstruction(exitIfNZ) + + // Load the value. + memBase := c.getMemoryBaseValue() + load := builder.AllocateInstruction() + switch op { + case wasm.OpcodeI32Load: + load.AsLoad(memBase, offset, ssa.TypeI32) + case wasm.OpcodeI64Load: + load.AsLoad(memBase, offset, ssa.TypeI64) + case wasm.OpcodeF32Load: + load.AsLoad(memBase, offset, ssa.TypeF32) + case wasm.OpcodeF64Load: + load.AsLoad(memBase, offset, ssa.TypeF64) + case wasm.OpcodeI32Load8S: + load.AsExtLoad(ssa.OpcodeSload8, memBase, offset, false) + case wasm.OpcodeI32Load8U: + load.AsExtLoad(ssa.OpcodeUload8, memBase, offset, false) + case wasm.OpcodeI32Load16S: + load.AsExtLoad(ssa.OpcodeSload16, memBase, offset, false) + case wasm.OpcodeI32Load16U: + load.AsExtLoad(ssa.OpcodeUload16, memBase, offset, false) + case wasm.OpcodeI64Load8S: + load.AsExtLoad(ssa.OpcodeSload8, memBase, offset, true) + case wasm.OpcodeI64Load8U: + load.AsExtLoad(ssa.OpcodeUload8, memBase, offset, true) + case wasm.OpcodeI64Load16S: + load.AsExtLoad(ssa.OpcodeSload16, memBase, offset, true) + case wasm.OpcodeI64Load16U: + load.AsExtLoad(ssa.OpcodeUload16, memBase, offset, true) + case wasm.OpcodeI64Load32S: + load.AsExtLoad(ssa.OpcodeSload32, memBase, offset, true) + case wasm.OpcodeI64Load32U: + load.AsExtLoad(ssa.OpcodeUload32, memBase, offset, true) + default: + panic("BUG") + } + builder.InsertInstruction(load) + state.push(load.Return()) case wasm.OpcodeBlock: // Note: we do not need to create a BB for this as that would always have only one predecessor // which is the current BB, and therefore it's always ok to merge them in any way. @@ -743,23 +825,23 @@ func (c *Compiler) getMemoryBaseValue() ssa.Value { if c.offset.LocalMemoryBegin < 0 { panic("TODO: imported memory") } - return c.getModuleCtxValue(c.memoryBaseVariable, c.offset.LocalMemoryBase(), ssa.TypeI64) + return c.getModuleCtxValueI32ZeroExt(c.memoryBaseVariable, c.offset.LocalMemoryBase()) } func (c *Compiler) getMemoryLenValue() ssa.Value { if c.offset.LocalMemoryBegin < 0 { panic("TODO: imported memory") } - return c.getModuleCtxValue(c.memoryLenVariable, c.offset.LocalMemoryLen(), ssa.TypeI64) + return c.getModuleCtxValueI32ZeroExt(c.memoryLenVariable, c.offset.LocalMemoryLen()) } -func (c *Compiler) getModuleCtxValue(variable ssa.Variable, offset wazevoapi.Offset, typ ssa.Type) ssa.Value { +func (c *Compiler) getModuleCtxValueI32ZeroExt(variable ssa.Variable, offset wazevoapi.Offset) ssa.Value { builder := c.ssaBuilder if v := builder.FindValue(variable); v.Valid() { return v } load := builder.AllocateInstruction() - load.AsLoad(c.moduleCtxPtrValue, uint32(offset), typ) + load.AsExtLoad(ssa.OpcodeUload32, c.moduleCtxPtrValue, uint32(offset), true) builder.InsertInstruction(load) ret := load.Return() builder.DefineVariableInCurrentBB(variable, ret) @@ -848,24 +930,22 @@ func (c *Compiler) readBlockType() *wasm.FunctionType { return bt } -func (c *Compiler) readMemArg() (align, offset uint32, err error) { +func (c *Compiler) readMemArg() (align, offset uint32) { state := &c.loweringState align, num, err := leb128.LoadUint32(c.wasmFunctionBody[state.pc+1:]) if err != nil { - err = fmt.Errorf("read memory align: %v", err) - return + panic(fmt.Errorf("read memory align: %v", err)) } state.pc += int(num) offset, num, err = leb128.LoadUint32(c.wasmFunctionBody[state.pc+1:]) if err != nil { - err = fmt.Errorf("read memory offset: %v", err) - return + panic(fmt.Errorf("read memory offset: %v", err)) } state.pc += int(num) - return align, offset, nil + return align, offset } // insertJumpToBlock inserts a jump instruction to the given block in the current block. diff --git a/internal/engine/wazevo/ssa/instructions.go b/internal/engine/wazevo/ssa/instructions.go index 4153626bfa..6715ef5a2e 100644 --- a/internal/engine/wazevo/ssa/instructions.go +++ b/internal/engine/wazevo/ssa/instructions.go @@ -131,6 +131,9 @@ const ( // OpcodeExitWithCode exit the execution immediately. OpcodeExitWithCode + // OpcodeExitIfNotZeroWithCode exits the execution immediately if the value `c` is not zero. + OpcodeExitIfNotZeroWithCode + // OpcodeReturn returns from the function: `return rvalues`. OpcodeReturn @@ -199,48 +202,37 @@ const ( // `v = ssub_sat x, y`. OpcodeSsubSat - // OpcodeLoad ... - // `v = load MemFlags, p, Offset`. + // OpcodeLoad loads a Type value from the [base + offset] address: `v = Load base, offset`. OpcodeLoad - // OpcodeStore ... - // `store MemFlags, x, p, Offset`. + // OpcodeStore stores a Type value to the [base + offset] address: `Store v, base, offset`. OpcodeStore - // OpcodeUload8 ... - // `v = uload8 MemFlags, p, Offset`. + // OpcodeUload8 loads the 8-bit value from the [base + offset] address, zero-extended to 64 bits: `v = Uload8 base, offset`. OpcodeUload8 - // OpcodeSload8 ... - // `v = sload8 MemFlags, p, Offset`. + // OpcodeSload8 loads the 8-bit value from the [base + offset] address, sign-extended to 64 bits: `v = Sload8 base, offset`. OpcodeSload8 - // OpcodeIstore8 ... - // `istore8 MemFlags, x, p, Offset`. + // OpcodeIstore8 stores the 8-bit value to the [base + offset] address, sign-extended to 64 bits: `Istore8 v, base, offset`. OpcodeIstore8 - // OpcodeUload16 ... - // `v = uload16 MemFlags, p, Offset`. + // OpcodeUload16 loads the 16-bit value from the [base + offset] address, zero-extended to 64 bits: `v = Uload16 base, offset`. OpcodeUload16 - // OpcodeSload16 ... - // `v = sload16 MemFlags, p, Offset`. + // OpcodeSload16 loads the 16-bit value from the [base + offset] address, sign-extended to 64 bits: `v = Sload16 base, offset`. OpcodeSload16 - // OpcodeIstore16 ... - // `istore16 MemFlags, x, p, Offset`. + // OpcodeIstore16 stores the 16-bit value to the [base + offset] address, zero-extended to 64 bits: `Istore16 v, base, offset`. OpcodeIstore16 - // OpcodeUload32 ... - // `v = uload32 MemFlags, p, Offset`. + // OpcodeUload32 loads the 32-bit value from the [base + offset] address, zero-extended to 64 bits: `v = Uload32 base, offset`. OpcodeUload32 - // OpcodeSload32 ... - // `v = sload32 MemFlags, p, Offset`. + // OpcodeSload32 loads the 32-bit value from the [base + offset] address, sign-extended to 64 bits: `v = Sload32 base, offset`. OpcodeSload32 - // OpcodeIstore32 ... - // `istore32 MemFlags, x, p, Offset`. + // OpcodeIstore32 stores the 32-bit value to the [base + offset] address, zero-extended to 64 bits: `Istore16 v, base, offset`. OpcodeIstore32 // OpcodeUload8x8 ... @@ -833,34 +825,41 @@ const ( // instructionSideEffects provides the info to determine if an instruction has side effects. // Instructions with side effects must not be eliminated regardless whether the result is used or not. var instructionSideEffects = [opcodeEnd]sideEffect{ - OpcodeJump: sideEffectTrue, - OpcodeIconst: sideEffectFalse, - OpcodeCall: sideEffectTrue, - OpcodeCallIndirect: sideEffectTrue, - OpcodeIadd: sideEffectFalse, - OpcodeImul: sideEffectFalse, - OpcodeIsub: sideEffectFalse, - OpcodeIcmp: sideEffectFalse, - OpcodeFcmp: sideEffectFalse, - OpcodeFadd: sideEffectFalse, - OpcodeLoad: sideEffectFalse, - OpcodeSExtend: sideEffectFalse, - OpcodeUExtend: sideEffectFalse, - OpcodeFsub: sideEffectFalse, - OpcodeF32const: sideEffectFalse, - OpcodeF64const: sideEffectFalse, - OpcodeIshl: sideEffectFalse, - OpcodeSshr: sideEffectFalse, - OpcodeUshr: sideEffectFalse, - OpcodeStore: sideEffectTrue, - OpcodeExitWithCode: sideEffectTrue, - OpcodeReturn: sideEffectTrue, - OpcodeBrz: sideEffectTrue, - OpcodeBrnz: sideEffectTrue, - OpcodeFdiv: sideEffectFalse, - OpcodeFmul: sideEffectFalse, - OpcodeFmax: sideEffectFalse, - OpcodeFmin: sideEffectFalse, + OpcodeJump: sideEffectTrue, + OpcodeIconst: sideEffectFalse, + OpcodeCall: sideEffectTrue, + OpcodeCallIndirect: sideEffectTrue, + OpcodeIadd: sideEffectFalse, + OpcodeImul: sideEffectFalse, + OpcodeIsub: sideEffectFalse, + OpcodeIcmp: sideEffectFalse, + OpcodeFcmp: sideEffectFalse, + OpcodeFadd: sideEffectFalse, + OpcodeLoad: sideEffectFalse, + OpcodeUload8: sideEffectFalse, + OpcodeUload16: sideEffectFalse, + OpcodeUload32: sideEffectFalse, + OpcodeSload8: sideEffectFalse, + OpcodeSload16: sideEffectFalse, + OpcodeSload32: sideEffectFalse, + OpcodeSExtend: sideEffectFalse, + OpcodeUExtend: sideEffectFalse, + OpcodeFsub: sideEffectFalse, + OpcodeF32const: sideEffectFalse, + OpcodeF64const: sideEffectFalse, + OpcodeIshl: sideEffectFalse, + OpcodeSshr: sideEffectFalse, + OpcodeUshr: sideEffectFalse, + OpcodeStore: sideEffectTrue, + OpcodeExitWithCode: sideEffectTrue, + OpcodeExitIfNotZeroWithCode: sideEffectTrue, + OpcodeReturn: sideEffectTrue, + OpcodeBrz: sideEffectTrue, + OpcodeBrnz: sideEffectTrue, + OpcodeFdiv: sideEffectFalse, + OpcodeFmul: sideEffectFalse, + OpcodeFmax: sideEffectFalse, + OpcodeFmin: sideEffectFalse, } // HasSideEffects returns true if this instruction has side effects. @@ -913,25 +912,32 @@ var instructionReturnTypes = [opcodeEnd]returnTypesFn{ } return }, - OpcodeLoad: returnTypesFnSingle, - OpcodeIadd: returnTypesFnSingle, - OpcodeIsub: returnTypesFnSingle, - OpcodeImul: returnTypesFnSingle, - OpcodeIcmp: returnTypesFnI32, - OpcodeFcmp: returnTypesFnI32, - OpcodeFadd: returnTypesFnSingle, - OpcodeFsub: returnTypesFnSingle, - OpcodeFdiv: returnTypesFnSingle, - OpcodeFmul: returnTypesFnSingle, - OpcodeFmax: returnTypesFnSingle, - OpcodeFmin: returnTypesFnSingle, - OpcodeF32const: returnTypesFnF32, - OpcodeF64const: returnTypesFnF64, - OpcodeStore: returnTypesFnNoReturns, - OpcodeExitWithCode: returnTypesFnNoReturns, - OpcodeReturn: returnTypesFnNoReturns, - OpcodeBrz: returnTypesFnNoReturns, - OpcodeBrnz: returnTypesFnNoReturns, + OpcodeLoad: returnTypesFnSingle, + OpcodeIadd: returnTypesFnSingle, + OpcodeIsub: returnTypesFnSingle, + OpcodeImul: returnTypesFnSingle, + OpcodeIcmp: returnTypesFnI32, + OpcodeFcmp: returnTypesFnI32, + OpcodeFadd: returnTypesFnSingle, + OpcodeFsub: returnTypesFnSingle, + OpcodeFdiv: returnTypesFnSingle, + OpcodeFmul: returnTypesFnSingle, + OpcodeFmax: returnTypesFnSingle, + OpcodeFmin: returnTypesFnSingle, + OpcodeF32const: returnTypesFnF32, + OpcodeF64const: returnTypesFnF64, + OpcodeStore: returnTypesFnNoReturns, + OpcodeExitWithCode: returnTypesFnNoReturns, + OpcodeExitIfNotZeroWithCode: returnTypesFnNoReturns, + OpcodeReturn: returnTypesFnNoReturns, + OpcodeBrz: returnTypesFnNoReturns, + OpcodeBrnz: returnTypesFnNoReturns, + OpcodeUload8: returnTypesFnSingle, + OpcodeUload16: returnTypesFnSingle, + OpcodeUload32: returnTypesFnSingle, + OpcodeSload8: returnTypesFnSingle, + OpcodeSload16: returnTypesFnSingle, + OpcodeSload32: returnTypesFnSingle, } // AsLoad initializes this instruction as a store instruction with OpcodeLoad. @@ -942,6 +948,18 @@ func (i *Instruction) AsLoad(ptr Value, offset uint32, typ Type) { i.typ = typ } +// AsExtLoad initializes this instruction as a store instruction with OpcodeLoad. +func (i *Instruction) AsExtLoad(op Opcode, ptr Value, offset uint32, dst64bit bool) { + i.opcode = op + i.v = ptr + i.u64 = uint64(offset) + if dst64bit { + i.typ = TypeI64 + } else { + i.typ = TypeI32 + } +} + // LoadData returns the operands for a load instruction. func (i *Instruction) LoadData() (ptr Value, offset uint32, typ Type) { return i.v, uint32(i.u64), i.typ @@ -1135,6 +1153,14 @@ func (i *Instruction) AsExitWithCode(ctx Value, code wazevoapi.ExitCode) { i.u64 = uint64(code) } +// AsExitIfNotZeroWithCode initializes this instruction as a trap instruction with OpcodeExitIfNotZeroWithCode. +func (i *Instruction) AsExitIfNotZeroWithCode(ctx, c Value, code wazevoapi.ExitCode) { + i.opcode = OpcodeExitIfNotZeroWithCode + i.v = ctx + i.v2 = c + i.u64 = uint64(code) +} + // ExitWithCodeData returns the context and exit code of OpcodeExitWithCode. func (i *Instruction) ExitWithCodeData() (ctx Value, code wazevoapi.ExitCode) { return i.v, wazevoapi.ExitCode(i.u64) @@ -1296,6 +1322,8 @@ func (i *Instruction) Format(b Builder) string { switch i.opcode { case OpcodeExitWithCode: instSuffix = fmt.Sprintf(" %s, %s", i.v.Format(b), wazevoapi.ExitCode(i.u64)) + case OpcodeExitIfNotZeroWithCode: + instSuffix = fmt.Sprintf(" %s, %s, %s", i.v2.Format(b), i.v.Format(b), wazevoapi.ExitCode(i.u64)) case OpcodeIadd, OpcodeIsub, OpcodeImul, OpcodeFadd, OpcodeFsub, OpcodeFmin, OpcodeFmax, OpcodeFdiv, OpcodeFmul: instSuffix = fmt.Sprintf(" %s, %s", i.v.Format(b), i.v2.Format(b)) case OpcodeIcmp: @@ -1318,6 +1346,8 @@ func (i *Instruction) Format(b Builder) string { instSuffix = fmt.Sprintf(" %s, %s, %#x", i.v.Format(b), i.v2.Format(b), int32(i.u64)) case OpcodeLoad: instSuffix = fmt.Sprintf(" %s, %#x", i.v.Format(b), int32(i.u64)) + case OpcodeUload8, OpcodeUload16, OpcodeUload32, OpcodeSload8, OpcodeSload16, OpcodeSload32: + instSuffix = fmt.Sprintf(" %s, %#x", i.v.Format(b), int32(i.u64)) case OpcodeIconst: switch i.typ { case TypeI32: @@ -1428,6 +1458,8 @@ func (o Opcode) String() (ret string) { return "BrTable" case OpcodeExitWithCode: return "Exit" + case OpcodeExitIfNotZeroWithCode: + return "ExitIfNotZero" case OpcodeReturn: return "Return" case OpcodeCall: diff --git a/internal/engine/wazevo/testcases/testcases.go b/internal/engine/wazevo/testcases/testcases.go index 082d5020ae..b788af426a 100644 --- a/internal/engine/wazevo/testcases/testcases.go +++ b/internal/engine/wazevo/testcases/testcases.go @@ -786,40 +786,72 @@ var ( Params: []wasm.ValueType{i32}, Results: []wasm.ValueType{ i32, i64, f32, f64, i32, i64, f32, f64, + i32, i32, i32, i32, i32, i32, i32, i32, + i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, }, }}, MemorySection: &wasm.Memory{Min: 1}, FunctionSection: []wasm.Index{0}, CodeSection: []wasm.Code{{Body: []byte{ - wasm.OpcodeLocalGet, 0, // Basic loads (without extensions). + wasm.OpcodeLocalGet, 0, wasm.OpcodeI32Load, 0x2, 0x0, // alignment=2 (natural alignment) staticOffset=0 wasm.OpcodeLocalGet, 0, - wasm.OpcodeI64Load, 0x3, 0x0, // alignment=4 (natural alignment) staticOffset=0 + wasm.OpcodeI64Load, 0x3, 0x0, // alignment=3 (natural alignment) staticOffset=0 wasm.OpcodeLocalGet, 0, wasm.OpcodeF32Load, 0x2, 0x0, // alignment=2 (natural alignment) staticOffset=0 wasm.OpcodeLocalGet, 0, - wasm.OpcodeF64Load, 0x3, 0x0, // alignment=4 (natural alignment) staticOffset=0 + wasm.OpcodeF64Load, 0x3, 0x0, // alignment=3 (natural alignment) staticOffset=0 wasm.OpcodeLocalGet, 0, wasm.OpcodeI32Load, 0x2, 0xf, // alignment=2 (natural alignment) staticOffset=16 wasm.OpcodeLocalGet, 0, - wasm.OpcodeI64Load, 0x3, 0xf, // alignment=4 (natural alignment) staticOffset=16 + wasm.OpcodeI64Load, 0x3, 0xf, // alignment=3 (natural alignment) staticOffset=16 wasm.OpcodeLocalGet, 0, wasm.OpcodeF32Load, 0x2, 0xf, // alignment=2 (natural alignment) staticOffset=16 wasm.OpcodeLocalGet, 0, - wasm.OpcodeF64Load, 0x3, 0xf, // alignment=4 (natural alignment) staticOffset=16 - - // TODO: - // wasm.OpcodeI32Load8S - // wasm.OpcodeI32Load8U - // wasm.OpcodeI32Load16S - // wasm.OpcodeI32Load16U - // wasm.OpcodeI64Load8S - // wasm.OpcodeI64Load8U - // wasm.OpcodeI64Load16S - // wasm.OpcodeI64Load16U - // wasm.OpcodeI64Load32S - // wasm.OpcodeI64Load32U + wasm.OpcodeF64Load, 0x3, 0xf, // alignment=3 (natural alignment) staticOffset=16 + + // Extension integer loads. + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI32Load8S, 0x0, 0x0, // alignment=0 (natural alignment) staticOffset=0 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI32Load8S, 0x0, 0xf, // alignment=0 (natural alignment) staticOffset=16 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI32Load8U, 0x0, 0x0, // alignment=0 (natural alignment) staticOffset=0 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI32Load8U, 0x0, 0xf, // alignment=0 (natural alignment) staticOffset=16 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI32Load16S, 0x1, 0x0, // alignment=1 (natural alignment) staticOffset=0 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI32Load16S, 0x1, 0xf, // alignment=1 (natural alignment) staticOffset=16 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI32Load16U, 0x1, 0x0, // alignment=1 (natural alignment) staticOffset=0 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI32Load16U, 0x1, 0xf, // alignment=1 (natural alignment) staticOffset=16 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI64Load8S, 0x0, 0x0, // alignment=0 (natural alignment) staticOffset=0 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI64Load8S, 0x0, 0xf, // alignment=0 (natural alignment) staticOffset=16 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI64Load8U, 0x0, 0x0, // alignment=0 (natural alignment) staticOffset=0 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI64Load8U, 0x0, 0xf, // alignment=0 (natural alignment) staticOffset=16 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI64Load16S, 0x1, 0x0, // alignment=1 (natural alignment) staticOffset=0 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI64Load16S, 0x1, 0xf, // alignment=1 (natural alignment) staticOffset=16 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI64Load16U, 0x1, 0x0, // alignment=1 (natural alignment) staticOffset=0 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI64Load16U, 0x1, 0xf, // alignment=1 (natural alignment) staticOffset=16 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI64Load32S, 0x2, 0x0, // alignment=2 (natural alignment) staticOffset=0 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI64Load32S, 0x2, 0xf, // alignment=2 (natural alignment) staticOffset=16 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI64Load32U, 0x2, 0x0, // alignment=2 (natural alignment) staticOffset=0 + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI64Load32U, 0x2, 0xf, // alignment=2 (natural alignment) staticOffset=16 wasm.OpcodeEnd, }}}, diff --git a/internal/engine/wazevo/wazevoapi/exitcode.go b/internal/engine/wazevo/wazevoapi/exitcode.go index 2df01ff574..8cee3ea344 100644 --- a/internal/engine/wazevo/wazevoapi/exitcode.go +++ b/internal/engine/wazevo/wazevoapi/exitcode.go @@ -7,6 +7,7 @@ const ( ExitCodeOK ExitCode = iota ExitCodeGrowStack ExitCodeUnreachable + ExitCodeMemoryOutOfBounds ) // String implements fmt.Stringer. @@ -18,6 +19,8 @@ func (e ExitCode) String() string { return "grow_stack" case ExitCodeUnreachable: return "unreachable" + case ExitCodeMemoryOutOfBounds: + return "memory_out_of_bounds" } panic("TODO") } From 431a1b77419aec2d6b9fd5ade08213582216cad9 Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Thu, 10 Aug 2023 16:10:31 +0900 Subject: [PATCH 3/8] progress Signed-off-by: Takeshi Yoneda --- .../engine/wazevo/backend/backend_test.go | 3 +++ .../engine/wazevo/backend/isa/arm64/instr.go | 15 +++++++++++ .../wazevo/backend/isa/arm64/lower_instr.go | 9 +++++++ .../wazevo/backend/isa/arm64/lower_mem.go | 26 +++++++++++++++++++ internal/engine/wazevo/ssa/instructions.go | 5 ++++ 5 files changed, 58 insertions(+) diff --git a/internal/engine/wazevo/backend/backend_test.go b/internal/engine/wazevo/backend/backend_test.go index 91fcb53800..31727a5772 100644 --- a/internal/engine/wazevo/backend/backend_test.go +++ b/internal/engine/wazevo/backend/backend_test.go @@ -1883,6 +1883,9 @@ L1 (SSA Block: blk0): ret `, }, + { + name: "imported_function_call", m: testcases.MemoryLoads.Module, + }, } { t.Run(tc.name, func(t *testing.T) { ssab := ssa.NewBuilder() diff --git a/internal/engine/wazevo/backend/isa/arm64/instr.go b/internal/engine/wazevo/backend/isa/arm64/instr.go index e3f667ed32..2c6202497d 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr.go @@ -383,6 +383,21 @@ func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) { i.amode = amode } +func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) { + switch sizeInBits { + case 8: + i.kind = sLoad8 + case 16: + i.kind = sLoad16 + case 32: + i.kind = sLoad32 + default: + panic("BUG") + } + i.rd = dst + i.amode = amode +} + func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) { switch sizeInBits { case 8: diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go index 2d3089f994..9c203b1fc5 100644 --- a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go +++ b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go @@ -130,10 +130,15 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { case ssa.OpcodeExitWithCode: execCtx, code := instr.ExitWithCodeData() m.lowerExitWithCode(execCtx, code) + case ssa.OpcodeExitIfNotZeroWithCode: + execCtx, c, code := instr.ExitIfNotZeroWithCodeData() + m.lowerExitIfNotZeroWithCode(execCtx, c, code) case ssa.OpcodeStore, ssa.OpcodeIstore8, ssa.OpcodeIstore16, ssa.OpcodeIstore32: m.lowerStore(instr) case ssa.OpcodeLoad: m.lowerLoad(instr) + case ssa.OpcodeUload8, ssa.OpcodeUload16, ssa.OpcodeUload32, ssa.OpcodeSload8, ssa.OpcodeSload16, ssa.OpcodeSload32: + m.lowerExtLoad(instr) case ssa.OpcodeCall, ssa.OpcodeCallIndirect: m.lowerCall(instr) case ssa.OpcodeIcmp: @@ -331,3 +336,7 @@ func (m *machine) lowerExitWithCode(ctx ssa.Value, code wazevoapi.ExitCode) { m.insert(setExitCode) m.insert(exitSeq) } + +func (m *machine) lowerExitIfNotZeroWithCode(ctx ssa.Value, c ssa.Value, code wazevoapi.ExitCode) { + +} diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_mem.go b/internal/engine/wazevo/backend/isa/arm64/lower_mem.go index 78ecd71d9b..efbdca2048 100644 --- a/internal/engine/wazevo/backend/isa/arm64/lower_mem.go +++ b/internal/engine/wazevo/backend/isa/arm64/lower_mem.go @@ -175,6 +175,32 @@ func (a addressMode) sizeInBitsToShiftAmount(sizeInBits byte) (lsl byte) { return } +func (m *machine) lowerExtLoad(si *ssa.Instruction) { + ptr, offset, _ := si.LoadData() + + // Extension loads are always 64 bit destination. + amode := m.lowerToAddressMode(ptr, offset, 64) + + load := m.allocateInstr() + switch si.Opcode() { + case ssa.OpcodeUload8: + load.asULoad(operandNR(m.compiler.VRegOf(si.Return())), amode, 8) + case ssa.OpcodeUload16: + load.asULoad(operandNR(m.compiler.VRegOf(si.Return())), amode, 16) + case ssa.OpcodeUload32: + load.asULoad(operandNR(m.compiler.VRegOf(si.Return())), amode, 32) + case ssa.OpcodeSload8: + load.asULoad(operandNR(m.compiler.VRegOf(si.Return())), amode, 8) + case ssa.OpcodeSload16: + load.asULoad(operandNR(m.compiler.VRegOf(si.Return())), amode, 16) + case ssa.OpcodeSload32: + load.asULoad(operandNR(m.compiler.VRegOf(si.Return())), amode, 32) + default: + panic("BUG") + } + m.insert(load) +} + func (m *machine) lowerLoad(si *ssa.Instruction) { // TODO: merge consecutive loads into a single pair store instruction. ptr, offset, typ := si.LoadData() diff --git a/internal/engine/wazevo/ssa/instructions.go b/internal/engine/wazevo/ssa/instructions.go index 6715ef5a2e..5dcdeec460 100644 --- a/internal/engine/wazevo/ssa/instructions.go +++ b/internal/engine/wazevo/ssa/instructions.go @@ -1166,6 +1166,11 @@ func (i *Instruction) ExitWithCodeData() (ctx Value, code wazevoapi.ExitCode) { return i.v, wazevoapi.ExitCode(i.u64) } +// ExitIfNotZeroWithCodeData returns the context and exit code of OpcodeExitWithCode. +func (i *Instruction) ExitIfNotZeroWithCodeData() (ctx, c Value, code wazevoapi.ExitCode) { + return i.v, i.v2, wazevoapi.ExitCode(i.u64) +} + // InvertBrx inverts either OpcodeBrz or OpcodeBrnz to the other. func (i *Instruction) InvertBrx() { switch i.opcode { From 7ec63e7c31f55f7e56b169e75180ee926364f786 Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Thu, 10 Aug 2023 16:36:30 +0900 Subject: [PATCH 4/8] more Signed-off-by: Takeshi Yoneda --- .../engine/wazevo/backend/isa/arm64/instr.go | 10 ++-- .../backend/isa/arm64/instr_encoding_test.go | 10 ++++ .../wazevo/backend/isa/arm64/lower_instr.go | 50 +++++++++++++++++-- .../engine/wazevo/backend/isa/arm64/reg.go | 3 +- 4 files changed, 62 insertions(+), 11 deletions(-) diff --git a/internal/engine/wazevo/backend/isa/arm64/instr.go b/internal/engine/wazevo/backend/isa/arm64/instr.go index 2c6202497d..9c3f8b776d 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr.go @@ -681,17 +681,17 @@ func (i *instruction) String() (str string) { case bitRR: panic("TODO") case uLoad8: - panic("TODO") + str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) case sLoad8: - panic("TODO") + str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) case uLoad16: - panic("TODO") + str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) case sLoad16: - panic("TODO") + str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) case uLoad32: str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) case sLoad32: - panic("TODO") + str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) case uLoad64: str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64)) case store8: diff --git a/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go b/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go index 7123f967fe..22ac128f1b 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go @@ -2,6 +2,7 @@ package arm64 import ( "encoding/hex" + "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" "math" "testing" @@ -760,3 +761,12 @@ func Test_encodeExitSequence(t *testing.T) { require.Equal(t, "dd0a40f9db0e40f97f030091de1240f9c0035fd6", hex.EncodeToString(m.buf)) require.Equal(t, len(m.buf), exitSequenceSize) } + +func Test_lowerExitWithCodeEncodingSize(t *testing.T) { + compiler, _, m := newSetupWithMockContext() + m.lowerExitWithCode(x10VReg, wazevoapi.ExitCodeGrowStack) + m.FlushPendingInstructions() + require.NotNil(t, m.perBlockHead) + m.encode(m.perBlockHead) + require.Equal(t, exitWithCodeEncodingSize, len(compiler.Buf())) +} diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go index 9c203b1fc5..3cf56cefec 100644 --- a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go +++ b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go @@ -129,10 +129,10 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { case ssa.OpcodeIconst, ssa.OpcodeF32const, ssa.OpcodeF64const: // Constant instructions are inlined. case ssa.OpcodeExitWithCode: execCtx, code := instr.ExitWithCodeData() - m.lowerExitWithCode(execCtx, code) + m.lowerExitWithCode(m.compiler.VRegOf(execCtx), code) case ssa.OpcodeExitIfNotZeroWithCode: execCtx, c, code := instr.ExitIfNotZeroWithCodeData() - m.lowerExitIfNotZeroWithCode(execCtx, c, code) + m.lowerExitIfNotZeroWithCode(m.compiler.VRegOf(execCtx), c, code) case ssa.OpcodeStore, ssa.OpcodeIstore8, ssa.OpcodeIstore16, ssa.OpcodeIstore32: m.lowerStore(instr) case ssa.OpcodeLoad: @@ -315,9 +315,10 @@ func (m *machine) lowerImul(x, y, result ssa.Value) { m.insert(mul) } +const exitWithCodeEncodingSize = exitSequenceSize + 8 + // lowerExitWithCode lowers the lowerExitWithCode takes a context pointer as argument. -func (m *machine) lowerExitWithCode(ctx ssa.Value, code wazevoapi.ExitCode) { - execCtxVReg := m.compiler.VRegOf(ctx) +func (m *machine) lowerExitWithCode(execCtxVReg regalloc.VReg, code wazevoapi.ExitCode) { loadExitCodeConst := m.allocateInstr() loadExitCodeConst.asMOVZ(tmpRegVReg, uint64(code), 0, true) @@ -337,6 +338,45 @@ func (m *machine) lowerExitWithCode(ctx ssa.Value, code wazevoapi.ExitCode) { m.insert(exitSeq) } -func (m *machine) lowerExitIfNotZeroWithCode(ctx ssa.Value, c ssa.Value, code wazevoapi.ExitCode) { +func (m *machine) lowerExitIfNotZeroWithCode(execCtxVReg regalloc.VReg, cond ssa.Value, code wazevoapi.ExitCode) { + condDef := m.compiler.ValueDefinition(cond) + if !m.compiler.MatchInstr(condDef, ssa.OpcodeIcmp) { + // We can have general case just like cachine.LowerConditionalBranch. + panic("TODO: OpcodeExitIfNotZeroWithCode must come after Icmp at the moment") + } + m.compiler.MarkLowered(condDef.Instr) + + cvalInstr := condDef.Instr + x, y, c := cvalInstr.IcmpData() + cc, signed := condFlagFromSSAIntegerCmpCond(c), c.Signed() + + if x.Type() != y.Type() { + panic("TODO(maybe): support icmp with different types") + } + + extMod := extModeOf(x.Type(), signed) + + // First operand must be in pure register form. + rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extMod) + // Second operand can be in any of Imm12, ER, SR, or NR form supported by the SUBS instructions. + rm := m.getOperand_Imm12_ER_SR_NR(m.compiler.ValueDefinition(y), extMod) + + alu := m.allocateInstr() + // subs zr, rn, rm + alu.asALU( + aluOpSubS, + // We don't need the result, just need to set flags. + operandNR(xzrVReg), + rn, + rm, + x.Type().Bits() == 64, + ) + m.insert(alu) + // We have to skip the entire exit sequence if the condition is false. + cbr := m.allocateInstr() + cbr.asCondBr(cc.asCond(), invalidLabel, false /* ignored */) + cbr.condBrOffsetResolve(exitWithCodeEncodingSize) + m.insert(cbr) + m.lowerExitWithCode(execCtxVReg, code) } diff --git a/internal/engine/wazevo/backend/isa/arm64/reg.go b/internal/engine/wazevo/backend/isa/arm64/reg.go index d5cb807ede..bdfac565ce 100644 --- a/internal/engine/wazevo/backend/isa/arm64/reg.go +++ b/internal/engine/wazevo/backend/isa/arm64/reg.go @@ -2,6 +2,7 @@ package arm64 import ( "fmt" + "strconv" "strings" "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" @@ -271,7 +272,7 @@ func formatVRegSized(r regalloc.VReg, size byte) (ret string) { case 64: ret = fmt.Sprintf("x%d?", r.ID()) default: - panic("BUG: invalid register size") + panic("BUG: invalid register size: " + strconv.Itoa(int(size))) } case regalloc.RegTypeFloat: switch size { From b37a0e6123d9eaf67d0d5a429d0b52525f028a18 Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Thu, 10 Aug 2023 16:50:06 +0900 Subject: [PATCH 5/8] more Signed-off-by: Takeshi Yoneda --- .../engine/wazevo/backend/backend_test.go | 530 ++++++++++++++++++ .../engine/wazevo/backend/isa/arm64/instr.go | 10 + .../wazevo/backend/isa/arm64/lower_instr.go | 2 +- .../engine/wazevo/frontend/frontend_test.go | 56 +- internal/engine/wazevo/frontend/lower.go | 2 +- 5 files changed, 570 insertions(+), 30 deletions(-) diff --git a/internal/engine/wazevo/backend/backend_test.go b/internal/engine/wazevo/backend/backend_test.go index 31727a5772..5ab53d8aef 100644 --- a/internal/engine/wazevo/backend/backend_test.go +++ b/internal/engine/wazevo/backend/backend_test.go @@ -1885,6 +1885,536 @@ L1 (SSA Block: blk0): }, { name: "imported_function_call", m: testcases.MemoryLoads.Module, + afterLoweringARM64: ` +L1 (SSA Block: blk0): + mov x0?, x0 + mov x1?, x1 + mov x2?, x2 + uxtw x4?, w2? + ldr w5?, [x1?, #0x8] + add x6?, x4?, #0x4 + subs xzr, x5?, x6? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldr w8?, [x1?] + ldr w9?, [x8?] + uxtw x11?, w2? + add x12?, x11?, #0x8 + subs xzr, x5?, x12? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldr x14?, [x8?] + uxtw x16?, w2? + add x17?, x16?, #0x4 + subs xzr, x5?, x17? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldr s19?, [x8?] + uxtw x21?, w2? + add x22?, x21?, #0x8 + subs xzr, x5?, x22? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldr d24?, [x8?] + uxtw x26?, w2? + add x27?, x26?, #0x13 + subs xzr, x5?, x27? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldr w29?, [x8?, #0xf] + uxtw x31?, w2? + add x32?, x31?, #0x17 + subs xzr, x5?, x32? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldr x34?, [x8?, #0xf] + uxtw x36?, w2? + add x37?, x36?, #0x13 + subs xzr, x5?, x37? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldr s39?, [x8?, #0xf] + uxtw x41?, w2? + add x42?, x41?, #0x17 + subs xzr, x5?, x42? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldr d44?, [x8?, #0xf] + uxtw x46?, w2? + add x47?, x46?, #0x1 + subs xzr, x5?, x47? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldrb w49?, [x8?] + uxtw x51?, w2? + add x52?, x51?, #0x10 + subs xzr, x5?, x52? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldrb w54?, [x8?, #0xf] + uxtw x56?, w2? + add x57?, x56?, #0x1 + subs xzr, x5?, x57? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldrb w59?, [x8?] + uxtw x61?, w2? + add x62?, x61?, #0x10 + subs xzr, x5?, x62? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldrb w64?, [x8?, #0xf] + uxtw x66?, w2? + add x67?, x66?, #0x2 + subs xzr, x5?, x67? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldrh w69?, [x8?] + uxtw x71?, w2? + add x72?, x71?, #0x11 + subs xzr, x5?, x72? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldrh w74?, [x8?, #0xf] + uxtw x76?, w2? + add x77?, x76?, #0x2 + subs xzr, x5?, x77? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldrh w79?, [x8?] + uxtw x81?, w2? + add x82?, x81?, #0x11 + subs xzr, x5?, x82? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldrh w84?, [x8?, #0xf] + uxtw x86?, w2? + add x87?, x86?, #0x1 + subs xzr, x5?, x87? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldrb w89?, [x8?] + uxtw x91?, w2? + add x92?, x91?, #0x10 + subs xzr, x5?, x92? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldrb w94?, [x8?, #0xf] + uxtw x96?, w2? + add x97?, x96?, #0x1 + subs xzr, x5?, x97? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldrb w99?, [x8?] + uxtw x101?, w2? + add x102?, x101?, #0x10 + subs xzr, x5?, x102? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldrb w104?, [x8?, #0xf] + uxtw x106?, w2? + add x107?, x106?, #0x2 + subs xzr, x5?, x107? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldrh w109?, [x8?] + uxtw x111?, w2? + add x112?, x111?, #0x11 + subs xzr, x5?, x112? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldrh w114?, [x8?, #0xf] + uxtw x116?, w2? + add x117?, x116?, #0x2 + subs xzr, x5?, x117? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldrh w119?, [x8?] + uxtw x121?, w2? + add x122?, x121?, #0x11 + subs xzr, x5?, x122? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldrh w124?, [x8?, #0xf] + uxtw x126?, w2? + add x127?, x126?, #0x4 + subs xzr, x5?, x127? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldr w129?, [x8?] + uxtw x131?, w2? + add x132?, x131?, #0x13 + subs xzr, x5?, x132? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldr w134?, [x8?, #0xf] + uxtw x136?, w2? + add x137?, x136?, #0x4 + subs xzr, x5?, x137? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldr w139?, [x8?] + uxtw x141?, w2? + add x142?, x141?, #0x13 + subs xzr, x5?, x142? + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + trap_sequence w0? + ldr w144?, [x8?, #0xf] + mov x0, x9? + mov x1, x14? + mov q0.8b, q19?.8b + mov q1.8b, q24?.8b + mov x2, x29? + mov x3, x34? + mov q2.8b, q39?.8b + mov q3.8b, q44?.8b + mov x4, x49? + mov x5, x54? + mov x6, x59? + mov x7, x64? + str w69?, [#ret_space, #0x0] + str w74?, [#ret_space, #0x8] + str w79?, [#ret_space, #0x10] + str w84?, [#ret_space, #0x18] + str x89?, [#ret_space, #0x20] + str x94?, [#ret_space, #0x28] + str x99?, [#ret_space, #0x30] + str x104?, [#ret_space, #0x38] + str x109?, [#ret_space, #0x40] + str x114?, [#ret_space, #0x48] + str x119?, [#ret_space, #0x50] + str x124?, [#ret_space, #0x58] + str x129?, [#ret_space, #0x60] + str x134?, [#ret_space, #0x68] + str x139?, [#ret_space, #0x70] + str x144?, [#ret_space, #0x78] + ret +`, + + afterFinalizeARM64: ` +L1 (SSA Block: blk0): + str x30, [sp, #-0x10]! + str x18, [sp, #-0x10]! + str x19, [sp, #-0x10]! + str x20, [sp, #-0x10]! + str x21, [sp, #-0x10]! + str x22, [sp, #-0x10]! + str x23, [sp, #-0x10]! + str x24, [sp, #-0x10]! + str x25, [sp, #-0x10]! + str x26, [sp, #-0x10]! + mov x23, x0 + mov x25, x2 + uxtw x8, w25 + ldr w24, [x1, #0x8] + add x8, x8, #0x4 + subs xzr, x24, x8 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldr w26, [x1] + ldr w0, [x26] + uxtw x8, w25 + add x8, x8, #0x8 + subs xzr, x24, x8 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldr x1, [x26] + uxtw x8, w25 + add x8, x8, #0x4 + subs xzr, x24, x8 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldr s0, [x26] + uxtw x8, w25 + add x8, x8, #0x8 + subs xzr, x24, x8 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldr d1, [x26] + uxtw x8, w25 + add x8, x8, #0x13 + subs xzr, x24, x8 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldr w2, [x26, #0xf] + uxtw x8, w25 + add x8, x8, #0x17 + subs xzr, x24, x8 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldr x3, [x26, #0xf] + uxtw x8, w25 + add x8, x8, #0x13 + subs xzr, x24, x8 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldr s2, [x26, #0xf] + uxtw x8, w25 + add x8, x8, #0x17 + subs xzr, x24, x8 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldr d3, [x26, #0xf] + uxtw x8, w25 + add x8, x8, #0x1 + subs xzr, x24, x8 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldrb w4, [x26] + uxtw x8, w25 + add x8, x8, #0x10 + subs xzr, x24, x8 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldrb w5, [x26, #0xf] + uxtw x8, w25 + add x8, x8, #0x1 + subs xzr, x24, x8 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldrb w6, [x26] + uxtw x8, w25 + add x8, x8, #0x10 + subs xzr, x24, x8 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldrb w7, [x26, #0xf] + uxtw x8, w25 + add x8, x8, #0x2 + subs xzr, x24, x8 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldrh w8, [x26] + uxtw x9, w25 + add x9, x9, #0x11 + subs xzr, x24, x9 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldrh w9, [x26, #0xf] + uxtw x10, w25 + add x10, x10, #0x2 + subs xzr, x24, x10 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldrh w10, [x26] + uxtw x11, w25 + add x11, x11, #0x11 + subs xzr, x24, x11 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldrh w11, [x26, #0xf] + uxtw x12, w25 + add x12, x12, #0x1 + subs xzr, x24, x12 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldrb w12, [x26] + uxtw x13, w25 + add x13, x13, #0x10 + subs xzr, x24, x13 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldrb w13, [x26, #0xf] + uxtw x14, w25 + add x14, x14, #0x1 + subs xzr, x24, x14 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldrb w14, [x26] + uxtw x15, w25 + add x15, x15, #0x10 + subs xzr, x24, x15 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldrb w15, [x26, #0xf] + uxtw x16, w25 + add x16, x16, #0x2 + subs xzr, x24, x16 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldrh w16, [x26] + uxtw x17, w25 + add x17, x17, #0x11 + subs xzr, x24, x17 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldrh w17, [x26, #0xf] + uxtw x18, w25 + add x18, x18, #0x2 + subs xzr, x24, x18 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldrh w18, [x26] + uxtw x19, w25 + add x19, x19, #0x11 + subs xzr, x24, x19 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldrh w19, [x26, #0xf] + uxtw x20, w25 + add x20, x20, #0x4 + subs xzr, x24, x20 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldr w20, [x26] + uxtw x21, w25 + add x21, x21, #0x13 + subs xzr, x24, x21 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldr w21, [x26, #0xf] + uxtw x22, w25 + add x22, x22, #0x4 + subs xzr, x24, x22 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldr w22, [x26] + uxtw x25, w25 + add x25, x25, #0x13 + subs xzr, x24, x25 + b.hi #0x20 + movz x27, #0x3, LSL 0 + str w27, [x23] + trap_sequence w23 + ldr w23, [x26, #0xf] + str w8, [sp, #0xa0] + str w9, [sp, #0xa8] + str w10, [sp, #0xb0] + str w11, [sp, #0xb8] + str x12, [sp, #0xc0] + str x13, [sp, #0xc8] + str x14, [sp, #0xd0] + str x15, [sp, #0xd8] + str x16, [sp, #0xe0] + str x17, [sp, #0xe8] + str x18, [sp, #0xf0] + str x19, [sp, #0xf8] + str x20, [sp, #0x100] + str x21, [sp, #0x108] + str x22, [sp, #0x110] + str x23, [sp, #0x118] + ldr x26, [sp], #0x10 + ldr x25, [sp], #0x10 + ldr x24, [sp], #0x10 + ldr x23, [sp], #0x10 + ldr x22, [sp], #0x10 + ldr x21, [sp], #0x10 + ldr x20, [sp], #0x10 + ldr x19, [sp], #0x10 + ldr x18, [sp], #0x10 + ldr x30, [sp], #0x10 + ret +`, }, } { t.Run(tc.name, func(t *testing.T) { diff --git a/internal/engine/wazevo/backend/isa/arm64/instr.go b/internal/engine/wazevo/backend/isa/arm64/instr.go index 9c3f8b776d..a4c1e55105 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr.go @@ -69,7 +69,12 @@ var defKinds = [numInstructionKinds]defKind{ cSet: defKindRD, extend: defKindRD, fpuCmp: defKindNone, + uLoad8: defKindRD, + uLoad16: defKindRD, uLoad32: defKindRD, + sLoad8: defKindRD, + sLoad16: defKindRD, + sLoad32: defKindRD, uLoad64: defKindRD, fpuLoad32: defKindRD, fpuLoad64: defKindRD, @@ -152,7 +157,12 @@ var useKinds = [numInstructionKinds]useKind{ cSet: useKindNone, extend: useKindRN, fpuCmp: useKindRNRM, + uLoad8: useKindAMode, + uLoad16: useKindAMode, uLoad32: useKindAMode, + sLoad8: useKindAMode, + sLoad16: useKindAMode, + sLoad32: useKindAMode, uLoad64: useKindAMode, fpuLoad32: useKindAMode, fpuLoad64: useKindAMode, diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go index 3cf56cefec..1b1aa92e86 100644 --- a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go +++ b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go @@ -376,7 +376,7 @@ func (m *machine) lowerExitIfNotZeroWithCode(execCtxVReg regalloc.VReg, cond ssa // We have to skip the entire exit sequence if the condition is false. cbr := m.allocateInstr() cbr.asCondBr(cc.asCond(), invalidLabel, false /* ignored */) - cbr.condBrOffsetResolve(exitWithCodeEncodingSize) + cbr.condBrOffsetResolve(exitWithCodeEncodingSize + 4) m.insert(cbr) m.lowerExitWithCode(execCtxVReg, code) } diff --git a/internal/engine/wazevo/frontend/frontend_test.go b/internal/engine/wazevo/frontend/frontend_test.go index 12c7f351a5..5c8ecf4ec2 100644 --- a/internal/engine/wazevo/frontend/frontend_test.go +++ b/internal/engine/wazevo/frontend/frontend_test.go @@ -883,170 +883,170 @@ blk0: (exec_ctx:i64, module_ctx:i64, v2:i32) v4:i64 = UExtend v2, 32->64 v5:i64 = Uload32 module_ctx, 0x8 v6:i64 = Iadd v4, v3 - v7:i32 = Icmp lt_u, v5, v6 + v7:i32 = Icmp gt_u, v5, v6 ExitIfNotZero v7, exec_ctx, memory_out_of_bounds v8:i64 = Uload32 module_ctx, 0x0 v9:i32 = Load v8, 0x0 v10:i64 = Iconst_64 0x8 v11:i64 = UExtend v2, 32->64 v12:i64 = Iadd v11, v10 - v13:i32 = Icmp lt_u, v5, v12 + v13:i32 = Icmp gt_u, v5, v12 ExitIfNotZero v13, exec_ctx, memory_out_of_bounds v14:i64 = Load v8, 0x0 v15:i64 = Iconst_64 0x4 v16:i64 = UExtend v2, 32->64 v17:i64 = Iadd v16, v15 - v18:i32 = Icmp lt_u, v5, v17 + v18:i32 = Icmp gt_u, v5, v17 ExitIfNotZero v18, exec_ctx, memory_out_of_bounds v19:f32 = Load v8, 0x0 v20:i64 = Iconst_64 0x8 v21:i64 = UExtend v2, 32->64 v22:i64 = Iadd v21, v20 - v23:i32 = Icmp lt_u, v5, v22 + v23:i32 = Icmp gt_u, v5, v22 ExitIfNotZero v23, exec_ctx, memory_out_of_bounds v24:f64 = Load v8, 0x0 v25:i64 = Iconst_64 0x13 v26:i64 = UExtend v2, 32->64 v27:i64 = Iadd v26, v25 - v28:i32 = Icmp lt_u, v5, v27 + v28:i32 = Icmp gt_u, v5, v27 ExitIfNotZero v28, exec_ctx, memory_out_of_bounds v29:i32 = Load v8, 0xf v30:i64 = Iconst_64 0x17 v31:i64 = UExtend v2, 32->64 v32:i64 = Iadd v31, v30 - v33:i32 = Icmp lt_u, v5, v32 + v33:i32 = Icmp gt_u, v5, v32 ExitIfNotZero v33, exec_ctx, memory_out_of_bounds v34:i64 = Load v8, 0xf v35:i64 = Iconst_64 0x13 v36:i64 = UExtend v2, 32->64 v37:i64 = Iadd v36, v35 - v38:i32 = Icmp lt_u, v5, v37 + v38:i32 = Icmp gt_u, v5, v37 ExitIfNotZero v38, exec_ctx, memory_out_of_bounds v39:f32 = Load v8, 0xf v40:i64 = Iconst_64 0x17 v41:i64 = UExtend v2, 32->64 v42:i64 = Iadd v41, v40 - v43:i32 = Icmp lt_u, v5, v42 + v43:i32 = Icmp gt_u, v5, v42 ExitIfNotZero v43, exec_ctx, memory_out_of_bounds v44:f64 = Load v8, 0xf v45:i64 = Iconst_64 0x1 v46:i64 = UExtend v2, 32->64 v47:i64 = Iadd v46, v45 - v48:i32 = Icmp lt_u, v5, v47 + v48:i32 = Icmp gt_u, v5, v47 ExitIfNotZero v48, exec_ctx, memory_out_of_bounds v49:i32 = Sload8 v8, 0x0 v50:i64 = Iconst_64 0x10 v51:i64 = UExtend v2, 32->64 v52:i64 = Iadd v51, v50 - v53:i32 = Icmp lt_u, v5, v52 + v53:i32 = Icmp gt_u, v5, v52 ExitIfNotZero v53, exec_ctx, memory_out_of_bounds v54:i32 = Sload8 v8, 0xf v55:i64 = Iconst_64 0x1 v56:i64 = UExtend v2, 32->64 v57:i64 = Iadd v56, v55 - v58:i32 = Icmp lt_u, v5, v57 + v58:i32 = Icmp gt_u, v5, v57 ExitIfNotZero v58, exec_ctx, memory_out_of_bounds v59:i32 = Uload8 v8, 0x0 v60:i64 = Iconst_64 0x10 v61:i64 = UExtend v2, 32->64 v62:i64 = Iadd v61, v60 - v63:i32 = Icmp lt_u, v5, v62 + v63:i32 = Icmp gt_u, v5, v62 ExitIfNotZero v63, exec_ctx, memory_out_of_bounds v64:i32 = Uload8 v8, 0xf v65:i64 = Iconst_64 0x2 v66:i64 = UExtend v2, 32->64 v67:i64 = Iadd v66, v65 - v68:i32 = Icmp lt_u, v5, v67 + v68:i32 = Icmp gt_u, v5, v67 ExitIfNotZero v68, exec_ctx, memory_out_of_bounds v69:i32 = Sload16 v8, 0x0 v70:i64 = Iconst_64 0x11 v71:i64 = UExtend v2, 32->64 v72:i64 = Iadd v71, v70 - v73:i32 = Icmp lt_u, v5, v72 + v73:i32 = Icmp gt_u, v5, v72 ExitIfNotZero v73, exec_ctx, memory_out_of_bounds v74:i32 = Sload16 v8, 0xf v75:i64 = Iconst_64 0x2 v76:i64 = UExtend v2, 32->64 v77:i64 = Iadd v76, v75 - v78:i32 = Icmp lt_u, v5, v77 + v78:i32 = Icmp gt_u, v5, v77 ExitIfNotZero v78, exec_ctx, memory_out_of_bounds v79:i32 = Uload16 v8, 0x0 v80:i64 = Iconst_64 0x11 v81:i64 = UExtend v2, 32->64 v82:i64 = Iadd v81, v80 - v83:i32 = Icmp lt_u, v5, v82 + v83:i32 = Icmp gt_u, v5, v82 ExitIfNotZero v83, exec_ctx, memory_out_of_bounds v84:i32 = Uload16 v8, 0xf v85:i64 = Iconst_64 0x1 v86:i64 = UExtend v2, 32->64 v87:i64 = Iadd v86, v85 - v88:i32 = Icmp lt_u, v5, v87 + v88:i32 = Icmp gt_u, v5, v87 ExitIfNotZero v88, exec_ctx, memory_out_of_bounds v89:i64 = Sload8 v8, 0x0 v90:i64 = Iconst_64 0x10 v91:i64 = UExtend v2, 32->64 v92:i64 = Iadd v91, v90 - v93:i32 = Icmp lt_u, v5, v92 + v93:i32 = Icmp gt_u, v5, v92 ExitIfNotZero v93, exec_ctx, memory_out_of_bounds v94:i64 = Sload8 v8, 0xf v95:i64 = Iconst_64 0x1 v96:i64 = UExtend v2, 32->64 v97:i64 = Iadd v96, v95 - v98:i32 = Icmp lt_u, v5, v97 + v98:i32 = Icmp gt_u, v5, v97 ExitIfNotZero v98, exec_ctx, memory_out_of_bounds v99:i64 = Uload8 v8, 0x0 v100:i64 = Iconst_64 0x10 v101:i64 = UExtend v2, 32->64 v102:i64 = Iadd v101, v100 - v103:i32 = Icmp lt_u, v5, v102 + v103:i32 = Icmp gt_u, v5, v102 ExitIfNotZero v103, exec_ctx, memory_out_of_bounds v104:i64 = Uload8 v8, 0xf v105:i64 = Iconst_64 0x2 v106:i64 = UExtend v2, 32->64 v107:i64 = Iadd v106, v105 - v108:i32 = Icmp lt_u, v5, v107 + v108:i32 = Icmp gt_u, v5, v107 ExitIfNotZero v108, exec_ctx, memory_out_of_bounds v109:i64 = Sload16 v8, 0x0 v110:i64 = Iconst_64 0x11 v111:i64 = UExtend v2, 32->64 v112:i64 = Iadd v111, v110 - v113:i32 = Icmp lt_u, v5, v112 + v113:i32 = Icmp gt_u, v5, v112 ExitIfNotZero v113, exec_ctx, memory_out_of_bounds v114:i64 = Sload16 v8, 0xf v115:i64 = Iconst_64 0x2 v116:i64 = UExtend v2, 32->64 v117:i64 = Iadd v116, v115 - v118:i32 = Icmp lt_u, v5, v117 + v118:i32 = Icmp gt_u, v5, v117 ExitIfNotZero v118, exec_ctx, memory_out_of_bounds v119:i64 = Uload16 v8, 0x0 v120:i64 = Iconst_64 0x11 v121:i64 = UExtend v2, 32->64 v122:i64 = Iadd v121, v120 - v123:i32 = Icmp lt_u, v5, v122 + v123:i32 = Icmp gt_u, v5, v122 ExitIfNotZero v123, exec_ctx, memory_out_of_bounds v124:i64 = Uload16 v8, 0xf v125:i64 = Iconst_64 0x4 v126:i64 = UExtend v2, 32->64 v127:i64 = Iadd v126, v125 - v128:i32 = Icmp lt_u, v5, v127 + v128:i32 = Icmp gt_u, v5, v127 ExitIfNotZero v128, exec_ctx, memory_out_of_bounds v129:i64 = Sload32 v8, 0x0 v130:i64 = Iconst_64 0x13 v131:i64 = UExtend v2, 32->64 v132:i64 = Iadd v131, v130 - v133:i32 = Icmp lt_u, v5, v132 + v133:i32 = Icmp gt_u, v5, v132 ExitIfNotZero v133, exec_ctx, memory_out_of_bounds v134:i64 = Sload32 v8, 0xf v135:i64 = Iconst_64 0x4 v136:i64 = UExtend v2, 32->64 v137:i64 = Iadd v136, v135 - v138:i32 = Icmp lt_u, v5, v137 + v138:i32 = Icmp gt_u, v5, v137 ExitIfNotZero v138, exec_ctx, memory_out_of_bounds v139:i64 = Uload32 v8, 0x0 v140:i64 = Iconst_64 0x13 v141:i64 = UExtend v2, 32->64 v142:i64 = Iadd v141, v140 - v143:i32 = Icmp lt_u, v5, v142 + v143:i32 = Icmp gt_u, v5, v142 ExitIfNotZero v143, exec_ctx, memory_out_of_bounds v144:i64 = Uload32 v8, 0xf Jump blk_ret, v9, v14, v19, v24, v29, v34, v39, v44, v49, v54, v59, v64, v69, v74, v79, v84, v89, v94, v99, v104, v109, v114, v119, v124, v129, v134, v139, v144 diff --git a/internal/engine/wazevo/frontend/lower.go b/internal/engine/wazevo/frontend/lower.go index c42808d88a..8450b7e635 100644 --- a/internal/engine/wazevo/frontend/lower.go +++ b/internal/engine/wazevo/frontend/lower.go @@ -493,7 +493,7 @@ func (c *Compiler) lowerOpcode(op wasm.Opcode) { // Check for out of bounds memory access: `baseAddrPlusCeil > memLen`. cmp := builder.AllocateInstruction() - cmp.AsIcmp(memLen, baseAddrPlusCeil.Return(), ssa.IntegerCmpCondUnsignedLessThan) + cmp.AsIcmp(memLen, baseAddrPlusCeil.Return(), ssa.IntegerCmpCondUnsignedGreaterThan) builder.InsertInstruction(cmp) exitIfNZ := builder.AllocateInstruction() exitIfNZ.AsExitIfNotZeroWithCode(c.execCtxPtrValue, cmp.Return(), wazevoapi.ExitCodeMemoryOutOfBounds) From 77e63644e09e574ce0152745802ad6b6485d2045 Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Thu, 10 Aug 2023 16:55:46 +0900 Subject: [PATCH 6/8] more Signed-off-by: Takeshi Yoneda --- internal/engine/wazevo/frontend/frontend.go | 8 ++++++-- internal/engine/wazevo/frontend/lower.go | 8 ++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/internal/engine/wazevo/frontend/frontend.go b/internal/engine/wazevo/frontend/frontend.go index ecd08a521d..473fd39def 100644 --- a/internal/engine/wazevo/frontend/frontend.go +++ b/internal/engine/wazevo/frontend/frontend.go @@ -30,6 +30,7 @@ type Compiler struct { wasmFunctionLocalTypes []wasm.ValueType wasmFunctionBody []byte memoryBaseVariable, memoryLenVariable ssa.Variable + needMemory bool // br is reused during lowering. br *bytes.Reader loweringState loweringState @@ -166,8 +167,11 @@ func (c *Compiler) declareWasmLocals(entry ssa.BasicBlock) { } func (c *Compiler) declareNecessaryVariables() { - c.memoryBaseVariable = c.ssaBuilder.DeclareVariable(ssa.TypeI64) - c.memoryLenVariable = c.ssaBuilder.DeclareVariable(ssa.TypeI64) + c.needMemory = len(c.m.ImportedMemories()) > 0 || c.m.MemorySection != nil + if c.needMemory { + c.memoryBaseVariable = c.ssaBuilder.DeclareVariable(ssa.TypeI64) + c.memoryLenVariable = c.ssaBuilder.DeclareVariable(ssa.TypeI64) + } // TODO: add tables, globals. } diff --git a/internal/engine/wazevo/frontend/lower.go b/internal/engine/wazevo/frontend/lower.go index 8450b7e635..cc0feb72c1 100644 --- a/internal/engine/wazevo/frontend/lower.go +++ b/internal/engine/wazevo/frontend/lower.go @@ -814,6 +814,14 @@ func (c *Compiler) lowerOpcode(op wasm.Opcode) { for _, v := range rest { state.push(v) } + + // After calling any function, memory buffer might have changed. So we need to re-defined the variable. + if c.needMemory { + // When these are not used in the following instructions, they will be optimized out. + // So in any ways, we define them! + _ = c.getMemoryBaseValue() + _ = c.getMemoryLenValue() + } case wasm.OpcodeDrop: _ = state.pop() default: From ec9e15d809c54aeeb1d6afaed1ffab351c3c910a Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Fri, 11 Aug 2023 10:51:09 +0900 Subject: [PATCH 7/8] basic Signed-off-by: Takeshi Yoneda --- internal/engine/compiler/engine.go | 3 + internal/engine/interpreter/interpreter.go | 3 + .../engine/wazevo/backend/backend_test.go | 1601 +++++++++-------- .../engine/wazevo/backend/isa/arm64/abi.go | 7 +- .../wazevo/backend/isa/arm64/abi_go_entry.go | 34 +- .../backend/isa/arm64/abi_go_entry_test.go | 58 +- .../engine/wazevo/backend/isa/arm64/instr.go | 2 +- .../backend/isa/arm64/instr_encoding_test.go | 2 +- .../wazevo/backend/isa/arm64/lower_instr.go | 3 +- .../wazevo/backend/isa/arm64/lower_mem.go | 6 +- .../isa/arm64/machine_pro_epi_logue_test.go | 4 +- internal/engine/wazevo/call_engine.go | 2 + internal/engine/wazevo/e2e_test.go | 41 + internal/engine/wazevo/engine.go | 7 +- .../engine/wazevo/frontend/frontend_test.go | 352 ++-- internal/engine/wazevo/frontend/lower.go | 53 +- internal/engine/wazevo/module_engine.go | 15 +- internal/engine/wazevo/module_engine_test.go | 1 + internal/engine/wazevo/ssa/instructions.go | 3 +- internal/engine/wazevo/testcases/testcases.go | 82 +- internal/wasm/engine.go | 3 + internal/wasm/store.go | 2 + internal/wasm/store_test.go | 3 + 23 files changed, 1297 insertions(+), 990 deletions(-) diff --git a/internal/engine/compiler/engine.go b/internal/engine/compiler/engine.go index 4eaf906570..95b2688555 100644 --- a/internal/engine/compiler/engine.go +++ b/internal/engine/compiler/engine.go @@ -661,6 +661,9 @@ func (e *moduleEngine) FunctionInstanceReference(funcIndex wasm.Index) wasm.Refe return uintptr(unsafe.Pointer(&e.functions[funcIndex])) } +// DoneInstantiation implements wasm.ModuleEngine. +func (e *moduleEngine) DoneInstantiation() {} + // NewFunction implements wasm.ModuleEngine. func (e *moduleEngine) NewFunction(index wasm.Index) api.Function { return e.newFunction(&e.functions[index]) diff --git a/internal/engine/interpreter/interpreter.go b/internal/engine/interpreter/interpreter.go index ad3e7f7603..81cb6b9538 100644 --- a/internal/engine/interpreter/interpreter.go +++ b/internal/engine/interpreter/interpreter.go @@ -442,6 +442,9 @@ func (e *moduleEngine) ResolveImportedFunction(index, indexInImportedModule wasm e.functions[index] = imported.functions[indexInImportedModule] } +// DoneInstantiation implements wasm.ModuleEngine. +func (e *moduleEngine) DoneInstantiation() {} + // FunctionInstanceReference implements the same method as documented on wasm.ModuleEngine. func (e *moduleEngine) FunctionInstanceReference(funcIndex wasm.Index) wasm.Reference { return uintptr(unsafe.Pointer(&e.functions[funcIndex])) diff --git a/internal/engine/wazevo/backend/backend_test.go b/internal/engine/wazevo/backend/backend_test.go index 5ab53d8aef..b24828c3a7 100644 --- a/internal/engine/wazevo/backend/backend_test.go +++ b/internal/engine/wazevo/backend/backend_test.go @@ -62,19 +62,19 @@ L1 (SSA Block: blk0): name: "consts", m: testcases.Constants.Module, afterLoweringARM64: ` L1 (SSA Block: blk0): - orr w0, wzr, #0x1 - orr x1, xzr, #0x2 - ldr s0, #8; b 8; data.f32 32.000000 ldr d1, #8; b 16; data.f64 64.000000 + ldr s0, #8; b 8; data.f32 32.000000 + orr x1, xzr, #0x2 + orr w0, wzr, #0x1 ret `, afterFinalizeARM64: ` L1 (SSA Block: blk0): str x30, [sp, #-0x10]! - orr w0, wzr, #0x1 - orr x1, xzr, #0x2 - ldr s0, #8; b 8; data.f32 32.000000 ldr d1, #8; b 16; data.f64 64.000000 + ldr s0, #8; b 8; data.f32 32.000000 + orr x1, xzr, #0x2 + orr w0, wzr, #0x1 ldr x30, [sp], #0x10 ret `, @@ -120,9 +120,9 @@ L1 (SSA Block: blk0): fdiv d16?, d15?, d4? fmax d17?, d16?, d4? fmin d18?, d17?, d4? - mov x0, x6? - mov q0.8b, q12?.8b mov q1.8b, q18?.8b + mov q0.8b, q12?.8b + mov x0, x6? ret `, afterFinalizeARM64: ` @@ -151,15 +151,15 @@ L1 (SSA Block: blk0): afterLoweringARM64: ` L1 (SSA Block: blk0): mov x2?, x2 - mov x0, x2? mov x1, xzr + mov x0, x2? ret `, afterFinalizeARM64: ` L1 (SSA Block: blk0): str x30, [sp, #-0x10]! - mov x0, x2 mov x1, xzr + mov x0, x2 ldr x30, [sp], #0x10 ret `, @@ -170,15 +170,15 @@ L1 (SSA Block: blk0): L1 (SSA Block: blk0): mov x2?, x2 mov x3?, x3 - mov x0, x3? mov x1, x2? + mov x0, x3? ret `, afterFinalizeARM64: ` L1 (SSA Block: blk0): str x30, [sp, #-0x10]! - mov x0, x3 mov x1, x2 + mov x0, x3 ldr x30, [sp], #0x10 ret `, @@ -190,16 +190,16 @@ L1 (SSA Block: blk0): mov x2?, x2 mov x3?, x3 L2 (SSA Block: blk1): - mov x0, x3? mov x1, x2? + mov x0, x3? ret `, afterFinalizeARM64: ` L1 (SSA Block: blk0): str x30, [sp, #-0x10]! L2 (SSA Block: blk1): - mov x0, x3 mov x1, x2 + mov x0, x3 ldr x30, [sp], #0x10 ret `, @@ -231,7 +231,7 @@ L3 (SSA Block: blk1): L2 (SSA Block: blk2): movz x27, #0x2, LSL 0 str w27, [x0?] - trap_sequence w0? + exit_sequence w0? `, afterFinalizeARM64: ` L1 (SSA Block: blk0): @@ -244,7 +244,7 @@ L3 (SSA Block: blk1): L2 (SSA Block: blk2): movz x27, #0x2, LSL 0 str w27, [x0] - trap_sequence w0 + exit_sequence w0 `, }, { @@ -551,8 +551,8 @@ L1 (SSA Block: blk0): bl f3 mov x5?, x0 mov x6?, x1 - mov x0, x5? mov x1, x6? + mov x0, x5? ret `, afterFinalizeARM64: ` @@ -762,46 +762,46 @@ L1 (SSA Block: blk0): ldr s44?, [sp, #0xb0] ldr d45?, [sp, #0xb8] add sp, sp, #0xc0 - mov x0, x6? - mov x1, x7? - mov q0.8b, q8?.8b - mov q1.8b, q9?.8b - mov x2, x10? - mov x3, x11? - mov q2.8b, q12?.8b - mov q3.8b, q13?.8b - mov x4, x14? - mov x5, x15? - mov q4.8b, q16?.8b - mov q5.8b, q17?.8b - mov x6, x18? - mov x7, x19? - mov q6.8b, q20?.8b - mov q7.8b, q21?.8b - str w22?, [#ret_space, #0x0] - str x23?, [#ret_space, #0x8] - str s24?, [#ret_space, #0x10] - str d25?, [#ret_space, #0x18] - str w26?, [#ret_space, #0x20] - str x27?, [#ret_space, #0x28] - str s28?, [#ret_space, #0x30] - str d29?, [#ret_space, #0x38] - str w30?, [#ret_space, #0x40] - str x31?, [#ret_space, #0x48] - str s32?, [#ret_space, #0x50] - str d33?, [#ret_space, #0x58] - str w34?, [#ret_space, #0x60] - str x35?, [#ret_space, #0x68] - str s36?, [#ret_space, #0x70] - str d37?, [#ret_space, #0x78] - str w38?, [#ret_space, #0x80] - str x39?, [#ret_space, #0x88] - str s40?, [#ret_space, #0x90] - str d41?, [#ret_space, #0x98] - str w42?, [#ret_space, #0xa0] - str x43?, [#ret_space, #0xa8] - str s44?, [#ret_space, #0xb0] str d45?, [#ret_space, #0xb8] + str s44?, [#ret_space, #0xb0] + str x43?, [#ret_space, #0xa8] + str w42?, [#ret_space, #0xa0] + str d41?, [#ret_space, #0x98] + str s40?, [#ret_space, #0x90] + str x39?, [#ret_space, #0x88] + str w38?, [#ret_space, #0x80] + str d37?, [#ret_space, #0x78] + str s36?, [#ret_space, #0x70] + str x35?, [#ret_space, #0x68] + str w34?, [#ret_space, #0x60] + str d33?, [#ret_space, #0x58] + str s32?, [#ret_space, #0x50] + str x31?, [#ret_space, #0x48] + str w30?, [#ret_space, #0x40] + str d29?, [#ret_space, #0x38] + str s28?, [#ret_space, #0x30] + str x27?, [#ret_space, #0x28] + str w26?, [#ret_space, #0x20] + str d25?, [#ret_space, #0x18] + str s24?, [#ret_space, #0x10] + str x23?, [#ret_space, #0x8] + str w22?, [#ret_space, #0x0] + mov q7.8b, q21?.8b + mov q6.8b, q20?.8b + mov x7, x19? + mov x6, x18? + mov q5.8b, q17?.8b + mov q4.8b, q16?.8b + mov x5, x15? + mov x4, x14? + mov q3.8b, q13?.8b + mov q2.8b, q12?.8b + mov x3, x11? + mov x2, x10? + mov q1.8b, q9?.8b + mov q0.8b, q8?.8b + mov x1, x7? + mov x0, x6? ret `, afterFinalizeARM64: ` @@ -839,30 +839,30 @@ L1 (SSA Block: blk0): ldr s9, [sp, #0xb0] ldr d8, [sp, #0xb8] add sp, sp, #0xc0 - str w19, [sp, #0x50] - str x18, [sp, #0x58] - str s19, [sp, #0x60] - str d18, [sp, #0x68] - str w17, [sp, #0x70] - str x16, [sp, #0x78] - str s17, [sp, #0x80] - str d16, [sp, #0x88] - str w15, [sp, #0x90] - str x14, [sp, #0x98] - str s15, [sp, #0xa0] - str d14, [sp, #0xa8] - str w13, [sp, #0xb0] - str x12, [sp, #0xb8] - str s13, [sp, #0xc0] - str d12, [sp, #0xc8] - str w11, [sp, #0xd0] - str x10, [sp, #0xd8] - str s11, [sp, #0xe0] - str d10, [sp, #0xe8] - str w9, [sp, #0xf0] - str x8, [sp, #0xf8] - str s9, [sp, #0x100] str d8, [sp, #0x108] + str s9, [sp, #0x100] + str x8, [sp, #0xf8] + str w9, [sp, #0xf0] + str d10, [sp, #0xe8] + str s11, [sp, #0xe0] + str x10, [sp, #0xd8] + str w11, [sp, #0xd0] + str d12, [sp, #0xc8] + str s13, [sp, #0xc0] + str x12, [sp, #0xb8] + str w13, [sp, #0xb0] + str d14, [sp, #0xa8] + str s15, [sp, #0xa0] + str x14, [sp, #0x98] + str w15, [sp, #0x90] + str d16, [sp, #0x88] + str s17, [sp, #0x80] + str x16, [sp, #0x78] + str w17, [sp, #0x70] + str d18, [sp, #0x68] + str s19, [sp, #0x60] + str x18, [sp, #0x58] + str w19, [sp, #0x50] ldr q19, [sp], #0x10 ldr q18, [sp], #0x10 ldr x19, [sp], #0x10 @@ -879,10 +879,10 @@ L1 (SSA Block: blk0): mov x11?, x7 ldr s20?, [#arg_space, #0x20] ldr d29?, [#arg_space, #0x68] - mov x0, x2? - mov x1, x11? - mov q0.8b, q20?.8b mov q1.8b, q29?.8b + mov q0.8b, q20?.8b + mov x1, x11? + mov x0, x2? ret `, afterFinalizeARM64: ` @@ -890,8 +890,8 @@ L1 (SSA Block: blk0): str x30, [sp, #-0x10]! ldr s0, [sp, #0x30] ldr d1, [sp, #0x78] - mov x0, x2 mov x1, x7 + mov x0, x2 ldr x30, [sp], #0x10 ret `, @@ -904,95 +904,89 @@ L1 (SSA Block: blk0): mov x3?, x3 mov q4?.8b, q0.8b mov q5?.8b, q1.8b - mov x0, x2? - mov x1, x3? - mov q0.8b, q4?.8b - mov q1.8b, q5?.8b - mov x2, x2? - mov x3, x3? - mov q2.8b, q4?.8b - mov q3.8b, q5?.8b - mov x4, x2? - mov x5, x3? - mov q4.8b, q4?.8b - mov q5.8b, q5?.8b - mov x6, x2? - mov x7, x3? - mov q6.8b, q4?.8b - mov q7.8b, q5?.8b - str w2?, [#ret_space, #0x0] - str x3?, [#ret_space, #0x8] - str s4?, [#ret_space, #0x10] - str d5?, [#ret_space, #0x18] - str w2?, [#ret_space, #0x20] - str x3?, [#ret_space, #0x28] - str s4?, [#ret_space, #0x30] - str d5?, [#ret_space, #0x38] - str w2?, [#ret_space, #0x40] - str x3?, [#ret_space, #0x48] - str s4?, [#ret_space, #0x50] - str d5?, [#ret_space, #0x58] - str w2?, [#ret_space, #0x60] - str x3?, [#ret_space, #0x68] - str s4?, [#ret_space, #0x70] - str d5?, [#ret_space, #0x78] - str w2?, [#ret_space, #0x80] - str x3?, [#ret_space, #0x88] - str s4?, [#ret_space, #0x90] - str d5?, [#ret_space, #0x98] - str w2?, [#ret_space, #0xa0] - str x3?, [#ret_space, #0xa8] - str s4?, [#ret_space, #0xb0] str d5?, [#ret_space, #0xb8] + str s4?, [#ret_space, #0xb0] + str x3?, [#ret_space, #0xa8] + str w2?, [#ret_space, #0xa0] + str d5?, [#ret_space, #0x98] + str s4?, [#ret_space, #0x90] + str x3?, [#ret_space, #0x88] + str w2?, [#ret_space, #0x80] + str d5?, [#ret_space, #0x78] + str s4?, [#ret_space, #0x70] + str x3?, [#ret_space, #0x68] + str w2?, [#ret_space, #0x60] + str d5?, [#ret_space, #0x58] + str s4?, [#ret_space, #0x50] + str x3?, [#ret_space, #0x48] + str w2?, [#ret_space, #0x40] + str d5?, [#ret_space, #0x38] + str s4?, [#ret_space, #0x30] + str x3?, [#ret_space, #0x28] + str w2?, [#ret_space, #0x20] + str d5?, [#ret_space, #0x18] + str s4?, [#ret_space, #0x10] + str x3?, [#ret_space, #0x8] + str w2?, [#ret_space, #0x0] + mov q7.8b, q5?.8b + mov q6.8b, q4?.8b + mov x7, x3? + mov x6, x2? + mov q5.8b, q5?.8b + mov q4.8b, q4?.8b + mov x5, x3? + mov x4, x2? + mov q3.8b, q5?.8b + mov q2.8b, q4?.8b + mov x3, x3? + mov x2, x2? + mov q1.8b, q5?.8b + mov q0.8b, q4?.8b + mov x1, x3? + mov x0, x2? ret `, afterFinalizeARM64: ` L1 (SSA Block: blk0): str x30, [sp, #-0x10]! - mov x8, x2 - mov x9, x3 - mov q8.8b, q0.8b - mov q9.8b, q1.8b - mov x0, x8 - mov x1, x9 - mov q0.8b, q8.8b - mov q1.8b, q9.8b - mov x2, x8 - mov x3, x9 - mov q2.8b, q8.8b - mov q3.8b, q9.8b - mov x4, x8 - mov x5, x9 - mov q4.8b, q8.8b - mov q5.8b, q9.8b - mov x6, x8 - mov x7, x9 - mov q6.8b, q8.8b - mov q7.8b, q9.8b - str w8, [sp, #0x10] - str x9, [sp, #0x18] - str s8, [sp, #0x20] - str d9, [sp, #0x28] - str w8, [sp, #0x30] - str x9, [sp, #0x38] - str s8, [sp, #0x40] - str d9, [sp, #0x48] - str w8, [sp, #0x50] - str x9, [sp, #0x58] - str s8, [sp, #0x60] - str d9, [sp, #0x68] - str w8, [sp, #0x70] - str x9, [sp, #0x78] - str s8, [sp, #0x80] - str d9, [sp, #0x88] - str w8, [sp, #0x90] - str x9, [sp, #0x98] - str s8, [sp, #0xa0] - str d9, [sp, #0xa8] - str w8, [sp, #0xb0] - str x9, [sp, #0xb8] - str s8, [sp, #0xc0] - str d9, [sp, #0xc8] + mov x0, x2 + mov x1, x3 + str d1, [sp, #0xc8] + str s0, [sp, #0xc0] + str x1, [sp, #0xb8] + str w0, [sp, #0xb0] + str d1, [sp, #0xa8] + str s0, [sp, #0xa0] + str x1, [sp, #0x98] + str w0, [sp, #0x90] + str d1, [sp, #0x88] + str s0, [sp, #0x80] + str x1, [sp, #0x78] + str w0, [sp, #0x70] + str d1, [sp, #0x68] + str s0, [sp, #0x60] + str x1, [sp, #0x58] + str w0, [sp, #0x50] + str d1, [sp, #0x48] + str s0, [sp, #0x40] + str x1, [sp, #0x38] + str w0, [sp, #0x30] + str d1, [sp, #0x28] + str s0, [sp, #0x20] + str x1, [sp, #0x18] + str w0, [sp, #0x10] + mov q7.8b, q1.8b + mov q6.8b, q0.8b + mov x7, x1 + mov x6, x0 + mov q5.8b, q1.8b + mov q4.8b, q0.8b + mov x5, x1 + mov x4, x0 + mov q3.8b, q1.8b + mov q2.8b, q0.8b + mov x3, x1 + mov x2, x0 ldr x30, [sp], #0x10 ret `, @@ -1018,69 +1012,69 @@ L1 (SSA Block: blk0): mov q16?.8b, q6.8b mov q17?.8b, q7.8b ldr w18?, [#arg_space, #0x10] - ldr x19?, [#arg_space, #0x18] - ldr s20?, [#arg_space, #0x20] - ldr d21?, [#arg_space, #0x28] - ldr w22?, [#arg_space, #0x30] - ldr x23?, [#arg_space, #0x38] - ldr s24?, [#arg_space, #0x40] - ldr d25?, [#arg_space, #0x48] - ldr w26?, [#arg_space, #0x50] - ldr x27?, [#arg_space, #0x58] - ldr s28?, [#arg_space, #0x60] - ldr d29?, [#arg_space, #0x68] - ldr w30?, [#arg_space, #0x70] - ldr x31?, [#arg_space, #0x78] - ldr s32?, [#arg_space, #0x80] - ldr d33?, [#arg_space, #0x88] - ldr w34?, [#arg_space, #0x90] - ldr x35?, [#arg_space, #0x98] - ldr s36?, [#arg_space, #0xa0] - ldr d37?, [#arg_space, #0xa8] - ldr w38?, [#arg_space, #0xb0] - ldr x39?, [#arg_space, #0xb8] - ldr s40?, [#arg_space, #0xc0] - ldr d41?, [#arg_space, #0xc8] - mov q0.8b, q41?.8b - mov q1.8b, q40?.8b - mov x0, x39? - mov x1, x38? - mov q2.8b, q37?.8b - mov q3.8b, q36?.8b - mov x2, x35? - mov x3, x34? - mov q4.8b, q33?.8b - mov q5.8b, q32?.8b - mov x4, x31? - mov x5, x30? - mov q6.8b, q29?.8b - mov q7.8b, q28?.8b - mov x6, x27? - mov x7, x26? - str d25?, [#ret_space, #0x0] - str s24?, [#ret_space, #0x8] - str x23?, [#ret_space, #0x10] - str w22?, [#ret_space, #0x18] - str d21?, [#ret_space, #0x20] - str s20?, [#ret_space, #0x28] - str x19?, [#ret_space, #0x30] - str w18?, [#ret_space, #0x38] - str d17?, [#ret_space, #0x40] - str s16?, [#ret_space, #0x48] - str x15?, [#ret_space, #0x50] - str w14?, [#ret_space, #0x58] - str d13?, [#ret_space, #0x60] - str s12?, [#ret_space, #0x68] - str x11?, [#ret_space, #0x70] - str w10?, [#ret_space, #0x78] - str d9?, [#ret_space, #0x80] - str s8?, [#ret_space, #0x88] - str x7?, [#ret_space, #0x90] - str w6?, [#ret_space, #0x98] - str d5?, [#ret_space, #0xa0] - str s4?, [#ret_space, #0xa8] - str x3?, [#ret_space, #0xb0] + ldr x19?, [#arg_space, #0x18] + ldr s20?, [#arg_space, #0x20] + ldr d21?, [#arg_space, #0x28] + ldr w22?, [#arg_space, #0x30] + ldr x23?, [#arg_space, #0x38] + ldr s24?, [#arg_space, #0x40] + ldr d25?, [#arg_space, #0x48] + ldr w26?, [#arg_space, #0x50] + ldr x27?, [#arg_space, #0x58] + ldr s28?, [#arg_space, #0x60] + ldr d29?, [#arg_space, #0x68] + ldr w30?, [#arg_space, #0x70] + ldr x31?, [#arg_space, #0x78] + ldr s32?, [#arg_space, #0x80] + ldr d33?, [#arg_space, #0x88] + ldr w34?, [#arg_space, #0x90] + ldr x35?, [#arg_space, #0x98] + ldr s36?, [#arg_space, #0xa0] + ldr d37?, [#arg_space, #0xa8] + ldr w38?, [#arg_space, #0xb0] + ldr x39?, [#arg_space, #0xb8] + ldr s40?, [#arg_space, #0xc0] + ldr d41?, [#arg_space, #0xc8] str w2?, [#ret_space, #0xb8] + str x3?, [#ret_space, #0xb0] + str s4?, [#ret_space, #0xa8] + str d5?, [#ret_space, #0xa0] + str w6?, [#ret_space, #0x98] + str x7?, [#ret_space, #0x90] + str s8?, [#ret_space, #0x88] + str d9?, [#ret_space, #0x80] + str w10?, [#ret_space, #0x78] + str x11?, [#ret_space, #0x70] + str s12?, [#ret_space, #0x68] + str d13?, [#ret_space, #0x60] + str w14?, [#ret_space, #0x58] + str x15?, [#ret_space, #0x50] + str s16?, [#ret_space, #0x48] + str d17?, [#ret_space, #0x40] + str w18?, [#ret_space, #0x38] + str x19?, [#ret_space, #0x30] + str s20?, [#ret_space, #0x28] + str d21?, [#ret_space, #0x20] + str w22?, [#ret_space, #0x18] + str x23?, [#ret_space, #0x10] + str s24?, [#ret_space, #0x8] + str d25?, [#ret_space, #0x0] + mov x7, x26? + mov x6, x27? + mov q7.8b, q28?.8b + mov q6.8b, q29?.8b + mov x5, x30? + mov x4, x31? + mov q5.8b, q32?.8b + mov q4.8b, q33?.8b + mov x3, x34? + mov x2, x35? + mov q3.8b, q36?.8b + mov q2.8b, q37?.8b + mov x1, x38? + mov x0, x39? + mov q1.8b, q40?.8b + mov q0.8b, q41?.8b ret `, afterFinalizeARM64: ` @@ -1130,30 +1124,30 @@ L1 (SSA Block: blk0): ldr x0, [sp, #0x108] ldr s1, [sp, #0x110] ldr d0, [sp, #0x118] - str d15, [sp, #0x120] - str s16, [sp, #0x128] - str x13, [sp, #0x130] - str w14, [sp, #0x138] - str d17, [sp, #0x140] - str s18, [sp, #0x148] - str x15, [sp, #0x150] - str w16, [sp, #0x158] - str d19, [sp, #0x160] - str s14, [sp, #0x168] - str x17, [sp, #0x170] - str w18, [sp, #0x178] - str d13, [sp, #0x180] - str s12, [sp, #0x188] - str x19, [sp, #0x190] - str w12, [sp, #0x198] - str d11, [sp, #0x1a0] - str s10, [sp, #0x1a8] - str x11, [sp, #0x1b0] - str w10, [sp, #0x1b8] - str d9, [sp, #0x1c0] - str s8, [sp, #0x1c8] - str x9, [sp, #0x1d0] str w8, [sp, #0x1d8] + str x9, [sp, #0x1d0] + str s8, [sp, #0x1c8] + str d9, [sp, #0x1c0] + str w10, [sp, #0x1b8] + str x11, [sp, #0x1b0] + str s10, [sp, #0x1a8] + str d11, [sp, #0x1a0] + str w12, [sp, #0x198] + str x19, [sp, #0x190] + str s12, [sp, #0x188] + str d13, [sp, #0x180] + str w18, [sp, #0x178] + str x17, [sp, #0x170] + str s14, [sp, #0x168] + str d19, [sp, #0x160] + str w16, [sp, #0x158] + str x15, [sp, #0x150] + str s18, [sp, #0x148] + str d17, [sp, #0x140] + str w14, [sp, #0x138] + str x13, [sp, #0x130] + str s16, [sp, #0x128] + str d15, [sp, #0x120] ldr q19, [sp], #0x10 ldr q18, [sp], #0x10 ldr x19, [sp], #0x10 @@ -1211,26 +1205,26 @@ L1 (SSA Block: blk0): cset x24?, hs subs xzr, x4?, x5? cset x25?, hs - mov x0, x6? - mov x1, x7? - mov x2, x8? - mov x3, x9? - mov x4, x10? - mov x5, x11? - mov x6, x12? - mov x7, x13? - str w14?, [#ret_space, #0x0] - str w15?, [#ret_space, #0x8] - str w16?, [#ret_space, #0x10] - str w17?, [#ret_space, #0x18] - str w18?, [#ret_space, #0x20] - str w19?, [#ret_space, #0x28] - str w20?, [#ret_space, #0x30] - str w21?, [#ret_space, #0x38] - str w22?, [#ret_space, #0x40] - str w23?, [#ret_space, #0x48] - str w24?, [#ret_space, #0x50] str w25?, [#ret_space, #0x58] + str w24?, [#ret_space, #0x50] + str w23?, [#ret_space, #0x48] + str w22?, [#ret_space, #0x40] + str w21?, [#ret_space, #0x38] + str w20?, [#ret_space, #0x30] + str w19?, [#ret_space, #0x28] + str w18?, [#ret_space, #0x20] + str w17?, [#ret_space, #0x18] + str w16?, [#ret_space, #0x10] + str w15?, [#ret_space, #0x8] + str w14?, [#ret_space, #0x0] + mov x7, x13? + mov x6, x12? + mov x5, x11? + mov x4, x10? + mov x3, x9? + mov x2, x8? + mov x1, x7? + mov x0, x6? ret `, afterFinalizeARM64: ` @@ -1240,62 +1234,62 @@ L1 (SSA Block: blk0): str x19, [sp, #-0x10]! str x20, [sp, #-0x10]! str x21, [sp, #-0x10]! - mov x18, x2 + mov x8, x2 mov x20, x3 mov x19, x4 mov x21, x5 - subs wzr, w18, w20 + subs wzr, w8, w20 cset x0, eq subs xzr, x19, x21 cset x1, eq - subs wzr, w18, w20 + subs wzr, w8, w20 cset x2, ne subs xzr, x19, x21 cset x3, ne - subs wzr, w18, w20 + subs wzr, w8, w20 cset x4, lt subs xzr, x19, x21 cset x5, lt - subs wzr, w18, w20 + subs wzr, w8, w20 cset x6, lo subs xzr, x19, x21 cset x7, lo - subs wzr, w18, w20 + subs wzr, w8, w20 + cset x18, gt + subs xzr, x19, x21 cset x17, gt + subs wzr, w8, w20 + cset x16, hi subs xzr, x19, x21 - cset x16, gt - subs wzr, w18, w20 cset x15, hi + subs wzr, w8, w20 + cset x14, le subs xzr, x19, x21 - cset x14, hi - subs wzr, w18, w20 cset x13, le + subs wzr, w8, w20 + cset x12, ls subs xzr, x19, x21 - cset x12, le - subs wzr, w18, w20 cset x11, ls + subs wzr, w8, w20 + cset x10, ge subs xzr, x19, x21 - cset x10, ls - subs wzr, w18, w20 cset x9, ge - subs xzr, x19, x21 - cset x8, ge - subs wzr, w18, w20 - cset x18, hs + subs wzr, w8, w20 + cset x8, hs subs xzr, x19, x21 cset x19, hs - str w17, [sp, #0x50] - str w16, [sp, #0x58] - str w15, [sp, #0x60] - str w14, [sp, #0x68] - str w13, [sp, #0x70] - str w12, [sp, #0x78] - str w11, [sp, #0x80] - str w10, [sp, #0x88] - str w9, [sp, #0x90] - str w8, [sp, #0x98] - str w18, [sp, #0xa0] str w19, [sp, #0xa8] + str w8, [sp, #0xa0] + str w9, [sp, #0x98] + str w10, [sp, #0x90] + str w11, [sp, #0x88] + str w12, [sp, #0x80] + str w13, [sp, #0x78] + str w14, [sp, #0x70] + str w15, [sp, #0x68] + str w16, [sp, #0x60] + str w17, [sp, #0x58] + str w18, [sp, #0x50] ldr x21, [sp], #0x10 ldr x20, [sp], #0x10 ldr x19, [sp], #0x10 @@ -1325,43 +1319,43 @@ L1 (SSA Block: blk0): lsr w20?, w2?, 0x1f lsr x21?, x4?, x5? lsr x23?, x4?, 0x20 - mov x0, x6? - mov x1, x8? - mov x2, x9? - mov x3, x11? - mov x4, x12? - mov x5, x14? - mov x6, x15? - mov x7, x17? - str w18?, [#ret_space, #0x0] - str w20?, [#ret_space, #0x8] - str x21?, [#ret_space, #0x10] str x23?, [#ret_space, #0x18] + str x21?, [#ret_space, #0x10] + str w20?, [#ret_space, #0x8] + str w18?, [#ret_space, #0x0] + mov x7, x17? + mov x6, x15? + mov x5, x14? + mov x4, x12? + mov x3, x11? + mov x2, x9? + mov x1, x8? + mov x0, x6? ret `, afterFinalizeARM64: ` L1 (SSA Block: blk0): str x30, [sp, #-0x10]! mov x9, x2 - mov x8, x3 - mov x11, x4 - mov x10, x5 - lsl w0, w9, w8 + mov x10, x3 + mov x8, x4 + mov x11, x5 + lsl w0, w9, w10 lsl w1, w9, 0x1f - lsl x2, x11, x10 - lsl x3, x11, 0x20 - asr w4, w9, w8 + lsl x2, x8, x11 + lsl x3, x8, 0x20 + asr w4, w9, w10 asr w5, w9, 0x1f - asr x6, x11, x10 - asr x7, x11, 0x20 - lsr w8, w9, w8 + asr x6, x8, x11 + asr x7, x8, 0x20 + lsr w10, w9, w10 lsr w9, w9, 0x1f - lsr x10, x11, x10 - lsr x11, x11, 0x20 - str w8, [sp, #0x10] + lsr x11, x8, x11 + lsr x8, x8, 0x20 + str x8, [sp, #0x28] + str x11, [sp, #0x20] str w9, [sp, #0x18] - str x10, [sp, #0x20] - str x11, [sp, #0x28] + str w10, [sp, #0x10] ldr x30, [sp], #0x10 ret `, @@ -1380,13 +1374,13 @@ L1 (SSA Block: blk0): sxtw x8?, w3? sxtb w9?, w2? sxth w10?, w2? - mov x0, x4? - mov x1, x5? - mov x2, x6? - mov x3, x7? - mov x4, x8? - mov x5, x9? mov x6, x10? + mov x5, x9? + mov x4, x8? + mov x3, x7? + mov x2, x6? + mov x1, x5? + mov x0, x4? ret `, afterFinalizeARM64: ` @@ -1438,18 +1432,18 @@ L1 (SSA Block: blk0): cset x16?, ls fcmp s4?, s5? cset x17?, ge - mov x0, x6? - mov x1, x7? - mov x2, x8? - mov x3, x9? - mov x4, x10? - mov x5, x11? - mov x6, x12? - mov x7, x13? - str w14?, [#ret_space, #0x0] - str w15?, [#ret_space, #0x8] - str w16?, [#ret_space, #0x10] str w17?, [#ret_space, #0x18] + str w16?, [#ret_space, #0x10] + str w15?, [#ret_space, #0x8] + str w14?, [#ret_space, #0x0] + mov x7, x13? + mov x6, x12? + mov x5, x11? + mov x4, x10? + mov x3, x9? + mov x2, x8? + mov x1, x7? + mov x0, x6? ret `, afterFinalizeARM64: ` @@ -1479,10 +1473,10 @@ L1 (SSA Block: blk0): cset x9, ls fcmp s2, s3 cset x8, ge - str w11, [sp, #0x10] - str w10, [sp, #0x18] - str w9, [sp, #0x20] str w8, [sp, #0x28] + str w9, [sp, #0x20] + str w10, [sp, #0x18] + str w11, [sp, #0x10] ldr x30, [sp], #0x10 ret `, @@ -1612,8 +1606,8 @@ L1 (SSA Block: blk0): fadd s119?, s68?, s118? fadd s120?, s66?, s119? fadd s121?, s64?, s120? - mov x0, x62? mov q0.8b, q121?.8b + mov x0, x62? ret `, afterFinalizeARM64: ` @@ -1884,7 +1878,7 @@ L1 (SSA Block: blk0): `, }, { - name: "imported_function_call", m: testcases.MemoryLoads.Module, + name: "memory_load_basic", m: testcases.MemoryLoadBasic.Module, afterLoweringARM64: ` L1 (SSA Block: blk0): mov x0?, x0 @@ -1894,256 +1888,323 @@ L1 (SSA Block: blk0): ldr w5?, [x1?, #0x8] add x6?, x4?, #0x4 subs xzr, x5?, x6? - b.hi #0x20 + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldr w8?, [x1?] - ldr w9?, [x8?] - uxtw x11?, w2? - add x12?, x11?, #0x8 - subs xzr, x5?, x12? - b.hi #0x20 + exit_sequence w0? + ldr x8?, [x1?] + add x11?, x8?, x4? + ldr w10?, [x11?] + mov x0, x10? + ret +`, + afterFinalizeARM64: ` +L1 (SSA Block: blk0): + str x30, [sp, #-0x10]! + uxtw x8, w2 + ldr w10, [x1, #0x8] + add x9, x8, #0x4 + subs xzr, x10, x9 + b.hs #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0] + exit_sequence w0 + ldr x9, [x1] + add x8, x9, x8 + ldr w0, [x8] + ldr x30, [sp], #0x10 + ret +`, + }, + { + name: "memory_loads", m: testcases.MemoryLoads.Module, + afterLoweringARM64: ` +L1 (SSA Block: blk0): + mov x0?, x0 + mov x1?, x1 + mov x2?, x2 + uxtw x4?, w2? + ldr w5?, [x1?, #0x8] + add x6?, x4?, #0x4 + subs xzr, x5?, x6? + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldr x14?, [x8?] - uxtw x16?, w2? - add x17?, x16?, #0x4 - subs xzr, x5?, x17? - b.hi #0x20 + exit_sequence w0? + ldr x8?, [x1?] + add x200?, x8?, x4? + ldr w10?, [x200?] + uxtw x12?, w2? + add x13?, x12?, #0x8 + subs xzr, x5?, x13? + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldr s19?, [x8?] - uxtw x21?, w2? - add x22?, x21?, #0x8 - subs xzr, x5?, x22? - b.hi #0x20 + exit_sequence w0? + add x199?, x8?, x12? + ldr x16?, [x199?] + uxtw x18?, w2? + add x19?, x18?, #0x4 + subs xzr, x5?, x19? + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldr d24?, [x8?] - uxtw x26?, w2? - add x27?, x26?, #0x13 - subs xzr, x5?, x27? - b.hi #0x20 + exit_sequence w0? + add x198?, x8?, x18? + ldr s22?, [x198?] + uxtw x24?, w2? + add x25?, x24?, #0x8 + subs xzr, x5?, x25? + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldr w29?, [x8?, #0xf] - uxtw x31?, w2? - add x32?, x31?, #0x17 - subs xzr, x5?, x32? - b.hi #0x20 + exit_sequence w0? + add x197?, x8?, x24? + ldr d28?, [x197?] + uxtw x30?, w2? + add x31?, x30?, #0x13 + subs xzr, x5?, x31? + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldr x34?, [x8?, #0xf] + exit_sequence w0? + add x196?, x8?, x30? + ldr w34?, [x196?, #0xf] uxtw x36?, w2? - add x37?, x36?, #0x13 + add x37?, x36?, #0x17 subs xzr, x5?, x37? - b.hi #0x20 - movz x27, #0x3, LSL 0 - str w27, [x0?] - trap_sequence w0? - ldr s39?, [x8?, #0xf] - uxtw x41?, w2? - add x42?, x41?, #0x17 - subs xzr, x5?, x42? - b.hi #0x20 + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldr d44?, [x8?, #0xf] - uxtw x46?, w2? - add x47?, x46?, #0x1 - subs xzr, x5?, x47? - b.hi #0x20 + exit_sequence w0? + add x195?, x8?, x36? + ldr x40?, [x195?, #0xf] + uxtw x42?, w2? + add x43?, x42?, #0x13 + subs xzr, x5?, x43? + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldrb w49?, [x8?] - uxtw x51?, w2? - add x52?, x51?, #0x10 - subs xzr, x5?, x52? - b.hi #0x20 + exit_sequence w0? + add x194?, x8?, x42? + ldr s46?, [x194?, #0xf] + uxtw x48?, w2? + add x49?, x48?, #0x17 + subs xzr, x5?, x49? + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldrb w54?, [x8?, #0xf] - uxtw x56?, w2? - add x57?, x56?, #0x1 - subs xzr, x5?, x57? - b.hi #0x20 + exit_sequence w0? + add x193?, x8?, x48? + ldr d52?, [x193?, #0xf] + uxtw x54?, w2? + add x55?, x54?, #0x1 + subs xzr, x5?, x55? + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldrb w59?, [x8?] - uxtw x61?, w2? - add x62?, x61?, #0x10 - subs xzr, x5?, x62? - b.hi #0x20 + exit_sequence w0? + add x192?, x8?, x54? + ldrsb w58?, [x192?] + uxtw x60?, w2? + add x61?, x60?, #0x10 + subs xzr, x5?, x61? + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldrb w64?, [x8?, #0xf] + exit_sequence w0? + add x191?, x8?, x60? + ldrsb w64?, [x191?, #0xf] uxtw x66?, w2? - add x67?, x66?, #0x2 + add x67?, x66?, #0x1 subs xzr, x5?, x67? - b.hi #0x20 + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldrh w69?, [x8?] - uxtw x71?, w2? - add x72?, x71?, #0x11 - subs xzr, x5?, x72? - b.hi #0x20 + exit_sequence w0? + add x190?, x8?, x66? + ldrb w70?, [x190?] + uxtw x72?, w2? + add x73?, x72?, #0x10 + subs xzr, x5?, x73? + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldrh w74?, [x8?, #0xf] - uxtw x76?, w2? - add x77?, x76?, #0x2 - subs xzr, x5?, x77? - b.hi #0x20 + exit_sequence w0? + add x189?, x8?, x72? + ldrb w76?, [x189?, #0xf] + uxtw x78?, w2? + add x79?, x78?, #0x2 + subs xzr, x5?, x79? + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldrh w79?, [x8?] - uxtw x81?, w2? - add x82?, x81?, #0x11 - subs xzr, x5?, x82? - b.hi #0x20 + exit_sequence w0? + add x188?, x8?, x78? + ldrsh w82?, [x188?] + uxtw x84?, w2? + add x85?, x84?, #0x11 + subs xzr, x5?, x85? + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldrh w84?, [x8?, #0xf] - uxtw x86?, w2? - add x87?, x86?, #0x1 - subs xzr, x5?, x87? - b.hi #0x20 + exit_sequence w0? + add x187?, x8?, x84? + ldrsh w88?, [x187?, #0xf] + uxtw x90?, w2? + add x91?, x90?, #0x2 + subs xzr, x5?, x91? + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldrb w89?, [x8?] - uxtw x91?, w2? - add x92?, x91?, #0x10 - subs xzr, x5?, x92? - b.hi #0x20 - movz x27, #0x3, LSL 0 - str w27, [x0?] - trap_sequence w0? - ldrb w94?, [x8?, #0xf] + exit_sequence w0? + add x186?, x8?, x90? + ldrh w94?, [x186?] uxtw x96?, w2? - add x97?, x96?, #0x1 + add x97?, x96?, #0x11 subs xzr, x5?, x97? - b.hi #0x20 - movz x27, #0x3, LSL 0 - str w27, [x0?] - trap_sequence w0? - ldrb w99?, [x8?] - uxtw x101?, w2? - add x102?, x101?, #0x10 - subs xzr, x5?, x102? - b.hi #0x20 + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldrb w104?, [x8?, #0xf] - uxtw x106?, w2? - add x107?, x106?, #0x2 - subs xzr, x5?, x107? - b.hi #0x20 + exit_sequence w0? + add x185?, x8?, x96? + ldrh w100?, [x185?, #0xf] + uxtw x102?, w2? + add x103?, x102?, #0x1 + subs xzr, x5?, x103? + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldrh w109?, [x8?] - uxtw x111?, w2? - add x112?, x111?, #0x11 - subs xzr, x5?, x112? - b.hi #0x20 + exit_sequence w0? + add x184?, x8?, x102? + ldrsb w106?, [x184?] + uxtw x108?, w2? + add x109?, x108?, #0x10 + subs xzr, x5?, x109? + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldrh w114?, [x8?, #0xf] - uxtw x116?, w2? - add x117?, x116?, #0x2 - subs xzr, x5?, x117? - b.hi #0x20 + exit_sequence w0? + add x183?, x8?, x108? + ldrsb w112?, [x183?, #0xf] + uxtw x114?, w2? + add x115?, x114?, #0x1 + subs xzr, x5?, x115? + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldrh w119?, [x8?] - uxtw x121?, w2? - add x122?, x121?, #0x11 - subs xzr, x5?, x122? - b.hi #0x20 + exit_sequence w0? + add x182?, x8?, x114? + ldrb w118?, [x182?] + uxtw x120?, w2? + add x121?, x120?, #0x10 + subs xzr, x5?, x121? + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldrh w124?, [x8?, #0xf] + exit_sequence w0? + add x181?, x8?, x120? + ldrb w124?, [x181?, #0xf] uxtw x126?, w2? - add x127?, x126?, #0x4 + add x127?, x126?, #0x2 subs xzr, x5?, x127? - b.hi #0x20 + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldr w129?, [x8?] - uxtw x131?, w2? - add x132?, x131?, #0x13 - subs xzr, x5?, x132? - b.hi #0x20 + exit_sequence w0? + add x180?, x8?, x126? + ldrsh w130?, [x180?] + uxtw x132?, w2? + add x133?, x132?, #0x11 + subs xzr, x5?, x133? + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldr w134?, [x8?, #0xf] - uxtw x136?, w2? - add x137?, x136?, #0x4 - subs xzr, x5?, x137? - b.hi #0x20 + exit_sequence w0? + add x179?, x8?, x132? + ldrsh w136?, [x179?, #0xf] + uxtw x138?, w2? + add x139?, x138?, #0x2 + subs xzr, x5?, x139? + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldr w139?, [x8?] - uxtw x141?, w2? - add x142?, x141?, #0x13 - subs xzr, x5?, x142? - b.hi #0x20 + exit_sequence w0? + add x178?, x8?, x138? + ldrh w142?, [x178?] + uxtw x144?, w2? + add x145?, x144?, #0x11 + subs xzr, x5?, x145? + b.hs #0x20 movz x27, #0x3, LSL 0 str w27, [x0?] - trap_sequence w0? - ldr w144?, [x8?, #0xf] - mov x0, x9? - mov x1, x14? - mov q0.8b, q19?.8b - mov q1.8b, q24?.8b - mov x2, x29? - mov x3, x34? - mov q2.8b, q39?.8b - mov q3.8b, q44?.8b - mov x4, x49? - mov x5, x54? - mov x6, x59? - mov x7, x64? - str w69?, [#ret_space, #0x0] - str w74?, [#ret_space, #0x8] - str w79?, [#ret_space, #0x10] - str w84?, [#ret_space, #0x18] - str x89?, [#ret_space, #0x20] - str x94?, [#ret_space, #0x28] - str x99?, [#ret_space, #0x30] - str x104?, [#ret_space, #0x38] - str x109?, [#ret_space, #0x40] - str x114?, [#ret_space, #0x48] - str x119?, [#ret_space, #0x50] - str x124?, [#ret_space, #0x58] - str x129?, [#ret_space, #0x60] - str x134?, [#ret_space, #0x68] - str x139?, [#ret_space, #0x70] - str x144?, [#ret_space, #0x78] + exit_sequence w0? + add x177?, x8?, x144? + ldrh w148?, [x177?, #0xf] + uxtw x150?, w2? + add x151?, x150?, #0x4 + subs xzr, x5?, x151? + b.hs #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + exit_sequence w0? + add x176?, x8?, x150? + ldrs w154?, [x176?] + uxtw x156?, w2? + add x157?, x156?, #0x13 + subs xzr, x5?, x157? + b.hs #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + exit_sequence w0? + add x175?, x8?, x156? + ldrs w160?, [x175?, #0xf] + uxtw x162?, w2? + add x163?, x162?, #0x4 + subs xzr, x5?, x163? + b.hs #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + exit_sequence w0? + add x174?, x8?, x162? + ldr w166?, [x174?] + uxtw x168?, w2? + add x169?, x168?, #0x13 + subs xzr, x5?, x169? + b.hs #0x20 + movz x27, #0x3, LSL 0 + str w27, [x0?] + exit_sequence w0? + add x173?, x8?, x168? + ldr w172?, [x173?, #0xf] + str x172?, [#ret_space, #0x78] + str x166?, [#ret_space, #0x70] + str x160?, [#ret_space, #0x68] + str x154?, [#ret_space, #0x60] + str x148?, [#ret_space, #0x58] + str x142?, [#ret_space, #0x50] + str x136?, [#ret_space, #0x48] + str x130?, [#ret_space, #0x40] + str x124?, [#ret_space, #0x38] + str x118?, [#ret_space, #0x30] + str x112?, [#ret_space, #0x28] + str x106?, [#ret_space, #0x20] + str w100?, [#ret_space, #0x18] + str w94?, [#ret_space, #0x10] + str w88?, [#ret_space, #0x8] + str w82?, [#ret_space, #0x0] + mov x7, x76? + mov x6, x70? + mov x5, x64? + mov x4, x58? + mov q3.8b, q52?.8b + mov q2.8b, q46?.8b + mov x3, x40? + mov x2, x34? + mov q1.8b, q28?.8b + mov q0.8b, q22?.8b + mov x1, x16? + mov x0, x10? ret `, @@ -2159,250 +2220,280 @@ L1 (SSA Block: blk0): str x24, [sp, #-0x10]! str x25, [sp, #-0x10]! str x26, [sp, #-0x10]! - mov x23, x0 - mov x25, x2 - uxtw x8, w25 - ldr w24, [x1, #0x8] - add x8, x8, #0x4 - subs xzr, x24, x8 - b.hi #0x20 + str x28, [sp, #-0x10]! + mov x8, x0 + uxtw x11, w2 + ldr w9, [x1, #0x8] + add x10, x11, #0x4 + subs xzr, x9, x10 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldr w26, [x1] - ldr w0, [x26] - uxtw x8, w25 - add x8, x8, #0x8 - subs xzr, x24, x8 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + ldr x10, [x1] + add x11, x10, x11 + ldr w0, [x11] + uxtw x12, w2 + add x11, x12, #0x8 + subs xzr, x9, x11 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldr x1, [x26] - uxtw x8, w25 - add x8, x8, #0x4 - subs xzr, x24, x8 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x11, x10, x12 + ldr x1, [x11] + uxtw x12, w2 + add x11, x12, #0x4 + subs xzr, x9, x11 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldr s0, [x26] - uxtw x8, w25 - add x8, x8, #0x8 - subs xzr, x24, x8 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x11, x10, x12 + ldr s0, [x11] + uxtw x12, w2 + add x11, x12, #0x8 + subs xzr, x9, x11 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldr d1, [x26] - uxtw x8, w25 - add x8, x8, #0x13 - subs xzr, x24, x8 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x11, x10, x12 + ldr d1, [x11] + uxtw x12, w2 + add x11, x12, #0x13 + subs xzr, x9, x11 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldr w2, [x26, #0xf] - uxtw x8, w25 - add x8, x8, #0x17 - subs xzr, x24, x8 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x11, x10, x12 + ldr w11, [x11, #0xf] + uxtw x13, w2 + add x12, x13, #0x17 + subs xzr, x9, x12 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldr x3, [x26, #0xf] - uxtw x8, w25 - add x8, x8, #0x13 - subs xzr, x24, x8 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x12, x10, x13 + ldr x3, [x12, #0xf] + uxtw x13, w2 + add x12, x13, #0x13 + subs xzr, x9, x12 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldr s2, [x26, #0xf] - uxtw x8, w25 - add x8, x8, #0x17 - subs xzr, x24, x8 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x12, x10, x13 + ldr s2, [x12, #0xf] + uxtw x13, w2 + add x12, x13, #0x17 + subs xzr, x9, x12 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldr d3, [x26, #0xf] - uxtw x8, w25 - add x8, x8, #0x1 - subs xzr, x24, x8 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x12, x10, x13 + ldr d3, [x12, #0xf] + uxtw x13, w2 + add x12, x13, #0x1 + subs xzr, x9, x12 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldrb w4, [x26] - uxtw x8, w25 - add x8, x8, #0x10 - subs xzr, x24, x8 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x12, x10, x13 + ldrsb w4, [x12] + uxtw x13, w2 + add x12, x13, #0x10 + subs xzr, x9, x12 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldrb w5, [x26, #0xf] - uxtw x8, w25 - add x8, x8, #0x1 - subs xzr, x24, x8 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x12, x10, x13 + ldrsb w5, [x12, #0xf] + uxtw x13, w2 + add x12, x13, #0x1 + subs xzr, x9, x12 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldrb w6, [x26] - uxtw x8, w25 - add x8, x8, #0x10 - subs xzr, x24, x8 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x12, x10, x13 + ldrb w6, [x12] + uxtw x13, w2 + add x12, x13, #0x10 + subs xzr, x9, x12 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldrb w7, [x26, #0xf] - uxtw x8, w25 - add x8, x8, #0x2 - subs xzr, x24, x8 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x12, x10, x13 + ldrb w7, [x12, #0xf] + uxtw x13, w2 + add x12, x13, #0x2 + subs xzr, x9, x12 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldrh w8, [x26] - uxtw x9, w25 - add x9, x9, #0x11 - subs xzr, x24, x9 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x12, x10, x13 + ldrsh w12, [x12] + uxtw x14, w2 + add x13, x14, #0x11 + subs xzr, x9, x13 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldrh w9, [x26, #0xf] - uxtw x10, w25 - add x10, x10, #0x2 - subs xzr, x24, x10 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x13, x10, x14 + ldrsh w13, [x13, #0xf] + uxtw x15, w2 + add x14, x15, #0x2 + subs xzr, x9, x14 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldrh w10, [x26] - uxtw x11, w25 - add x11, x11, #0x11 - subs xzr, x24, x11 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x14, x10, x15 + ldrh w14, [x14] + uxtw x16, w2 + add x15, x16, #0x11 + subs xzr, x9, x15 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldrh w11, [x26, #0xf] - uxtw x12, w25 - add x12, x12, #0x1 - subs xzr, x24, x12 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x15, x10, x16 + ldrh w15, [x15, #0xf] + uxtw x17, w2 + add x16, x17, #0x1 + subs xzr, x9, x16 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldrb w12, [x26] - uxtw x13, w25 - add x13, x13, #0x10 - subs xzr, x24, x13 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x16, x10, x17 + ldrsb w16, [x16] + uxtw x18, w2 + add x17, x18, #0x10 + subs xzr, x9, x17 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldrb w13, [x26, #0xf] - uxtw x14, w25 - add x14, x14, #0x1 - subs xzr, x24, x14 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x17, x10, x18 + ldrsb w17, [x17, #0xf] + uxtw x19, w2 + add x18, x19, #0x1 + subs xzr, x9, x18 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldrb w14, [x26] - uxtw x15, w25 - add x15, x15, #0x10 - subs xzr, x24, x15 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x18, x10, x19 + ldrb w18, [x18] + uxtw x20, w2 + add x19, x20, #0x10 + subs xzr, x9, x19 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldrb w15, [x26, #0xf] - uxtw x16, w25 - add x16, x16, #0x2 - subs xzr, x24, x16 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x19, x10, x20 + ldrb w19, [x19, #0xf] + uxtw x21, w2 + add x20, x21, #0x2 + subs xzr, x9, x20 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldrh w16, [x26] - uxtw x17, w25 - add x17, x17, #0x11 - subs xzr, x24, x17 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x20, x10, x21 + ldrsh w20, [x20] + uxtw x22, w2 + add x21, x22, #0x11 + subs xzr, x9, x21 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldrh w17, [x26, #0xf] - uxtw x18, w25 - add x18, x18, #0x2 - subs xzr, x24, x18 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x21, x10, x22 + ldrsh w21, [x21, #0xf] + uxtw x23, w2 + add x22, x23, #0x2 + subs xzr, x9, x22 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldrh w18, [x26] - uxtw x19, w25 - add x19, x19, #0x11 - subs xzr, x24, x19 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x22, x10, x23 + ldrh w22, [x22] + uxtw x24, w2 + add x23, x24, #0x11 + subs xzr, x9, x23 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldrh w19, [x26, #0xf] - uxtw x20, w25 - add x20, x20, #0x4 - subs xzr, x24, x20 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x23, x10, x24 + ldrh w23, [x23, #0xf] + uxtw x25, w2 + add x24, x25, #0x4 + subs xzr, x9, x24 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldr w20, [x26] - uxtw x21, w25 - add x21, x21, #0x13 - subs xzr, x24, x21 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x24, x10, x25 + ldrs w24, [x24] + uxtw x26, w2 + add x25, x26, #0x13 + subs xzr, x9, x25 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldr w21, [x26, #0xf] - uxtw x22, w25 - add x22, x22, #0x4 - subs xzr, x24, x22 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x25, x10, x26 + ldrs w25, [x25, #0xf] + uxtw x28, w2 + add x26, x28, #0x4 + subs xzr, x9, x26 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldr w22, [x26] - uxtw x25, w25 - add x25, x25, #0x13 - subs xzr, x24, x25 - b.hi #0x20 + str w27, [x8] + exit_sequence w8 + add x26, x10, x28 + ldr w26, [x26] + uxtw x29, w2 + add x28, x29, #0x13 + subs xzr, x9, x28 + b.hs #0x20 movz x27, #0x3, LSL 0 - str w27, [x23] - trap_sequence w23 - ldr w23, [x26, #0xf] - str w8, [sp, #0xa0] - str w9, [sp, #0xa8] - str w10, [sp, #0xb0] - str w11, [sp, #0xb8] - str x12, [sp, #0xc0] - str x13, [sp, #0xc8] - str x14, [sp, #0xd0] - str x15, [sp, #0xd8] - str x16, [sp, #0xe0] - str x17, [sp, #0xe8] - str x18, [sp, #0xf0] - str x19, [sp, #0xf8] - str x20, [sp, #0x100] - str x21, [sp, #0x108] - str x22, [sp, #0x110] - str x23, [sp, #0x118] + str w27, [x8] + exit_sequence w8 + add x8, x10, x29 + ldr w8, [x8, #0xf] + str x8, [sp, #0x128] + str x26, [sp, #0x120] + str x25, [sp, #0x118] + str x24, [sp, #0x110] + str x23, [sp, #0x108] + str x22, [sp, #0x100] + str x21, [sp, #0xf8] + str x20, [sp, #0xf0] + str x19, [sp, #0xe8] + str x18, [sp, #0xe0] + str x17, [sp, #0xd8] + str x16, [sp, #0xd0] + str w15, [sp, #0xc8] + str w14, [sp, #0xc0] + str w13, [sp, #0xb8] + str w12, [sp, #0xb0] + mov x2, x11 + ldr x28, [sp], #0x10 ldr x26, [sp], #0x10 ldr x25, [sp], #0x10 ldr x24, [sp], #0x10 diff --git a/internal/engine/wazevo/backend/isa/arm64/abi.go b/internal/engine/wazevo/backend/isa/arm64/abi.go index 51d0bcd332..556c934a3b 100644 --- a/internal/engine/wazevo/backend/isa/arm64/abi.go +++ b/internal/engine/wazevo/backend/isa/arm64/abi.go @@ -196,8 +196,11 @@ func (a *abiImpl) CalleeGenFunctionArgsToVRegs(args []ssa.Value) { // CalleeGenVRegsToFunctionReturns implements backend.FunctionABI. func (a *abiImpl) CalleeGenVRegsToFunctionReturns(rets []ssa.Value) { - for i, ret := range rets { - r := &a.rets[i] + l := len(rets) - 1 + for i := range rets { + // Reverse order in order to avoid overwriting the stack returns existing in the return registers. + ret := rets[l-i] + r := &a.rets[l-i] reg := a.m.compiler.VRegOf(ret) if def := a.m.compiler.ValueDefinition(ret); def.IsFromInstr() { // Constant instructions are inlined. diff --git a/internal/engine/wazevo/backend/isa/arm64/abi_go_entry.go b/internal/engine/wazevo/backend/isa/arm64/abi_go_entry.go index aa4e709862..24137625a8 100644 --- a/internal/engine/wazevo/backend/isa/arm64/abi_go_entry.go +++ b/internal/engine/wazevo/backend/isa/arm64/abi_go_entry.go @@ -9,10 +9,9 @@ import ( // EmitGoEntryPreamble implements backend.FunctionABI. This assumes `entrypoint` function (in abi_go_entry_arm64.s) passes: // -// 1. execution context ptr in x0 -// 2. module context ptr in x1 -// 3. param/result slice ptr in x19; the pointer to []uint64{} which is used to pass arguments and accept return values. -// 4. Go-allocated stack slice ptr in x26. +// 1. First (execution context ptr) and Second arguments are already passed in x0, and x1. +// 2. param/result slice ptr in x19; the pointer to []uint64{} which is used to pass arguments and accept return values. +// 3. Go-allocated stack slice ptr in x26. // // also SP and FP are correct Go-runtime-based values, and LR is the return address to the Go-side caller. func (a *abiImpl) EmitGoEntryPreamble() { @@ -118,7 +117,32 @@ func (a *abiImpl) constructGoEntryPreamble() (root *instruction) { instr.asStore(rd, mode, ret.Type.Bits()) cur = linkInstr(cur, instr) case backend.ABIArgKindStack: - panic("TODO") + offset, typ := ret.Offset, ret.Type + if offset != 0 && !offsetFitsInAddressModeKindRegUnsignedImm12(typ.Bits(), ret.Offset) { + // Do we really want to support? + panic("TODO: too many parameters") + } + + tmpOperand := operandNR(tmpRegVReg) + + // First load the value from the Go-allocated stack into temporary. + mode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: spVReg, imm: offset} + toTmp := m.allocateInstr() + switch ret.Type { + case ssa.TypeI32, ssa.TypeI64: + toTmp.asULoad(tmpOperand, mode, typ.Bits()) + case ssa.TypeF32, ssa.TypeF64: + toTmp.asFpuLoad(tmpOperand, mode, typ.Bits()) + default: + panic("TODO") + } + cur = linkInstr(cur, toTmp) + + // Then write it back to the paramResultSlicePtr. + mode = addressMode{kind: addressModeKindPostIndex, rn: paramResultSlicePtr, imm: 8} + storeTmp := m.allocateInstr() + storeTmp.asStore(tmpOperand, mode, ret.Type.Bits()) + cur = linkInstr(cur, storeTmp) } } // Finally, restore the FP, SP and LR, and return to the Go code. diff --git a/internal/engine/wazevo/backend/isa/arm64/abi_go_entry_test.go b/internal/engine/wazevo/backend/isa/arm64/abi_go_entry_test.go index 6d87ac73c6..66c77498ae 100644 --- a/internal/engine/wazevo/backend/isa/arm64/abi_go_entry_test.go +++ b/internal/engine/wazevo/backend/isa/arm64/abi_go_entry_test.go @@ -38,7 +38,7 @@ func TestAbiImpl_constructGoEntryPreamble(t *testing.T) { name: "float reg params", sig: &ssa.Signature{ Params: []ssa.Type{ - i64, i64, // module context, execution context will be skipped. + i64, i64, // first and second will be skipped. f32, f32, f32, f32, f64, }, }, @@ -66,7 +66,7 @@ func TestAbiImpl_constructGoEntryPreamble(t *testing.T) { name: "int reg params", sig: &ssa.Signature{ Params: []ssa.Type{ - i64, i64, // module context, execution context will be skipped. + i64, i64, // first and second will be skipped. i32, i32, i32, i64, i32, }, }, @@ -94,7 +94,7 @@ func TestAbiImpl_constructGoEntryPreamble(t *testing.T) { name: "int/float reg params interleaved", sig: &ssa.Signature{ Params: []ssa.Type{ - i64, i64, // module context, execution context will be skipped. + i64, i64, // first and second will be skipped. i32, f64, i32, f32, i64, i32, i64, f64, i32, f32, }, }, @@ -127,7 +127,7 @@ func TestAbiImpl_constructGoEntryPreamble(t *testing.T) { name: "int/float reg params/results interleaved", sig: &ssa.Signature{ Params: []ssa.Type{ - i64, i64, // module context, execution context will be skipped. + i64, i64, // first and second will be skipped. i32, f64, i32, f32, i64, }, Results: []ssa.Type{f32, f64, i32, f32, i64, i32, f64}, @@ -157,6 +157,56 @@ func TestAbiImpl_constructGoEntryPreamble(t *testing.T) { mov sp, x27 ldr x30, [x18, #0x20] ret +`, + }, + { + name: "many results", + sig: &ssa.Signature{ + Results: []ssa.Type{ + f32, f64, i32, f32, i64, i32, i32, i64, i32, i64, + f32, f64, i32, f32, i64, i32, i32, i64, i32, i64, + }, + }, + exp: ` + mov x18, x0 + str x29, [x18, #0x10] + mov x27, sp + str x27, [x18, #0x18] + str x30, [x18, #0x20] + sub x26, x26, #0x30 + mov sp, x26 + bl #0x80 + str s0, [d19], #0x8 + str d1, [d19], #0x8 + str w0, [d19], #0x8 + str s2, [d19], #0x8 + str x1, [d19], #0x8 + str w2, [d19], #0x8 + str w3, [d19], #0x8 + str x4, [d19], #0x8 + str w5, [d19], #0x8 + str x6, [d19], #0x8 + str s3, [d19], #0x8 + str d4, [d19], #0x8 + str w7, [d19], #0x8 + str s5, [d19], #0x8 + ldr x27, [sp] + str x27, [d19], #0x8 + ldr w27, [sp, #0x8] + str w27, [d19], #0x8 + ldr w27, [sp, #0x10] + str w27, [d19], #0x8 + ldr x27, [sp, #0x18] + str x27, [d19], #0x8 + ldr w27, [sp, #0x20] + str w27, [d19], #0x8 + ldr x27, [sp, #0x28] + str x27, [d19], #0x8 + ldr x29, [x18, #0x10] + ldr x27, [x18, #0x18] + mov sp, x27 + ldr x30, [x18, #0x20] + ret `, }, } { diff --git a/internal/engine/wazevo/backend/isa/arm64/instr.go b/internal/engine/wazevo/backend/isa/arm64/instr.go index a4c1e55105..6cb6015578 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr.go @@ -898,7 +898,7 @@ func (i *instruction) String() (str string) { case loadAddr: panic("TODO") case exitSequence: - str = fmt.Sprintf("trap_sequence %s", formatVRegSized(i.rn.nr(), 32)) + str = fmt.Sprintf("exit_sequence %s", formatVRegSized(i.rn.nr(), 32)) case udf: str = "udf" default: diff --git a/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go b/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go index 22ac128f1b..40a148e7a9 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go @@ -2,12 +2,12 @@ package arm64 import ( "encoding/hex" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" "math" "testing" "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" + "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" "github.com/tetratelabs/wazero/internal/testing/require" ) diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go index 1b1aa92e86..ae286c38c4 100644 --- a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go +++ b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go @@ -319,7 +319,6 @@ const exitWithCodeEncodingSize = exitSequenceSize + 8 // lowerExitWithCode lowers the lowerExitWithCode takes a context pointer as argument. func (m *machine) lowerExitWithCode(execCtxVReg regalloc.VReg, code wazevoapi.ExitCode) { - loadExitCodeConst := m.allocateInstr() loadExitCodeConst.asMOVZ(tmpRegVReg, uint64(code), 0, true) @@ -376,7 +375,7 @@ func (m *machine) lowerExitIfNotZeroWithCode(execCtxVReg regalloc.VReg, cond ssa // We have to skip the entire exit sequence if the condition is false. cbr := m.allocateInstr() cbr.asCondBr(cc.asCond(), invalidLabel, false /* ignored */) - cbr.condBrOffsetResolve(exitWithCodeEncodingSize + 4) + cbr.condBrOffsetResolve(exitWithCodeEncodingSize + 4 /* br offset is from the beginning of this instruction */) m.insert(cbr) m.lowerExitWithCode(execCtxVReg, code) } diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_mem.go b/internal/engine/wazevo/backend/isa/arm64/lower_mem.go index efbdca2048..c83ca8f1d1 100644 --- a/internal/engine/wazevo/backend/isa/arm64/lower_mem.go +++ b/internal/engine/wazevo/backend/isa/arm64/lower_mem.go @@ -190,11 +190,11 @@ func (m *machine) lowerExtLoad(si *ssa.Instruction) { case ssa.OpcodeUload32: load.asULoad(operandNR(m.compiler.VRegOf(si.Return())), amode, 32) case ssa.OpcodeSload8: - load.asULoad(operandNR(m.compiler.VRegOf(si.Return())), amode, 8) + load.asSLoad(operandNR(m.compiler.VRegOf(si.Return())), amode, 8) case ssa.OpcodeSload16: - load.asULoad(operandNR(m.compiler.VRegOf(si.Return())), amode, 16) + load.asSLoad(operandNR(m.compiler.VRegOf(si.Return())), amode, 16) case ssa.OpcodeSload32: - load.asULoad(operandNR(m.compiler.VRegOf(si.Return())), amode, 32) + load.asSLoad(operandNR(m.compiler.VRegOf(si.Return())), amode, 32) default: panic("BUG") } diff --git a/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue_test.go b/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue_test.go index 19842b20c6..40b92ad24f 100644 --- a/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue_test.go +++ b/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue_test.go @@ -223,7 +223,7 @@ func TestMachine_insertStackBoundsCheck(t *testing.T) { str x27, [x0, #0x40] adr x27, #0x1c str x27, [x0, #0x30] - trap_sequence w0 + exit_sequence w0 ldr x1, [x0, #0x50] ldr x2, [x0, #0x60] ldr x3, [x0, #0x70] @@ -321,7 +321,7 @@ func TestMachine_insertStackBoundsCheck(t *testing.T) { str x27, [x0, #0x40] adr x27, #0x1c str x27, [x0, #0x30] - trap_sequence w0 + exit_sequence w0 ldr x1, [x0, #0x50] ldr x2, [x0, #0x60] ldr x3, [x0, #0x70] diff --git a/internal/engine/wazevo/call_engine.go b/internal/engine/wazevo/call_engine.go index 7acc50148a..dfae774d61 100644 --- a/internal/engine/wazevo/call_engine.go +++ b/internal/engine/wazevo/call_engine.go @@ -120,6 +120,8 @@ func (c *callEngine) CallWithStack(ctx context.Context, paramResultStack []uint6 afterStackGrowEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, newsp) case wazevoapi.ExitCodeUnreachable: return wasmruntime.ErrRuntimeUnreachable + case wazevoapi.ExitCodeMemoryOutOfBounds: + return wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess default: panic("BUG") } diff --git a/internal/engine/wazevo/e2e_test.go b/internal/engine/wazevo/e2e_test.go index 85d65e6130..944cce0e82 100644 --- a/internal/engine/wazevo/e2e_test.go +++ b/internal/engine/wazevo/e2e_test.go @@ -80,6 +80,41 @@ func TestE2E(t *testing.T) { {params: []uint64{100}, expResults: []uint64{100 * 100}}, }, }, + { + name: "memory_load_basic", + m: testcases.MemoryLoadBasic.Module, + calls: []callCase{ + {params: []uint64{0}, expResults: []uint64{0x03_02_01_00}}, + {params: []uint64{256}, expResults: []uint64{0x03_02_01_00}}, + {params: []uint64{100}, expResults: []uint64{103<<24 | 102<<16 | 101<<8 | 100}}, + // Last 4 bytes. + {params: []uint64{uint64(wasm.MemoryPageSize) - 4}, expResults: []uint64{0xfffefdfc}}, + }, + }, + { + name: "memory out of bounds", + m: testcases.MemoryLoadBasic.Module, + calls: []callCase{ + {params: []uint64{uint64(wasm.MemoryPageSize)}, expErr: "out of bounds memory access"}, + // We load I32, so we can't load from the last 3 bytes. + {params: []uint64{uint64(wasm.MemoryPageSize) - 3}, expErr: "out of bounds memory access"}, + }, + }, + { + name: "memory_loads", + m: testcases.MemoryLoads.Module, + calls: []callCase{ + // These expected results are derived by commenting out `configureWazevo(config)` below to run the old compiler, assuming that it is correct. + {params: []uint64{0}, expResults: []uint64{0x3020100, 0x706050403020100, 0x3020100, 0x706050403020100, 0x1211100f, 0x161514131211100f, 0x1211100f, 0x161514131211100f, 0x0, 0xf, 0x0, 0xf, 0x100, 0x100f, 0x100, 0x100f, 0x0, 0xf, 0x0, 0xf, 0x100, 0x100f, 0x100, 0x100f, 0x3020100, 0x1211100f, 0x3020100, 0x1211100f}}, + {params: []uint64{1}, expResults: []uint64{0x4030201, 0x807060504030201, 0x4030201, 0x807060504030201, 0x13121110, 0x1716151413121110, 0x13121110, 0x1716151413121110, 0x1, 0x10, 0x1, 0x10, 0x201, 0x1110, 0x201, 0x1110, 0x1, 0x10, 0x1, 0x10, 0x201, 0x1110, 0x201, 0x1110, 0x4030201, 0x13121110, 0x4030201, 0x13121110}}, + {params: []uint64{8}, expResults: []uint64{0xb0a0908, 0xf0e0d0c0b0a0908, 0xb0a0908, 0xf0e0d0c0b0a0908, 0x1a191817, 0x1e1d1c1b1a191817, 0x1a191817, 0x1e1d1c1b1a191817, 0x8, 0x17, 0x8, 0x17, 0x908, 0x1817, 0x908, 0x1817, 0x8, 0x17, 0x8, 0x17, 0x908, 0x1817, 0x908, 0x1817, 0xb0a0908, 0x1a191817, 0xb0a0908, 0x1a191817}}, + {params: []uint64{0xb}, expResults: []uint64{0xe0d0c0b, 0x1211100f0e0d0c0b, 0xe0d0c0b, 0x1211100f0e0d0c0b, 0x1d1c1b1a, 0x21201f1e1d1c1b1a, 0x1d1c1b1a, 0x21201f1e1d1c1b1a, 0xb, 0x1a, 0xb, 0x1a, 0xc0b, 0x1b1a, 0xc0b, 0x1b1a, 0xb, 0x1a, 0xb, 0x1a, 0xc0b, 0x1b1a, 0xc0b, 0x1b1a, 0xe0d0c0b, 0x1d1c1b1a, 0xe0d0c0b, 0x1d1c1b1a}}, + {params: []uint64{0xc}, expResults: []uint64{0xf0e0d0c, 0x131211100f0e0d0c, 0xf0e0d0c, 0x131211100f0e0d0c, 0x1e1d1c1b, 0x2221201f1e1d1c1b, 0x1e1d1c1b, 0x2221201f1e1d1c1b, 0xc, 0x1b, 0xc, 0x1b, 0xd0c, 0x1c1b, 0xd0c, 0x1c1b, 0xc, 0x1b, 0xc, 0x1b, 0xd0c, 0x1c1b, 0xd0c, 0x1c1b, 0xf0e0d0c, 0x1e1d1c1b, 0xf0e0d0c, 0x1e1d1c1b}}, + {params: []uint64{0xd}, expResults: []uint64{0x100f0e0d, 0x14131211100f0e0d, 0x100f0e0d, 0x14131211100f0e0d, 0x1f1e1d1c, 0x232221201f1e1d1c, 0x1f1e1d1c, 0x232221201f1e1d1c, 0xd, 0x1c, 0xd, 0x1c, 0xe0d, 0x1d1c, 0xe0d, 0x1d1c, 0xd, 0x1c, 0xd, 0x1c, 0xe0d, 0x1d1c, 0xe0d, 0x1d1c, 0x100f0e0d, 0x1f1e1d1c, 0x100f0e0d, 0x1f1e1d1c}}, + {params: []uint64{0xe}, expResults: []uint64{0x11100f0e, 0x1514131211100f0e, 0x11100f0e, 0x1514131211100f0e, 0x201f1e1d, 0x24232221201f1e1d, 0x201f1e1d, 0x24232221201f1e1d, 0xe, 0x1d, 0xe, 0x1d, 0xf0e, 0x1e1d, 0xf0e, 0x1e1d, 0xe, 0x1d, 0xe, 0x1d, 0xf0e, 0x1e1d, 0xf0e, 0x1e1d, 0x11100f0e, 0x201f1e1d, 0x11100f0e, 0x201f1e1d}}, + {params: []uint64{0xf}, expResults: []uint64{0x1211100f, 0x161514131211100f, 0x1211100f, 0x161514131211100f, 0x21201f1e, 0x2524232221201f1e, 0x21201f1e, 0x2524232221201f1e, 0xf, 0x1e, 0xf, 0x1e, 0x100f, 0x1f1e, 0x100f, 0x1f1e, 0xf, 0x1e, 0xf, 0x1e, 0x100f, 0x1f1e, 0x100f, 0x1f1e, 0x1211100f, 0x21201f1e, 0x1211100f, 0x21201f1e}}, + }, + }, } { tc := tc t.Run(tc.name, func(t *testing.T) { @@ -117,7 +152,13 @@ func TestE2E(t *testing.T) { require.EqualError(t, err, cc.expErr) } else { require.NoError(t, err) + require.Equal(t, len(cc.expResults), len(result)) require.Equal(t, cc.expResults, result) + for i := range cc.expResults { + if cc.expResults[i] != result[i] { + t.Errorf("result[%d]: exp %d, got %d", i, cc.expResults[i], result[i]) + } + } } }) } diff --git a/internal/engine/wazevo/engine.go b/internal/engine/wazevo/engine.go index 2afeffa1f7..66376cdc89 100644 --- a/internal/engine/wazevo/engine.go +++ b/internal/engine/wazevo/engine.go @@ -212,6 +212,11 @@ func (e *engine) NewModuleEngine(m *wasm.Module, mi *wasm.ModuleInstance) (wasm. } me.parent = compiled me.module = mi - me.setupOpaque() + + if size := compiled.offsets.TotalSize; size != 0 { + opaque := make([]byte, size) + me.opaque = opaque + me.opaquePtr = &opaque[0] + } return me, nil } diff --git a/internal/engine/wazevo/frontend/frontend_test.go b/internal/engine/wazevo/frontend/frontend_test.go index 5c8ecf4ec2..35bc9dc3bf 100644 --- a/internal/engine/wazevo/frontend/frontend_test.go +++ b/internal/engine/wazevo/frontend/frontend_test.go @@ -859,6 +859,22 @@ blk3: () <-- (blk2) v10:i32 = Call f0:sig0, exec_ctx, module_ctx, v9 v11:i32 = Iadd v7, v10 Jump blk_ret, v11 +`, + }, + { + name: "memory_load_basic", m: testcases.MemoryLoadBasic.Module, + exp: ` +blk0: (exec_ctx:i64, module_ctx:i64, v2:i32) + v3:i64 = Iconst_64 0x4 + v4:i64 = UExtend v2, 32->64 + v5:i64 = Uload32 module_ctx, 0x8 + v6:i64 = Iadd v4, v3 + v7:i32 = Icmp ge_u, v5, v6 + ExitIfNotZero v7, exec_ctx, memory_out_of_bounds + v8:i64 = Load module_ctx, 0x0 + v9:i64 = Iadd v8, v4 + v10:i32 = Load v9, 0x0 + Jump blk_ret, v10 `, }, { @@ -883,173 +899,201 @@ blk0: (exec_ctx:i64, module_ctx:i64, v2:i32) v4:i64 = UExtend v2, 32->64 v5:i64 = Uload32 module_ctx, 0x8 v6:i64 = Iadd v4, v3 - v7:i32 = Icmp gt_u, v5, v6 + v7:i32 = Icmp ge_u, v5, v6 ExitIfNotZero v7, exec_ctx, memory_out_of_bounds - v8:i64 = Uload32 module_ctx, 0x0 - v9:i32 = Load v8, 0x0 - v10:i64 = Iconst_64 0x8 - v11:i64 = UExtend v2, 32->64 - v12:i64 = Iadd v11, v10 - v13:i32 = Icmp gt_u, v5, v12 - ExitIfNotZero v13, exec_ctx, memory_out_of_bounds - v14:i64 = Load v8, 0x0 - v15:i64 = Iconst_64 0x4 - v16:i64 = UExtend v2, 32->64 - v17:i64 = Iadd v16, v15 - v18:i32 = Icmp gt_u, v5, v17 - ExitIfNotZero v18, exec_ctx, memory_out_of_bounds - v19:f32 = Load v8, 0x0 - v20:i64 = Iconst_64 0x8 - v21:i64 = UExtend v2, 32->64 - v22:i64 = Iadd v21, v20 - v23:i32 = Icmp gt_u, v5, v22 - ExitIfNotZero v23, exec_ctx, memory_out_of_bounds - v24:f64 = Load v8, 0x0 - v25:i64 = Iconst_64 0x13 - v26:i64 = UExtend v2, 32->64 - v27:i64 = Iadd v26, v25 - v28:i32 = Icmp gt_u, v5, v27 - ExitIfNotZero v28, exec_ctx, memory_out_of_bounds - v29:i32 = Load v8, 0xf - v30:i64 = Iconst_64 0x17 - v31:i64 = UExtend v2, 32->64 - v32:i64 = Iadd v31, v30 - v33:i32 = Icmp gt_u, v5, v32 - ExitIfNotZero v33, exec_ctx, memory_out_of_bounds - v34:i64 = Load v8, 0xf - v35:i64 = Iconst_64 0x13 + v8:i64 = Load module_ctx, 0x0 + v9:i64 = Iadd v8, v4 + v10:i32 = Load v9, 0x0 + v11:i64 = Iconst_64 0x8 + v12:i64 = UExtend v2, 32->64 + v13:i64 = Iadd v12, v11 + v14:i32 = Icmp ge_u, v5, v13 + ExitIfNotZero v14, exec_ctx, memory_out_of_bounds + v15:i64 = Iadd v8, v12 + v16:i64 = Load v15, 0x0 + v17:i64 = Iconst_64 0x4 + v18:i64 = UExtend v2, 32->64 + v19:i64 = Iadd v18, v17 + v20:i32 = Icmp ge_u, v5, v19 + ExitIfNotZero v20, exec_ctx, memory_out_of_bounds + v21:i64 = Iadd v8, v18 + v22:f32 = Load v21, 0x0 + v23:i64 = Iconst_64 0x8 + v24:i64 = UExtend v2, 32->64 + v25:i64 = Iadd v24, v23 + v26:i32 = Icmp ge_u, v5, v25 + ExitIfNotZero v26, exec_ctx, memory_out_of_bounds + v27:i64 = Iadd v8, v24 + v28:f64 = Load v27, 0x0 + v29:i64 = Iconst_64 0x13 + v30:i64 = UExtend v2, 32->64 + v31:i64 = Iadd v30, v29 + v32:i32 = Icmp ge_u, v5, v31 + ExitIfNotZero v32, exec_ctx, memory_out_of_bounds + v33:i64 = Iadd v8, v30 + v34:i32 = Load v33, 0xf + v35:i64 = Iconst_64 0x17 v36:i64 = UExtend v2, 32->64 v37:i64 = Iadd v36, v35 - v38:i32 = Icmp gt_u, v5, v37 + v38:i32 = Icmp ge_u, v5, v37 ExitIfNotZero v38, exec_ctx, memory_out_of_bounds - v39:f32 = Load v8, 0xf - v40:i64 = Iconst_64 0x17 - v41:i64 = UExtend v2, 32->64 - v42:i64 = Iadd v41, v40 - v43:i32 = Icmp gt_u, v5, v42 - ExitIfNotZero v43, exec_ctx, memory_out_of_bounds - v44:f64 = Load v8, 0xf - v45:i64 = Iconst_64 0x1 - v46:i64 = UExtend v2, 32->64 - v47:i64 = Iadd v46, v45 - v48:i32 = Icmp gt_u, v5, v47 - ExitIfNotZero v48, exec_ctx, memory_out_of_bounds - v49:i32 = Sload8 v8, 0x0 - v50:i64 = Iconst_64 0x10 - v51:i64 = UExtend v2, 32->64 - v52:i64 = Iadd v51, v50 - v53:i32 = Icmp gt_u, v5, v52 - ExitIfNotZero v53, exec_ctx, memory_out_of_bounds - v54:i32 = Sload8 v8, 0xf - v55:i64 = Iconst_64 0x1 - v56:i64 = UExtend v2, 32->64 - v57:i64 = Iadd v56, v55 - v58:i32 = Icmp gt_u, v5, v57 - ExitIfNotZero v58, exec_ctx, memory_out_of_bounds - v59:i32 = Uload8 v8, 0x0 - v60:i64 = Iconst_64 0x10 - v61:i64 = UExtend v2, 32->64 - v62:i64 = Iadd v61, v60 - v63:i32 = Icmp gt_u, v5, v62 - ExitIfNotZero v63, exec_ctx, memory_out_of_bounds - v64:i32 = Uload8 v8, 0xf - v65:i64 = Iconst_64 0x2 + v39:i64 = Iadd v8, v36 + v40:i64 = Load v39, 0xf + v41:i64 = Iconst_64 0x13 + v42:i64 = UExtend v2, 32->64 + v43:i64 = Iadd v42, v41 + v44:i32 = Icmp ge_u, v5, v43 + ExitIfNotZero v44, exec_ctx, memory_out_of_bounds + v45:i64 = Iadd v8, v42 + v46:f32 = Load v45, 0xf + v47:i64 = Iconst_64 0x17 + v48:i64 = UExtend v2, 32->64 + v49:i64 = Iadd v48, v47 + v50:i32 = Icmp ge_u, v5, v49 + ExitIfNotZero v50, exec_ctx, memory_out_of_bounds + v51:i64 = Iadd v8, v48 + v52:f64 = Load v51, 0xf + v53:i64 = Iconst_64 0x1 + v54:i64 = UExtend v2, 32->64 + v55:i64 = Iadd v54, v53 + v56:i32 = Icmp ge_u, v5, v55 + ExitIfNotZero v56, exec_ctx, memory_out_of_bounds + v57:i64 = Iadd v8, v54 + v58:i32 = Sload8 v57, 0x0 + v59:i64 = Iconst_64 0x10 + v60:i64 = UExtend v2, 32->64 + v61:i64 = Iadd v60, v59 + v62:i32 = Icmp ge_u, v5, v61 + ExitIfNotZero v62, exec_ctx, memory_out_of_bounds + v63:i64 = Iadd v8, v60 + v64:i32 = Sload8 v63, 0xf + v65:i64 = Iconst_64 0x1 v66:i64 = UExtend v2, 32->64 v67:i64 = Iadd v66, v65 - v68:i32 = Icmp gt_u, v5, v67 + v68:i32 = Icmp ge_u, v5, v67 ExitIfNotZero v68, exec_ctx, memory_out_of_bounds - v69:i32 = Sload16 v8, 0x0 - v70:i64 = Iconst_64 0x11 - v71:i64 = UExtend v2, 32->64 - v72:i64 = Iadd v71, v70 - v73:i32 = Icmp gt_u, v5, v72 - ExitIfNotZero v73, exec_ctx, memory_out_of_bounds - v74:i32 = Sload16 v8, 0xf - v75:i64 = Iconst_64 0x2 - v76:i64 = UExtend v2, 32->64 - v77:i64 = Iadd v76, v75 - v78:i32 = Icmp gt_u, v5, v77 - ExitIfNotZero v78, exec_ctx, memory_out_of_bounds - v79:i32 = Uload16 v8, 0x0 - v80:i64 = Iconst_64 0x11 - v81:i64 = UExtend v2, 32->64 - v82:i64 = Iadd v81, v80 - v83:i32 = Icmp gt_u, v5, v82 - ExitIfNotZero v83, exec_ctx, memory_out_of_bounds - v84:i32 = Uload16 v8, 0xf - v85:i64 = Iconst_64 0x1 - v86:i64 = UExtend v2, 32->64 - v87:i64 = Iadd v86, v85 - v88:i32 = Icmp gt_u, v5, v87 - ExitIfNotZero v88, exec_ctx, memory_out_of_bounds - v89:i64 = Sload8 v8, 0x0 - v90:i64 = Iconst_64 0x10 - v91:i64 = UExtend v2, 32->64 - v92:i64 = Iadd v91, v90 - v93:i32 = Icmp gt_u, v5, v92 - ExitIfNotZero v93, exec_ctx, memory_out_of_bounds - v94:i64 = Sload8 v8, 0xf - v95:i64 = Iconst_64 0x1 + v69:i64 = Iadd v8, v66 + v70:i32 = Uload8 v69, 0x0 + v71:i64 = Iconst_64 0x10 + v72:i64 = UExtend v2, 32->64 + v73:i64 = Iadd v72, v71 + v74:i32 = Icmp ge_u, v5, v73 + ExitIfNotZero v74, exec_ctx, memory_out_of_bounds + v75:i64 = Iadd v8, v72 + v76:i32 = Uload8 v75, 0xf + v77:i64 = Iconst_64 0x2 + v78:i64 = UExtend v2, 32->64 + v79:i64 = Iadd v78, v77 + v80:i32 = Icmp ge_u, v5, v79 + ExitIfNotZero v80, exec_ctx, memory_out_of_bounds + v81:i64 = Iadd v8, v78 + v82:i32 = Sload16 v81, 0x0 + v83:i64 = Iconst_64 0x11 + v84:i64 = UExtend v2, 32->64 + v85:i64 = Iadd v84, v83 + v86:i32 = Icmp ge_u, v5, v85 + ExitIfNotZero v86, exec_ctx, memory_out_of_bounds + v87:i64 = Iadd v8, v84 + v88:i32 = Sload16 v87, 0xf + v89:i64 = Iconst_64 0x2 + v90:i64 = UExtend v2, 32->64 + v91:i64 = Iadd v90, v89 + v92:i32 = Icmp ge_u, v5, v91 + ExitIfNotZero v92, exec_ctx, memory_out_of_bounds + v93:i64 = Iadd v8, v90 + v94:i32 = Uload16 v93, 0x0 + v95:i64 = Iconst_64 0x11 v96:i64 = UExtend v2, 32->64 v97:i64 = Iadd v96, v95 - v98:i32 = Icmp gt_u, v5, v97 + v98:i32 = Icmp ge_u, v5, v97 ExitIfNotZero v98, exec_ctx, memory_out_of_bounds - v99:i64 = Uload8 v8, 0x0 - v100:i64 = Iconst_64 0x10 - v101:i64 = UExtend v2, 32->64 - v102:i64 = Iadd v101, v100 - v103:i32 = Icmp gt_u, v5, v102 - ExitIfNotZero v103, exec_ctx, memory_out_of_bounds - v104:i64 = Uload8 v8, 0xf - v105:i64 = Iconst_64 0x2 - v106:i64 = UExtend v2, 32->64 - v107:i64 = Iadd v106, v105 - v108:i32 = Icmp gt_u, v5, v107 - ExitIfNotZero v108, exec_ctx, memory_out_of_bounds - v109:i64 = Sload16 v8, 0x0 - v110:i64 = Iconst_64 0x11 - v111:i64 = UExtend v2, 32->64 - v112:i64 = Iadd v111, v110 - v113:i32 = Icmp gt_u, v5, v112 - ExitIfNotZero v113, exec_ctx, memory_out_of_bounds - v114:i64 = Sload16 v8, 0xf - v115:i64 = Iconst_64 0x2 - v116:i64 = UExtend v2, 32->64 - v117:i64 = Iadd v116, v115 - v118:i32 = Icmp gt_u, v5, v117 - ExitIfNotZero v118, exec_ctx, memory_out_of_bounds - v119:i64 = Uload16 v8, 0x0 - v120:i64 = Iconst_64 0x11 - v121:i64 = UExtend v2, 32->64 - v122:i64 = Iadd v121, v120 - v123:i32 = Icmp gt_u, v5, v122 - ExitIfNotZero v123, exec_ctx, memory_out_of_bounds - v124:i64 = Uload16 v8, 0xf - v125:i64 = Iconst_64 0x4 + v99:i64 = Iadd v8, v96 + v100:i32 = Uload16 v99, 0xf + v101:i64 = Iconst_64 0x1 + v102:i64 = UExtend v2, 32->64 + v103:i64 = Iadd v102, v101 + v104:i32 = Icmp ge_u, v5, v103 + ExitIfNotZero v104, exec_ctx, memory_out_of_bounds + v105:i64 = Iadd v8, v102 + v106:i64 = Sload8 v105, 0x0 + v107:i64 = Iconst_64 0x10 + v108:i64 = UExtend v2, 32->64 + v109:i64 = Iadd v108, v107 + v110:i32 = Icmp ge_u, v5, v109 + ExitIfNotZero v110, exec_ctx, memory_out_of_bounds + v111:i64 = Iadd v8, v108 + v112:i64 = Sload8 v111, 0xf + v113:i64 = Iconst_64 0x1 + v114:i64 = UExtend v2, 32->64 + v115:i64 = Iadd v114, v113 + v116:i32 = Icmp ge_u, v5, v115 + ExitIfNotZero v116, exec_ctx, memory_out_of_bounds + v117:i64 = Iadd v8, v114 + v118:i64 = Uload8 v117, 0x0 + v119:i64 = Iconst_64 0x10 + v120:i64 = UExtend v2, 32->64 + v121:i64 = Iadd v120, v119 + v122:i32 = Icmp ge_u, v5, v121 + ExitIfNotZero v122, exec_ctx, memory_out_of_bounds + v123:i64 = Iadd v8, v120 + v124:i64 = Uload8 v123, 0xf + v125:i64 = Iconst_64 0x2 v126:i64 = UExtend v2, 32->64 v127:i64 = Iadd v126, v125 - v128:i32 = Icmp gt_u, v5, v127 + v128:i32 = Icmp ge_u, v5, v127 ExitIfNotZero v128, exec_ctx, memory_out_of_bounds - v129:i64 = Sload32 v8, 0x0 - v130:i64 = Iconst_64 0x13 - v131:i64 = UExtend v2, 32->64 - v132:i64 = Iadd v131, v130 - v133:i32 = Icmp gt_u, v5, v132 - ExitIfNotZero v133, exec_ctx, memory_out_of_bounds - v134:i64 = Sload32 v8, 0xf - v135:i64 = Iconst_64 0x4 - v136:i64 = UExtend v2, 32->64 - v137:i64 = Iadd v136, v135 - v138:i32 = Icmp gt_u, v5, v137 - ExitIfNotZero v138, exec_ctx, memory_out_of_bounds - v139:i64 = Uload32 v8, 0x0 - v140:i64 = Iconst_64 0x13 - v141:i64 = UExtend v2, 32->64 - v142:i64 = Iadd v141, v140 - v143:i32 = Icmp gt_u, v5, v142 - ExitIfNotZero v143, exec_ctx, memory_out_of_bounds - v144:i64 = Uload32 v8, 0xf - Jump blk_ret, v9, v14, v19, v24, v29, v34, v39, v44, v49, v54, v59, v64, v69, v74, v79, v84, v89, v94, v99, v104, v109, v114, v119, v124, v129, v134, v139, v144 + v129:i64 = Iadd v8, v126 + v130:i64 = Sload16 v129, 0x0 + v131:i64 = Iconst_64 0x11 + v132:i64 = UExtend v2, 32->64 + v133:i64 = Iadd v132, v131 + v134:i32 = Icmp ge_u, v5, v133 + ExitIfNotZero v134, exec_ctx, memory_out_of_bounds + v135:i64 = Iadd v8, v132 + v136:i64 = Sload16 v135, 0xf + v137:i64 = Iconst_64 0x2 + v138:i64 = UExtend v2, 32->64 + v139:i64 = Iadd v138, v137 + v140:i32 = Icmp ge_u, v5, v139 + ExitIfNotZero v140, exec_ctx, memory_out_of_bounds + v141:i64 = Iadd v8, v138 + v142:i64 = Uload16 v141, 0x0 + v143:i64 = Iconst_64 0x11 + v144:i64 = UExtend v2, 32->64 + v145:i64 = Iadd v144, v143 + v146:i32 = Icmp ge_u, v5, v145 + ExitIfNotZero v146, exec_ctx, memory_out_of_bounds + v147:i64 = Iadd v8, v144 + v148:i64 = Uload16 v147, 0xf + v149:i64 = Iconst_64 0x4 + v150:i64 = UExtend v2, 32->64 + v151:i64 = Iadd v150, v149 + v152:i32 = Icmp ge_u, v5, v151 + ExitIfNotZero v152, exec_ctx, memory_out_of_bounds + v153:i64 = Iadd v8, v150 + v154:i64 = Sload32 v153, 0x0 + v155:i64 = Iconst_64 0x13 + v156:i64 = UExtend v2, 32->64 + v157:i64 = Iadd v156, v155 + v158:i32 = Icmp ge_u, v5, v157 + ExitIfNotZero v158, exec_ctx, memory_out_of_bounds + v159:i64 = Iadd v8, v156 + v160:i64 = Sload32 v159, 0xf + v161:i64 = Iconst_64 0x4 + v162:i64 = UExtend v2, 32->64 + v163:i64 = Iadd v162, v161 + v164:i32 = Icmp ge_u, v5, v163 + ExitIfNotZero v164, exec_ctx, memory_out_of_bounds + v165:i64 = Iadd v8, v162 + v166:i64 = Uload32 v165, 0x0 + v167:i64 = Iconst_64 0x13 + v168:i64 = UExtend v2, 32->64 + v169:i64 = Iadd v168, v167 + v170:i32 = Icmp ge_u, v5, v169 + ExitIfNotZero v170, exec_ctx, memory_out_of_bounds + v171:i64 = Iadd v8, v168 + v172:i64 = Uload32 v171, 0xf + Jump blk_ret, v10, v16, v22, v28, v34, v40, v46, v52, v58, v64, v70, v76, v82, v88, v94, v100, v106, v112, v118, v124, v130, v136, v142, v148, v154, v160, v166, v172 `, }, } { diff --git a/internal/engine/wazevo/frontend/lower.go b/internal/engine/wazevo/frontend/lower.go index cc0feb72c1..bcd7168c1b 100644 --- a/internal/engine/wazevo/frontend/lower.go +++ b/internal/engine/wazevo/frontend/lower.go @@ -453,7 +453,7 @@ func (c *Compiler) lowerOpcode(op wasm.Opcode) { return } - var ceil = offset + ceil := offset switch op { case wasm.OpcodeI32Load, wasm.OpcodeF32Load: ceil += 4 @@ -491,46 +491,51 @@ func (c *Compiler) lowerOpcode(op wasm.Opcode) { baseAddrPlusCeil.AsIadd(extBaseAddr.Return(), ceilConst.Return()) builder.InsertInstruction(baseAddrPlusCeil) - // Check for out of bounds memory access: `baseAddrPlusCeil > memLen`. + // Check for out of bounds memory access: `memLen >= baseAddrPlusCeil`. cmp := builder.AllocateInstruction() - cmp.AsIcmp(memLen, baseAddrPlusCeil.Return(), ssa.IntegerCmpCondUnsignedGreaterThan) + cmp.AsIcmp(memLen, baseAddrPlusCeil.Return(), ssa.IntegerCmpCondUnsignedGreaterThanOrEqual) builder.InsertInstruction(cmp) exitIfNZ := builder.AllocateInstruction() exitIfNZ.AsExitIfNotZeroWithCode(c.execCtxPtrValue, cmp.Return(), wazevoapi.ExitCodeMemoryOutOfBounds) builder.InsertInstruction(exitIfNZ) - // Load the value. + // Load the value from memBase + extBaseAddr. memBase := c.getMemoryBaseValue() + addrCalc := builder.AllocateInstruction() + addrCalc.AsIadd(memBase, extBaseAddr.Return()) + builder.InsertInstruction(addrCalc) + + addr := addrCalc.Return() load := builder.AllocateInstruction() switch op { case wasm.OpcodeI32Load: - load.AsLoad(memBase, offset, ssa.TypeI32) + load.AsLoad(addr, offset, ssa.TypeI32) case wasm.OpcodeI64Load: - load.AsLoad(memBase, offset, ssa.TypeI64) + load.AsLoad(addr, offset, ssa.TypeI64) case wasm.OpcodeF32Load: - load.AsLoad(memBase, offset, ssa.TypeF32) + load.AsLoad(addr, offset, ssa.TypeF32) case wasm.OpcodeF64Load: - load.AsLoad(memBase, offset, ssa.TypeF64) + load.AsLoad(addr, offset, ssa.TypeF64) case wasm.OpcodeI32Load8S: - load.AsExtLoad(ssa.OpcodeSload8, memBase, offset, false) + load.AsExtLoad(ssa.OpcodeSload8, addr, offset, false) case wasm.OpcodeI32Load8U: - load.AsExtLoad(ssa.OpcodeUload8, memBase, offset, false) + load.AsExtLoad(ssa.OpcodeUload8, addr, offset, false) case wasm.OpcodeI32Load16S: - load.AsExtLoad(ssa.OpcodeSload16, memBase, offset, false) + load.AsExtLoad(ssa.OpcodeSload16, addr, offset, false) case wasm.OpcodeI32Load16U: - load.AsExtLoad(ssa.OpcodeUload16, memBase, offset, false) + load.AsExtLoad(ssa.OpcodeUload16, addr, offset, false) case wasm.OpcodeI64Load8S: - load.AsExtLoad(ssa.OpcodeSload8, memBase, offset, true) + load.AsExtLoad(ssa.OpcodeSload8, addr, offset, true) case wasm.OpcodeI64Load8U: - load.AsExtLoad(ssa.OpcodeUload8, memBase, offset, true) + load.AsExtLoad(ssa.OpcodeUload8, addr, offset, true) case wasm.OpcodeI64Load16S: - load.AsExtLoad(ssa.OpcodeSload16, memBase, offset, true) + load.AsExtLoad(ssa.OpcodeSload16, addr, offset, true) case wasm.OpcodeI64Load16U: - load.AsExtLoad(ssa.OpcodeUload16, memBase, offset, true) + load.AsExtLoad(ssa.OpcodeUload16, addr, offset, true) case wasm.OpcodeI64Load32S: - load.AsExtLoad(ssa.OpcodeSload32, memBase, offset, true) + load.AsExtLoad(ssa.OpcodeSload32, addr, offset, true) case wasm.OpcodeI64Load32U: - load.AsExtLoad(ssa.OpcodeUload32, memBase, offset, true) + load.AsExtLoad(ssa.OpcodeUload32, addr, offset, true) default: panic("BUG") } @@ -833,23 +838,27 @@ func (c *Compiler) getMemoryBaseValue() ssa.Value { if c.offset.LocalMemoryBegin < 0 { panic("TODO: imported memory") } - return c.getModuleCtxValueI32ZeroExt(c.memoryBaseVariable, c.offset.LocalMemoryBase()) + return c.getModuleCtxValue(c.memoryBaseVariable, c.offset.LocalMemoryBase(), false) } func (c *Compiler) getMemoryLenValue() ssa.Value { if c.offset.LocalMemoryBegin < 0 { panic("TODO: imported memory") } - return c.getModuleCtxValueI32ZeroExt(c.memoryLenVariable, c.offset.LocalMemoryLen()) + return c.getModuleCtxValue(c.memoryLenVariable, c.offset.LocalMemoryLen(), true) } -func (c *Compiler) getModuleCtxValueI32ZeroExt(variable ssa.Variable, offset wazevoapi.Offset) ssa.Value { +func (c *Compiler) getModuleCtxValue(variable ssa.Variable, offset wazevoapi.Offset, zeroExt bool) ssa.Value { builder := c.ssaBuilder if v := builder.FindValue(variable); v.Valid() { return v } load := builder.AllocateInstruction() - load.AsExtLoad(ssa.OpcodeUload32, c.moduleCtxPtrValue, uint32(offset), true) + if zeroExt { + load.AsExtLoad(ssa.OpcodeUload32, c.moduleCtxPtrValue, uint32(offset), true) + } else { + load.AsExtLoad(ssa.OpcodeLoad, c.moduleCtxPtrValue, uint32(offset), true) + } builder.InsertInstruction(load) ret := load.Return() builder.DefineVariableInCurrentBB(variable, ret) diff --git a/internal/engine/wazevo/module_engine.go b/internal/engine/wazevo/module_engine.go index 5b25c4ceb1..d6e4b3321c 100644 --- a/internal/engine/wazevo/module_engine.go +++ b/internal/engine/wazevo/module_engine.go @@ -40,15 +40,9 @@ type ( ) func (m *moduleEngine) setupOpaque() { - offsets := &m.parent.offsets - size := offsets.TotalSize - if size == 0 { - return - } - opaque := make([]byte, size) - m.opaque = opaque - m.opaquePtr = &opaque[0] inst := m.module + offsets := &m.parent.offsets + opaque := m.opaque if lm := offsets.LocalMemoryBegin; lm >= 0 { b := uint64(uintptr(unsafe.Pointer(&inst.MemoryInstance.Buffer[0]))) @@ -105,6 +99,11 @@ func (m *moduleEngine) ResolveImportedFunction(index, indexInImportedModule wasm binary.LittleEndian.PutUint64(m.opaque[moduleCtx:], uint64(uintptr(unsafe.Pointer(importedME.opaquePtr)))) } +// DoneInstantiation implements wasm.ModuleEngine. +func (m *moduleEngine) DoneInstantiation() { + m.setupOpaque() +} + // LookupFunction implements wasm.ModuleEngine. func (m *moduleEngine) LookupFunction(t *wasm.TableInstance, typeId wasm.FunctionTypeID, tableOffset wasm.Index) (api.Function, error) { panic("TODO") diff --git a/internal/engine/wazevo/module_engine_test.go b/internal/engine/wazevo/module_engine_test.go index 807234151b..9a63980b7f 100644 --- a/internal/engine/wazevo/module_engine_test.go +++ b/internal/engine/wazevo/module_engine_test.go @@ -42,6 +42,7 @@ func TestModuleEngine_setupOpaque(t *testing.T) { m := &moduleEngine{ parent: &compiledModule{offsets: tc.offset}, module: tc.m, + opaque: make([]byte, tc.offset.TotalSize), } m.setupOpaque() diff --git a/internal/engine/wazevo/ssa/instructions.go b/internal/engine/wazevo/ssa/instructions.go index 5dcdeec460..38ed758a58 100644 --- a/internal/engine/wazevo/ssa/instructions.go +++ b/internal/engine/wazevo/ssa/instructions.go @@ -2,9 +2,10 @@ package ssa import ( "fmt" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" "math" "strings" + + "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" ) // Opcode represents a SSA instruction. diff --git a/internal/engine/wazevo/testcases/testcases.go b/internal/engine/wazevo/testcases/testcases.go index b788af426a..22900b9176 100644 --- a/internal/engine/wazevo/testcases/testcases.go +++ b/internal/engine/wazevo/testcases/testcases.go @@ -1,9 +1,9 @@ package testcases import ( - "github.com/tetratelabs/wazero/internal/leb128" "math" + "github.com/tetratelabs/wazero/internal/leb128" "github.com/tetratelabs/wazero/internal/wasm" ) @@ -779,6 +779,27 @@ var ( }}}, }, } + MemoryLoadBasic = TestCase{ + Name: "memory_load_basic", + Module: &wasm.Module{ + TypeSection: []wasm.FunctionType{{ + Params: []wasm.ValueType{i32}, + Results: []wasm.ValueType{i32}, + }}, + ExportSection: []wasm.Export{{Name: ExportName, Type: wasm.ExternTypeFunc, Index: 0}}, + MemorySection: &wasm.Memory{Min: 1}, + FunctionSection: []wasm.Index{0}, + CodeSection: []wasm.Code{{Body: []byte{ + // Basic loads (without extensions). + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI32Load, 0x2, 0x0, // alignment=2 (natural alignment) staticOffset=0 + wasm.OpcodeEnd, + }}}, + DataSection: []wasm.DataSegment{{OffsetExpression: constOffsetExpr(0), Init: maskedBuf(int(wasm.MemoryPageSize))}}, + }, + } + + // TODO: add memory loads before and after the call case. MemoryLoads = TestCase{ Name: "memory_loads", Module: &wasm.Module{ @@ -790,67 +811,70 @@ var ( i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, }, }}, + ExportSection: []wasm.Export{{Name: ExportName, Type: wasm.ExternTypeFunc, Index: 0}}, MemorySection: &wasm.Memory{Min: 1}, FunctionSection: []wasm.Index{0}, CodeSection: []wasm.Code{{Body: []byte{ // Basic loads (without extensions). - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 0 wasm.OpcodeI32Load, 0x2, 0x0, // alignment=2 (natural alignment) staticOffset=0 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 1 wasm.OpcodeI64Load, 0x3, 0x0, // alignment=3 (natural alignment) staticOffset=0 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 2 wasm.OpcodeF32Load, 0x2, 0x0, // alignment=2 (natural alignment) staticOffset=0 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 3 wasm.OpcodeF64Load, 0x3, 0x0, // alignment=3 (natural alignment) staticOffset=0 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 4 wasm.OpcodeI32Load, 0x2, 0xf, // alignment=2 (natural alignment) staticOffset=16 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 5 wasm.OpcodeI64Load, 0x3, 0xf, // alignment=3 (natural alignment) staticOffset=16 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 6 wasm.OpcodeF32Load, 0x2, 0xf, // alignment=2 (natural alignment) staticOffset=16 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 7 wasm.OpcodeF64Load, 0x3, 0xf, // alignment=3 (natural alignment) staticOffset=16 // Extension integer loads. - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 8 wasm.OpcodeI32Load8S, 0x0, 0x0, // alignment=0 (natural alignment) staticOffset=0 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 9 wasm.OpcodeI32Load8S, 0x0, 0xf, // alignment=0 (natural alignment) staticOffset=16 - wasm.OpcodeLocalGet, 0, + + wasm.OpcodeLocalGet, 0, // 10 wasm.OpcodeI32Load8U, 0x0, 0x0, // alignment=0 (natural alignment) staticOffset=0 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 11 wasm.OpcodeI32Load8U, 0x0, 0xf, // alignment=0 (natural alignment) staticOffset=16 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 12 wasm.OpcodeI32Load16S, 0x1, 0x0, // alignment=1 (natural alignment) staticOffset=0 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 13 wasm.OpcodeI32Load16S, 0x1, 0xf, // alignment=1 (natural alignment) staticOffset=16 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 14 wasm.OpcodeI32Load16U, 0x1, 0x0, // alignment=1 (natural alignment) staticOffset=0 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 15 wasm.OpcodeI32Load16U, 0x1, 0xf, // alignment=1 (natural alignment) staticOffset=16 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 16 wasm.OpcodeI64Load8S, 0x0, 0x0, // alignment=0 (natural alignment) staticOffset=0 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 17 wasm.OpcodeI64Load8S, 0x0, 0xf, // alignment=0 (natural alignment) staticOffset=16 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 18 wasm.OpcodeI64Load8U, 0x0, 0x0, // alignment=0 (natural alignment) staticOffset=0 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 19 + wasm.OpcodeI64Load8U, 0x0, 0xf, // alignment=0 (natural alignment) staticOffset=16 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 20 wasm.OpcodeI64Load16S, 0x1, 0x0, // alignment=1 (natural alignment) staticOffset=0 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 21 wasm.OpcodeI64Load16S, 0x1, 0xf, // alignment=1 (natural alignment) staticOffset=16 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 22 wasm.OpcodeI64Load16U, 0x1, 0x0, // alignment=1 (natural alignment) staticOffset=0 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 23 wasm.OpcodeI64Load16U, 0x1, 0xf, // alignment=1 (natural alignment) staticOffset=16 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 24 wasm.OpcodeI64Load32S, 0x2, 0x0, // alignment=2 (natural alignment) staticOffset=0 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 25 wasm.OpcodeI64Load32S, 0x2, 0xf, // alignment=2 (natural alignment) staticOffset=16 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 26 wasm.OpcodeI64Load32U, 0x2, 0x0, // alignment=2 (natural alignment) staticOffset=0 - wasm.OpcodeLocalGet, 0, + wasm.OpcodeLocalGet, 0, // 27 wasm.OpcodeI64Load32U, 0x2, 0xf, // alignment=2 (natural alignment) staticOffset=16 wasm.OpcodeEnd, diff --git a/internal/wasm/engine.go b/internal/wasm/engine.go index 0d03243283..c0516308f6 100644 --- a/internal/wasm/engine.go +++ b/internal/wasm/engine.go @@ -36,6 +36,9 @@ type Engine interface { // ModuleEngine implements function calls for a given module. type ModuleEngine interface { + // DoneInstantiation is called at the end of the instantiation of the module. + DoneInstantiation() + // NewFunction returns an api.Function for the given function pointed by the given Index. NewFunction(index Index) api.Function diff --git a/internal/wasm/store.go b/internal/wasm/store.go index 5e123a603c..b27466cd5e 100644 --- a/internal/wasm/store.go +++ b/internal/wasm/store.go @@ -381,6 +381,8 @@ func (s *Store) instantiate( return nil, fmt.Errorf("start %s failed: %w", module.funcDesc(SectionIDFunction, funcIdx), err) } } + + m.Engine.DoneInstantiation() return } diff --git a/internal/wasm/store_test.go b/internal/wasm/store_test.go index cabe9cc765..60ac45a5e1 100644 --- a/internal/wasm/store_test.go +++ b/internal/wasm/store_test.go @@ -453,6 +453,9 @@ func (e *mockEngine) NewModuleEngine(_ *Module, _ *ModuleInstance) (ModuleEngine return &mockModuleEngine{callFailIndex: e.callFailIndex, resolveImportsCalled: map[Index]Index{}}, nil } +// mockModuleEngine implements the same method as documented on wasm.ModuleEngine. +func (e *mockModuleEngine) DoneInstantiation() {} + // FunctionInstanceReference implements the same method as documented on wasm.ModuleEngine. func (e *mockModuleEngine) FunctionInstanceReference(i Index) Reference { return e.functionRefs[i] From 98147b6556c815abc90275cecd922bcafb9d5dde Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Fri, 11 Aug 2023 11:13:16 +0900 Subject: [PATCH 8/8] basic Signed-off-by: Takeshi Yoneda --- .../engine/wazevo/frontend/frontend_test.go | 65 +++++++++++++++++++ internal/engine/wazevo/ssa/builder.go | 29 +++++++-- internal/engine/wazevo/testcases/testcases.go | 31 ++++++++- 3 files changed, 118 insertions(+), 7 deletions(-) diff --git a/internal/engine/wazevo/frontend/frontend_test.go b/internal/engine/wazevo/frontend/frontend_test.go index 35bc9dc3bf..76856dd817 100644 --- a/internal/engine/wazevo/frontend/frontend_test.go +++ b/internal/engine/wazevo/frontend/frontend_test.go @@ -875,6 +875,71 @@ blk0: (exec_ctx:i64, module_ctx:i64, v2:i32) v9:i64 = Iadd v8, v4 v10:i32 = Load v9, 0x0 Jump blk_ret, v10 +`, + }, + { + name: "memory_load_basic2", m: testcases.MemoryLoadBasic2.Module, + exp: ` +signatures: + sig1: i64i64_v + +blk0: (exec_ctx:i64, module_ctx:i64, v2:i32) + v3:i32 = Iconst_32 0x0 + v4:i32 = Icmp eq, v2, v3 + Brz v4, blk2 + Jump blk1 + +blk1: () <-- (blk0) + Store module_ctx, exec_ctx, 0x8 + Call f1:sig1, exec_ctx, module_ctx + v5:i64 = Load module_ctx, 0x0 + v6:i64 = Uload32 module_ctx, 0x8 + Jump blk3, v2 + +blk2: () <-- (blk0) + Jump blk3, v2 + +blk3: (v7:i32) <-- (blk1,blk2) + v8:i64 = Iconst_64 0x4 + v9:i64 = UExtend v7, 32->64 + v10:i64 = Uload32 module_ctx, 0x8 + v11:i64 = Iadd v9, v8 + v12:i32 = Icmp ge_u, v10, v11 + ExitIfNotZero v12, exec_ctx, memory_out_of_bounds + v13:i64 = Load module_ctx, 0x0 + v14:i64 = Iadd v13, v9 + v15:i32 = Load v14, 0x0 + Jump blk_ret, v15 +`, + expAfterOpt: ` +signatures: + sig1: i64i64_v + +blk0: (exec_ctx:i64, module_ctx:i64, v2:i32) + v3:i32 = Iconst_32 0x0 + v4:i32 = Icmp eq, v2, v3 + Brz v4, blk2 + Jump blk1 + +blk1: () <-- (blk0) + Store module_ctx, exec_ctx, 0x8 + Call f1:sig1, exec_ctx, module_ctx + Jump blk3 + +blk2: () <-- (blk0) + Jump blk3 + +blk3: () <-- (blk1,blk2) + v8:i64 = Iconst_64 0x4 + v9:i64 = UExtend v2, 32->64 + v10:i64 = Uload32 module_ctx, 0x8 + v11:i64 = Iadd v9, v8 + v12:i32 = Icmp ge_u, v10, v11 + ExitIfNotZero v12, exec_ctx, memory_out_of_bounds + v13:i64 = Load module_ctx, 0x0 + v14:i64 = Iadd v13, v9 + v15:i32 = Load v14, 0x0 + Jump blk_ret, v15 `, }, { diff --git a/internal/engine/wazevo/ssa/builder.go b/internal/engine/wazevo/ssa/builder.go index db796c319e..282648aff1 100644 --- a/internal/engine/wazevo/ssa/builder.go +++ b/internal/engine/wazevo/ssa/builder.go @@ -174,6 +174,9 @@ type builder struct { donePasses bool // doneBlockLayout is true if LayoutBlocks is called. doneBlockLayout bool + + // vs is reused by builder.FindValue. + vs []Value } // ReturnBlock implements Builder.ReturnBlock. @@ -400,9 +403,27 @@ func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock, must b return ValueInvalid } + b.vs = b.vs[:0] // If this block has multiple predecessors, we have to gather the definitions, - // and treat them as an argument to this block. So the first thing we do now is - // define a new parameter to this block which may or may not be redundant, but + // and treat them as an argument to this block. + // + // First gather all the definitions. + for i := range blk.preds { + pred := &blk.preds[i] + // Find the definition in the predecessor recursively. + value := b.findValue(typ, variable, pred.blk, must) + b.vs = append(b.vs, value) + } + if !must { + // If this is not a must, the value definition might be incomplete. + for _, v := range b.vs { + if !v.Valid() { + // If one of them is invalid, the value is not defined. + return ValueInvalid + } + } + } + // So the next thing we do now is to define a new parameter to this block which may or may not be redundant, but // later we eliminate trivial params in an optimization pass. paramValue := blk.AddParam(b, typ) b.DefineVariable(variable, paramValue, blk) @@ -411,9 +432,7 @@ func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock, must b // the newly added PHI. for i := range blk.preds { pred := &blk.preds[i] - // Find the definition in the predecessor recursively. - value := b.findValue(typ, variable, pred.blk, must) - pred.branch.addArgumentBranchInst(value) + pred.branch.addArgumentBranchInst(b.vs[i]) } return paramValue } diff --git a/internal/engine/wazevo/testcases/testcases.go b/internal/engine/wazevo/testcases/testcases.go index 22900b9176..f7000d6507 100644 --- a/internal/engine/wazevo/testcases/testcases.go +++ b/internal/engine/wazevo/testcases/testcases.go @@ -790,7 +790,6 @@ var ( MemorySection: &wasm.Memory{Min: 1}, FunctionSection: []wasm.Index{0}, CodeSection: []wasm.Code{{Body: []byte{ - // Basic loads (without extensions). wasm.OpcodeLocalGet, 0, wasm.OpcodeI32Load, 0x2, 0x0, // alignment=2 (natural alignment) staticOffset=0 wasm.OpcodeEnd, @@ -799,7 +798,35 @@ var ( }, } - // TODO: add memory loads before and after the call case. + MemoryLoadBasic2 = TestCase{ + Name: "memory_load_basic2", + Module: &wasm.Module{ + TypeSection: []wasm.FunctionType{i32_i32, {}}, + ExportSection: []wasm.Export{{Name: ExportName, Type: wasm.ExternTypeFunc, Index: 0}}, + MemorySection: &wasm.Memory{Min: 1}, + FunctionSection: []wasm.Index{0, 1}, + CodeSection: []wasm.Code{ + {Body: []byte{ + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI32Const, 0, + wasm.OpcodeI32Eq, + wasm.OpcodeIf, blockSignature_vv, + wasm.OpcodeCall, 0x1, // After this the memory buf/size pointer reloads. + wasm.OpcodeElse, // But in Else block, we do nothing, so not reloaded. + wasm.OpcodeEnd, + + // Therefore, this block should reload the memory buf/size pointer here. + wasm.OpcodeLocalGet, 0, + wasm.OpcodeI32Load, 0x2, 0x0, // alignment=2 (natural alignment) staticOffset=0 + + wasm.OpcodeEnd, + }}, + {Body: []byte{wasm.OpcodeEnd}}, + }, + DataSection: []wasm.DataSegment{{OffsetExpression: constOffsetExpr(0), Init: maskedBuf(int(wasm.MemoryPageSize))}}, + }, + } + MemoryLoads = TestCase{ Name: "memory_loads", Module: &wasm.Module{