Skip to content

Commit

Permalink
add movToVec, movFromVec, improve tests
Browse files Browse the repository at this point in the history
Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
  • Loading branch information
evacchi committed Aug 20, 2023
1 parent 9409f2c commit ad7680d
Show file tree
Hide file tree
Showing 8 changed files with 167 additions and 36 deletions.
3 changes: 3 additions & 0 deletions internal/engine/compiler/compiler_numeric_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -844,6 +844,9 @@ func TestCompiler_compile_Clz_Ctz_Popcnt(t *testing.T) {
// Generate and run the code under test.
_, err = compiler.compile(code.NextCodeSection())
require.NoError(t, err)

fmt.Printf("--\n%x\n--", code.Bytes())

env.exec(code.Bytes())

// One value must be pushed as a result.
Expand Down
35 changes: 19 additions & 16 deletions internal/engine/wazevo/backend/backend_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1426,17 +1426,17 @@ L1 (SSA Block: blk0):
clz w4?, w2?
rbit w27, w2?
clz w5?, w27
mov d11?.8b, x2?.8b
cnt d11?.16B, d11?.16B
uaddlv d11?.8B, d11?.8B
mov x6?.8b, d11?.8b
ins v13?.D[0], x2?
cnt v14?.16B, v13?.16B
uaddlv v15?.8B, v14?.8B
mov v6?.D[0], x6?
clz x7?, x3?
rbit x27, x3?
clz x8?, x27
mov d10?.8b, x3?.8b
cnt d10?.16B, d10?.16B
uaddlv d10?.8B, d10?.8B
mov x9?.8b, d10?.8b
ins v10?.D[0], x3?
cnt v11?.16B, v10?.16B
uaddlv v12?.8B, v11?.8B
mov v9?.D[0], x9?
mov x5, x9?
mov x4, x8?
mov x3, x7?
Expand All @@ -1452,14 +1452,17 @@ L1 (SSA Block: blk0):
clz w0, w2
rbit w27, w2
clz w1, w27
cnt x2.16B, x2.16B
uaddlv x2.8B, x2.8B
clz x2, x3
rbit x27, x3
clz x3, x27
cnt x8.16B, x8.16B
uaddlv x8.8B, x8.8B
mov x5.8b, x8.8b
ins v13.D[0], x2
cnt v14.16B, v13.16B
uaddlv v15.8B, v14.8B
mov v6.D[0], x2
clz x3, x8
rbit x27, x8
clz x4, x27
ins v10.D[0], x8
cnt v11.16B, v10.16B
uaddlv v12.8B, v11.8B
mov v9.D[0], x5
ldr x30, [sp], #0x10
ret
`,
Expand Down
6 changes: 6 additions & 0 deletions internal/engine/wazevo/backend/isa/arm64/cond.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,3 +289,9 @@ func (v vecArrangement) String() (ret string) {
}
return
}

// vecIndex is the index of an element of a vector register
type vecIndex byte

// vecIndexNone indicates no vector index specified.
const vecIndexNone = ^vecIndex(0)
62 changes: 49 additions & 13 deletions internal/engine/wazevo/backend/isa/arm64/instr.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ var defKinds = [numInstructionKinds]defKind{
udf: defKindNone,
cSel: defKindRD,
fpuCSel: defKindRD,
movToVec: defKindRD,
movFromVec: defKindRD,
vecRRR: defKindRD,
vecMisc: defKindRD,
}
Expand Down Expand Up @@ -184,6 +186,8 @@ var useKinds = [numInstructionKinds]useKind{
loadFpuConst64: useKindNone,
cSel: useKindRNRM,
fpuCSel: useKindRNRM,
movToVec: useKindRN,
movFromVec: useKindRN,
vecMisc: useKindRN,
vecRRR: useKindRN,
}
Expand Down Expand Up @@ -663,6 +667,22 @@ func (i *instruction) asFpuMov128(rd, rn regalloc.VReg) {
i.rn, i.rd = operandNR(rn), operandNR(rd)
}

func (i *instruction) asMovToVec(rd, rn regalloc.VReg, arr vecArrangement, index vecIndex) {
i.kind = movToVec
i.rd = operandNR(rd)
i.rn = operandNR(rn)
i.u1 = uint64(arr)
i.u2 = uint64(index)
}

func (i *instruction) asMovFromVec(rd, rn regalloc.VReg, arr vecArrangement, index vecIndex) {
i.kind = movFromVec
i.rd = operandNR(rd)
i.rn = operandNR(rn)
i.u1 = uint64(arr)
i.u2 = uint64(index)
}

func (i *instruction) asVecRRR(op vecOp, rd, rn regalloc.VReg, arr vecArrangement) {
i.kind = vecRRR
i.u1 = uint64(op)
Expand Down Expand Up @@ -829,7 +849,7 @@ func (i *instruction) String() (str string) {
case cCmpImm:
panic("TODO")
case fpuMov64:
str = fmt.Sprintf("mov %s.8b, %s.8b", formatVRegSized(i.rd.nr(), 64), formatVRegSized(i.rn.nr(), 64))
str = fmt.Sprintf("mov %s.8b, %s.8b", formatVRegSized(i.rd.nr(), 128), formatVRegSized(i.rn.nr(), 128))
case fpuMov128:
str = fmt.Sprintf("mov %s.16b, %s.16b", formatVRegSized(i.rd.nr(), 128), formatVRegSized(i.rn.nr(), 128))
case fpuMovFromVec:
Expand Down Expand Up @@ -883,9 +903,29 @@ func (i *instruction) String() (str string) {
case movToFpu:
panic("TODO")
case movToVec:
panic("TODO")
var size byte
arr := vecArrangement(i.u1)
switch arr {
case vecArrangementB:
size = 32
case vecArrangementD:
size = 64
default:
panic("unsupported arrangement " + arr.String())
}
str = fmt.Sprintf("ins %s, %s", formatVRegSizedVec(i.rd.nr(), 128, arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size))
case movFromVec:
panic("TODO")
var size byte
arr := vecArrangement(i.u1)
switch arr {
case vecArrangementB:
size = 32
case vecArrangementD:
size = 64
default:
panic("unsupported arrangement " + arr.String())
}
str = fmt.Sprintf("mov %s, %s", formatVRegSizedVec(i.rd.nr(), 128, arr, vecIndex(i.u2)), formatVRegSized(i.rd.nr(), size))
case movFromVecSigned:
panic("TODO")
case vecDup:
Expand All @@ -899,20 +939,16 @@ func (i *instruction) String() (str string) {
case vecMiscNarrow:
panic("TODO")
case vecRRR:
str = fmt.Sprintf("%s %s.%s, %s.%s",
str = fmt.Sprintf("%s %s, %s",
vecOp(i.u1),
formatVRegSized(i.rd.nr(), 64),
vecArrangement(i.u2),
formatVRegSized(i.rn.nr(), 64),
vecArrangement(i.u2),
formatVRegSizedVec(i.rd.nr(), 64, vecArrangement(i.u2), vecIndexNone),
formatVRegSizedVec(i.rn.nr(), 64, vecArrangement(i.u2), vecIndexNone),
)
case vecMisc:
str = fmt.Sprintf("%s %s.%s, %s.%s",
str = fmt.Sprintf("%s %s, %s",
vecOp(i.u1),
formatVRegSized(i.rd.nr(), 64),
vecArrangement(i.u2),
formatVRegSized(i.rn.nr(), 64),
vecArrangement(i.u2),
formatVRegSizedVec(i.rd.nr(), 64, vecArrangement(i.u2), vecIndexNone),
formatVRegSizedVec(i.rn.nr(), 64, vecArrangement(i.u2), vecIndexNone),
)
case vecLanes:
panic("TODO")
Expand Down
65 changes: 65 additions & 0 deletions internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,20 @@ func (i *instruction) encode(c backend.Compiler) {
condFlag(i.u1),
i.u3 == 1,
))
case movToVec:
c.Emit4Bytes(encodeMoveToVec(
regNumberInEncoding[i.rd.realReg()],
regNumberInEncoding[i.rn.realReg()],
vecArrangement(byte(i.u1)),
vecIndex(i.u2),
))
case movFromVec:
c.Emit4Bytes(encodeMoveFromVec(
regNumberInEncoding[i.rd.realReg()],
regNumberInEncoding[i.rn.realReg()],
vecArrangement(byte(i.u1)),
vecIndex(i.u2),
))
case vecRRR:
c.Emit4Bytes(encodeVecRRR(
vecOp(i.u1),
Expand Down Expand Up @@ -265,6 +279,57 @@ func encodeFpuCSel(rd, rn, rm uint32, c condFlag, _64bit bool) uint32 {
return 0b1111<<25 | ftype<<22 | 0b1<<21 | rm<<16 | uint32(c)<<12 | 0b11<<10 | rn<<5 | rd
}

// encodeMoveToVec encodes as "Move general-purpose register to a vector element." in
// https://developer.arm.com/documentation/dui0801/g/A64-SIMD-Vector-Instructions/MOV--vector--from-general-
// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/MOV--from-general---Move-general-purpose-register-to-a-vector-element--an-alias-of-INS--general--?lang=en
func encodeMoveToVec(rd, rn uint32, arr vecArrangement, index vecIndex) uint32 {
var imm5 uint32
switch arr {
// case vecArrangementB:
// imm5 |= 0b1
// imm5 |= uint32(index) << 1
// case vecArrangementH:
// imm5 |= 0b10
// imm5 |= uint32(index) << 2
case vecArrangementS:
imm5 |= 0b100
imm5 |= uint32(index) << 3
case vecArrangementD:
imm5 |= 0b1000
imm5 |= uint32(index) << 4
default:
panic("Unsupported arrangement " + arr.String())
}

return 0b01001110000<<21 | imm5<<16 | 0b000111<<10 | rn<<5 | rd
}

// encodeMoveFromVec encodes as "Move vector element to a general-purpose register." in
// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/UMOV--Unsigned-Move-vector-element-to-general-purpose-register-?lang=en
// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/MOV--to-general---Move-vector-element-to-general-purpose-register--an-alias-of-UMOV-?lang=en
func encodeMoveFromVec(rd, rn uint32, arr vecArrangement, index vecIndex) uint32 {
var q uint32
var imm5 uint32
switch arr {
// case vecArrangementB:
// imm5 |= 0b1
// imm5 |= uint32(index) << 1
// case vecArrangementH:
// imm5 |= 0b10
// imm5 |= uint32(index) << 2
case vecArrangementS:
imm5 |= 0b100
imm5 |= uint32(index) << 3
case vecArrangementD:
imm5 |= 0b1000
imm5 |= uint32(index) << 4
q = 0b1
default:
panic("Unsupported arrangement " + arr.String())
}
return 0b0_001110000<<21 | q<<30 | imm5<<16 | 0b001111<<10 | rn<<5 | rd
}

// encodeConditionalSelect encodes as "Conditional select" in
// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en#condsel
func encodeConditionalSelect(kind instructionKind, rd, rn, rm uint32, c condFlag, _64bit bool) uint32 {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ func TestInstruction_encode(t *testing.T) {
{want: "41bc631e", setup: func(i *instruction) { i.asFpuCSel(operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), lt, true) }},
{want: "41cc231e", setup: func(i *instruction) { i.asFpuCSel(operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), gt, false) }},
{want: "41bc231e", setup: func(i *instruction) { i.asFpuCSel(operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), lt, false) }},
{want: "411c084e", setup: func(i *instruction) { i.asMovToVec(x1VReg, v2VReg, vecArrangementD, vecIndex(0)) }},
{want: "411c044e", setup: func(i *instruction) { i.asMovToVec(x1VReg, v2VReg, vecArrangementS, vecIndex(0)) }},
{want: "223c084e", setup: func(i *instruction) { i.asMovFromVec(v2VReg, x1VReg, vecArrangementD, vecIndex(0)) }},
{want: "223c040e", setup: func(i *instruction) { i.asMovFromVec(v2VReg, x1VReg, vecArrangementS, vecIndex(0)) }},
{want: "5b28030b", setup: func(i *instruction) {
i.asALU(aluOpAdd, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSL), false)
}},
Expand Down
16 changes: 9 additions & 7 deletions internal/engine/wazevo/backend/isa/arm64/lower_instr.go
Original file line number Diff line number Diff line change
Expand Up @@ -344,24 +344,26 @@ func (m *machine) lowerPopcnt(x, result ssa.Value) {
// uaddlv h0, v0.8b
// fmov x3, d0

freg := m.compiler.AllocateVReg(regalloc.RegTypeFloat)
rd := m.compiler.VRegOf(result)
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)

fmov := m.allocateInstr()
fmov.asFpuMov64(freg, rn.nr())
m.insert(fmov)
rf1 := m.compiler.AllocateVReg(regalloc.RegTypeFloat)
movv := m.allocateInstr()
movv.asMovToVec(rf1, rn.nr(), vecArrangementD, vecIndex(0))
m.insert(movv)

rf2 := m.compiler.AllocateVReg(regalloc.RegTypeFloat)
cnt := m.allocateInstr()
cnt.asVecMisc(vecOpCnt, freg, freg, vecArrangement16B)
cnt.asVecMisc(vecOpCnt, rf2, rf1, vecArrangement16B)
m.insert(cnt)

rf3 := m.compiler.AllocateVReg(regalloc.RegTypeFloat)
uaddlv := m.allocateInstr()
uaddlv.asVecRRR(vecOpUaddlv, freg, freg, vecArrangement8B)
uaddlv.asVecRRR(vecOpUaddlv, rf3, rf2, vecArrangement8B)
m.insert(uaddlv)

fmov2 := m.allocateInstr()
fmov2.asFpuMov64(rd, freg)
fmov2.asMovFromVec(rd, rf3, vecArrangementD, vecIndex(0))
m.insert(fmov2)
}

Expand Down
12 changes: 12 additions & 0 deletions internal/engine/wazevo/backend/isa/arm64/reg.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,18 @@ func formatVRegSized(r regalloc.VReg, size byte) (ret string) {
return
}

func formatVRegSizedVec(r regalloc.VReg, size byte, arr vecArrangement, index vecIndex) (ret string) {
c := "?"
if r.IsRealReg() {
c = ""
}
ret = fmt.Sprintf("v%d%s.%s", r.ID(), c, arr)
if index != vecIndexNone {
ret += fmt.Sprintf("[%d]", index)
}
return
}

func regTypeToRegisterSizeInBits(r regalloc.RegType) byte {
switch r {
case regalloc.RegTypeInt:
Expand Down

0 comments on commit ad7680d

Please sign in to comment.