Skip to content

Commit

Permalink
wazevo: add bit count instruction Popcnt (#1638)
Browse files Browse the repository at this point in the history
Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
  • Loading branch information
evacchi authored Aug 21, 2023
1 parent 43e03a8 commit 68d575a
Show file tree
Hide file tree
Showing 11 changed files with 481 additions and 14 deletions.
29 changes: 24 additions & 5 deletions internal/engine/wazevo/backend/backend_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1420,9 +1420,19 @@ L1 (SSA Block: blk0):
clz w4?, w2?
rbit w27, w2?
clz w5?, w27
clz x6?, x3?
ins v13?.d[0], x2?
cnt v14?.16b, v13?.16b
uaddlv h15?, v14?.8b
mov x6?, v15?.d[0]
clz x7?, x3?
rbit x27, x3?
clz x7?, x27
clz x8?, x27
ins v10?.d[0], x3?
cnt v11?.16b, v10?.16b
uaddlv h12?, v11?.8b
mov x9?, v12?.d[0]
mov x5, x9?
mov x4, x8?
mov x3, x7?
mov x2, x6?
mov x1, x5?
Expand All @@ -1432,12 +1442,21 @@ L1 (SSA Block: blk0):
afterFinalizeARM64: `
L1 (SSA Block: blk0):
str x30, [sp, #-0x10]!
mov x8, x3
clz w0, w2
rbit w27, w2
clz w1, w27
clz x2, x3
rbit x27, x3
clz x3, x27
ins v8.d[0], x2
cnt v8.16b, v8.16b
uaddlv h8, v8.8b
mov x2, v8.d[0]
clz x3, x8
rbit x27, x8
clz x4, x27
ins v8.d[0], x8
cnt v8.16b, v8.16b
uaddlv h8, v8.8b
mov x5, v8.d[0]
ldr x30, [sp], #0x10
ret
`,
Expand Down
83 changes: 83 additions & 0 deletions internal/engine/wazevo/backend/isa/arm64/cond.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,3 +213,86 @@ func condFlagFromSSAFloatCmpCond(c ssa.FloatCmpCond) condFlag {
panic(c)
}
}

// vecArrangement is the arrangement of data within a vector register.
type vecArrangement byte

const (
// vecArrangementNone is an arrangement indicating no data is stored.
vecArrangementNone vecArrangement = iota
// vecArrangement8B is an arrangement of 8 bytes (64-bit vector)
vecArrangement8B
// vecArrangement16B is an arrangement of 16 bytes (128-bit vector)
vecArrangement16B
// vecArrangement4H is an arrangement of 4 half precisions (64-bit vector)
vecArrangement4H
// vecArrangement8H is an arrangement of 8 half precisions (128-bit vector)
vecArrangement8H
// vecArrangement2S is an arrangement of 2 single precisions (64-bit vector)
vecArrangement2S
// vecArrangement4S is an arrangement of 4 single precisions (128-bit vector)
vecArrangement4S
// vecArrangement1D is an arrangement of 1 double precision (64-bit vector)
vecArrangement1D
// vecArrangement2D is an arrangement of 2 double precisions (128-bit vector)
vecArrangement2D

// Assign each vector size specifier to a vector arrangement ID.
// Instructions can only have an arrangement or a size specifier, but not both, so it
// simplifies the internal representation of vector instructions by being able to
// store either into the same field.

// vecArrangementB is a size specifier of byte
vecArrangementB
// vecArrangementH is a size specifier of word (16-bit)
vecArrangementH
// vecArrangementS is a size specifier of double word (32-bit)
vecArrangementS
// vecArrangementD is a size specifier of quad word (64-bit)
vecArrangementD
// vecArrangementQ is a size specifier of the entire vector (128-bit)
vecArrangementQ
)

// String implements fmt.Stringer
func (v vecArrangement) String() (ret string) {
switch v {
case vecArrangement8B:
ret = "8B"
case vecArrangement16B:
ret = "16B"
case vecArrangement4H:
ret = "4H"
case vecArrangement8H:
ret = "8H"
case vecArrangement2S:
ret = "2S"
case vecArrangement4S:
ret = "4S"
case vecArrangement1D:
ret = "1D"
case vecArrangement2D:
ret = "2D"
case vecArrangementB:
ret = "B"
case vecArrangementH:
ret = "H"
case vecArrangementS:
ret = "S"
case vecArrangementD:
ret = "D"
case vecArrangementQ:
ret = "Q"
case vecArrangementNone:
ret = "none"
default:
panic(v)
}
return
}

// vecIndex is the index of an element of a vector register
type vecIndex byte

// vecIndexNone indicates no vector index specified.
const vecIndexNone = ^vecIndex(0)
105 changes: 101 additions & 4 deletions internal/engine/wazevo/backend/isa/arm64/instr.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ var defKinds = [numInstructionKinds]defKind{
udf: defKindNone,
cSel: defKindRD,
fpuCSel: defKindRD,
movToVec: defKindRD,
movFromVec: defKindRD,
vecMisc: defKindRD,
vecLanes: defKindRD,
}

// defs returns the list of regalloc.VReg that are defined by the instruction.
Expand Down Expand Up @@ -182,6 +186,10 @@ var useKinds = [numInstructionKinds]useKind{
loadFpuConst64: useKindNone,
cSel: useKindRNRM,
fpuCSel: useKindRNRM,
movToVec: useKindRN,
movFromVec: useKindRN,
vecMisc: useKindRN,
vecLanes: useKindRN,
}

// uses returns the list of regalloc.VReg that are used by the instruction.
Expand Down Expand Up @@ -659,6 +667,34 @@ func (i *instruction) asFpuMov128(rd, rn regalloc.VReg) {
i.rn, i.rd = operandNR(rn), operandNR(rd)
}

func (i *instruction) asMovToVec(rd, rn regalloc.VReg, arr vecArrangement, index vecIndex) {
i.kind = movToVec
i.rd = operandNR(rd)
i.rn = operandNR(rn)
i.u1, i.u2 = uint64(arr), uint64(index)
}

func (i *instruction) asMovFromVec(rd, rn regalloc.VReg, arr vecArrangement, index vecIndex) {
i.kind = movFromVec
i.rd = operandNR(rd)
i.rn = operandNR(rn)
i.u1, i.u2 = uint64(arr), uint64(index)
}

func (i *instruction) asVecMisc(op vecOp, rd, rn regalloc.VReg, arr vecArrangement) {
i.kind = vecMisc
i.u1 = uint64(op)
i.rn, i.rd = operandNR(rn), operandNR(rd)
i.u2 = uint64(arr)
}

func (i *instruction) asVecLanes(op vecOp, rd, rn regalloc.VReg, arr vecArrangement) {
i.kind = vecLanes
i.u1 = uint64(op)
i.rn, i.rd = operandNR(rn), operandNR(rd)
i.u2 = uint64(arr)
}

func (i *instruction) isCopy() bool {
op := i.kind
return op == mov64 || op == mov32 || op == fpuMov64 || op == fpuMov128
Expand Down Expand Up @@ -863,9 +899,32 @@ func (i *instruction) String() (str string) {
case movToFpu:
panic("TODO")
case movToVec:
panic("TODO")
var size byte
arr := vecArrangement(i.u1)
switch arr {
case vecArrangementB, vecArrangementH, vecArrangementS:
size = 32
case vecArrangementD:
size = 64
default:
panic("unsupported arrangement " + arr.String())
}
str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd.nr(), arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size))
case movFromVec:
panic("TODO")
var size byte
var opcode string
arr := vecArrangement(i.u1)
switch arr {
case vecArrangementB, vecArrangementH, vecArrangementS:
size = 32
opcode = "umov"
case vecArrangementD:
size = 64
opcode = "mov"
default:
panic("unsupported arrangement " + arr.String())
}
str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd.nr(), size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)))
case movFromVecSigned:
panic("TODO")
case vecDup:
Expand All @@ -881,9 +940,27 @@ func (i *instruction) String() (str string) {
case vecRRR:
panic("TODO")
case vecMisc:
panic("TODO")
str = fmt.Sprintf("%s %s, %s",
vecOp(i.u1),
formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone),
formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone))
case vecLanes:
panic("TODO")
arr := vecArrangement(i.u2)
var destArr vecArrangement
switch arr {
case vecArrangement8B, vecArrangement16B:
destArr = vecArrangementH
case vecArrangement4H, vecArrangement8H:
destArr = vecArrangementS
case vecArrangement4S:
destArr = vecArrangementD
default:
panic("invalid arrangement " + arr.String())
}
str = fmt.Sprintf("%s %s, %s",
vecOp(i.u1),
formatVRegWidthVec(i.rd.nr(), destArr),
formatVRegVec(i.rn.nr(), arr, vecIndexNone))
case vecTbl:
panic("TODO")
case vecTbl2:
Expand Down Expand Up @@ -1236,6 +1313,26 @@ const (
aluOpMSub
)

// vecOp determines the type of vector operation. Instructions whose kind is one of
// vecOpCnt would use this type.
type vecOp int

// String implements fmt.Stringer.
func (b vecOp) String() string {
switch b {
case vecOpCnt:
return "cnt"
case vecOpUaddlv:
return "uaddlv"
}
panic(int(b))
}

const (
vecOpCnt vecOp = iota
vecOpUaddlv
)

// bitOp determines the type of bitwise operation. Instructions whose kind is one of
// bitOpRbit and bitOpClz would use this type.
type bitOp int
Expand Down
Loading

0 comments on commit 68d575a

Please sign in to comment.