Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

wazevo: add bit count instruction Popcnt #1638

Merged
merged 6 commits into from
Aug 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 24 additions & 5 deletions internal/engine/wazevo/backend/backend_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1426,9 +1426,19 @@ L1 (SSA Block: blk0):
clz w4?, w2?
rbit w27, w2?
clz w5?, w27
clz x6?, x3?
ins v13?.d[0], x2?
cnt v14?.16b, v13?.16b
uaddlv h15?, v14?.8b
mov x6?, v15?.d[0]
clz x7?, x3?
rbit x27, x3?
clz x7?, x27
clz x8?, x27
ins v10?.d[0], x3?
cnt v11?.16b, v10?.16b
uaddlv h12?, v11?.8b
mov x9?, v12?.d[0]
mov x5, x9?
mov x4, x8?
mov x3, x7?
mov x2, x6?
mov x1, x5?
Expand All @@ -1438,12 +1448,21 @@ L1 (SSA Block: blk0):
afterFinalizeARM64: `
L1 (SSA Block: blk0):
str x30, [sp, #-0x10]!
mov x8, x3
clz w0, w2
rbit w27, w2
clz w1, w27
clz x2, x3
rbit x27, x3
clz x3, x27
ins v8.d[0], x2
cnt v8.16b, v8.16b
uaddlv h8, v8.8b
mov x2, v8.d[0]
clz x3, x8
rbit x27, x8
clz x4, x27
ins v8.d[0], x8
cnt v8.16b, v8.16b
uaddlv h8, v8.8b
mov x5, v8.d[0]
ldr x30, [sp], #0x10
ret
`,
Expand Down
83 changes: 83 additions & 0 deletions internal/engine/wazevo/backend/isa/arm64/cond.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,3 +213,86 @@ func condFlagFromSSAFloatCmpCond(c ssa.FloatCmpCond) condFlag {
panic(c)
}
}

// vecArrangement is the arrangement of data within a vector register.
type vecArrangement byte

const (
// vecArrangementNone is an arrangement indicating no data is stored.
vecArrangementNone vecArrangement = iota
// vecArrangement8B is an arrangement of 8 bytes (64-bit vector)
vecArrangement8B
// vecArrangement16B is an arrangement of 16 bytes (128-bit vector)
vecArrangement16B
// vecArrangement4H is an arrangement of 4 half precisions (64-bit vector)
vecArrangement4H
// vecArrangement8H is an arrangement of 8 half precisions (128-bit vector)
vecArrangement8H
// vecArrangement2S is an arrangement of 2 single precisions (64-bit vector)
vecArrangement2S
// vecArrangement4S is an arrangement of 4 single precisions (128-bit vector)
vecArrangement4S
// vecArrangement1D is an arrangement of 1 double precision (64-bit vector)
vecArrangement1D
// vecArrangement2D is an arrangement of 2 double precisions (128-bit vector)
vecArrangement2D

// Assign each vector size specifier to a vector arrangement ID.
// Instructions can only have an arrangement or a size specifier, but not both, so it
// simplifies the internal representation of vector instructions by being able to
// store either into the same field.

// vecArrangementB is a size specifier of byte
vecArrangementB
// vecArrangementH is a size specifier of word (16-bit)
vecArrangementH
// vecArrangementS is a size specifier of double word (32-bit)
vecArrangementS
// vecArrangementD is a size specifier of quad word (64-bit)
vecArrangementD
// vecArrangementQ is a size specifier of the entire vector (128-bit)
vecArrangementQ
)

// String implements fmt.Stringer
func (v vecArrangement) String() (ret string) {
switch v {
case vecArrangement8B:
ret = "8B"
case vecArrangement16B:
ret = "16B"
case vecArrangement4H:
ret = "4H"
case vecArrangement8H:
ret = "8H"
case vecArrangement2S:
ret = "2S"
case vecArrangement4S:
ret = "4S"
case vecArrangement1D:
ret = "1D"
case vecArrangement2D:
ret = "2D"
case vecArrangementB:
ret = "B"
case vecArrangementH:
ret = "H"
case vecArrangementS:
ret = "S"
case vecArrangementD:
ret = "D"
case vecArrangementQ:
ret = "Q"
case vecArrangementNone:
ret = "none"
default:
panic(v)
}
return
}

// vecIndex is the index of an element of a vector register
type vecIndex byte

// vecIndexNone indicates no vector index specified.
const vecIndexNone = ^vecIndex(0)
105 changes: 101 additions & 4 deletions internal/engine/wazevo/backend/isa/arm64/instr.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ var defKinds = [numInstructionKinds]defKind{
udf: defKindNone,
cSel: defKindRD,
fpuCSel: defKindRD,
movToVec: defKindRD,
movFromVec: defKindRD,
vecMisc: defKindRD,
vecLanes: defKindRD,
}

// defs returns the list of regalloc.VReg that are defined by the instruction.
Expand Down Expand Up @@ -182,6 +186,10 @@ var useKinds = [numInstructionKinds]useKind{
loadFpuConst64: useKindNone,
cSel: useKindRNRM,
fpuCSel: useKindRNRM,
movToVec: useKindRN,
movFromVec: useKindRN,
vecMisc: useKindRN,
vecLanes: useKindRN,
}

// uses returns the list of regalloc.VReg that are used by the instruction.
Expand Down Expand Up @@ -659,6 +667,34 @@ func (i *instruction) asFpuMov128(rd, rn regalloc.VReg) {
i.rn, i.rd = operandNR(rn), operandNR(rd)
}

func (i *instruction) asMovToVec(rd, rn regalloc.VReg, arr vecArrangement, index vecIndex) {
i.kind = movToVec
i.rd = operandNR(rd)
i.rn = operandNR(rn)
i.u1, i.u2 = uint64(arr), uint64(index)
}

func (i *instruction) asMovFromVec(rd, rn regalloc.VReg, arr vecArrangement, index vecIndex) {
i.kind = movFromVec
i.rd = operandNR(rd)
i.rn = operandNR(rn)
i.u1, i.u2 = uint64(arr), uint64(index)
}

func (i *instruction) asVecMisc(op vecOp, rd, rn regalloc.VReg, arr vecArrangement) {
i.kind = vecMisc
i.u1 = uint64(op)
i.rn, i.rd = operandNR(rn), operandNR(rd)
i.u2 = uint64(arr)
}

func (i *instruction) asVecLanes(op vecOp, rd, rn regalloc.VReg, arr vecArrangement) {
i.kind = vecLanes
i.u1 = uint64(op)
i.rn, i.rd = operandNR(rn), operandNR(rd)
i.u2 = uint64(arr)
}

func (i *instruction) isCopy() bool {
op := i.kind
return op == mov64 || op == mov32 || op == fpuMov64 || op == fpuMov128
Expand Down Expand Up @@ -865,9 +901,32 @@ func (i *instruction) String() (str string) {
case movToFpu:
panic("TODO")
case movToVec:
panic("TODO")
var size byte
arr := vecArrangement(i.u1)
switch arr {
case vecArrangementB, vecArrangementH, vecArrangementS:
size = 32
case vecArrangementD:
size = 64
default:
panic("unsupported arrangement " + arr.String())
}
str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd.nr(), arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size))
case movFromVec:
panic("TODO")
var size byte
var opcode string
arr := vecArrangement(i.u1)
switch arr {
case vecArrangementB, vecArrangementH, vecArrangementS:
size = 32
opcode = "umov"
case vecArrangementD:
size = 64
opcode = "mov"
default:
panic("unsupported arrangement " + arr.String())
}
str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd.nr(), size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)))
case movFromVecSigned:
panic("TODO")
case vecDup:
Expand All @@ -883,9 +942,27 @@ func (i *instruction) String() (str string) {
case vecRRR:
panic("TODO")
case vecMisc:
panic("TODO")
str = fmt.Sprintf("%s %s, %s",
vecOp(i.u1),
formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone),
formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone))
case vecLanes:
panic("TODO")
arr := vecArrangement(i.u2)
var destArr vecArrangement
switch arr {
case vecArrangement8B, vecArrangement16B:
destArr = vecArrangementH
case vecArrangement4H, vecArrangement8H:
destArr = vecArrangementS
case vecArrangement4S:
destArr = vecArrangementD
default:
panic("invalid arrangement " + arr.String())
}
str = fmt.Sprintf("%s %s, %s",
vecOp(i.u1),
formatVRegWidthVec(i.rd.nr(), destArr),
formatVRegVec(i.rn.nr(), arr, vecIndexNone))
case vecTbl:
panic("TODO")
case vecTbl2:
Expand Down Expand Up @@ -1240,6 +1317,26 @@ const (
aluOpMSub
)

// vecOp determines the type of vector operation. Instructions whose kind is one of
// vecOpCnt would use this type.
type vecOp int

// String implements fmt.Stringer.
func (b vecOp) String() string {
switch b {
case vecOpCnt:
return "cnt"
case vecOpUaddlv:
return "uaddlv"
}
panic(int(b))
}

const (
vecOpCnt vecOp = iota
vecOpUaddlv
)

// bitOp determines the type of bitwise operation. Instructions whose kind is one of
// bitOpRbit and bitOpClz would use this type.
type bitOp int
Expand Down
Loading