-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathasm-AndSSE2.go
123 lines (87 loc) · 2.22 KB
/
asm-AndSSE2.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
//go:build ignore
// +build ignore
package main
import (
. "github.com/mmcloughlin/avo/build"
. "github.com/mmcloughlin/avo/operand"
)
func main() {
TEXT("andSSE2", NOSPLIT|NOPTR, "func(r []byte, x []byte, y []byte)")
Comment("pointer of r")
r := Mem{Base: Load(Param("r").Base(), GP64())}
Comment("pointer of x")
x := Mem{Base: Load(Param("x").Base(), GP64())}
Comment("length of x")
n := Load(Param("x").Len(), GP64())
Comment("pointer of y")
y := Mem{Base: Load(Param("y").Base(), GP64())}
Comment("--------------------------------------------")
Comment("end address of x, will not change: p + n")
end0 := GP64()
MOVQ(x.Base, end0)
ADDQ(n, end0)
Comment("end address for loop")
end := GP64()
Comment("n < 8, jump to tail")
CMPQ(n, U32(8))
JL(LabelRef("tail"))
Comment("n < 16, jump to loop8")
CMPQ(n, U32(16))
JL(LabelRef("loop8_start"))
left := GP64()
Comment("--------------------------------------------")
Comment("end address for loop16")
MOVQ(end0, end)
SUBQ(U32(15), end)
Label("loop16")
h := XMM() // 128 bits
Comment("compute x & y, and save value to x")
VMOVDQU(x.Offset(0), h)
// VANDPS(y.Offset(0), h, h)
VPAND(y.Offset(0), h, h)
VMOVDQU(h, r.Offset(0))
Comment("move pointer")
ADDQ(U32(16), x.Base)
ADDQ(U32(16), y.Base)
ADDQ(U32(16), r.Base)
CMPQ(x.Base, end)
JL(LabelRef("loop16"))
Comment("n < 8, jump to tail")
MOVQ(end0, left)
SUBQ(x.Base, left)
CMPQ(left, U32(8))
JL(LabelRef("tail"))
Comment("--------------------------------------------")
Label("loop8_start")
Comment("end address for loop8")
MOVQ(end0, end)
SUBQ(U32(7), end)
Label("loop8")
t := GP64() // 64 bits
Comment("compute x & y, and save value to x")
MOVQ(x.Offset(0), t)
ANDQ(y.Offset(0), t)
MOVQ(t, r.Offset(0))
Comment("move pointer")
ADDQ(U32(8), x.Base)
ADDQ(U32(8), y.Base)
ADDQ(U32(8), r.Base)
CMPQ(x.Base, end)
JL(LabelRef("loop8"))
Comment("--------------------------------------------")
Label("tail")
Comment("left elements (<8)")
o := GP8()
CMPQ(x.Base, end0)
JE(LabelRef("end"))
MOVB(x.Offset(0), o)
ANDB(y.Offset(0), o)
MOVB(o, r.Offset(0))
ADDQ(U32(1), x.Base)
ADDQ(U32(1), y.Base)
ADDQ(U32(1), r.Base)
JMP(LabelRef("tail"))
Label("end")
RET()
Generate()
}