Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: remove use of R15 for small moduli mul #113 #114

Merged
merged 1 commit into from
Dec 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 48 additions & 48 deletions ecc/bls12-377/fr/element_mul_adx_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -72,19 +72,19 @@ TEXT ·mul(SB), NOSPLIT, $0-24

// A -> BP
// t[0] -> R14
// t[1] -> R15
// t[1] -> R13
// t[2] -> CX
// t[3] -> BX
// clear the flags
XORQ AX, AX
MOVQ 0(R11), DX

// (A,t[0]) := x[0]*y[0] + A
MULXQ DI, R14, R15
MULXQ DI, R14, R13

// (A,t[1]) := x[1]*y[0] + A
MULXQ R8, AX, CX
ADOXQ AX, R15
ADOXQ AX, R13

// (A,t[2]) := x[2]*y[0] + A
MULXQ R9, AX, BX
Expand All @@ -111,14 +111,14 @@ TEXT ·mul(SB), NOSPLIT, $0-24
MOVQ R12, R14

// (C,t[0]) := t[1] + m*q[1] + C
ADCXQ R15, R14
MULXQ q<>+8(SB), AX, R15
ADCXQ R13, R14
MULXQ q<>+8(SB), AX, R13
ADOXQ AX, R14

// (C,t[1]) := t[2] + m*q[2] + C
ADCXQ CX, R15
ADCXQ CX, R13
MULXQ q<>+16(SB), AX, CX
ADOXQ AX, R15
ADOXQ AX, R13

// (C,t[2]) := t[3] + m*q[3] + C
ADCXQ BX, CX
Expand All @@ -139,9 +139,9 @@ TEXT ·mul(SB), NOSPLIT, $0-24
ADOXQ AX, R14

// (A,t[1]) := t[1] + x[1]*y[1] + A
ADCXQ BP, R15
ADCXQ BP, R13
MULXQ R8, AX, BP
ADOXQ AX, R15
ADOXQ AX, R13

// (A,t[2]) := t[2] + x[2]*y[1] + A
ADCXQ BP, CX
Expand Down Expand Up @@ -171,14 +171,14 @@ TEXT ·mul(SB), NOSPLIT, $0-24
MOVQ R12, R14

// (C,t[0]) := t[1] + m*q[1] + C
ADCXQ R15, R14
MULXQ q<>+8(SB), AX, R15
ADCXQ R13, R14
MULXQ q<>+8(SB), AX, R13
ADOXQ AX, R14

// (C,t[1]) := t[2] + m*q[2] + C
ADCXQ CX, R15
ADCXQ CX, R13
MULXQ q<>+16(SB), AX, CX
ADOXQ AX, R15
ADOXQ AX, R13

// (C,t[2]) := t[3] + m*q[3] + C
ADCXQ BX, CX
Expand All @@ -199,9 +199,9 @@ TEXT ·mul(SB), NOSPLIT, $0-24
ADOXQ AX, R14

// (A,t[1]) := t[1] + x[1]*y[2] + A
ADCXQ BP, R15
ADCXQ BP, R13
MULXQ R8, AX, BP
ADOXQ AX, R15
ADOXQ AX, R13

// (A,t[2]) := t[2] + x[2]*y[2] + A
ADCXQ BP, CX
Expand Down Expand Up @@ -231,14 +231,14 @@ TEXT ·mul(SB), NOSPLIT, $0-24
MOVQ R12, R14

// (C,t[0]) := t[1] + m*q[1] + C
ADCXQ R15, R14
MULXQ q<>+8(SB), AX, R15
ADCXQ R13, R14
MULXQ q<>+8(SB), AX, R13
ADOXQ AX, R14

// (C,t[1]) := t[2] + m*q[2] + C
ADCXQ CX, R15
ADCXQ CX, R13
MULXQ q<>+16(SB), AX, CX
ADOXQ AX, R15
ADOXQ AX, R13

// (C,t[2]) := t[3] + m*q[3] + C
ADCXQ BX, CX
Expand All @@ -259,9 +259,9 @@ TEXT ·mul(SB), NOSPLIT, $0-24
ADOXQ AX, R14

// (A,t[1]) := t[1] + x[1]*y[3] + A
ADCXQ BP, R15
ADCXQ BP, R13
MULXQ R8, AX, BP
ADOXQ AX, R15
ADOXQ AX, R13

// (A,t[2]) := t[2] + x[2]*y[3] + A
ADCXQ BP, CX
Expand Down Expand Up @@ -291,14 +291,14 @@ TEXT ·mul(SB), NOSPLIT, $0-24
MOVQ R12, R14

// (C,t[0]) := t[1] + m*q[1] + C
ADCXQ R15, R14
MULXQ q<>+8(SB), AX, R15
ADCXQ R13, R14
MULXQ q<>+8(SB), AX, R13
ADOXQ AX, R14

// (C,t[1]) := t[2] + m*q[2] + C
ADCXQ CX, R15
ADCXQ CX, R13
MULXQ q<>+16(SB), AX, CX
ADOXQ AX, R15
ADOXQ AX, R13

// (C,t[2]) := t[3] + m*q[3] + C
ADCXQ BX, CX
Expand All @@ -310,12 +310,12 @@ TEXT ·mul(SB), NOSPLIT, $0-24
ADCXQ AX, BX
ADOXQ BP, BX

// reduce element(R14,R15,CX,BX) using temp registers (R13,SI,R12,R11)
REDUCE(R14,R15,CX,BX,R13,SI,R12,R11)
// reduce element(R14,R13,CX,BX) using temp registers (SI,R12,R11,DI)
REDUCE(R14,R13,CX,BX,SI,R12,R11,DI)

MOVQ res+0(FP), AX
MOVQ R14, 0(AX)
MOVQ R15, 8(AX)
MOVQ R13, 8(AX)
MOVQ CX, 16(AX)
MOVQ BX, 24(AX)
RET
Expand All @@ -335,7 +335,7 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
// t[N-1] = C
MOVQ res+0(FP), DX
MOVQ 0(DX), R14
MOVQ 8(DX), R15
MOVQ 8(DX), R13
MOVQ 16(DX), CX
MOVQ 24(DX), BX
XORQ DX, DX
Expand All @@ -351,14 +351,14 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
MOVQ BP, R14

// (C,t[0]) := t[1] + m*q[1] + C
ADCXQ R15, R14
MULXQ q<>+8(SB), AX, R15
ADCXQ R13, R14
MULXQ q<>+8(SB), AX, R13
ADOXQ AX, R14

// (C,t[1]) := t[2] + m*q[2] + C
ADCXQ CX, R15
ADCXQ CX, R13
MULXQ q<>+16(SB), AX, CX
ADOXQ AX, R15
ADOXQ AX, R13

// (C,t[2]) := t[3] + m*q[3] + C
ADCXQ BX, CX
Expand All @@ -380,14 +380,14 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
MOVQ BP, R14

// (C,t[0]) := t[1] + m*q[1] + C
ADCXQ R15, R14
MULXQ q<>+8(SB), AX, R15
ADCXQ R13, R14
MULXQ q<>+8(SB), AX, R13
ADOXQ AX, R14

// (C,t[1]) := t[2] + m*q[2] + C
ADCXQ CX, R15
ADCXQ CX, R13
MULXQ q<>+16(SB), AX, CX
ADOXQ AX, R15
ADOXQ AX, R13

// (C,t[2]) := t[3] + m*q[3] + C
ADCXQ BX, CX
Expand All @@ -409,14 +409,14 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
MOVQ BP, R14

// (C,t[0]) := t[1] + m*q[1] + C
ADCXQ R15, R14
MULXQ q<>+8(SB), AX, R15
ADCXQ R13, R14
MULXQ q<>+8(SB), AX, R13
ADOXQ AX, R14

// (C,t[1]) := t[2] + m*q[2] + C
ADCXQ CX, R15
ADCXQ CX, R13
MULXQ q<>+16(SB), AX, CX
ADOXQ AX, R15
ADOXQ AX, R13

// (C,t[2]) := t[3] + m*q[3] + C
ADCXQ BX, CX
Expand All @@ -438,14 +438,14 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
MOVQ BP, R14

// (C,t[0]) := t[1] + m*q[1] + C
ADCXQ R15, R14
MULXQ q<>+8(SB), AX, R15
ADCXQ R13, R14
MULXQ q<>+8(SB), AX, R13
ADOXQ AX, R14

// (C,t[1]) := t[2] + m*q[2] + C
ADCXQ CX, R15
ADCXQ CX, R13
MULXQ q<>+16(SB), AX, CX
ADOXQ AX, R15
ADOXQ AX, R13

// (C,t[2]) := t[3] + m*q[3] + C
ADCXQ BX, CX
Expand All @@ -455,12 +455,12 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
ADCXQ AX, BX
ADOXQ AX, BX

// reduce element(R14,R15,CX,BX) using temp registers (SI,DI,R8,R9)
REDUCE(R14,R15,CX,BX,SI,DI,R8,R9)
// reduce element(R14,R13,CX,BX) using temp registers (SI,DI,R8,R9)
REDUCE(R14,R13,CX,BX,SI,DI,R8,R9)

MOVQ res+0(FP), AX
MOVQ R14, 0(AX)
MOVQ R15, 8(AX)
MOVQ R13, 8(AX)
MOVQ CX, 16(AX)
MOVQ BX, 24(AX)
RET
Loading