Skip to content

Commit

Permalink
fix: remove use of R15 for small moduli mul #113
Browse files Browse the repository at this point in the history
  • Loading branch information
gbotrel committed Dec 14, 2021
1 parent 57b6761 commit 0e7b520
Show file tree
Hide file tree
Showing 16 changed files with 913 additions and 914 deletions.
96 changes: 48 additions & 48 deletions ecc/bls12-377/fr/element_mul_adx_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -72,19 +72,19 @@ TEXT ·mul(SB), NOSPLIT, $0-24

// A -> BP
// t[0] -> R14
// t[1] -> R15
// t[1] -> R13
// t[2] -> CX
// t[3] -> BX
// clear the flags
XORQ AX, AX
MOVQ 0(R11), DX

// (A,t[0]) := x[0]*y[0] + A
MULXQ DI, R14, R15
MULXQ DI, R14, R13

// (A,t[1]) := x[1]*y[0] + A
MULXQ R8, AX, CX
ADOXQ AX, R15
ADOXQ AX, R13

// (A,t[2]) := x[2]*y[0] + A
MULXQ R9, AX, BX
Expand All @@ -111,14 +111,14 @@ TEXT ·mul(SB), NOSPLIT, $0-24
MOVQ R12, R14

// (C,t[0]) := t[1] + m*q[1] + C
ADCXQ R15, R14
MULXQ q<>+8(SB), AX, R15
ADCXQ R13, R14
MULXQ q<>+8(SB), AX, R13
ADOXQ AX, R14

// (C,t[1]) := t[2] + m*q[2] + C
ADCXQ CX, R15
ADCXQ CX, R13
MULXQ q<>+16(SB), AX, CX
ADOXQ AX, R15
ADOXQ AX, R13

// (C,t[2]) := t[3] + m*q[3] + C
ADCXQ BX, CX
Expand All @@ -139,9 +139,9 @@ TEXT ·mul(SB), NOSPLIT, $0-24
ADOXQ AX, R14

// (A,t[1]) := t[1] + x[1]*y[1] + A
ADCXQ BP, R15
ADCXQ BP, R13
MULXQ R8, AX, BP
ADOXQ AX, R15
ADOXQ AX, R13

// (A,t[2]) := t[2] + x[2]*y[1] + A
ADCXQ BP, CX
Expand Down Expand Up @@ -171,14 +171,14 @@ TEXT ·mul(SB), NOSPLIT, $0-24
MOVQ R12, R14

// (C,t[0]) := t[1] + m*q[1] + C
ADCXQ R15, R14
MULXQ q<>+8(SB), AX, R15
ADCXQ R13, R14
MULXQ q<>+8(SB), AX, R13
ADOXQ AX, R14

// (C,t[1]) := t[2] + m*q[2] + C
ADCXQ CX, R15
ADCXQ CX, R13
MULXQ q<>+16(SB), AX, CX
ADOXQ AX, R15
ADOXQ AX, R13

// (C,t[2]) := t[3] + m*q[3] + C
ADCXQ BX, CX
Expand All @@ -199,9 +199,9 @@ TEXT ·mul(SB), NOSPLIT, $0-24
ADOXQ AX, R14

// (A,t[1]) := t[1] + x[1]*y[2] + A
ADCXQ BP, R15
ADCXQ BP, R13
MULXQ R8, AX, BP
ADOXQ AX, R15
ADOXQ AX, R13

// (A,t[2]) := t[2] + x[2]*y[2] + A
ADCXQ BP, CX
Expand Down Expand Up @@ -231,14 +231,14 @@ TEXT ·mul(SB), NOSPLIT, $0-24
MOVQ R12, R14

// (C,t[0]) := t[1] + m*q[1] + C
ADCXQ R15, R14
MULXQ q<>+8(SB), AX, R15
ADCXQ R13, R14
MULXQ q<>+8(SB), AX, R13
ADOXQ AX, R14

// (C,t[1]) := t[2] + m*q[2] + C
ADCXQ CX, R15
ADCXQ CX, R13
MULXQ q<>+16(SB), AX, CX
ADOXQ AX, R15
ADOXQ AX, R13

// (C,t[2]) := t[3] + m*q[3] + C
ADCXQ BX, CX
Expand All @@ -259,9 +259,9 @@ TEXT ·mul(SB), NOSPLIT, $0-24
ADOXQ AX, R14

// (A,t[1]) := t[1] + x[1]*y[3] + A
ADCXQ BP, R15
ADCXQ BP, R13
MULXQ R8, AX, BP
ADOXQ AX, R15
ADOXQ AX, R13

// (A,t[2]) := t[2] + x[2]*y[3] + A
ADCXQ BP, CX
Expand Down Expand Up @@ -291,14 +291,14 @@ TEXT ·mul(SB), NOSPLIT, $0-24
MOVQ R12, R14

// (C,t[0]) := t[1] + m*q[1] + C
ADCXQ R15, R14
MULXQ q<>+8(SB), AX, R15
ADCXQ R13, R14
MULXQ q<>+8(SB), AX, R13
ADOXQ AX, R14

// (C,t[1]) := t[2] + m*q[2] + C
ADCXQ CX, R15
ADCXQ CX, R13
MULXQ q<>+16(SB), AX, CX
ADOXQ AX, R15
ADOXQ AX, R13

// (C,t[2]) := t[3] + m*q[3] + C
ADCXQ BX, CX
Expand All @@ -310,12 +310,12 @@ TEXT ·mul(SB), NOSPLIT, $0-24
ADCXQ AX, BX
ADOXQ BP, BX

// reduce element(R14,R15,CX,BX) using temp registers (R13,SI,R12,R11)
REDUCE(R14,R15,CX,BX,R13,SI,R12,R11)
// reduce element(R14,R13,CX,BX) using temp registers (SI,R12,R11,DI)
REDUCE(R14,R13,CX,BX,SI,R12,R11,DI)

MOVQ res+0(FP), AX
MOVQ R14, 0(AX)
MOVQ R15, 8(AX)
MOVQ R13, 8(AX)
MOVQ CX, 16(AX)
MOVQ BX, 24(AX)
RET
Expand All @@ -335,7 +335,7 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
// t[N-1] = C
MOVQ res+0(FP), DX
MOVQ 0(DX), R14
MOVQ 8(DX), R15
MOVQ 8(DX), R13
MOVQ 16(DX), CX
MOVQ 24(DX), BX
XORQ DX, DX
Expand All @@ -351,14 +351,14 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
MOVQ BP, R14

// (C,t[0]) := t[1] + m*q[1] + C
ADCXQ R15, R14
MULXQ q<>+8(SB), AX, R15
ADCXQ R13, R14
MULXQ q<>+8(SB), AX, R13
ADOXQ AX, R14

// (C,t[1]) := t[2] + m*q[2] + C
ADCXQ CX, R15
ADCXQ CX, R13
MULXQ q<>+16(SB), AX, CX
ADOXQ AX, R15
ADOXQ AX, R13

// (C,t[2]) := t[3] + m*q[3] + C
ADCXQ BX, CX
Expand All @@ -380,14 +380,14 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
MOVQ BP, R14

// (C,t[0]) := t[1] + m*q[1] + C
ADCXQ R15, R14
MULXQ q<>+8(SB), AX, R15
ADCXQ R13, R14
MULXQ q<>+8(SB), AX, R13
ADOXQ AX, R14

// (C,t[1]) := t[2] + m*q[2] + C
ADCXQ CX, R15
ADCXQ CX, R13
MULXQ q<>+16(SB), AX, CX
ADOXQ AX, R15
ADOXQ AX, R13

// (C,t[2]) := t[3] + m*q[3] + C
ADCXQ BX, CX
Expand All @@ -409,14 +409,14 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
MOVQ BP, R14

// (C,t[0]) := t[1] + m*q[1] + C
ADCXQ R15, R14
MULXQ q<>+8(SB), AX, R15
ADCXQ R13, R14
MULXQ q<>+8(SB), AX, R13
ADOXQ AX, R14

// (C,t[1]) := t[2] + m*q[2] + C
ADCXQ CX, R15
ADCXQ CX, R13
MULXQ q<>+16(SB), AX, CX
ADOXQ AX, R15
ADOXQ AX, R13

// (C,t[2]) := t[3] + m*q[3] + C
ADCXQ BX, CX
Expand All @@ -438,14 +438,14 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
MOVQ BP, R14

// (C,t[0]) := t[1] + m*q[1] + C
ADCXQ R15, R14
MULXQ q<>+8(SB), AX, R15
ADCXQ R13, R14
MULXQ q<>+8(SB), AX, R13
ADOXQ AX, R14

// (C,t[1]) := t[2] + m*q[2] + C
ADCXQ CX, R15
ADCXQ CX, R13
MULXQ q<>+16(SB), AX, CX
ADOXQ AX, R15
ADOXQ AX, R13

// (C,t[2]) := t[3] + m*q[3] + C
ADCXQ BX, CX
Expand All @@ -455,12 +455,12 @@ TEXT ·fromMont(SB), NOSPLIT, $0-8
ADCXQ AX, BX
ADOXQ AX, BX

// reduce element(R14,R15,CX,BX) using temp registers (SI,DI,R8,R9)
REDUCE(R14,R15,CX,BX,SI,DI,R8,R9)
// reduce element(R14,R13,CX,BX) using temp registers (SI,DI,R8,R9)
REDUCE(R14,R13,CX,BX,SI,DI,R8,R9)

MOVQ res+0(FP), AX
MOVQ R14, 0(AX)
MOVQ R15, 8(AX)
MOVQ R13, 8(AX)
MOVQ CX, 16(AX)
MOVQ BX, 24(AX)
RET
Loading

0 comments on commit 0e7b520

Please sign in to comment.