diff --git a/sha1/src/x64.asm b/sha1/src/x64.asm deleted file mode 120000 index 363c2b4..0000000 --- a/sha1/src/x64.asm +++ /dev/null @@ -1 +0,0 @@ -x64.S \ No newline at end of file diff --git a/sha1/src/x64.asm b/sha1/src/x64.asm new file mode 100644 index 0000000..d7e42c1 --- /dev/null +++ b/sha1/src/x64.asm @@ -0,0 +1,218 @@ +; SHA-1 hash in x86-64 assembly, for the ML64 assembler on MSVC +; +; Copyright (c) 2015 Project Nayuki. (MIT License) +; https://www.nayuki.io/page/fast-sha1-hash-implementation-in-x86-assembly +; +; Permission is hereby granted, free of charge, to any person obtaining a copy of +; this software and associated documentation files (the "Software"), to deal in +; the Software without restriction, including without limitation the rights to +; use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +; the Software, and to permit persons to whom the Software is furnished to do so, +; subject to the following conditions: +; - The above copyright notice and this permission notice shall be included in +; all copies or substantial portions of the Software. +; - The Software is provided "as is", without warranty of any kind, express or +; implied, including but not limited to the warranties of merchantability, +; fitness for a particular purpose and noninfringement. In no event shall the +; authors or copyright holders be liable for any claim, damages or other +; liability, whether in an action of contract, tort or otherwise, arising from, +; out of or in connection with the Software or the use or other dealings in the +; Software. + + +; void sha1_compress(uint32_t state[5], const uint8_t block[64]) +PUBLIC sha1_compress +.code +sha1_compress PROC + ; Storage usage: + ; Bytes Location Description + ; 4 eax SHA-1 state variable A + ; 4 ebx SHA-1 state variable B + ; 4 ecx SHA-1 state variable C + ; 4 edx SHA-1 state variable D + ; 4 ebp SHA-1 state variable E + ; 4 esi Temporary for calculation per round + ; 4 edi (Last 64 rounds) temporary for calculation per round + ; 8 rdi (First 16 rounds) base address of block array argument (read-only) + ; 8 r8 Base address of state array argument (read-only) + ; 8 rsp x86-64 stack pointer + ; 64 [rsp+0] Circular buffer of most recent 16 key schedule items, 4 bytes each + ; 16 xmm0 Caller's value of rbx (only low 64 bits are used) + ; 16 xmm1 Caller's value of rbp (only low 64 bits are used) + + ROUND0a MACRO a, b, c, d, e, i + movl (i*4)(%rdi), %esi + bswapl %esi + movl %esi, (i*4)(%rsp) + addl %esi, %e + movl %c, %esi + xorl %d, %esi + andl %b, %esi + xorl %d, %esi + ROUNDTAIL a, b, e, i, 0x5A827999 + ENDM + + SCHEDULE MACRO i, e + movl (((i- 3)&0xF)*4)(%rsp), %esi + xorl (((i- 8)&0xF)*4)(%rsp), %esi + xorl (((i-14)&0xF)*4)(%rsp), %esi + xorl (((i-16)&0xF)*4)(%rsp), %esi + roll $1, %esi + addl %esi, %e + movl %esi, ((i&0xF)*4)(%rsp) + ENDM + + ROUND0b MACRO a, b, c, d, e, i + SCHEDULE i, e + movl %c, %esi + xorl %d, %esi + andl %b, %esi + xorl %d, %esi + ROUNDTAIL a, b, e, i, 0x5A827999 + ENDM + + ROUND1 MACRO a, b, c, d, e, i + SCHEDULE i, e + movl %b, %esi + xorl %c, %esi + xorl %d, %esi + ROUNDTAIL a, b, e, i, 0x6ED9EBA1 + ENDM + + ROUND2 MACRO a, b, c, d, e, i + SCHEDULE i, e + movl %c, %esi + movl %c, %edi + orl %d, %esi + andl %b, %esi + andl %d, %edi + orl %edi, %esi + ROUNDTAIL a, b, e, i, -0x70E44324 + ENDM + + ROUND3 MACRO a, b, c, d, e, i + SCHEDULE i, e + movl %b, %esi + xorl %c, %esi + xorl %d, %esi + ROUNDTAIL a, b, e, i, -0x359D3E2A + ENDM + + ROUNDTAIL MACRO a, b, e, i, k + roll $30, %b + leal k(%e,%esi), %e + movl %a, %esi + roll $5, %esi + addl %esi, %e + ENDM + + ; Save registers, allocate scratch space + movq %rbx, %xmm0 + movq %rbp, %xmm1 + subq $64, %rsp + + ; Load arguments + movq %rdi, %r8 + movl 0(%rdi), %eax ; a + movl 4(%rdi), %ebx ; b + movl 8(%rdi), %ecx ; c + movl 12(%rdi), %edx ; d + movl 16(%rdi), %ebp ; e + movq %rsi, %rdi + + ; 80 rounds of hashing + ROUND0a eax, ebx, ecx, edx, ebp, 0 + ROUND0a ebp, eax, ebx, ecx, edx, 1 + ROUND0a edx, ebp, eax, ebx, ecx, 2 + ROUND0a ecx, edx, ebp, eax, ebx, 3 + ROUND0a ebx, ecx, edx, ebp, eax, 4 + ROUND0a eax, ebx, ecx, edx, ebp, 5 + ROUND0a ebp, eax, ebx, ecx, edx, 6 + ROUND0a edx, ebp, eax, ebx, ecx, 7 + ROUND0a ecx, edx, ebp, eax, ebx, 8 + ROUND0a ebx, ecx, edx, ebp, eax, 9 + ROUND0a eax, ebx, ecx, edx, ebp, 10 + ROUND0a ebp, eax, ebx, ecx, edx, 11 + ROUND0a edx, ebp, eax, ebx, ecx, 12 + ROUND0a ecx, edx, ebp, eax, ebx, 13 + ROUND0a ebx, ecx, edx, ebp, eax, 14 + ROUND0a eax, ebx, ecx, edx, ebp, 15 + ROUND0b ebp, eax, ebx, ecx, edx, 16 + ROUND0b edx, ebp, eax, ebx, ecx, 17 + ROUND0b ecx, edx, ebp, eax, ebx, 18 + ROUND0b ebx, ecx, edx, ebp, eax, 19 + ROUND1 eax, ebx, ecx, edx, ebp, 20 + ROUND1 ebp, eax, ebx, ecx, edx, 21 + ROUND1 edx, ebp, eax, ebx, ecx, 22 + ROUND1 ecx, edx, ebp, eax, ebx, 23 + ROUND1 ebx, ecx, edx, ebp, eax, 24 + ROUND1 eax, ebx, ecx, edx, ebp, 25 + ROUND1 ebp, eax, ebx, ecx, edx, 26 + ROUND1 edx, ebp, eax, ebx, ecx, 27 + ROUND1 ecx, edx, ebp, eax, ebx, 28 + ROUND1 ebx, ecx, edx, ebp, eax, 29 + ROUND1 eax, ebx, ecx, edx, ebp, 30 + ROUND1 ebp, eax, ebx, ecx, edx, 31 + ROUND1 edx, ebp, eax, ebx, ecx, 32 + ROUND1 ecx, edx, ebp, eax, ebx, 33 + ROUND1 ebx, ecx, edx, ebp, eax, 34 + ROUND1 eax, ebx, ecx, edx, ebp, 35 + ROUND1 ebp, eax, ebx, ecx, edx, 36 + ROUND1 edx, ebp, eax, ebx, ecx, 37 + ROUND1 ecx, edx, ebp, eax, ebx, 38 + ROUND1 ebx, ecx, edx, ebp, eax, 39 + ROUND2 eax, ebx, ecx, edx, ebp, 40 + ROUND2 ebp, eax, ebx, ecx, edx, 41 + ROUND2 edx, ebp, eax, ebx, ecx, 42 + ROUND2 ecx, edx, ebp, eax, ebx, 43 + ROUND2 ebx, ecx, edx, ebp, eax, 44 + ROUND2 eax, ebx, ecx, edx, ebp, 45 + ROUND2 ebp, eax, ebx, ecx, edx, 46 + ROUND2 edx, ebp, eax, ebx, ecx, 47 + ROUND2 ecx, edx, ebp, eax, ebx, 48 + ROUND2 ebx, ecx, edx, ebp, eax, 49 + ROUND2 eax, ebx, ecx, edx, ebp, 50 + ROUND2 ebp, eax, ebx, ecx, edx, 51 + ROUND2 edx, ebp, eax, ebx, ecx, 52 + ROUND2 ecx, edx, ebp, eax, ebx, 53 + ROUND2 ebx, ecx, edx, ebp, eax, 54 + ROUND2 eax, ebx, ecx, edx, ebp, 55 + ROUND2 ebp, eax, ebx, ecx, edx, 56 + ROUND2 edx, ebp, eax, ebx, ecx, 57 + ROUND2 ecx, edx, ebp, eax, ebx, 58 + ROUND2 ebx, ecx, edx, ebp, eax, 59 + ROUND3 eax, ebx, ecx, edx, ebp, 60 + ROUND3 ebp, eax, ebx, ecx, edx, 61 + ROUND3 edx, ebp, eax, ebx, ecx, 62 + ROUND3 ecx, edx, ebp, eax, ebx, 63 + ROUND3 ebx, ecx, edx, ebp, eax, 64 + ROUND3 eax, ebx, ecx, edx, ebp, 65 + ROUND3 ebp, eax, ebx, ecx, edx, 66 + ROUND3 edx, ebp, eax, ebx, ecx, 67 + ROUND3 ecx, edx, ebp, eax, ebx, 68 + ROUND3 ebx, ecx, edx, ebp, eax, 69 + ROUND3 eax, ebx, ecx, edx, ebp, 70 + ROUND3 ebp, eax, ebx, ecx, edx, 71 + ROUND3 edx, ebp, eax, ebx, ecx, 72 + ROUND3 ecx, edx, ebp, eax, ebx, 73 + ROUND3 ebx, ecx, edx, ebp, eax, 74 + ROUND3 eax, ebx, ecx, edx, ebp, 75 + ROUND3 ebp, eax, ebx, ecx, edx, 76 + ROUND3 edx, ebp, eax, ebx, ecx, 77 + ROUND3 ecx, edx, ebp, eax, ebx, 78 + ROUND3 ebx, ecx, edx, ebp, eax, 79 + + ; Save updated state + addl %eax, 0(%r8) + addl %ebx, 4(%r8) + addl %ecx, 8(%r8) + addl %edx, 12(%r8) + addl %ebp, 16(%r8) + + ; Restore registers + movq %xmm0, %rbx + movq %xmm1, %rbp + addq $64, %rsp + retq + +sha1_compress ENDP