From e74ca7979e021960aabce58c8307d8b827ec97d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Wojciech=20Mu=C5=82a?= Date: Wed, 19 Jan 2022 18:29:54 +0100 Subject: [PATCH] Simplify HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop Get rid of three divisions. The original expression was: opmin := min((oend0 - op0) / 10, (oend1 - op1) / 10, (oend2 - op2) / 10, (oend3 - op3) / 10) r15 := min(r15, opmin) The division by 10 can be moved outside the `min`: opmin := min(oend0 - op0, oend1 - op1, oend2 - op2, oend3 - op3) r15 := min(r15, opmin/10) --- lib/decompress/huf_decompress_amd64.S | 41 ++++++++++----------------- 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/lib/decompress/huf_decompress_amd64.S b/lib/decompress/huf_decompress_amd64.S index 49589cb6114..3f0e5c26c7d 100644 --- a/lib/decompress/huf_decompress_amd64.S +++ b/lib/decompress/huf_decompress_amd64.S @@ -427,41 +427,30 @@ HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop: /* r15 = (ip0 - ilimit) / 7 */ movq %rdx, %r15 - movabsq $-3689348814741910323, %rdx - movq 8(%rsp), %rax /* rax = oend0 */ - subq %op0, %rax /* rax = oend0 - op0 */ - mulq %rdx - shrq $3, %rdx /* rdx = rax / 10 */ - - /* r15 = min(%rdx, %r15) */ - cmpq %rdx, %r15 - cmova %rdx, %r15 + /* r15 = min(r15, min(oend0 - op0, oend1 - op1, oend2 - op2, oend3 - op3) / 10) */ + movq 8(%rsp), %rax /* rax = oend0 */ + subq %op0, %rax /* rax = oend0 - op0 */ + movq 16(%rsp), %rdx /* rdx = oend1 */ + subq %op1, %rdx /* rdx = oend1 - op1 */ - movabsq $-3689348814741910323, %rdx - movq 16(%rsp), %rax /* rax = oend1 */ - subq %op1, %rax /* rax = oend1 - op1 */ - mulq %rdx - shrq $3, %rdx /* rdx = rax / 10 */ - - /* r15 = min(%rdx, %r15) */ - cmpq %rdx, %r15 - cmova %rdx, %r15 + cmpq %rax, %rdx + cmova %rax, %rdx /* rdx = min(%rdx, %rax) */ - movabsq $-3689348814741910323, %rdx movq 24(%rsp), %rax /* rax = oend2 */ subq %op2, %rax /* rax = oend2 - op2 */ - mulq %rdx - shrq $3, %rdx /* rdx = rax / 10 */ - /* r15 = min(%rdx, %r15) */ - cmpq %rdx, %r15 - cmova %rdx, %r15 + cmpq %rax, %rdx + cmova %rax, %rdx /* rdx = min(%rdx, %rax) */ - movabsq $-3689348814741910323, %rdx movq 32(%rsp), %rax /* rax = oend3 */ subq %op3, %rax /* rax = oend3 - op3 */ + + cmpq %rax, %rdx + cmova %rax, %rdx /* rdx = min(%rdx, %rax) */ + + movabsq $-3689348814741910323, %rax mulq %rdx - shrq $3, %rdx /* rdx = rax / 10 */ + shrq $3, %rdx /* rdx = rdx / 10 */ /* r15 = min(%rdx, %r15) */ cmpq %rdx, %r15