diff --git a/include/relic_arch.h b/include/relic_arch.h
index f149d4a5d..05ff65e07 100644
--- a/include/relic_arch.h
+++ b/include/relic_arch.h
@@ -95,6 +95,11 @@ ull_t arch_cycles(void);
*/
uint_t arch_lzcnt(dig_t);
+/**
+ * Return the number of trailing zeros in an integer.
+ */
+uint_t arch_tzcnt(dig_t);
+
#if ARCH == AVR
/**
diff --git a/include/relic_bn.h b/include/relic_bn.h
index 39046483c..bbfa6e740 100644
--- a/include/relic_bn.h
+++ b/include/relic_bn.h
@@ -1541,6 +1541,18 @@ void bn_rec_glv(bn_t k0, bn_t k1, const bn_t k, const bn_t n, const bn_t v1[],
void bn_rec_frb(bn_t *ki, int sub, const bn_t k, const bn_t x, const bn_t n,
int cof);
+/**
+ * Recodes subscalars in the signed aligned column representation..
+ *
+ * @param[out] b - the recoded subscalars.
+ * @param[in] len - the length in bytes of the recoding.
+ * @param[in] k - the subscalars to recode.
+ * @param[in] m - the number of subscallars to recode.
+ * @param[in] n - the elliptic curve group order.
+ * @throw ERR_NO_BUFFER - if the buffer capacity is insufficient.
+ */
+void bn_rec_sac(int8_t *b, size_t *len, bn_t *k, size_t m, bn_t n);
+
/**
* Computes the coefficients of the polynomial representing the Lagrange
* interpolation for a modulus and a given set of roots.
diff --git a/include/relic_core.h b/include/relic_core.h
index 13805aeb5..6941e014c 100644
--- a/include/relic_core.h
+++ b/include/relic_core.h
@@ -497,8 +497,10 @@ typedef struct _ctx_t {
/** Function pointer to underlying lznct implementation. */
#if ARCH == X86
unsigned int (*lzcnt_ptr)(dig_t);
+ unsigned int (*tzcnt_ptr)(dig_t);
#elif ARCH == X64 || ARCH == A64
unsigned int (*lzcnt_ptr)(ull_t);
+ unsigned int (*tzcnt_ptr)(ull_t);
#endif
} ctx_t;
diff --git a/src/arch/relic_arch_a64.c b/src/arch/relic_arch_a64.c
index f4dd6277a..1c4f1bd72 100644
--- a/src/arch/relic_arch_a64.c
+++ b/src/arch/relic_arch_a64.c
@@ -36,6 +36,7 @@
#include "relic_core.h"
#include "lzcnt.inc"
+#include "tzcnt.inc"
/**
* Renames the inline assembly macro to a prettier name.
@@ -177,6 +178,8 @@ void arch_init(void) {
if (ctx != NULL) {
core_get()->lzcnt_ptr =
(has_lzcnt_hard() ? lzcnt64_hard : lzcnt64_soft);
+ core_get()->tzcnt_ptr =
+ (has_lzcnt_hard() ? tzcnt64_hard : tzcnt64_soft);
}
#if TIMER == CYCLE
@@ -199,6 +202,7 @@ void arch_clean(void) {
ctx_t *ctx = core_get();
if (ctx != NULL) {
core_get()->lzcnt_ptr = NULL;
+ core_get()->tzcnt_ptr = NULL;
}
}
@@ -234,3 +238,7 @@ ull_t arch_cycles(void) {
uint_t arch_lzcnt(dig_t x) {
return core_get()->lzcnt_ptr((ull_t)x) - (8 * sizeof(ull_t) - WSIZE);
}
+
+uint_t arch_tzcnt(dig_t x) {
+ return core_get()->tzcnt_ptr(x);
+}
diff --git a/src/arch/relic_arch_arm.c b/src/arch/relic_arch_arm.c
index d6e662676..e079042b4 100644
--- a/src/arch/relic_arch_arm.c
+++ b/src/arch/relic_arch_arm.c
@@ -32,6 +32,7 @@
#include "relic_types.h"
#include "lzcnt.inc"
+#include "tzcnt.inc"
/**
* Renames the inline assembly macro to a prettier name.
@@ -111,3 +112,11 @@ uint_t arch_lzcnt(uint_t x) {
return lzcnt64_gcc_arm(x);
#endif
}
+
+uint_t arch_tzcnt(uint_t x) {
+#ifdef WSIZE == 32
+ return tzcnt32_gcc_arm(x);
+#elif WSIZE == 64
+ return tzcnt64_gcc_arm(x);
+#endif
+}
diff --git a/src/arch/relic_arch_avr.c b/src/arch/relic_arch_avr.c
index 5ca50b16b..220045941 100644
--- a/src/arch/relic_arch_avr.c
+++ b/src/arch/relic_arch_avr.c
@@ -62,3 +62,15 @@ uint_t arch_lzcnt() {
}
return 0;
}
+
+uint_t arch_tzcnt() {
+ static const uint8_t table[16] = {
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
+ };
+ if (a >> 4 != 0) {
+ return table[a & 0xF];
+ } else {
+ return table[a >> 4] + 4;
+ }
+ return 0;
+}
diff --git a/src/arch/relic_arch_msp.c b/src/arch/relic_arch_msp.c
index db2545492..314cb68d8 100644
--- a/src/arch/relic_arch_msp.c
+++ b/src/arch/relic_arch_msp.c
@@ -119,3 +119,32 @@ uint_t arch_lzcnt() {
return 0;
#endif
}
+
+uint_t arch_tzcnt() {
+ static const uint8_t table[16] = {
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
+ };
+#if WSIZE == 8
+ if (a >> 4 != 0) {
+ return table[a & 0xF];
+ } else {
+ return table[a >> 4] + 4;
+ }
+ return 0;
+#elif WSIZE == 16
+ int offset;
+
+ if (a & 0xFF == 0) {
+ offset = 8;
+ } else {
+ offset = 0;
+ }
+ a = a >> offset;
+ if (a >> 4 != 0) {
+ return table[a & 0xF] + offset;
+ } else {
+ return table[a >> 4] + 4 + offset;
+ }
+ return 0;
+#endif
+}
diff --git a/src/arch/relic_arch_none.c b/src/arch/relic_arch_none.c
index 9063cf574..98f57e744 100644
--- a/src/arch/relic_arch_none.c
+++ b/src/arch/relic_arch_none.c
@@ -83,7 +83,50 @@ uint_t arch_lzcnt(dig_t a) {
#ifdef _MSC_VER
return __lzcnt64(a);
#else
- return __builtin_clzll(a);
+ return __builtin_clzl(a);
+#endif
+#endif
+}
+
+uint_t arch_tzcnt(dig_t a) {
+#if WSIZE == 8 || WSIZE == 16
+ static const uint8_t table[16] = {
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
+ };
+#endif
+#if WSIZE == 8
+ if (a >> 4 != 0) {
+ return table[a & 0xF];
+ } else {
+ return table[a >> 4] + 4;
+ }
+ return 0;
+#elif WSIZE == 16
+ int offset;
+
+ if (a & 0xFF == 0) {
+ offset = 8;
+ } else {
+ offset = 0;
+ }
+ a = a >> offset;
+ if (a >> 4 != 0) {
+ return table[a & 0xF] + offset;
+ } else {
+ return table[a >> 4] + 4 + offset;
+ }
+ return 0;
+#elif WSIZE == 32
+#ifdef _MSC_VER
+ return __tzcnt(a);
+#else
+ return __builtin_ctz(a);
+#endif
+#elif WSIZE == 64
+#ifdef _MSC_VER
+ return __tzcnt64(a);
+#else
+ return __builtin_ctzl(a);
#endif
#endif
}
diff --git a/src/arch/relic_arch_x64.c b/src/arch/relic_arch_x64.c
index 5bef299d8..baebd138a 100644
--- a/src/arch/relic_arch_x64.c
+++ b/src/arch/relic_arch_x64.c
@@ -36,6 +36,7 @@
#include "relic_core.h"
#include "lzcnt.inc"
+#include "tzcnt.inc"
/**
* Renames the inline assembly macro to a prettier name.
@@ -51,6 +52,8 @@ void arch_init(void) {
if (ctx != NULL) {
core_get()->lzcnt_ptr =
(has_lzcnt_hard() ? lzcnt64_hard : lzcnt64_soft);
+ core_get()->tzcnt_ptr =
+ (has_tzcnt_hard() ? tzcnt64_hard : tzcnt64_soft);
}
}
@@ -58,6 +61,7 @@ void arch_clean(void) {
ctx_t *ctx = core_get();
if (ctx != NULL) {
core_get()->lzcnt_ptr = NULL;
+ core_get()->tzcnt_ptr = NULL;
}
}
@@ -103,3 +107,7 @@ ull_t arch_cycles(void) {
uint_t arch_lzcnt(dig_t x) {
return core_get()->lzcnt_ptr((ull_t)x) - (8 * sizeof(ull_t) - WSIZE);
}
+
+uint_t arch_tzcnt(dig_t x) {
+ return core_get()->tzcnt_ptr(x);
+}
diff --git a/src/arch/relic_arch_x86.c b/src/arch/relic_arch_x86.c
index fc32b5415..2f0bbac2f 100644
--- a/src/arch/relic_arch_x86.c
+++ b/src/arch/relic_arch_x86.c
@@ -36,6 +36,7 @@
#include "relic_core.h"
#include "lzcnt.inc"
+#include "tzcnt.inc"
/*============================================================================*/
/* Public definitions */
@@ -43,10 +44,12 @@
void arch_init(void) {
core_get()->lzcnt_ptr = (has_lzcnt_hard() ? lzcnt32_hard : lzcnt32_soft);
+ core_get()->tzcnt_ptr = (has_tzcnt_hard() ? tzcnt32_hard : tzcnt32_soft);
}
void arch_clean(void) {
core_get()->lzcnt_ptr = NULL;
+ core_get()->tzcnt_ptr = NULL;
}
ull_t arch_cycles(void) {
@@ -64,3 +67,7 @@ ull_t arch_cycles(void) {
uint_t arch_lzcnt(dig_t x) {
return core_get()->lzcnt_ptr((uint32_t)x) - (8 * sizeof(uint32_t) - WSIZE);
}
+
+uint_t arch_tzcnt(dig_t x) {
+ return core_get()->tzcnt_ptr(x);
+}
diff --git a/src/arch/tzcnt.inc b/src/arch/tzcnt.inc
new file mode 100644
index 000000000..719761e79
--- /dev/null
+++ b/src/arch/tzcnt.inc
@@ -0,0 +1,456 @@
+/*
+Count trailing zero bits. Choice of public domain or MIT-0.
+
+David Reid - mackron@gmail.com
+
+The tzcnt32 and tzcnt64 functions count the number of trailing zero bits in a given 32- or 64-bit variable. When the input variable is 0, the
+total size in bits will be returned (32 for tzcnt32 and 64 for tzcnt64).
+
+For x86/64 platforms, this will use the TZCNT instruction if available. On ARM it will be implemented in terms of the CLZ instruction. If these
+are unavailable it will fall back to compiler-specific built-ins. If these are unavailable it'll fall back to the generic implementation.
+
+
+License
+=======
+
+Public Domain (www.unlicense.org)
+-------------------------------------------------
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to
+
+
+Functions
+---------
+tzcnt32_generic
+tzcnt64_generic
+ Generic implementation.
+
+tzcnt32_msvc_bsf
+tzcnt64_msvc_bsf
+ MSVC built-in implementation using _BitScanForward/64(). Note that tzcnt64_mscv_bsf() is only available when compiling as 64-bit.
+
+tzcnt32_gcc_builtin
+tzcnt64_gcc_builtin
+ GCC/Clang built-in implementation using __builtin_ctzl/l(). Note that tzcnt64_gcc_builtin() is only available when compiling as 64-bit.
+
+tzcnt32_msvc_x86
+tzcnt64_msvc_x64
+ MSVC implementation in terms of the __lzcnt and __lzcnt64 intrinsic. Note that these are only available when targeting x86/64. tzcnt64_msvc_x64()
+ is only available when compiling as 64-bit.
+
+tzcnt32_gcc_x86
+tzcnt64_gcc_x64
+ GCC/Clang inline assembly implementation. This will emit the TZCNT instruction. Note that these are only available when targeting x86/x64
+ and when compiled using a compiler that supports GCC style inline assembly.
+
+tzcnt32_gcc_arm
+tzcnt64_gcc_arm
+ GCC/Clang inline assembly implementation. This will be implemented in terms of the CLZ instruction. Note that these are only available when
+ targeting ARM architecture version 5 and above and when compiled using a compiler that supports GCC style inline assembly.
+
+tzcnt32_hard
+tzcnt64_hard
+ High level helper for calling an hardware implementation. This will choose either tzcnt32_msvc_x86()/tzcnt64_msvc_x64() or tzcnt32_gcc_x86()/
+ tzcnt64_gcc_x64() depending on the environment. Note that this is only available when targeting x86/64. tzcnt64_hard() is only available
+ when compiling as 64-bit. You should only call this if has_tzcnt_hard() returns non-zero.
+
+tzcnt32_soft
+tzcnt64_soft
+ High level helper for calling the best software implementation available for the current build environment.
+
+tzcnt32
+tzcnt64
+ High level helper for calling either a hardware or software implementation depending on the build environment. This will always favor a
+ hardware implementation. Do not call this in high performance code. The reason for this is that each it will call has_tzcnt_hard() each
+ time which may be too fine grained for your purposes. You may be better off calling has_tzcnt_hard() once at a higher level.
+
+has_tzcnt_hard
+ Determines whether or not a hardware implementation of tzcnt is available. Use this to know whether or not you can call tzcnt32/64_hard().
+ Note that this calls CPUID for each, so you may want to cache the result. Use HAS_TZCNT32/64_HARD to check for compile-time support.
+*/
+#if defined(_MSC_VER)
+#include
+#endif
+
+#if defined(__i386) || defined(_M_IX86)
+ #define ARCH_X86
+#elif defined(__x86_64__) || defined(_M_X64)
+ #define ARCH_X64
+#elif (defined(__arm__) && defined(__ARM_ARCH) && __ARM_ARCH >= 5) || (defined(_M_ARM) && _M_ARM >= 5) || defined(__ARM_FEATURE_CLZ) /* ARM (Architecture Version 5) */
+ #define ARCH_ARM
+#endif
+
+#if defined(_WIN64) || defined(_LP64) || defined(__LP64__)
+ #define ARCH_64BIT
+#else
+ #define ARCH_32BIT
+#endif
+
+#if defined(ARCH_X86) || defined(ARCH_X64)
+ /* x86/64 */
+ #if defined(_MSC_VER) && _MSC_VER >= 1500
+ #define HAS_TZCNT32_HARD
+ #if defined(ARCH_64BIT)
+ #define HAS_TZCNT64_HARD
+ #endif
+ #elif defined(__GNUC__) || defined(__clang__)
+ #define HAS_TZCNT32_HARD
+ #if defined(ARCH_64BIT)
+ #define HAS_TZCNT64_HARD
+ #endif
+ #endif
+#elif defined(ARCH_ARM)
+ /* ARM */
+ #if defined(__GNUC__) || defined(__clang__)
+ #define HAS_TZCNT32_HARD
+ #if defined(ARCH_64BIT)
+ #define HAS_TZCNT64_HARD
+ #endif
+ #endif
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(ARCH_X86) || defined(ARCH_X64)) && !defined(__clang__)
+ #define HAS_TZCNT_INTRINSIC
+#elif (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)))
+ #define HAS_TZCNT_INTRINSIC
+#elif defined(__clang__)
+ #if defined(__has_builtin)
+ #if __has_builtin(__builtin_ctzll) || __has_builtin(__builtin_ctzl)
+ #define HAS_TZCNT_INTRINSIC
+ #endif
+ #endif
+#endif
+
+inline unsigned int tzcnt32_generic(unsigned int x)
+{
+ unsigned int n;
+
+ /* Special case for odd numbers since they should happen about half the time. */
+ if (x & 0x1) {
+ return 0;
+ }
+
+ if (x == 0) {
+ return sizeof(x) << 3;
+ }
+
+ n = 1;
+ if ((x & 0x0000FFFF) == 0) { x >>= 16; n += 16; }
+ if ((x & 0x000000FF) == 0) { x >>= 8; n += 8; }
+ if ((x & 0x0000000F) == 0) { x >>= 4; n += 4; }
+ if ((x & 0x00000003) == 0) { x >>= 2; n += 2; }
+ n -= x & 0x00000001;
+
+ return n;
+}
+
+inline unsigned int tzcnt64_generic(unsigned long long x)
+{
+ unsigned int n;
+
+ /* Special case for odd numbers since they should happen about half the time. */
+ if (x & 0x1) {
+ return 0;
+ }
+
+ if (x == 0) {
+ return sizeof(x) << 3;
+ }
+
+ n = 1;
+ if ((x & 0xFFFFFFFF) == 0) { x >>= 32; n += 32; }
+ if ((x & 0x0000FFFF) == 0) { x >>= 16; n += 16; }
+ if ((x & 0x000000FF) == 0) { x >>= 8; n += 8; }
+ if ((x & 0x0000000F) == 0) { x >>= 4; n += 4; }
+ if ((x & 0x00000003) == 0) { x >>= 2; n += 2; }
+ n -= x & 0x00000001;
+
+ return n;
+}
+
+/* Generic compiler specific intrinsics. */
+#if defined(_MSC_VER)
+static unsigned int tzcnt32_msvc_bsf(unsigned int x)
+{
+ unsigned long n;
+
+ if (x == 0) {
+ return 32;
+ }
+
+ _BitScanForward(&n, x);
+
+ return n;
+}
+
+/* _BitScanReverse64() is only available on 64-bit builds. */
+#if defined(ARCH_64BIT)
+static unsigned int tzcnt64_msvc_bsf(unsigned long long x)
+{
+ unsigned long n;
+
+ if (x == 0) {
+ return 64;
+ }
+
+ _BitScanForward64(&n, x);
+
+ return n;
+}
+#endif /* ARCH_64BIT */
+#elif (defined(__GNUC__) || defined(__clang__)) && defined(HAS_TZCNT_INTRINSIC)
+static unsigned int tzcnt32_gcc_builtin(unsigned int x)
+{
+ if (x == 0) {
+ return 32;
+ }
+
+ return (unsigned int)__builtin_ctzl((unsigned long)x);
+}
+
+static unsigned int tzcnt64_gcc_builtin(unsigned long long x)
+{
+ if (x == 0) {
+ return 64;
+ }
+
+ return (unsigned int)__builtin_ctzll(x);
+}
+#endif
+
+static int has_tzcnt_hard()
+{
+#if defined(ARCH_X86) || defined(ARCH_X64)
+ int info[4] = {0};
+
+ #if defined(_MSC_VER)
+ __cpuid(info, 0x80000001);
+ #elif defined(__GNUC__) || defined(__clang__)
+ /*
+ It looks like the -fPIC option uses the ebx register which GCC complains about. We can work around this by just using a different register, the
+ specific register of which I'm letting the compiler decide on. The "k" prefix is used to specify a 32-bit register. The {...} syntax is for
+ supporting different assembly dialects.
+
+ What's basically happening is that we're saving and restoring the ebx register manually.
+ */
+ #if defined(ARCH_X86) && defined(__PIC__)
+ __asm__ __volatile__ (
+ "xchg{l} {%%}ebx, %k1;"
+ "cpuid;"
+ "xchg{l} {%%}ebx, %k1;"
+ : "=a"(info[0]), "=&r"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(0x80000001), "c"(0)
+ );
+ #else
+ __asm__ __volatile__ (
+ "cpuid" : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(0x80000001), "c"(0)
+ );
+ #endif
+ #endif
+
+ return (info[2] & (1 << 5)) != 0;
+#elif defined(ARCH_ARM)
+ return 1; /* The CLZ instruction is available starting from ARM architecture version 5. Our ARCH_ARM #define is only defined when targeting version 5 at compile time. */
+#else
+ return 0; /* Hardware TZCNT is only supported in x86/64 and ARM for now. */
+#endif
+}
+
+/* Intrinsics and inline-assembly. x86/64 has a hardware TZCNT instruction. You can only call these if has_tzcnt_hard() returns true. */
+#if defined(HAS_TZCNT32_HARD)
+ #if defined(ARCH_X86) || defined(ARCH_X64)
+ #if defined(_MSC_VER) && !defined(__clang__)
+ /* Unfortunately no tzcnt instrinsic on MSVC, but we can build it in terms of lzcnt(). */
+ static unsigned int tzcnt32_msvc_x86(unsigned int x)
+ {
+ if (x == 0) {
+ return sizeof(x) << 3;
+ }
+
+ return 31 - __lzcnt(x & -(int)x);
+ }
+ #elif defined(__GNUC__) || defined(__clang__)
+ static unsigned int tzcnt32_gcc_x86(unsigned int x)
+ {
+ /*
+ att: tzcntl [out], [in]
+ intel: tzcnt [in], [out]
+ */
+ unsigned int r;
+ __asm__ __volatile__ (
+ "tzcnt{l %1, %0| %0, %1}" : "=r"(r) : "r"(x) : "cc"
+ );
+
+ return r;
+ }
+ #endif
+ #endif
+ #if defined(ARCH_ARM)
+ #if defined(__GNUC__) || defined(__clang__)
+ /* The ARM implementation needs to be written in terms of the CLZ instruction. This can probably be optimized by implementing the whole function in assembly. */
+ static unsigned int tzcnt32_gcc_arm(unsigned int x)
+ {
+ unsigned int r;
+
+ if (x == 0) {
+ return sizeof(x) << 3;
+ }
+
+ __asm__ __volatile__ (
+ #if defined(ARCH_32BIT)
+ "clz %[out], %[in]" : [out]"=r"(r) : [in]"r"(x)
+ #else
+ "clz %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(x)
+ #endif
+ );
+
+ return 31 - r;
+ }
+ #endif
+ #endif
+
+ static unsigned int tzcnt32_hard(unsigned int x)
+ {
+ #if defined(ARCH_X86) || defined(ARCH_X64)
+ #if defined(_MSC_VER) && !defined(__clang__)
+ return tzcnt32_msvc_x86(x);
+ #elif defined(__GNUC__) || defined(__clang__)
+ return tzcnt32_gcc_x86(x);
+ #else
+ #error "This compiler does not support the tzcnt intrinsic."
+ #endif
+ #elif defined(ARCH_ARM)
+ #if defined(__GNUC__) || defined(__clang__)
+ return tzcnt32_gcc_arm(x);
+ #else
+ #error "This compiler does not support the clz intrinsic."
+ #endif
+ #else
+ #error "The build target does not support a native instruction."
+ #endif
+ }
+#endif
+
+#if defined(HAS_TZCNT64_HARD)
+ #if defined(ARCH_X86) || defined(ARCH_X64)
+ #if defined(_MSC_VER) && !defined(__clang__)
+ static unsigned int tzcnt64_msvc_x64(unsigned long long x)
+ {
+ return _tzcnt_u64(x);
+ }
+ #elif defined(__GNUC__) || defined(__clang__)
+ static unsigned int tzcnt64_gcc_x64(unsigned long long x)
+ {
+ /*
+ att: tzcnt [out], [in]
+ intel: tzcnt [in], [out]
+ */
+ unsigned long long r;
+ __asm__ __volatile__ (
+ "tzcnt{ %1, %0| %0, %1}" : "=r"(r) : "r"(x) : "cc"
+ );
+
+ return r;
+ }
+ #endif
+ #endif
+ #if defined(ARCH_ARM)
+ #if defined(__GNUC__) || defined(__clang__)
+ static unsigned int tzcnt64_gcc_arm(unsigned long long x)
+ {
+ return __builtin_ctzl(x);
+ }
+ #endif
+ #endif
+
+ static unsigned int tzcnt64_hard(unsigned long long x)
+ {
+ #if defined(ARCH_X64)
+ #if defined(_MSC_VER) && !defined(__clang__)
+ return tzcnt64_msvc_x64(x);
+ #elif defined(__GNUC__) || defined(__clang__)
+ return tzcnt64_gcc_x64(x);
+ #else
+ #error "This compiler does not support the tzcnt intrinsic."
+ #endif
+ #elif defined(ARCH_ARM) && defined(ARCH_64BIT)
+ #if defined(__GNUC__) || defined(__clang__)
+ return tzcnt64_gcc_arm(x);
+ #else
+ #error "This compiler does not support the clz intrinsic."
+ #endif
+ #else
+ #error "The build target does not support a native instruction."
+ #endif
+ }
+#endif
+
+
+static unsigned int tzcnt32_soft(unsigned int x)
+{
+#if defined(_MSC_VER)
+ return tzcnt32_msvc_bsf(x);
+#elif defined(HAS_TZCNT_INTRINSIC)
+ return tzcnt32_gcc_builtin(x);
+#else
+ return tzcnt32_generic(x);
+#endif
+}
+
+static unsigned int tzcnt64_soft(unsigned long long x)
+{
+#if defined(ARCH_64BIT)
+ #if defined(_MSC_VER)
+ return tzcnt64_msvc_bsf(x);
+ #elif defined(HAS_TZCNT_INTRINSIC)
+ return tzcnt64_gcc_builtin(x);
+ #else
+ return tzcnt64_generic(x);
+ #endif
+#else
+ return tzcnt64_generic(x);
+#endif
+}
+
+
+inline static unsigned int tzcnt32(unsigned int x)
+{
+#if defined(HAS_TZCNT32_HARD)
+ if (has_tzcnt_hard()) {
+ return tzcnt32_hard(x);
+ } else
+#endif
+ {
+ return tzcnt32_soft(x);
+ }
+}
+
+inline static unsigned int tzcnt64(unsigned int x)
+{
+#if defined(HAS_TZCNT64_HARD)
+ if (has_tzcnt_hard()) {
+ return tzcnt64_hard(x);
+ } else
+#endif
+ {
+ return tzcnt64_soft(x);
+ }
+}
diff --git a/src/bn/relic_bn_rec.c b/src/bn/relic_bn_rec.c
index d48b63660..2f8b2c210 100644
--- a/src/bn/relic_bn_rec.c
+++ b/src/bn/relic_bn_rec.c
@@ -876,16 +876,24 @@ void bn_rec_glv(bn_t k0, bn_t k1, const bn_t k, const bn_t n, const bn_t *v1,
}
}
-void bn_rec_sac(bn_t *b, bn_t *k, size_t m, bn_t n) {
+void bn_rec_sac(int8_t *b, size_t *len, bn_t *k, size_t m, bn_t n) {
/* Assume k0 is the sign-aligner. */
bn_t *t = RLC_ALLOCA(bn_t, m);
size_t l = RLC_CEIL(bn_bits(n), m) + 1;
+ int8_t bji;
if (t == NULL) {
RLC_THROW(ERR_NO_MEMORY);
return;
}
+ if (*len <= l) {
+ *len = 0;
+ RLC_FREE(t);
+ RLC_THROW(ERR_NO_BUFFER);
+ return;
+ }
+
RLC_TRY {
for (size_t i = 0; i < m; i++) {
bn_null(t[i]);
@@ -893,25 +901,32 @@ void bn_rec_sac(bn_t *b, bn_t *k, size_t m, bn_t n) {
bn_copy(t[i], k[i]);
}
- bn_set_bit(b[0], l - 1, 0);
+ /* The current basis for BN curves might be one bit longer. */
+ for (size_t i = 0; i < m; i++) {
+ l = RLC_MAX(l, bn_bits(k[i]) + 1);
+ }
+
+ b[l - 1] = 0;
for (size_t i = 0; i < l - 1; i++) {
- bn_set_bit(b[0], i, 1 - bn_get_bit(k[0], i + 1));
+ b[i] = 1 - bn_get_bit(k[0], i + 1);
}
for (size_t j = 1; j < m; j++) {
- for (size_t i = 0; i < l; i++) {
- uint8_t bji = bn_get_bit(t[j], 0);
- bn_set_bit(b[j], i, bji);
+ for (size_t i = 0; i < l - 1; i++) {
+ bji = bn_get_bit(t[j], 0);
+ b[j * l + i] = bji;
bn_hlv(t[j], t[j]);
- bn_add_dig(t[j], t[j], bji & bn_get_bit(b[0], i));
+ bn_add_dig(t[j], t[j], bji & b[i]);
}
+ b[j * l + l - 1] = bn_get_bit(t[j], 0);
}
+ *len = l;
} RLC_CATCH_ANY {
RLC_THROW(ERR_CAUGHT);
} RLC_FINALLY {
for (size_t i = 0; i < m; i++) {
bn_free(t[i]);
- RLC_FREE(t);
}
+ RLC_FREE(t);
}
}
diff --git a/src/bn/relic_bn_smb.c b/src/bn/relic_bn_smb.c
index 98f74dd82..d1446ae0e 100644
--- a/src/bn/relic_bn_smb.c
+++ b/src/bn/relic_bn_smb.c
@@ -77,14 +77,21 @@ int bn_smb_leg(const bn_t a, const bn_t b) {
}
int bn_smb_jac(const bn_t a, const bn_t b) {
- bn_t t0, t1, r;
- int t, h, res;
+ dis_t ai, bi, ci, di;
+ dig_t n, d, t;
+ bn_t t0, t1, t2, t3;
+ uint_t z, i, s = (RLC_DIG >> 1) - 2;
+ int r;
bn_null(t0);
bn_null(t1);
- bn_null(r);
+ bn_null(t2);
+ bn_null(t3);
- /* Argument b must be odd. */
+ /* Optimized Pornin's Algorithm by Aleksei Vambol from
+ * https://github.com/privacy-scaling-explorations/halo2curves/pull/95 */
+
+ /* Argument b must be odd for Jacobi symbol. */
if (bn_is_even(b) || bn_sign(b) == RLC_NEG) {
RLC_THROW(ERR_NO_VALID);
return 0;
@@ -93,55 +100,119 @@ int bn_smb_jac(const bn_t a, const bn_t b) {
RLC_TRY {
bn_new(t0);
bn_new(t1);
- bn_new(r);
- t = 1;
+ bn_new(t2);
+ bn_new(t3);
- if (bn_sign(a) == RLC_NEG) {
- bn_add(t0, a, b);
- } else {
- bn_copy(t0, a);
- }
+ bn_mod(t0, a, b);
bn_copy(t1, b);
+ t = 0;
while (1) {
- /* t0 = a mod b. */
- bn_mod(t0, t0, t1);
- /* If a = 0 then if n = 1 return t else return 0. */
- if (bn_is_zero(t0)) {
- if (bn_cmp_dig(t1, 1) == RLC_EQ) {
- res = 1;
- if (t == -1) {
- res = -1;
+ ai = di = 1;
+ bi = ci = 0;
+
+ i = RLC_MAX(t0->used, t1->used);
+ dv_zero(t0->dp + t0->used, i - t0->used);
+ dv_zero(t1->dp + t1->used, i - t1->used);
+ if (i == 1) {
+ n = t0->dp[0];
+ d = t1->dp[0];
+ while (n != 0) {
+ if (n & 1) {
+ if (n < d) {
+ RLC_SWAP(n, d);
+ t ^= (n & d);
+ }
+ n = (n - d) >> 1;
+ t ^= d ^ (d >> 1);
+ } else {
+ z = arch_tzcnt(n);
+ t ^= (d ^ (d >> 1)) & (z << 1);
+ n >>= z;
+ }
+ }
+ r = (d == 1 ? 1 - (t & 2) : 0);
+ break;
+ }
+
+ z = RLC_MIN(arch_lzcnt(t0->dp[i - 1]), arch_lzcnt(t1->dp[i - 1]));
+ n = t0->dp[i - 1] << z;
+ d = t1->dp[i - 1] << z;
+ if (z > (RLC_DIG >> 1)) {
+ n |= t0->dp[i - 2] >> z;
+ d |= t1->dp[i - 2] >> z;
+ }
+ n = (n & RLC_HMASK) | (t0->dp[0] & RLC_LMASK);
+ d = (d & RLC_HMASK) | (t1->dp[0] & RLC_LMASK);
+
+ i = s;
+ while (i > 0) {
+ if (n & 1) {
+ if (n < d) {
+ RLC_SWAP(ai, ci);
+ RLC_SWAP(bi, di);
+ RLC_SWAP(n, d);
+ t ^= (n & d);
}
- break;
+ n = (n - d) >> 1;
+ ai = ai - ci;
+ bi = bi - di;
+ ci += ci;
+ di += di;
+ t ^= d ^ (d >> 1);
+ i -= 1;
} else {
- res = 0;
- break;
+ z = RLC_MIN(i, arch_tzcnt(n));
+ t ^= (d ^ (d >> 1)) & (z << 1);
+ ci = (dig_t)ci << z;
+ di = (dig_t)di << z;
+ n >>= z;
+ i -= z;
}
}
- /* Write t0 as 2^h * t0. */
- h = 0;
- while (bn_is_even(t0) && !bn_is_zero(t0)) {
- h++;
- bn_rsh(t0, t0, 1);
+
+ if (ai < 0) {
+ bn_mul_dig(t2, t0, -ai);
+ bn_neg(t2, t2);
+ } else {
+ bn_mul_dig(t2, t0, ai);
}
- /* If h != 0 (mod 2) and n != +-1 (mod 8) then t = -t. */
- bn_mod_2b(r, t1, 3);
- if ((h % 2 != 0) && (bn_cmp_dig(r, 1) != RLC_EQ) &&
- (bn_cmp_dig(r, 7) != RLC_EQ)) {
- t = -t;
+ if (bi < 0) {
+ bn_mul_dig(t3, t1, -bi);
+ bn_neg(t3, t3);
+ } else {
+ bn_mul_dig(t3, t1, bi);
}
- /* If t0 != 1 (mod 4) and n != 1 (mod 4) then t = -t. */
- bn_mod_2b(r, t0, 2);
- if (bn_cmp_dig(r, 1) != RLC_EQ) {
- bn_mod_2b(r, t1, 2);
- if (bn_cmp_dig(r, 1) != RLC_EQ) {
- t = -t;
- }
+ bn_add(t3, t3, t2);
+
+ if (ci < 0) {
+ bn_mul_dig(t2, t0, -ci);
+ bn_neg(t2, t2);
+ } else {
+ bn_mul_dig(t2, t0, ci);
+ }
+ if (di < 0) {
+ bn_mul_dig(t1, t1, -di);
+ bn_neg(t1, t1);
+ } else {
+ bn_mul_dig(t1, t1, di);
+ }
+ bn_add(t1, t1, t2);
+ bn_rsh(t1, t1, s);
+ bn_rsh(t0, t3, s);
+
+ if (bn_is_zero(t0)) {
+ r = (bn_cmp_dig(t1, 1) == RLC_EQ ? 1 - (t & 2) : 0);
+ break;
+ }
+
+ if (bn_sign(t0) == RLC_NEG) {
+ t ^= t1->dp[0];
+ bn_neg(t0, t0);
+ }
+ if (bn_sign(t1) == RLC_NEG) {
+ bn_neg(t1, t1);
}
- bn_copy(r, t0);
- bn_copy(t0, t1);
- bn_copy(t1, r);
}
}
RLC_CATCH_ANY {
@@ -150,8 +221,9 @@ int bn_smb_jac(const bn_t a, const bn_t b) {
RLC_FINALLY {
bn_free(t0);
bn_free(t1);
- bn_free(r);
+ bn_free(t2);
+ bn_free(t3);
}
- return res;
+ return r;
}
diff --git a/src/ep/relic_ep_map.c b/src/ep/relic_ep_map.c
index fb923a61e..c96b78795 100644
--- a/src/ep/relic_ep_map.c
+++ b/src/ep/relic_ep_map.c
@@ -258,6 +258,7 @@ void ep_map_swift(ep_t p, const uint8_t *msg, size_t len) {
bn_null(k);
fp_null(v);
fp_null(w);
+ fp_null(y);
fp_null(t1);
fp_null(t2);
fp_null(x1);
@@ -271,6 +272,7 @@ void ep_map_swift(ep_t p, const uint8_t *msg, size_t len) {
bn_new(k);
fp_new(v);
fp_new(w);
+ fp_new(y);
fp_new(t1);
fp_new(t2);
fp_new(x1);
@@ -478,6 +480,7 @@ void ep_map_swift(ep_t p, const uint8_t *msg, size_t len) {
bn_free(k);
fp_free(v);
fp_free(w);
+ fp_free(y);
fp_free(t1);
fp_free(t2);
fp_free(x1);
@@ -488,8 +491,7 @@ void ep_map_swift(ep_t p, const uint8_t *msg, size_t len) {
fp_free(d[2]);
RLC_FREE(pseudo_random_bytes);
for (size_t i = 0; i < 8; i++) {
- fp_null(h[i]);
- fp_new(h[i]);
+ fp_free(h[i]);
}
}
}
diff --git a/src/epx/relic_ep2_map.c b/src/epx/relic_ep2_map.c
index a99998f8b..447050d53 100644
--- a/src/epx/relic_ep2_map.c
+++ b/src/epx/relic_ep2_map.c
@@ -516,6 +516,12 @@ void ep2_map_swift(ep2_t p, const uint8_t *msg, size_t len) {
}
RLC_FINALLY {
bn_free(k);
+ fp2_free(a);
+ fp2_free(b);
+ fp2_free(c);
+ fp2_free(d);
+ fp2_free(e);
+ fp2_free(f);
fp2_free(t);
fp2_free(u);
fp2_free(v);
diff --git a/src/epx/relic_ep2_mul.c b/src/epx/relic_ep2_mul.c
index 68904d8f5..2e2386cbf 100644
--- a/src/epx/relic_ep2_mul.c
+++ b/src/epx/relic_ep2_mul.c
@@ -115,100 +115,102 @@ static void ep2_mul_gls_imp(ep2_t r, const ep2_t p, const bn_t k) {
#if EP_MUL == LWREG || !defined(STRIP)
static void ep2_mul_reg_gls(ep2_t r, const ep2_t p, const bn_t k) {
- int8_t reg[4][RLC_FP_BITS + 1], b[4], s[4], c0, n0;
- ep2_t q, w, t[4][1 << (RLC_WIDTH - 2)];
+ size_t l;
bn_t n, _k[4], u;
- size_t l, len, _l[4];
+ int8_t even, col, sac[4 * (RLC_FP_BITS + 1)];
+ ep2_t q[4], t[1 << 3];
bn_null(n);
bn_null(u);
- ep2_null(q);
- ep2_null(w);
RLC_TRY {
bn_new(n);
bn_new(u);
- ep2_new(q);
- ep2_new(w);
- for (size_t i = 0; i < 4; i++) {
+ for (int i = 0; i < 4; i++) {
bn_null(_k[i]);
+ ep2_null(q[i]);
bn_new(_k[i]);
- for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) {
- ep2_null(t[i][j]);
- ep2_new(t[i][j]);
- }
+ ep2_new(q[i]);
+ }
+ for (int i = 0; i < (1 << 3); i++) {
+ ep2_null(t[i]);
+ ep2_new(t[i]);
}
ep2_curve_get_ord(n);
fp_prime_get_par(u);
bn_mod(_k[0], k, n);
bn_rec_frb(_k, 4, _k[0], u, n, ep_curve_is_pairf() == EP_BN);
-
- l = 0;
- /* Make some extra room for BN curves that grow subscalars by 1. */
- len = bn_bits(u) + (ep_curve_is_pairf() == EP_BN);
- ep2_norm(t[0][0], p);
+ ep2_norm(q[0], p);
+ ep2_frb(q[1], q[0], 1);
+ ep2_frb(q[2], q[1], 1);
+ ep2_frb(q[3], q[2], 1);
for (size_t i = 0; i < 4; i++) {
- s[i] = bn_sign(_k[i]);
- bn_abs(_k[i], _k[i]);
- b[i] = bn_is_even(_k[i]);
- _k[i]->dp[0] |= b[i];
-
- _l[i] = RLC_FP_BITS + 1;
- bn_rec_reg(reg[i], &_l[i], _k[i], len, RLC_WIDTH);
- l = RLC_MAX(l, _l[i]);
-
- /* Apply Frobenius before flipping sign to build table. */
- if (i > 0) {
- ep2_frb(t[i][0], t[i - 1][0], 1);
- }
+ ep2_neg(r, q[i]);
+ fp2_copy_sec(q[i]->y, r->y, bn_sign(_k[i]) == RLC_NEG);
+ _k[i]->sign = RLC_POS;
}
+ even = bn_is_even(_k[0]);
+ bn_add_dig(_k[0], _k[0], even);
- for (size_t i = 0; i < 4; i++) {
- ep2_neg(q, t[i][0]);
- fp2_copy_sec(q->y, t[i][0]->y, s[i] == RLC_POS);
- ep2_tab(t[i], q, RLC_WIDTH);
+ ep2_copy(t[0], q[0]);
+ for (size_t i = 1; i < (1 << 3); i++) {
+ l = util_bits_dig(i);
+ ep2_add(t[i], t[i ^ (1 << (l - 1))], q[l]);
}
+ l = RLC_FP_BITS + 1;
+ bn_rec_sac(sac, &l, _k, 4, n);
+
#if defined(EP_MIXED)
- fp2_set_dig(w->z, 1);
- w->coord = BASIC;
+ ep2_norm_sim(t + 1, t + 1, (1 << 3) - 1);
+ fp2_set_dig(r->z, 1);
+ fp2_set_dig(q[1]->z, 1);
+ r->coord = q[1]->coord = BASIC;
#else
- w->coord = = EP_ADD;
+ r->coord = q[1]->coord = EP_ADD;
#endif
- ep2_set_infty(r);
- for (int j = l - 1; j >= 0; j--) {
- for (size_t i = 0; i < RLC_WIDTH - 1; i++) {
- ep2_dbl(r, r);
- }
+ col = 0;
+ for (int i = 3; i > 0; i--) {
+ col <<= 1;
+ col += sac[i * l + l - 1];
+ }
+ for (size_t m = 0; m < (1 << 3); m++) {
+ fp2_copy_sec(r->x, t[m]->x, m == col);
+ fp2_copy_sec(r->y, t[m]->y, m == col);
+#if !defined(EP_MIXED)
+ fp2_copy_sec(r->z, t[m]->z, m == col);
+#endif
+ }
- for (size_t i = 0; i < 4; i++) {
- n0 = reg[i][j];
- c0 = (n0 >> 7);
- n0 = ((n0 ^ c0) - c0) >> 1;
-
- for (size_t m = 0; m < (1 << (RLC_WIDTH - 2)); m++) {
- fp2_copy_sec(w->x, t[i][m]->x, m == n0);
- fp2_copy_sec(w->y, t[i][m]->y, m == n0);
- #if !defined(EP_MIXED)
- fp2_copy_sec(w->z, t[i][m]->z, m == n0);
- #endif
- }
+ ep2_neg(q[1], r);
+ fp2_copy_sec(r->y, q[1]->y, sac[l - 1] != 0);
+ for (int j = l - 2; j >= 0; j--) {
+ ep2_dbl(r, r);
- ep2_neg(q, w);
- fp2_copy_sec(q->y, w->y, c0 == 0);
- ep2_add(r, r, q);
+ col = 0;
+ for (int i = 3; i > 0; i--) {
+ col <<= 1;
+ col += sac[i * l + j];
}
+
+ for (size_t m = 0; m < (1 << 3); m++) {
+ fp2_copy_sec(q[1]->x, t[m]->x, m == col);
+ fp2_copy_sec(q[1]->y, t[m]->y, m == col);
+#if !defined(EP_MIXED)
+ fp2_copy_sec(q[1]->z, t[m]->z, m == col);
+#endif
+ }
+ ep2_neg(q[2], q[1]);
+ fp2_copy_sec(q[1]->y, q[2]->y, sac[j]);
+ ep2_add(r, r, q[1]);
}
- for (size_t i = 0; i < 4; i++) {
- /* Tables are built with points already negated, so no need here. */
- ep2_sub(q, r, t[i][0]);
- fp2_copy_sec(r->x, q->x, b[i]);
- fp2_copy_sec(r->y, q->y, b[i]);
- fp2_copy_sec(r->z, q->z, b[i]);
- }
+ ep2_sub(q[1], r, q[0]);
+ fp2_copy_sec(r->x, q[1]->x, even);
+ fp2_copy_sec(r->y, q[1]->y, even);
+ fp2_copy_sec(r->z, q[1]->z, even);
/* Convert r to affine coordinates. */
ep2_norm(r, r);
@@ -219,13 +221,12 @@ static void ep2_mul_reg_gls(ep2_t r, const ep2_t p, const bn_t k) {
RLC_FINALLY {
bn_free(n);
bn_free(u);
- ep2_free(q);
- ep2_free(w);
for (int i = 0; i < 4; i++) {
bn_free(_k[i]);
- for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) {
- ep2_free(t[i][j]);
- }
+ ep2_free(q[i]);
+ }
+ for (int i = 0; i < (1 << 3); i++) {
+ ep2_free(t[i]);
}
}
}
diff --git a/src/fp/relic_fp_smb.c b/src/fp/relic_fp_smb.c
index 0ff9d5266..f9f2602bb 100644
--- a/src/fp/relic_fp_smb.c
+++ b/src/fp/relic_fp_smb.c
@@ -164,24 +164,24 @@ static dig_t porninstep(dis_t m[4],const dig_t f[2], const dig_t g[2],
static dis_t jumpdivstep(dis_t m[4], dig_t *k, dis_t delta, dis_t y, dis_t x,
int s) {
- dig_t d0, t0, t1, t2, c0, c1, yi, ai = 1, bi = 0, ci = 0, di = 1, u = 0;
+ dig_t t0, t1, t2, c0, c1, yi, ai = 1, bi = 0, ci = 0, di = 1, u = 0;
/* Unrolling twice makes it faster. */
for (s -= 2; s >= 0; s -= 2) {
yi = y;
- d0 = (delta >= 0);
+ c0 = delta >> (RLC_DIG - 1);
c1 = -(x & 1);
- c0 = (-d0) & c1;
+ c0 &= c1;
- t0 = (y ^ -d0) + d0;
- t1 = (ci ^ -d0) + d0;
- t2 = (di ^ -d0) + d0;
+ t0 = (y ^ c0) - c0;
+ t1 = (ci ^ c0) - c0;
+ t2 = (di ^ c0) - c0;
x += t0 & c1;
ai += t1 & c1;
bi += t2 & c1;
- /* delta = RLC_SEL(delta + 1, -delta, c0) */
+ /* delta = RLC_SEL(2 + delta, 2 - delta, c0) */
y += x & c0;
ci += ai & c0;
di += bi & c0;
@@ -189,25 +189,25 @@ static dis_t jumpdivstep(dis_t m[4], dig_t *k, dis_t delta, dis_t y, dis_t x,
x >>= 1;
ci <<= 1;
di <<= 1;
- delta = (delta ^ c0) + 1;
+ delta = (delta ^ c0) - 1;
u += ((yi & y) ^ (y >> 1)) & 2;
u += (u & 1) ^ RLC_SIGN(ci);
yi = y;
- d0 = (delta >= 0);
+ c0 = delta >> (RLC_DIG - 1);
c1 = -(x & 1);
- c0 = (-d0) & c1;
+ c0 &= c1;
- t0 = (y ^ -d0) + d0;
- t1 = (ci ^ -d0) + d0;
- t2 = (di ^ -d0) + d0;
+ t0 = (y ^ c0) - c0;
+ t1 = (ci ^ c0) - c0;
+ t2 = (di ^ c0) - c0;
x += t0 & c1;
ai += t1 & c1;
bi += t2 & c1;
- /* delta = RLC_SEL(delta + 1, -delta, c0) */
+ /* delta = RLC_SEL(2 + delta, 2 - delta, c0) */
y += x & c0;
ci += ai & c0;
di += bi & c0;
@@ -215,7 +215,7 @@ static dis_t jumpdivstep(dis_t m[4], dig_t *k, dis_t delta, dis_t y, dis_t x,
x >>= 1;
ci <<= 1;
di <<= 1;
- delta = (delta ^ c0) + 1;
+ delta = (delta ^ c0) - 1;
u += ((yi & y) ^ (y >> 1)) & 2;
u += (u & 1) ^ RLC_SIGN(ci);
@@ -468,7 +468,7 @@ int fp_smb_divst(const fp_t a) {
#if FP_SMB == JMPDS || !defined(STRIP)
int fp_smb_jmpds(const fp_t a) {
- dis_t m[4], d = 0;
+ dis_t m[4], d = -1;
/* Iterations taken directly from https://github.com/sipa/safegcd-bounds */
const int iterations = (45907 * FP_PRIME + 26313) / 19929;
int loops, precision, i, r = 0, s = RLC_DIG - 2;
diff --git a/test/test_bn.c b/test/test_bn.c
index 4e2f7f574..5ac90f83b 100644
--- a/test/test_bn.c
+++ b/test/test_bn.c
@@ -2281,6 +2281,29 @@ static int recoding(void) {
bn_cmp(a, v2[2]) == RLC_EQ, end);
}
} TEST_END;
+
+ TEST_CASE("glv-sac recoding is correct") {
+ size_t l = RLC_BN_BITS;
+ int8_t ptr[2 * RLC_BN_BITS] = { 0 };
+ if (ep_param_set_any_endom() == RLC_OK) {
+ ep_curve_get_v1(v1);
+ ep_curve_get_v2(v2);
+ ep_curve_get_ord(b);
+ bn_rand_mod(a, b);
+ bn_rec_glv(b, c, a, b, (const bn_t *)v1, (const bn_t *)v2);
+ ep_curve_get_ord(v2[0]);
+ bn_rec_sac(ptr, &l, v1, 2, v2[0]);
+ if (bn_is_even(b)) {
+ bn_add_dig(b, b, 1);
+ }
+ bn_copy(v1[0], b);
+ bn_copy(v1[1], c);
+ for (size_t i = 0; i < l; i++) {
+ TEST_ASSERT(ptr[i] == 0 || ptr[i] == 1, end);
+ TEST_ASSERT(ptr[l + i] == 0 || ptr[l + i] == 1, end);
+ }
+ }
+ } TEST_END;
#endif /* WITH_EP && EP_ENDOM */
}
RLC_CATCH_ANY {