diff --git a/.travis.yml b/.travis.yml index 7276fe4a..79474e17 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,6 +13,12 @@ script: - ./ci/test_full.sh matrix: include: + - name: "Rust: stable-i686" + rust: stable-i686-unknown-linux-gnu + addons: + apt: + packages: + - gcc-multilib - name: "rustfmt" rust: 1.31.0 before_script: diff --git a/benches/bigint.rs b/benches/bigint.rs index bc0875d8..90a7e856 100644 --- a/benches/bigint.rs +++ b/benches/bigint.rs @@ -112,6 +112,11 @@ fn divide_2(b: &mut Bencher) { divide_bench(b, 1 << 16, 1 << 12); } +#[bench] +fn divide_big_little(b: &mut Bencher) { + divide_bench(b, 1 << 16, 1 << 4); +} + #[bench] fn remainder_0(b: &mut Bencher) { remainder_bench(b, 1 << 8, 1 << 6); @@ -127,6 +132,11 @@ fn remainder_2(b: &mut Bencher) { remainder_bench(b, 1 << 16, 1 << 12); } +#[bench] +fn remainder_big_little(b: &mut Bencher) { + remainder_bench(b, 1 << 16, 1 << 4); +} + #[bench] fn factorial_100(b: &mut Bencher) { b.iter(|| factorial(100)); diff --git a/build.rs b/build.rs index e483c15f..1f55eb95 100644 --- a/build.rs +++ b/build.rs @@ -1,14 +1,78 @@ extern crate autocfg; use std::env; +use std::error::Error; +use std::fs::File; +use std::io::Write; +use std::path::Path; fn main() { let ac = autocfg::new(); + if ac.probe_type("i128") { - println!("cargo:rustc-cfg=has_i128"); + autocfg::emit("has_i128"); + + let pointer_width = env::var("CARGO_CFG_TARGET_POINTER_WIDTH"); + if pointer_width.as_ref().map(String::as_str) == Ok("64") { + autocfg::emit("u64_digit"); + } } else if env::var_os("CARGO_FEATURE_I128").is_some() { panic!("i128 support was not detected!"); } autocfg::rerun_path("build.rs"); + + write_radix_bases().unwrap(); +} + +/// Write tables of the greatest power of each radix for the given bit size. These are returned +/// from `biguint::get_radix_base` to batch the multiplication/division of radix conversions on +/// full `BigUint` values, operating on primitive integers as much as possible. +/// +/// e.g. BASES_16[3] = (59049, 10) // 3¹⁰ fits in u16, but 3¹¹ is too big +/// BASES_32[3] = (3486784401, 20) +/// BASES_64[3] = (12157665459056928801, 40) +/// +/// Powers of two are not included, just zeroed, as they're implemented with shifts. +#[allow(unknown_lints, bare_trait_objects)] +fn write_radix_bases() -> Result<(), Box> { + let out_dir = env::var("OUT_DIR")?; + let dest_path = Path::new(&out_dir).join("radix_bases.rs"); + let mut f = File::create(&dest_path)?; + + for &bits in &[16, 32, 64] { + let max = if bits < 64 { + (1 << bits) - 1 + } else { + std::u64::MAX + }; + + writeln!(f, "#[deny(overflowing_literals)]")?; + writeln!( + f, + "pub static BASES_{bits}: [(u{bits}, usize); 257] = [", + bits = bits + )?; + for radix in 0u64..257 { + let (base, power) = if radix == 0 || radix.is_power_of_two() { + (0, 0) + } else { + let mut power = 1; + let mut base = radix; + + while let Some(b) = base.checked_mul(radix) { + if b > max { + break; + } + base = b; + power += 1; + } + (base, power) + }; + writeln!(f, " ({}, {}), // {}", base, power, radix)?; + } + writeln!(f, "];")?; + } + + Ok(()) } diff --git a/ci/test_full.sh b/ci/test_full.sh index 4e1b60e9..4915e880 100755 --- a/ci/test_full.sh +++ b/ci/test_full.sh @@ -5,13 +5,13 @@ set -ex echo Testing num-bigint on rustc ${TRAVIS_RUST_VERSION} FEATURES="serde" -if [[ "$TRAVIS_RUST_VERSION" =~ ^(nightly|beta|stable|1.31.0|1.26.0|1.22.0)$ ]]; then +if [[ "$TRAVIS_RUST_VERSION" =~ ^(nightly|beta|stable.*|1.31.0|1.26.0|1.22.0)$ ]]; then FEATURES="$FEATURES rand" fi -if [[ "$TRAVIS_RUST_VERSION" =~ ^(nightly|beta|stable|1.31.0|1.26.0)$ ]]; then +if [[ "$TRAVIS_RUST_VERSION" =~ ^(nightly|beta|stable.*|1.31.0|1.26.0)$ ]]; then FEATURES="$FEATURES i128" fi -if [[ "$TRAVIS_RUST_VERSION" =~ ^(nightly|beta|stable|1.31.0)$ ]]; then +if [[ "$TRAVIS_RUST_VERSION" =~ ^(nightly|beta|stable.*|1.31.0)$ ]]; then FEATURES="$FEATURES quickcheck quickcheck_macros" fi diff --git a/src/algorithms.rs b/src/algorithms.rs index 223f051a..aa42c6c8 100644 --- a/src/algorithms.rs +++ b/src/algorithms.rs @@ -6,6 +6,7 @@ use std::mem; use traits; use traits::{One, Zero}; +use biguint::biguint_from_vec; use biguint::BigUint; use bigint::BigInt; @@ -68,29 +69,65 @@ fn div_wide(hi: BigDigit, lo: BigDigit, divisor: BigDigit) -> (BigDigit, BigDigi ((lhs / rhs) as BigDigit, (lhs % rhs) as BigDigit) } +/// For small divisors, we can divide without promoting to `DoubleBigDigit` by +/// using half-size pieces of digit, like long-division. +#[inline] +fn div_half(rem: BigDigit, digit: BigDigit, divisor: BigDigit) -> (BigDigit, BigDigit) { + use big_digit::{HALF, HALF_BITS}; + use integer::Integer; + + debug_assert!(rem < divisor && divisor <= HALF); + let (hi, rem) = ((rem << HALF_BITS) | (digit >> HALF_BITS)).div_rem(&divisor); + let (lo, rem) = ((rem << HALF_BITS) | (digit & HALF)).div_rem(&divisor); + ((hi << HALF_BITS) | lo, rem) +} + +#[inline] pub fn div_rem_digit(mut a: BigUint, b: BigDigit) -> (BigUint, BigDigit) { let mut rem = 0; - for d in a.data.iter_mut().rev() { - let (q, r) = div_wide(rem, *d, b); - *d = q; - rem = r; + if b <= big_digit::HALF { + for d in a.data.iter_mut().rev() { + let (q, r) = div_half(rem, *d, b); + *d = q; + rem = r; + } + } else { + for d in a.data.iter_mut().rev() { + let (q, r) = div_wide(rem, *d, b); + *d = q; + rem = r; + } } (a.normalized(), rem) } +#[inline] pub fn rem_digit(a: &BigUint, b: BigDigit) -> BigDigit { - let mut rem: DoubleBigDigit = 0; - for &digit in a.data.iter().rev() { - rem = (rem << big_digit::BITS) + DoubleBigDigit::from(digit); - rem %= DoubleBigDigit::from(b); + let mut rem = 0; + + if b <= big_digit::HALF { + for &digit in a.data.iter().rev() { + let (_, r) = div_half(rem, digit, b); + rem = r; + } + } else { + for &digit in a.data.iter().rev() { + let (_, r) = div_wide(rem, digit, b); + rem = r; + } } - rem as BigDigit + rem } -// Only for the Add impl: +/// Two argument addition of raw slices, `a += b`, returning the carry. +/// +/// This is used when the data `Vec` might need to resize to push a non-zero carry, so we perform +/// the addition first hoping that it will fit. +/// +/// The caller _must_ ensure that `a` is at least as long as `b`. #[inline] pub fn __add2(a: &mut [BigDigit], b: &[BigDigit]) -> BigDigit { debug_assert!(a.len() >= b.len()); @@ -193,12 +230,12 @@ pub fn sub_sign(a: &[BigDigit], b: &[BigDigit]) -> (Sign, BigUint) { Greater => { let mut a = a.to_vec(); sub2(&mut a, b); - (Plus, BigUint::new(a)) + (Plus, biguint_from_vec(a)) } Less => { let mut b = b.to_vec(); sub2(&mut b, a); - (Minus, BigUint::new(b)) + (Minus, biguint_from_vec(b)) } _ => (NoSign, Zero::zero()), } @@ -225,6 +262,10 @@ pub fn mac_digit(acc: &mut [BigDigit], b: &[BigDigit], c: BigDigit) { } } +fn bigint_from_slice(slice: &[BigDigit]) -> BigInt { + BigInt::from(biguint_from_vec(slice.to_vec())) +} + /// Three argument multiply accumulate: /// acc += b * c fn mac3(acc: &mut [BigDigit], b: &[BigDigit], c: &[BigDigit]) { @@ -387,14 +428,14 @@ fn mac3(acc: &mut [BigDigit], b: &[BigDigit], c: &[BigDigit]) { // in place of multiplications. // // x(t) = x2*t^2 + x1*t + x0 - let x0 = BigInt::from_slice(Plus, &x[..x0_len]); - let x1 = BigInt::from_slice(Plus, &x[x0_len..x0_len + x1_len]); - let x2 = BigInt::from_slice(Plus, &x[x0_len + x1_len..]); + let x0 = bigint_from_slice(&x[..x0_len]); + let x1 = bigint_from_slice(&x[x0_len..x0_len + x1_len]); + let x2 = bigint_from_slice(&x[x0_len + x1_len..]); // y(t) = y2*t^2 + y1*t + y0 - let y0 = BigInt::from_slice(Plus, &y[..y0_len]); - let y1 = BigInt::from_slice(Plus, &y[y0_len..y0_len + y1_len]); - let y2 = BigInt::from_slice(Plus, &y[y0_len + y1_len..]); + let y0 = bigint_from_slice(&y[..y0_len]); + let y1 = bigint_from_slice(&y[y0_len..y0_len + y1_len]); + let y2 = bigint_from_slice(&y[y0_len + y1_len..]); // Let w(t) = x(t) * y(t) // @@ -538,6 +579,7 @@ pub fn div_rem(mut u: BigUint, mut d: BigUint) -> (BigUint, BigUint) { // want it to be the largest number we can efficiently divide by. // let shift = d.data.last().unwrap().leading_zeros() as usize; + let (q, r) = if shift == 0 { // no need to clone d div_rem_core(u, &d) @@ -655,8 +697,7 @@ fn div_rem_core(mut a: BigUint, b: &BigUint) -> (BigUint, BigUint) { let mut prod = b * &q0; while cmp_slice(&prod.data[..], &a.data[j..]) == Greater { - let one: BigUint = One::one(); - q0 -= one; + q0 -= 1u32; prod -= b; } @@ -709,7 +750,7 @@ pub fn biguint_shl(n: Cow, bits: usize) -> BigUint { } } - BigUint::new(data) + biguint_from_vec(data) } #[inline] @@ -736,7 +777,7 @@ pub fn biguint_shr(n: Cow, bits: usize) -> BigUint { } } - BigUint::new(data) + biguint_from_vec(data) } pub fn cmp_slice(a: &[BigDigit], b: &[BigDigit]) -> Ordering { @@ -766,7 +807,6 @@ pub fn cmp_slice(a: &[BigDigit], b: &[BigDigit]) -> Ordering { mod algorithm_tests { use big_digit::BigDigit; use traits::Num; - use Sign::Plus; use {BigInt, BigUint}; #[test] @@ -780,8 +820,8 @@ mod algorithm_tests { let a = BigUint::from_str_radix("265252859812191058636308480000000", 10).unwrap(); let b = BigUint::from_str_radix("26525285981219105863630848000000", 10).unwrap(); - let a_i = BigInt::from_biguint(Plus, a.clone()); - let b_i = BigInt::from_biguint(Plus, b.clone()); + let a_i = BigInt::from(a.clone()); + let b_i = BigInt::from(b.clone()); assert_eq!(sub_sign_i(&a.data[..], &b.data[..]), &a_i - &b_i); assert_eq!(sub_sign_i(&b.data[..], &a.data[..]), &b_i - &a_i); diff --git a/src/bigint.rs b/src/bigint.rs index bd74e7d3..d62767c2 100644 --- a/src/bigint.rs +++ b/src/bigint.rs @@ -111,12 +111,30 @@ impl<'de> serde::Deserialize<'de> for Sign { } /// A big signed integer type. -#[derive(Clone, Debug, Hash)] +#[derive(Debug, Hash)] pub struct BigInt { sign: Sign, data: BigUint, } +// Note: derived `Clone` doesn't specialize `clone_from`, +// but we want to keep the allocation in `data`. +impl Clone for BigInt { + #[inline] + fn clone(&self) -> Self { + BigInt { + sign: self.sign, + data: self.data.clone(), + } + } + + #[inline] + fn clone_from(&mut self, other: &Self) { + self.sign = other.sign; + self.data.clone_from(&other.data); + } +} + #[cfg(feature = "quickcheck")] impl Arbitrary for BigInt { fn arbitrary(g: &mut G) -> Self { @@ -271,8 +289,9 @@ impl<'a> Not for &'a BigInt { fn not(self) -> BigInt { match self.sign { - NoSign | Plus => BigInt::from_biguint(Minus, &self.data + 1u32), - Minus => BigInt::from_biguint(Plus, &self.data - 1u32), + NoSign => -BigInt::one(), + Plus => -BigInt::from(&self.data + 1u32), + Minus => BigInt::from(&self.data - 1u32), } } } @@ -352,8 +371,8 @@ impl<'a, 'b> BitAnd<&'b BigInt> for &'a BigInt { #[inline] fn bitand(self, other: &BigInt) -> BigInt { match (self.sign, other.sign) { - (NoSign, _) | (_, NoSign) => BigInt::from_slice(NoSign, &[]), - (Plus, Plus) => BigInt::from_biguint(Plus, &self.data & &other.data), + (NoSign, _) | (_, NoSign) => BigInt::zero(), + (Plus, Plus) => BigInt::from(&self.data & &other.data), (Plus, Minus) => self.clone() & other, (Minus, Plus) => other.clone() & self, (Minus, Minus) => { @@ -384,7 +403,7 @@ impl<'a> BitAndAssign<&'a BigInt> for BigInt { fn bitand_assign(&mut self, other: &BigInt) { match (self.sign, other.sign) { (NoSign, _) => {} - (_, NoSign) => self.assign_from_slice(NoSign, &[]), + (_, NoSign) => self.set_zero(), (Plus, Plus) => { self.data &= &other.data; if self.data.is_zero() { @@ -489,7 +508,7 @@ impl<'a, 'b> BitOr<&'b BigInt> for &'a BigInt { match (self.sign, other.sign) { (NoSign, _) => other.clone(), (_, NoSign) => self.clone(), - (Plus, Plus) => BigInt::from_biguint(Plus, &self.data | &other.data), + (Plus, Plus) => BigInt::from(&self.data | &other.data), (Plus, Minus) => other.clone() | self, (Minus, Plus) => self.clone() | other, (Minus, Minus) => { @@ -520,7 +539,7 @@ impl<'a> BitOrAssign<&'a BigInt> for BigInt { fn bitor_assign(&mut self, other: &BigInt) { match (self.sign, other.sign) { (_, NoSign) => {} - (NoSign, _) => self.assign_from_slice(other.sign, other.digits()), + (NoSign, _) => self.clone_from(other), (Plus, Plus) => self.data |= &other.data, (Plus, Minus) => { bitor_pos_neg(self.digits_mut(), other.digits()); @@ -646,7 +665,7 @@ impl<'a> BitXorAssign<&'a BigInt> for BigInt { fn bitxor_assign(&mut self, other: &BigInt) { match (self.sign, other.sign) { (_, NoSign) => {} - (NoSign, _) => self.assign_from_slice(other.sign, other.digits()), + (NoSign, _) => self.clone_from(other), (Plus, Plus) => { self.data ^= &other.data; if self.data.is_zero() { @@ -729,10 +748,7 @@ impl ShlAssign for BigInt { // Negative values need a rounding adjustment if there are any ones in the // bits that are getting shifted out. fn shr_round_down(i: &BigInt, rhs: usize) -> bool { - i.is_negative() - && biguint::trailing_zeros(&i.data) - .map(|n| n < rhs) - .unwrap_or(false) + i.is_negative() && i.trailing_zeros().map(|n| n < rhs).unwrap_or(false) } impl Shr for BigInt { @@ -772,7 +788,10 @@ impl ShrAssign for BigInt { impl Zero for BigInt { #[inline] fn zero() -> BigInt { - BigInt::from_biguint(NoSign, Zero::zero()) + BigInt { + sign: NoSign, + data: BigUint::zero(), + } } #[inline] @@ -790,7 +809,10 @@ impl Zero for BigInt { impl One for BigInt { #[inline] fn one() -> BigInt { - BigInt::from_biguint(Plus, One::one()) + BigInt { + sign: Plus, + data: BigUint::one(), + } } #[inline] @@ -810,7 +832,7 @@ impl Signed for BigInt { fn abs(&self) -> BigInt { match self.sign { Plus | NoSign => self.clone(), - Minus => BigInt::from_biguint(Plus, self.data.clone()), + Minus => BigInt::from(self.data.clone()), } } @@ -826,9 +848,9 @@ impl Signed for BigInt { #[inline] fn signum(&self) -> BigInt { match self.sign { - Plus => BigInt::from_biguint(Plus, One::one()), - Minus => BigInt::from_biguint(Minus, One::one()), - NoSign => Zero::zero(), + Plus => BigInt::one(), + Minus => -BigInt::one(), + NoSign => BigInt::zero(), } } @@ -1007,15 +1029,16 @@ impl Add for BigInt { fn add(self, other: u32) -> BigInt { match self.sign { NoSign => From::from(other), - Plus => BigInt::from_biguint(Plus, self.data + other), + Plus => BigInt::from(self.data + other), Minus => match self.data.cmp(&From::from(other)) { Equal => Zero::zero(), - Less => BigInt::from_biguint(Plus, other - self.data), - Greater => BigInt::from_biguint(Minus, self.data - other), + Less => BigInt::from(other - self.data), + Greater => -BigInt::from(self.data - other), }, } } } + impl AddAssign for BigInt { #[inline] fn add_assign(&mut self, other: u32) { @@ -1031,15 +1054,16 @@ impl Add for BigInt { fn add(self, other: u64) -> BigInt { match self.sign { NoSign => From::from(other), - Plus => BigInt::from_biguint(Plus, self.data + other), + Plus => BigInt::from(self.data + other), Minus => match self.data.cmp(&From::from(other)) { Equal => Zero::zero(), - Less => BigInt::from_biguint(Plus, other - self.data), - Greater => BigInt::from_biguint(Minus, self.data - other), + Less => BigInt::from(other - self.data), + Greater => -BigInt::from(self.data - other), }, } } } + impl AddAssign for BigInt { #[inline] fn add_assign(&mut self, other: u64) { @@ -1055,12 +1079,12 @@ impl Add for BigInt { #[inline] fn add(self, other: u128) -> BigInt { match self.sign { - NoSign => From::from(other), - Plus => BigInt::from_biguint(Plus, self.data + other), + NoSign => BigInt::from(other), + Plus => BigInt::from(self.data + other), Minus => match self.data.cmp(&From::from(other)) { - Equal => Zero::zero(), - Less => BigInt::from_biguint(Plus, other - self.data), - Greater => BigInt::from_biguint(Minus, self.data - other), + Equal => BigInt::zero(), + Less => BigInt::from(other - self.data), + Greater => -BigInt::from(self.data - other), }, } } @@ -1235,12 +1259,12 @@ impl Sub for BigInt { #[inline] fn sub(self, other: u32) -> BigInt { match self.sign { - NoSign => BigInt::from_biguint(Minus, From::from(other)), - Minus => BigInt::from_biguint(Minus, self.data + other), + NoSign => -BigInt::from(other), + Minus => -BigInt::from(self.data + other), Plus => match self.data.cmp(&From::from(other)) { Equal => Zero::zero(), - Greater => BigInt::from_biguint(Plus, self.data - other), - Less => BigInt::from_biguint(Minus, other - self.data), + Greater => BigInt::from(self.data - other), + Less => -BigInt::from(other - self.data), }, } } @@ -1286,12 +1310,12 @@ impl Sub for BigInt { #[inline] fn sub(self, other: u64) -> BigInt { match self.sign { - NoSign => BigInt::from_biguint(Minus, From::from(other)), - Minus => BigInt::from_biguint(Minus, self.data + other), + NoSign => -BigInt::from(other), + Minus => -BigInt::from(self.data + other), Plus => match self.data.cmp(&From::from(other)) { Equal => Zero::zero(), - Greater => BigInt::from_biguint(Plus, self.data - other), - Less => BigInt::from_biguint(Minus, other - self.data), + Greater => BigInt::from(self.data - other), + Less => -BigInt::from(other - self.data), }, } } @@ -1311,12 +1335,12 @@ impl Sub for BigInt { #[inline] fn sub(self, other: u128) -> BigInt { match self.sign { - NoSign => BigInt::from_biguint(Minus, From::from(other)), - Minus => BigInt::from_biguint(Minus, self.data + other), + NoSign => -BigInt::from(other), + Minus => -BigInt::from(self.data + other), Plus => match self.data.cmp(&From::from(other)) { Equal => Zero::zero(), - Greater => BigInt::from_biguint(Plus, self.data - other), - Less => BigInt::from_biguint(Minus, other - self.data), + Greater => BigInt::from(self.data - other), + Less => -BigInt::from(other - self.data), }, } } @@ -1851,8 +1875,14 @@ impl<'a, 'b> Rem<&'b BigInt> for &'a BigInt { #[inline] fn rem(self, other: &BigInt) -> BigInt { - let (_, r) = self.div_rem(other); - r + if let Some(other) = other.to_u32() { + self % other + } else if let Some(other) = other.to_i32() { + self % other + } else { + let (_, r) = self.div_rem(other); + r + } } } @@ -1895,7 +1925,7 @@ impl Rem for u32 { #[inline] fn rem(self, other: BigInt) -> BigInt { - BigInt::from_biguint(Plus, self % other.data) + BigInt::from(self % other.data) } } @@ -1923,7 +1953,7 @@ impl Rem for u64 { #[inline] fn rem(self, other: BigInt) -> BigInt { - BigInt::from_biguint(Plus, self % other.data) + BigInt::from(self % other.data) } } @@ -1954,7 +1984,7 @@ impl Rem for u128 { #[inline] fn rem(self, other: BigInt) -> BigInt { - BigInt::from_biguint(Plus, self % other.data) + BigInt::from(self % other.data) } } @@ -2154,8 +2184,8 @@ impl Integer for BigInt { fn div_mod_floor(&self, other: &BigInt) -> (BigInt, BigInt) { // m.sign == other.sign let (d_ui, m_ui) = self.data.div_rem(&other.data); - let d = BigInt::from_biguint(Plus, d_ui); - let m = BigInt::from_biguint(Plus, m_ui); + let d = BigInt::from(d_ui); + let m = BigInt::from(m_ui); let one: BigInt = One::one(); match (self.sign, other.sign) { (_, NoSign) => panic!(), @@ -2183,13 +2213,13 @@ impl Integer for BigInt { /// The result is always positive. #[inline] fn gcd(&self, other: &BigInt) -> BigInt { - BigInt::from_biguint(Plus, self.data.gcd(&other.data)) + BigInt::from(self.data.gcd(&other.data)) } /// Calculates the Lowest Common Multiple (LCM) of the number and `other`. #[inline] fn lcm(&self, other: &BigInt) -> BigInt { - BigInt::from_biguint(Plus, self.data.lcm(&other.data)) + BigInt::from(self.data.lcm(&other.data)) } /// Deprecated, use `is_multiple_of` instead. @@ -2337,9 +2367,9 @@ impl FromPrimitive for BigInt { #[inline] fn from_f64(n: f64) -> Option { if n >= 0.0 { - BigUint::from_f64(n).map(|x| BigInt::from_biguint(Plus, x)) + BigUint::from_f64(n).map(BigInt::from) } else { - BigUint::from_f64(-n).map(|x| BigInt::from_biguint(Minus, x)) + BigUint::from_f64(-n).map(|x| -BigInt::from(x)) } } } @@ -2609,8 +2639,7 @@ impl BigInt { #[inline] pub fn assign_from_slice(&mut self, sign: Sign, slice: &[u32]) { if sign == NoSign { - self.data.assign_from_slice(&[]); - self.sign = NoSign; + self.set_zero(); } else { self.data.assign_from_slice(slice); self.sign = match self.data.is_zero() { @@ -3024,6 +3053,12 @@ impl BigInt { pub fn nth_root(&self, n: u32) -> Self { Roots::nth_root(self, n) } + + /// Returns the number of least-significant bits that are zero, + /// or `None` if the entire number is zero. + pub fn trailing_zeros(&self) -> Option { + self.data.trailing_zeros() + } } impl_sum_iter_type!(BigInt); diff --git a/src/bigrand.rs b/src/bigrand.rs index 69564dd1..ab0e0ed8 100644 --- a/src/bigrand.rs +++ b/src/bigrand.rs @@ -2,14 +2,13 @@ use rand::distributions::uniform::{SampleUniform, UniformSampler}; use rand::prelude::*; -use rand::AsByteSliceMut; use BigInt; use BigUint; use Sign::*; -use big_digit::BigDigit; use bigint::{into_magnitude, magnitude}; +use biguint::biguint_from_vec; use integer::Integer; use traits::Zero; @@ -39,21 +38,45 @@ pub trait RandBigInt { fn gen_bigint_range(&mut self, lbound: &BigInt, ubound: &BigInt) -> BigInt; } +fn gen_bits(rng: &mut R, data: &mut [u32], rem: usize) { + // `fill` is faster than many `gen::` calls + rng.fill(data); + if rem > 0 { + let last = data.len() - 1; + data[last] >>= 32 - rem; + } +} + impl RandBigInt for R { + #[cfg(not(u64_digit))] fn gen_biguint(&mut self, bit_size: usize) -> BigUint { - use super::big_digit::BITS; - let (digits, rem) = bit_size.div_rem(&BITS); - let mut data = vec![BigDigit::default(); digits + (rem > 0) as usize]; - // `fill_bytes` is faster than many `gen::` calls - self.fill_bytes(data[..].as_byte_slice_mut()); - // Swap bytes per the `Rng::fill` source. This might be - // unnecessary if reproducibility across architectures is not - // desired. - data.to_le(); - if rem > 0 { - data[digits] >>= BITS - rem; + let (digits, rem) = bit_size.div_rem(&32); + let mut data = vec![0u32; digits + (rem > 0) as usize]; + gen_bits(self, &mut data, rem); + biguint_from_vec(data) + } + + #[cfg(u64_digit)] + fn gen_biguint(&mut self, bit_size: usize) -> BigUint { + use std::slice; + + let (digits, rem) = bit_size.div_rem(&32); + let native_digits = bit_size.div_ceil(&64); + let mut data = vec![0u64; native_digits]; + unsafe { + // Generate bits in a `&mut [u32]` slice for value stability + let ptr = data.as_mut_ptr() as *mut u32; + let len = digits + (rem > 0) as usize; + debug_assert!(native_digits * 2 >= len); + let data = slice::from_raw_parts_mut(ptr, len); + gen_bits(self, data, rem); + } + #[cfg(target_endian = "big")] + for digit in &mut data { + // swap u32 digits into u64 endianness + *digit = (*digit << 32) | (*digit >> 32); } - BigUint::new(data) + biguint_from_vec(data) } fn gen_bigint(&mut self, bit_size: usize) -> BigInt { diff --git a/src/biguint.rs b/src/biguint.rs index 68363424..ff30ac24 100644 --- a/src/biguint.rs +++ b/src/biguint.rs @@ -13,7 +13,7 @@ use std::ops::{ }; use std::str::{self, FromStr}; use std::{f32, f64}; -use std::{u64, u8}; +use std::{u32, u64, u8}; #[cfg(feature = "serde")] use serde; @@ -46,22 +46,38 @@ use ParseBigIntError; use quickcheck::{Arbitrary, Gen}; /// A big unsigned integer type. -#[derive(Clone, Debug, Hash)] +#[derive(Debug, Hash)] pub struct BigUint { data: Vec, } +// Note: derived `Clone` doesn't specialize `clone_from`, +// but we want to keep the allocation in `data`. +impl Clone for BigUint { + #[inline] + fn clone(&self) -> Self { + BigUint { + data: self.data.clone(), + } + } + + #[inline] + fn clone_from(&mut self, other: &Self) { + self.data.clone_from(&other.data); + } +} + #[cfg(feature = "quickcheck")] impl Arbitrary for BigUint { fn arbitrary(g: &mut G) -> Self { // Use arbitrary from Vec - Self::new(Vec::::arbitrary(g)) + biguint_from_vec(Vec::::arbitrary(g)) } #[allow(bare_trait_objects)] // `dyn` needs Rust 1.27 to parse, even when cfg-disabled fn shrink(&self) -> Box> { // Use shrinker from Vec - Box::new(self.data.shrink().map(BigUint::new)) + Box::new(self.data.shrink().map(biguint_from_vec)) } } @@ -156,7 +172,7 @@ fn from_bitwise_digits_le(v: &[u8], bits: usize) -> BigUint { }) .collect(); - BigUint::new(data) + biguint_from_vec(data) } // Convert from a power of two radix (bits == ilog2(radix)) where bits doesn't evenly divide @@ -191,7 +207,7 @@ fn from_inexact_bitwise_digits_le(v: &[u8], bits: usize) -> BigUint { data.push(d as BigDigit); } - BigUint::new(data) + biguint_from_vec(data) } // Read little-endian radix digits @@ -204,7 +220,7 @@ fn from_radix_digits_be(v: &[u8], radix: u32) -> BigUint { let big_digits = (bits / big_digit::BITS as f64).ceil(); let mut data = Vec::with_capacity(big_digits as usize); - let (base, power) = get_radix_base(radix); + let (base, power) = get_radix_base(radix, big_digit::BITS); let radix = radix as BigDigit; let r = v.len() % power; @@ -234,7 +250,7 @@ fn from_radix_digits_be(v: &[u8], radix: u32) -> BigUint { add2(&mut data, &[n]); } - BigUint::new(data) + biguint_from_vec(data) } impl Num for BigUint { @@ -437,7 +453,7 @@ impl ShrAssign for BigUint { impl Zero for BigUint { #[inline] fn zero() -> BigUint { - BigUint::new(Vec::new()) + BigUint { data: Vec::new() } } #[inline] @@ -454,7 +470,7 @@ impl Zero for BigUint { impl One for BigUint { #[inline] fn one() -> BigUint { - BigUint::new(vec![1]) + BigUint { data: vec![1] } } #[inline] @@ -623,6 +639,7 @@ impl Add for BigUint { } impl AddAssign for BigUint { + #[cfg(not(u64_digit))] #[inline] fn add_assign(&mut self, other: u64) { let (hi, lo) = big_digit::from_doublebigdigit(other); @@ -639,6 +656,21 @@ impl AddAssign for BigUint { } } } + + #[cfg(u64_digit)] + #[inline] + fn add_assign(&mut self, other: u64) { + if other != 0 { + if self.data.is_empty() { + self.data.push(0); + } + + let carry = __add2(&mut self.data, &[other as BigDigit]); + if carry != 0 { + self.data.push(carry); + } + } + } } #[cfg(has_i128)] @@ -654,6 +686,7 @@ impl Add for BigUint { #[cfg(has_i128)] impl AddAssign for BigUint { + #[cfg(not(u64_digit))] #[inline] fn add_assign(&mut self, other: u128) { if other <= u128::from(u64::max_value()) { @@ -678,6 +711,24 @@ impl AddAssign for BigUint { } } } + + #[cfg(u64_digit)] + #[inline] + fn add_assign(&mut self, other: u128) { + let (hi, lo) = big_digit::from_doublebigdigit(other); + if hi == 0 { + *self += lo; + } else { + while self.data.len() < 2 { + self.data.push(0); + } + + let carry = __add2(&mut self.data, &[lo, hi]); + if carry != 0 { + self.data.push(carry); + } + } + } } forward_val_val_binop!(impl Sub for BigUint, sub); @@ -743,6 +794,18 @@ impl SubAssign for BigUint { impl Sub for u32 { type Output = BigUint; + #[cfg(not(u64_digit))] + #[inline] + fn sub(self, mut other: BigUint) -> BigUint { + if other.data.len() == 0 { + other.data.push(self); + } else { + sub2rev(&[self], &mut other.data[..]); + } + other.normalized() + } + + #[cfg(u64_digit)] #[inline] fn sub(self, mut other: BigUint) -> BigUint { if other.data.is_empty() { @@ -765,17 +828,26 @@ impl Sub for BigUint { } impl SubAssign for BigUint { + #[cfg(not(u64_digit))] #[inline] fn sub_assign(&mut self, other: u64) { let (hi, lo) = big_digit::from_doublebigdigit(other); sub2(&mut self.data[..], &[lo, hi]); self.normalize(); } + + #[cfg(u64_digit)] + #[inline] + fn sub_assign(&mut self, other: u64) { + sub2(&mut self.data[..], &[other as BigDigit]); + self.normalize(); + } } impl Sub for u64 { type Output = BigUint; + #[cfg(not(u64_digit))] #[inline] fn sub(self, mut other: BigUint) -> BigUint { while other.data.len() < 2 { @@ -786,6 +858,17 @@ impl Sub for u64 { sub2rev(&[lo, hi], &mut other.data[..]); other.normalized() } + + #[cfg(u64_digit)] + #[inline] + fn sub(self, mut other: BigUint) -> BigUint { + if other.data.is_empty() { + other.data.push(self); + } else { + sub2rev(&[self], &mut other.data[..]); + } + other.normalized() + } } #[cfg(has_i128)] @@ -800,17 +883,28 @@ impl Sub for BigUint { } #[cfg(has_i128)] impl SubAssign for BigUint { + #[cfg(not(u64_digit))] + #[inline] fn sub_assign(&mut self, other: u128) { let (a, b, c, d) = u32_from_u128(other); sub2(&mut self.data[..], &[d, c, b, a]); self.normalize(); } + + #[cfg(u64_digit)] + #[inline] + fn sub_assign(&mut self, other: u128) { + let (hi, lo) = big_digit::from_doublebigdigit(other); + sub2(&mut self.data[..], &[lo, hi]); + self.normalize(); + } } #[cfg(has_i128)] impl Sub for u128 { type Output = BigUint; + #[cfg(not(u64_digit))] #[inline] fn sub(self, mut other: BigUint) -> BigUint { while other.data.len() < 4 { @@ -821,6 +915,18 @@ impl Sub for u128 { sub2rev(&[d, c, b, a], &mut other.data[..]); other.normalized() } + + #[cfg(u64_digit)] + #[inline] + fn sub(self, mut other: BigUint) -> BigUint { + while other.data.len() < 2 { + other.data.push(0); + } + + let (hi, lo) = big_digit::from_doublebigdigit(self); + sub2rev(&[lo, hi], &mut other.data[..]); + other.normalized() + } } forward_all_binop_to_ref_ref!(impl Mul for BigUint, mul); @@ -881,6 +987,7 @@ impl Mul for BigUint { } } impl MulAssign for BigUint { + #[cfg(not(u64_digit))] #[inline] fn mul_assign(&mut self, other: u64) { if other == 0 { @@ -892,6 +999,19 @@ impl MulAssign for BigUint { *self = mul3(&self.data[..], &[lo, hi]) } } + + #[cfg(u64_digit)] + #[inline] + fn mul_assign(&mut self, other: u64) { + if other == 0 { + self.data.clear(); + } else { + let carry = scalar_mul(&mut self.data[..], other as BigDigit); + if carry != 0 { + self.data.push(carry); + } + } + } } #[cfg(has_i128)] @@ -906,6 +1026,7 @@ impl Mul for BigUint { } #[cfg(has_i128)] impl MulAssign for BigUint { + #[cfg(not(u64_digit))] #[inline] fn mul_assign(&mut self, other: u128) { if other == 0 { @@ -917,6 +1038,19 @@ impl MulAssign for BigUint { *self = mul3(&self.data[..], &[d, c, b, a]) } } + + #[cfg(u64_digit)] + #[inline] + fn mul_assign(&mut self, other: u128) { + if other == 0 { + self.data.clear(); + } else if other <= BigDigit::max_value() as u128 { + *self *= other as BigDigit + } else { + let (hi, lo) = big_digit::from_doublebigdigit(other); + *self = mul3(&self.data[..], &[lo, hi]) + } + } } forward_val_ref_binop!(impl Div for BigUint, div); @@ -1006,6 +1140,7 @@ impl DivAssign for BigUint { impl Div for u64 { type Output = BigUint; + #[cfg(not(u64_digit))] #[inline] fn div(self, other: BigUint) -> BigUint { match other.data.len() { @@ -1015,6 +1150,16 @@ impl Div for u64 { _ => Zero::zero(), } } + + #[cfg(u64_digit)] + #[inline] + fn div(self, other: BigUint) -> BigUint { + match other.data.len() { + 0 => panic!(), + 1 => From::from(self / other.data[0]), + _ => Zero::zero(), + } + } } #[cfg(has_i128)] @@ -1027,6 +1172,7 @@ impl Div for BigUint { q } } + #[cfg(has_i128)] impl DivAssign for BigUint { #[inline] @@ -1039,6 +1185,7 @@ impl DivAssign for BigUint { impl Div for u128 { type Output = BigUint; + #[cfg(not(u64_digit))] #[inline] fn div(self, other: BigUint) -> BigUint { match other.data.len() { @@ -1054,6 +1201,17 @@ impl Div for u128 { _ => Zero::zero(), } } + + #[cfg(u64_digit)] + #[inline] + fn div(self, other: BigUint) -> BigUint { + match other.data.len() { + 0 => panic!(), + 1 => From::from(self / other.data[0] as u128), + 2 => From::from(self / big_digit::to_doublebigdigit(other.data[1], other.data[0])), + _ => Zero::zero(), + } + } } forward_val_ref_binop!(impl Rem for BigUint, rem); @@ -1065,8 +1223,12 @@ impl Rem for BigUint { #[inline] fn rem(self, other: BigUint) -> BigUint { - let (_, r) = div_rem(self, other); - r + if let Some(other) = other.to_u32() { + &self % other + } else { + let (_, r) = div_rem(self, other); + r + } } } @@ -1075,8 +1237,12 @@ impl<'a, 'b> Rem<&'b BigUint> for &'a BigUint { #[inline] fn rem(self, other: &BigUint) -> BigUint { - let (_, r) = self.div_rem(other); - r + if let Some(other) = other.to_u32() { + self % other + } else { + let (_, r) = self.div_rem(other); + r + } } } impl<'a> RemAssign<&'a BigUint> for BigUint { @@ -1098,7 +1264,7 @@ impl<'a> Rem for &'a BigUint { #[inline] fn rem(self, other: u32) -> BigUint { - From::from(rem_digit(self, other as BigDigit)) + rem_digit(self, other as BigDigit).into() } } impl RemAssign for BigUint { @@ -1287,7 +1453,7 @@ impl Integer for BigUint { fn gcd(&self, other: &Self) -> Self { #[inline] fn twos(x: &BigUint) -> usize { - trailing_zeros(x).unwrap_or(0) + x.trailing_zeros().unwrap_or(0) } // Stein's algorithm @@ -1850,7 +2016,7 @@ fn to_radix_digits_le(u: &BigUint, radix: u32) -> Vec { let mut res = Vec::with_capacity(radix_digits as usize); let mut digits = u.clone(); - let (base, power) = get_radix_base(radix); + let (base, power) = get_radix_base(radix, big_digit::HALF_BITS); let radix = radix as BigDigit; while digits.data.len() > 1 { @@ -1912,13 +2078,34 @@ pub fn to_str_radix_reversed(u: &BigUint, radix: u32) -> Vec { res } +/// Creates and initializes a `BigUint`. +/// +/// The digits are in little-endian base matching `BigDigit`. +/// +/// This is an internal `pub(crate)`-ish API only! +#[inline] +pub fn biguint_from_vec(digits: Vec) -> BigUint { + BigUint { data: digits }.normalized() +} + impl BigUint { /// Creates and initializes a `BigUint`. /// /// The base 232 digits are ordered least significant digit first. #[inline] pub fn new(digits: Vec) -> BigUint { - BigUint { data: digits }.normalized() + let mut big = BigUint::zero(); + + #[cfg(not(u64_digit))] + { + big.data = digits; + big.normalize(); + } + + #[cfg(u64_digit)] + big.assign_from_slice(&digits); + + big } /// Creates and initializes a `BigUint`. @@ -1926,7 +2113,9 @@ impl BigUint { /// The base 232 digits are ordered least significant digit first. #[inline] pub fn from_slice(slice: &[u32]) -> BigUint { - BigUint::new(slice.to_vec()) + let mut big = BigUint::zero(); + big.assign_from_slice(slice); + big } /// Assign a value to a `BigUint`. @@ -1934,8 +2123,21 @@ impl BigUint { /// The base 232 digits are ordered least significant digit first. #[inline] pub fn assign_from_slice(&mut self, slice: &[u32]) { - self.data.resize(slice.len(), 0); - self.data.clone_from_slice(slice); + self.data.clear(); + + #[cfg(not(u64_digit))] + self.data.extend_from_slice(slice); + + #[cfg(u64_digit)] + self.data.extend(slice.chunks(2).map(|chunk| { + // raw could have odd length + let mut digit = BigDigit::from(chunk[0]); + if let Some(&hi) = chunk.get(1) { + digit |= BigDigit::from(hi) << 32; + } + digit + })); + self.normalize(); } @@ -2140,7 +2342,30 @@ impl BigUint { /// ``` #[inline] pub fn to_u32_digits(&self) -> Vec { - self.data.clone() + let mut digits = Vec::new(); + + #[cfg(not(u64_digit))] + digits.clone_from(&self.data); + + #[cfg(u64_digit)] + { + if let Some((&last, data)) = self.data.split_last() { + let last_lo = last as u32; + let last_hi = (last >> 32) as u32; + let u32_len = data.len() * 2 + 1 + (last_hi != 0) as usize; + digits.reserve_exact(u32_len); + for &x in data { + digits.push(x as u32); + digits.push((x >> 32) as u32); + } + digits.push(last_lo); + if last_hi != 0 { + digits.push(last_hi); + } + } + } + + digits } /// Returns the integer formatted as a string in the given radix. @@ -2259,6 +2484,16 @@ impl BigUint { pub fn nth_root(&self, n: u32) -> Self { Roots::nth_root(self, n) } + + /// Returns the number of least-significant bits that are zero, + /// or `None` if the entire number is zero. + pub fn trailing_zeros(&self) -> Option { + self.data + .iter() + .enumerate() + .find(|&(_, &digit)| digit != 0) + .map(|(i, digit)| i * big_digit::BITS + digit.trailing_zeros() as usize) + } } fn plain_modpow(base: &BigUint, exp_data: &[BigDigit], modulus: &BigUint) -> BigUint { @@ -2360,16 +2595,6 @@ fn test_plain_modpow() { ); } -/// Returns the number of least-significant bits that are zero, -/// or `None` if the entire number is zero. -pub fn trailing_zeros(u: &BigUint) -> Option { - u.data - .iter() - .enumerate() - .find(|&(_, &digit)| digit != 0) - .map(|(i, digit)| i * big_digit::BITS + digit.trailing_zeros() as usize) -} - impl_sum_iter_type!(BigUint); impl_product_iter_type!(BigUint); @@ -2406,6 +2631,7 @@ impl IntDigits for BigUint { /// Combine four `u32`s into a single `u128`. #[cfg(has_i128)] +#[cfg(any(test, not(u64_digit)))] #[inline] fn u32_to_u128(a: u32, b: u32, c: u32, d: u32) -> u128 { u128::from(d) | (u128::from(c) << 32) | (u128::from(b) << 64) | (u128::from(a) << 96) @@ -2413,6 +2639,7 @@ fn u32_to_u128(a: u32, b: u32, c: u32, d: u32) -> u128 { /// Split a single `u128` into four `u32`. #[cfg(has_i128)] +#[cfg(any(test, not(u64_digit)))] #[inline] fn u32_from_u128(n: u128) -> (u32, u32, u32, u32) { ( @@ -2424,6 +2651,7 @@ fn u32_from_u128(n: u128) -> (u32, u32, u32, u32) { } #[cfg(feature = "serde")] +#[cfg(not(u64_digit))] impl serde::Serialize for BigUint { fn serialize(&self, serializer: S) -> Result where @@ -2438,611 +2666,152 @@ impl serde::Serialize for BigUint { } #[cfg(feature = "serde")] +#[cfg(u64_digit)] +impl serde::Serialize for BigUint { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + use serde::ser::SerializeSeq; + if let Some((&last, data)) = self.data.split_last() { + let last_lo = last as u32; + let last_hi = (last >> 32) as u32; + let u32_len = data.len() * 2 + 1 + (last_hi != 0) as usize; + let mut seq = serializer.serialize_seq(Some(u32_len))?; + for &x in data { + seq.serialize_element(&(x as u32))?; + seq.serialize_element(&((x >> 32) as u32))?; + } + seq.serialize_element(&last_lo)?; + if last_hi != 0 { + seq.serialize_element(&last_hi)?; + } + seq.end() + } else { + let data: &[u32] = &[]; + data.serialize(serializer) + } + } +} + +#[cfg(feature = "serde")] +#[cfg(not(u64_digit))] impl<'de> serde::Deserialize<'de> for BigUint { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { let data: Vec = Vec::deserialize(deserializer)?; - Ok(BigUint::new(data)) + Ok(biguint_from_vec(data)) } } -/// Returns the greatest power of the radix <= big_digit::BASE +#[cfg(feature = "serde")] +#[cfg(u64_digit)] +impl<'de> serde::Deserialize<'de> for BigUint { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + use serde::de::{SeqAccess, Visitor}; + + struct U32Visitor; + + impl<'de> Visitor<'de> for U32Visitor { + type Value = BigUint; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a sequence of unsigned 32-bit numbers") + } + + fn visit_seq(self, mut seq: S) -> Result + where + S: SeqAccess<'de>, + { + let u32_len = seq.size_hint().unwrap_or(0); + let len = u32_len.div_ceil(&2); + let mut data = Vec::with_capacity(len); + + while let Some(lo) = seq.next_element::()? { + let mut value = BigDigit::from(lo); + if let Some(hi) = seq.next_element::()? { + value |= BigDigit::from(hi) << 32; + } + data.push(value); + } + + Ok(biguint_from_vec(data)) + } + } + + deserializer.deserialize_seq(U32Visitor) + } +} + +/// Returns the greatest power of the radix for the given bit size #[inline] -fn get_radix_base(radix: u32) -> (BigDigit, usize) { +fn get_radix_base(radix: u32, bits: usize) -> (BigDigit, usize) { + mod gen { + include! { concat!(env!("OUT_DIR"), "/radix_bases.rs") } + } + debug_assert!( 2 <= radix && radix <= 256, "The radix must be within 2...256" ); debug_assert!(!radix.is_power_of_two()); + debug_assert!(bits <= big_digit::BITS); - // To generate this table: - // for radix in 2u64..257 { - // let mut power = big_digit::BITS / fls(radix as u64); - // let mut base = radix.pow(power as u32); - // - // while let Some(b) = base.checked_mul(radix) { - // if b > big_digit::MAX { - // break; - // } - // base = b; - // power += 1; - // } - // - // println!("({:10}, {:2}), // {:2}", base, power, radix); - // } - // and - // for radix in 2u64..257 { - // let mut power = 64 / fls(radix as u64); - // let mut base = radix.pow(power as u32); - // - // while let Some(b) = base.checked_mul(radix) { - // base = b; - // power += 1; - // } - // - // println!("({:20}, {:2}), // {:2}", base, power, radix); - // } - match big_digit::BITS { + match bits { + 16 => { + let (base, power) = gen::BASES_16[radix as usize]; + (base as BigDigit, power) + } 32 => { - const BASES: [(u32, usize); 257] = [ - (0, 0), - (0, 0), - (0, 0), // 2 - (3486784401, 20), // 3 - (0, 0), // 4 - (1220703125, 13), // 5 - (2176782336, 12), // 6 - (1977326743, 11), // 7 - (0, 0), // 8 - (3486784401, 10), // 9 - (1000000000, 9), // 10 - (2357947691, 9), // 11 - (429981696, 8), // 12 - (815730721, 8), // 13 - (1475789056, 8), // 14 - (2562890625, 8), // 15 - (0, 0), // 16 - (410338673, 7), // 17 - (612220032, 7), // 18 - (893871739, 7), // 19 - (1280000000, 7), // 20 - (1801088541, 7), // 21 - (2494357888, 7), // 22 - (3404825447, 7), // 23 - (191102976, 6), // 24 - (244140625, 6), // 25 - (308915776, 6), // 26 - (387420489, 6), // 27 - (481890304, 6), // 28 - (594823321, 6), // 29 - (729000000, 6), // 30 - (887503681, 6), // 31 - (0, 0), // 32 - (1291467969, 6), // 33 - (1544804416, 6), // 34 - (1838265625, 6), // 35 - (2176782336, 6), // 36 - (2565726409, 6), // 37 - (3010936384, 6), // 38 - (3518743761, 6), // 39 - (4096000000, 6), // 40 - (115856201, 5), // 41 - (130691232, 5), // 42 - (147008443, 5), // 43 - (164916224, 5), // 44 - (184528125, 5), // 45 - (205962976, 5), // 46 - (229345007, 5), // 47 - (254803968, 5), // 48 - (282475249, 5), // 49 - (312500000, 5), // 50 - (345025251, 5), // 51 - (380204032, 5), // 52 - (418195493, 5), // 53 - (459165024, 5), // 54 - (503284375, 5), // 55 - (550731776, 5), // 56 - (601692057, 5), // 57 - (656356768, 5), // 58 - (714924299, 5), // 59 - (777600000, 5), // 60 - (844596301, 5), // 61 - (916132832, 5), // 62 - (992436543, 5), // 63 - (0, 0), // 64 - (1160290625, 5), // 65 - (1252332576, 5), // 66 - (1350125107, 5), // 67 - (1453933568, 5), // 68 - (1564031349, 5), // 69 - (1680700000, 5), // 70 - (1804229351, 5), // 71 - (1934917632, 5), // 72 - (2073071593, 5), // 73 - (2219006624, 5), // 74 - (2373046875, 5), // 75 - (2535525376, 5), // 76 - (2706784157, 5), // 77 - (2887174368, 5), // 78 - (3077056399, 5), // 79 - (3276800000, 5), // 80 - (3486784401, 5), // 81 - (3707398432, 5), // 82 - (3939040643, 5), // 83 - (4182119424, 5), // 84 - (52200625, 4), // 85 - (54700816, 4), // 86 - (57289761, 4), // 87 - (59969536, 4), // 88 - (62742241, 4), // 89 - (65610000, 4), // 90 - (68574961, 4), // 91 - (71639296, 4), // 92 - (74805201, 4), // 93 - (78074896, 4), // 94 - (81450625, 4), // 95 - (84934656, 4), // 96 - (88529281, 4), // 97 - (92236816, 4), // 98 - (96059601, 4), // 99 - (100000000, 4), // 100 - (104060401, 4), // 101 - (108243216, 4), // 102 - (112550881, 4), // 103 - (116985856, 4), // 104 - (121550625, 4), // 105 - (126247696, 4), // 106 - (131079601, 4), // 107 - (136048896, 4), // 108 - (141158161, 4), // 109 - (146410000, 4), // 110 - (151807041, 4), // 111 - (157351936, 4), // 112 - (163047361, 4), // 113 - (168896016, 4), // 114 - (174900625, 4), // 115 - (181063936, 4), // 116 - (187388721, 4), // 117 - (193877776, 4), // 118 - (200533921, 4), // 119 - (207360000, 4), // 120 - (214358881, 4), // 121 - (221533456, 4), // 122 - (228886641, 4), // 123 - (236421376, 4), // 124 - (244140625, 4), // 125 - (252047376, 4), // 126 - (260144641, 4), // 127 - (0, 0), // 128 - (276922881, 4), // 129 - (285610000, 4), // 130 - (294499921, 4), // 131 - (303595776, 4), // 132 - (312900721, 4), // 133 - (322417936, 4), // 134 - (332150625, 4), // 135 - (342102016, 4), // 136 - (352275361, 4), // 137 - (362673936, 4), // 138 - (373301041, 4), // 139 - (384160000, 4), // 140 - (395254161, 4), // 141 - (406586896, 4), // 142 - (418161601, 4), // 143 - (429981696, 4), // 144 - (442050625, 4), // 145 - (454371856, 4), // 146 - (466948881, 4), // 147 - (479785216, 4), // 148 - (492884401, 4), // 149 - (506250000, 4), // 150 - (519885601, 4), // 151 - (533794816, 4), // 152 - (547981281, 4), // 153 - (562448656, 4), // 154 - (577200625, 4), // 155 - (592240896, 4), // 156 - (607573201, 4), // 157 - (623201296, 4), // 158 - (639128961, 4), // 159 - (655360000, 4), // 160 - (671898241, 4), // 161 - (688747536, 4), // 162 - (705911761, 4), // 163 - (723394816, 4), // 164 - (741200625, 4), // 165 - (759333136, 4), // 166 - (777796321, 4), // 167 - (796594176, 4), // 168 - (815730721, 4), // 169 - (835210000, 4), // 170 - (855036081, 4), // 171 - (875213056, 4), // 172 - (895745041, 4), // 173 - (916636176, 4), // 174 - (937890625, 4), // 175 - (959512576, 4), // 176 - (981506241, 4), // 177 - (1003875856, 4), // 178 - (1026625681, 4), // 179 - (1049760000, 4), // 180 - (1073283121, 4), // 181 - (1097199376, 4), // 182 - (1121513121, 4), // 183 - (1146228736, 4), // 184 - (1171350625, 4), // 185 - (1196883216, 4), // 186 - (1222830961, 4), // 187 - (1249198336, 4), // 188 - (1275989841, 4), // 189 - (1303210000, 4), // 190 - (1330863361, 4), // 191 - (1358954496, 4), // 192 - (1387488001, 4), // 193 - (1416468496, 4), // 194 - (1445900625, 4), // 195 - (1475789056, 4), // 196 - (1506138481, 4), // 197 - (1536953616, 4), // 198 - (1568239201, 4), // 199 - (1600000000, 4), // 200 - (1632240801, 4), // 201 - (1664966416, 4), // 202 - (1698181681, 4), // 203 - (1731891456, 4), // 204 - (1766100625, 4), // 205 - (1800814096, 4), // 206 - (1836036801, 4), // 207 - (1871773696, 4), // 208 - (1908029761, 4), // 209 - (1944810000, 4), // 210 - (1982119441, 4), // 211 - (2019963136, 4), // 212 - (2058346161, 4), // 213 - (2097273616, 4), // 214 - (2136750625, 4), // 215 - (2176782336, 4), // 216 - (2217373921, 4), // 217 - (2258530576, 4), // 218 - (2300257521, 4), // 219 - (2342560000, 4), // 220 - (2385443281, 4), // 221 - (2428912656, 4), // 222 - (2472973441, 4), // 223 - (2517630976, 4), // 224 - (2562890625, 4), // 225 - (2608757776, 4), // 226 - (2655237841, 4), // 227 - (2702336256, 4), // 228 - (2750058481, 4), // 229 - (2798410000, 4), // 230 - (2847396321, 4), // 231 - (2897022976, 4), // 232 - (2947295521, 4), // 233 - (2998219536, 4), // 234 - (3049800625, 4), // 235 - (3102044416, 4), // 236 - (3154956561, 4), // 237 - (3208542736, 4), // 238 - (3262808641, 4), // 239 - (3317760000, 4), // 240 - (3373402561, 4), // 241 - (3429742096, 4), // 242 - (3486784401, 4), // 243 - (3544535296, 4), // 244 - (3603000625, 4), // 245 - (3662186256, 4), // 246 - (3722098081, 4), // 247 - (3782742016, 4), // 248 - (3844124001, 4), // 249 - (3906250000, 4), // 250 - (3969126001, 4), // 251 - (4032758016, 4), // 252 - (4097152081, 4), // 253 - (4162314256, 4), // 254 - (4228250625, 4), // 255 - (0, 0), // 256 - ]; - - let (base, power) = BASES[radix as usize]; + let (base, power) = gen::BASES_32[radix as usize]; (base as BigDigit, power) } 64 => { - const BASES: [(u64, usize); 257] = [ - (0, 0), - (0, 0), - (9223372036854775808, 63), // 2 - (12157665459056928801, 40), // 3 - (4611686018427387904, 31), // 4 - (7450580596923828125, 27), // 5 - (4738381338321616896, 24), // 6 - (3909821048582988049, 22), // 7 - (9223372036854775808, 21), // 8 - (12157665459056928801, 20), // 9 - (10000000000000000000, 19), // 10 - (5559917313492231481, 18), // 11 - (2218611106740436992, 17), // 12 - (8650415919381337933, 17), // 13 - (2177953337809371136, 16), // 14 - (6568408355712890625, 16), // 15 - (1152921504606846976, 15), // 16 - (2862423051509815793, 15), // 17 - (6746640616477458432, 15), // 18 - (15181127029874798299, 15), // 19 - (1638400000000000000, 14), // 20 - (3243919932521508681, 14), // 21 - (6221821273427820544, 14), // 22 - (11592836324538749809, 14), // 23 - (876488338465357824, 13), // 24 - (1490116119384765625, 13), // 25 - (2481152873203736576, 13), // 26 - (4052555153018976267, 13), // 27 - (6502111422497947648, 13), // 28 - (10260628712958602189, 13), // 29 - (15943230000000000000, 13), // 30 - (787662783788549761, 12), // 31 - (1152921504606846976, 12), // 32 - (1667889514952984961, 12), // 33 - (2386420683693101056, 12), // 34 - (3379220508056640625, 12), // 35 - (4738381338321616896, 12), // 36 - (6582952005840035281, 12), // 37 - (9065737908494995456, 12), // 38 - (12381557655576425121, 12), // 39 - (16777216000000000000, 12), // 40 - (550329031716248441, 11), // 41 - (717368321110468608, 11), // 42 - (929293739471222707, 11), // 43 - (1196683881290399744, 11), // 44 - (1532278301220703125, 11), // 45 - (1951354384207722496, 11), // 46 - (2472159215084012303, 11), // 47 - (3116402981210161152, 11), // 48 - (3909821048582988049, 11), // 49 - (4882812500000000000, 11), // 50 - (6071163615208263051, 11), // 51 - (7516865509350965248, 11), // 52 - (9269035929372191597, 11), // 53 - (11384956040305711104, 11), // 54 - (13931233916552734375, 11), // 55 - (16985107389382393856, 11), // 56 - (362033331456891249, 10), // 57 - (430804206899405824, 10), // 58 - (511116753300641401, 10), // 59 - (604661760000000000, 10), // 60 - (713342911662882601, 10), // 61 - (839299365868340224, 10), // 62 - (984930291881790849, 10), // 63 - (1152921504606846976, 10), // 64 - (1346274334462890625, 10), // 65 - (1568336880910795776, 10), // 66 - (1822837804551761449, 10), // 67 - (2113922820157210624, 10), // 68 - (2446194060654759801, 10), // 69 - (2824752490000000000, 10), // 70 - (3255243551009881201, 10), // 71 - (3743906242624487424, 10), // 72 - (4297625829703557649, 10), // 73 - (4923990397355877376, 10), // 74 - (5631351470947265625, 10), // 75 - (6428888932339941376, 10), // 76 - (7326680472586200649, 10), // 77 - (8335775831236199424, 10), // 78 - (9468276082626847201, 10), // 79 - (10737418240000000000, 10), // 80 - (12157665459056928801, 10), // 81 - (13744803133596058624, 10), // 82 - (15516041187205853449, 10), // 83 - (17490122876598091776, 10), // 84 - (231616946283203125, 9), // 85 - (257327417311663616, 9), // 86 - (285544154243029527, 9), // 87 - (316478381828866048, 9), // 88 - (350356403707485209, 9), // 89 - (387420489000000000, 9), // 90 - (427929800129788411, 9), // 91 - (472161363286556672, 9), // 92 - (520411082988487293, 9), // 93 - (572994802228616704, 9), // 94 - (630249409724609375, 9), // 95 - (692533995824480256, 9), // 96 - (760231058654565217, 9), // 97 - (833747762130149888, 9), // 98 - (913517247483640899, 9), // 99 - (1000000000000000000, 9), // 100 - (1093685272684360901, 9), // 101 - (1195092568622310912, 9), // 102 - (1304773183829244583, 9), // 103 - (1423311812421484544, 9), // 104 - (1551328215978515625, 9), // 105 - (1689478959002692096, 9), // 106 - (1838459212420154507, 9), // 107 - (1999004627104432128, 9), // 108 - (2171893279442309389, 9), // 109 - (2357947691000000000, 9), // 110 - (2558036924386500591, 9), // 111 - (2773078757450186752, 9), // 112 - (3004041937984268273, 9), // 113 - (3251948521156637184, 9), // 114 - (3517876291919921875, 9), // 115 - (3802961274698203136, 9), // 116 - (4108400332687853397, 9), // 117 - (4435453859151328768, 9), // 118 - (4785448563124474679, 9), // 119 - (5159780352000000000, 9), // 120 - (5559917313492231481, 9), // 121 - (5987402799531080192, 9), // 122 - (6443858614676334363, 9), // 123 - (6930988311686938624, 9), // 124 - (7450580596923828125, 9), // 125 - (8004512848309157376, 9), // 126 - (8594754748609397887, 9), // 127 - (9223372036854775808, 9), // 128 - (9892530380752880769, 9), // 129 - (10604499373000000000, 9), // 130 - (11361656654439817571, 9), // 131 - (12166492167065567232, 9), // 132 - (13021612539908538853, 9), // 133 - (13929745610903012864, 9), // 134 - (14893745087865234375, 9), // 135 - (15916595351771938816, 9), // 136 - (17001416405572203977, 9), // 137 - (18151468971815029248, 9), // 138 - (139353667211683681, 8), // 139 - (147578905600000000, 8), // 140 - (156225851787813921, 8), // 141 - (165312903998914816, 8), // 142 - (174859124550883201, 8), // 143 - (184884258895036416, 8), // 144 - (195408755062890625, 8), // 145 - (206453783524884736, 8), // 146 - (218041257467152161, 8), // 147 - (230193853492166656, 8), // 148 - (242935032749128801, 8), // 149 - (256289062500000000, 8), // 150 - (270281038127131201, 8), // 151 - (284936905588473856, 8), // 152 - (300283484326400961, 8), // 153 - (316348490636206336, 8), // 154 - (333160561500390625, 8), // 155 - (350749278894882816, 8), // 156 - (369145194573386401, 8), // 157 - (388379855336079616, 8), // 158 - (408485828788939521, 8), // 159 - (429496729600000000, 8), // 160 - (451447246258894081, 8), // 161 - (474373168346071296, 8), // 162 - (498311414318121121, 8), // 163 - (523300059815673856, 8), // 164 - (549378366500390625, 8), // 165 - (576586811427594496, 8), // 166 - (604967116961135041, 8), // 167 - (634562281237118976, 8), // 168 - (665416609183179841, 8), // 169 - (697575744100000000, 8), // 170 - (731086699811838561, 8), // 171 - (765997893392859136, 8), // 172 - (802359178476091681, 8), // 173 - (840221879151902976, 8), // 174 - (879638824462890625, 8), // 175 - (920664383502155776, 8), // 176 - (963354501121950081, 8), // 177 - (1007766734259732736, 8), // 178 - (1053960288888713761, 8), // 179 - (1101996057600000000, 8), // 180 - (1151936657823500641, 8), // 181 - (1203846470694789376, 8), // 182 - (1257791680575160641, 8), // 183 - (1313840315232157696, 8), // 184 - (1372062286687890625, 8), // 185 - (1432529432742502656, 8), // 186 - (1495315559180183521, 8), // 187 - (1560496482665168896, 8), // 188 - (1628150074335205281, 8), // 189 - (1698356304100000000, 8), // 190 - (1771197285652216321, 8), // 191 - (1846757322198614016, 8), // 192 - (1925122952918976001, 8), // 193 - (2006383000160502016, 8), // 194 - (2090628617375390625, 8), // 195 - (2177953337809371136, 8), // 196 - (2268453123948987361, 8), // 197 - (2362226417735475456, 8), // 198 - (2459374191553118401, 8), // 199 - (2560000000000000000, 8), // 200 - (2664210032449121601, 8), // 201 - (2772113166407885056, 8), // 202 - (2883821021683985761, 8), // 203 - (2999448015365799936, 8), // 204 - (3119111417625390625, 8), // 205 - (3242931408352297216, 8), // 206 - (3371031134626313601, 8), // 207 - (3503536769037500416, 8), // 208 - (3640577568861717121, 8), // 209 - (3782285936100000000, 8), // 210 - (3928797478390152481, 8), // 211 - (4080251070798954496, 8), // 212 - (4236788918503437921, 8), // 213 - (4398556620369715456, 8), // 214 - (4565703233437890625, 8), // 215 - (4738381338321616896, 8), // 216 - (4916747105530914241, 8), // 217 - (5100960362726891776, 8), // 218 - (5291184662917065441, 8), // 219 - (5487587353600000000, 8), // 220 - (5690339646868044961, 8), // 221 - (5899616690476974336, 8), // 222 - (6115597639891380481, 8), // 223 - (6338465731314712576, 8), // 224 - (6568408355712890625, 8), // 225 - (6805617133840466176, 8), // 226 - (7050287992278341281, 8), // 227 - (7302621240492097536, 8), // 228 - (7562821648920027361, 8), // 229 - (7831098528100000000, 8), // 230 - (8107665808844335041, 8), // 231 - (8392742123471896576, 8), // 232 - (8686550888106661441, 8), // 233 - (8989320386052055296, 8), // 234 - (9301283852250390625, 8), // 235 - (9622679558836781056, 8), // 236 - (9953750901796946721, 8), // 237 - (10294746488738365696, 8), // 238 - (10645920227784266881, 8), // 239 - (11007531417600000000, 8), // 240 - (11379844838561358721, 8), // 241 - (11763130845074473216, 8), // 242 - (12157665459056928801, 8), // 243 - (12563730464589807616, 8), // 244 - (12981613503750390625, 8), // 245 - (13411608173635297536, 8), // 246 - (13854014124583882561, 8), // 247 - (14309137159611744256, 8), // 248 - (14777289335064248001, 8), // 249 - (15258789062500000000, 8), // 250 - (15753961211814252001, 8), // 251 - (16263137215612256256, 8), // 252 - (16786655174842630561, 8), // 253 - (17324859965700833536, 8), // 254 - (17878103347812890625, 8), // 255 - (72057594037927936, 7), // 256 - ]; - - let (base, power) = BASES[radix as usize]; + let (base, power) = gen::BASES_64[radix as usize]; (base as BigDigit, power) } _ => panic!("Invalid bigdigit size"), } } +#[cfg(not(u64_digit))] #[test] fn test_from_slice() { - fn check(slice: &[BigDigit], data: &[BigDigit]) { - assert!(BigUint::from_slice(slice).data == data); + fn check(slice: &[u32], data: &[BigDigit]) { + assert_eq!(BigUint::from_slice(slice).data, data); } check(&[1], &[1]); check(&[0, 0, 0], &[]); check(&[1, 2, 0, 0], &[1, 2]); check(&[0, 0, 1, 2], &[0, 0, 1, 2]); check(&[0, 0, 1, 2, 0, 0], &[0, 0, 1, 2]); - check(&[-1i32 as BigDigit], &[-1i32 as BigDigit]); + check(&[-1i32 as u32], &[-1i32 as BigDigit]); } +#[cfg(u64_digit)] #[test] -fn test_assign_from_slice() { - fn check(slice: &[BigDigit], data: &[BigDigit]) { - let mut p = BigUint::from_slice(&[2627_u32, 0_u32, 9182_u32, 42_u32]); - p.assign_from_slice(slice); - assert!(p.data == data); +fn test_from_slice() { + fn check(slice: &[u32], data: &[BigDigit]) { + assert_eq!( + BigUint::from_slice(slice).data, + data, + "from {:?}, to {:?}", + slice, + data + ); } check(&[1], &[1]); check(&[0, 0, 0], &[]); - check(&[1, 2, 0, 0], &[1, 2]); - check(&[0, 0, 1, 2], &[0, 0, 1, 2]); - check(&[0, 0, 1, 2, 0, 0], &[0, 0, 1, 2]); - check(&[-1i32 as BigDigit], &[-1i32 as BigDigit]); + check(&[1, 2], &[8_589_934_593]); + check(&[1, 2, 0, 0], &[8_589_934_593]); + check(&[0, 0, 1, 2], &[0, 8_589_934_593]); + check(&[0, 0, 1, 2, 0, 0], &[0, 8_589_934_593]); + check(&[-1i32 as u32], &[(-1i32 as u32) as BigDigit]); } #[cfg(has_i128)] diff --git a/src/lib.rs b/src/lib.rs index 837870a6..236ff7a3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -196,19 +196,34 @@ pub use bigrand::{RandBigInt, RandomBits, UniformBigInt, UniformBigUint}; mod big_digit { /// A `BigDigit` is a `BigUint`'s composing element. + #[cfg(not(u64_digit))] pub type BigDigit = u32; + #[cfg(u64_digit)] + pub type BigDigit = u64; /// A `DoubleBigDigit` is the internal type used to do the computations. Its /// size is the double of the size of `BigDigit`. + #[cfg(not(u64_digit))] pub type DoubleBigDigit = u64; + #[cfg(u64_digit)] + pub type DoubleBigDigit = u128; /// A `SignedDoubleBigDigit` is the signed version of `DoubleBigDigit`. + #[cfg(not(u64_digit))] pub type SignedDoubleBigDigit = i64; + #[cfg(u64_digit)] + pub type SignedDoubleBigDigit = i128; // `DoubleBigDigit` size dependent + #[cfg(not(u64_digit))] pub const BITS: usize = 32; + #[cfg(u64_digit)] + pub const BITS: usize = 64; - const LO_MASK: DoubleBigDigit = (-1i32 as DoubleBigDigit) >> BITS; + pub const HALF_BITS: usize = BITS / 2; + pub const HALF: BigDigit = (1 << HALF_BITS) - 1; + + const LO_MASK: DoubleBigDigit = (1 << BITS) - 1; #[inline] fn get_hi(n: DoubleBigDigit) -> BigDigit { diff --git a/src/monty.rs b/src/monty.rs index 62f59b38..88b417aa 100644 --- a/src/monty.rs +++ b/src/monty.rs @@ -1,129 +1,222 @@ -use integer::Integer; -use traits::Zero; +use std::mem; +use std::ops::Shl; +use traits::{One, Zero}; +use big_digit::{self, BigDigit, DoubleBigDigit, SignedDoubleBigDigit}; use biguint::BigUint; -struct MontyReducer<'a> { - n: &'a BigUint, - n0inv: u32, +struct MontyReducer { + n0inv: BigDigit, } -// Calculate the modular inverse of `num`, using Extended GCD. -// -// Reference: -// Brent & Zimmermann, Modern Computer Arithmetic, v0.5.9, Algorithm 1.20 -fn inv_mod_u32(num: u32) -> u32 { - // num needs to be relatively prime to 2**32 -- i.e. it must be odd. - assert!(num % 2 != 0); - - let mut a: i64 = i64::from(num); - let mut b: i64 = i64::from(u32::max_value()) + 1; - - // ExtendedGcd - // Input: positive integers a and b - // Output: integers (g, u, v) such that g = gcd(a, b) = ua + vb - // As we don't need v for modular inverse, we don't calculate it. - - // 1: (u, w) <- (1, 0) - let mut u = 1; - let mut w = 0; - // 3: while b != 0 - while b != 0 { - // 4: (q, r) <- DivRem(a, b) - let q = a / b; - let r = a % b; - // 5: (a, b) <- (b, r) - a = b; - b = r; - // 6: (u, w) <- (w, u - qw) - let m = u - w * q; - u = w; - w = m; +// k0 = -m**-1 mod 2**BITS. Algorithm from: Dumas, J.G. "On Newton–Raphson +// Iteration for Multiplicative Inverses Modulo Prime Powers". +fn inv_mod_alt(b: BigDigit) -> BigDigit { + assert_ne!(b & 1, 0); + + let mut k0 = 2 - b as SignedDoubleBigDigit; + let mut t = (b - 1) as SignedDoubleBigDigit; + let mut i = 1; + while i < big_digit::BITS { + t = t.wrapping_mul(t); + k0 = k0.wrapping_mul(t + 1); + + i <<= 1; } + -k0 as BigDigit +} - assert!(a == 1); - // Downcasting acts like a mod 2^32 too. - u as u32 +impl MontyReducer { + fn new(n: &BigUint) -> Self { + let n0inv = inv_mod_alt(n.data[0]); + MontyReducer { n0inv: n0inv } + } } -impl<'a> MontyReducer<'a> { - fn new(n: &'a BigUint) -> Self { - let n0inv = inv_mod_u32(n.data[0]); - MontyReducer { n: n, n0inv: n0inv } +/// Computes z mod m = x * y * 2 ** (-n*_W) mod m +/// assuming k = -1/m mod 2**_W +/// See Gueron, "Efficient Software Implementations of Modular Exponentiation". +/// https://eprint.iacr.org/2011/239.pdf +/// In the terminology of that paper, this is an "Almost Montgomery Multiplication": +/// x and y are required to satisfy 0 <= z < 2**(n*_W) and then the result +/// z is guaranteed to satisfy 0 <= z < 2**(n*_W), but it may not be < m. +fn montgomery(x: &BigUint, y: &BigUint, m: &BigUint, k: BigDigit, n: usize) -> BigUint { + // This code assumes x, y, m are all the same length, n. + // (required by addMulVVW and the for loop). + // It also assumes that x, y are already reduced mod m, + // or else the result will not be properly reduced. + assert!( + x.data.len() == n && y.data.len() == n && m.data.len() == n, + "{:?} {:?} {:?} {}", + x, + y, + m, + n + ); + + let mut z = BigUint::zero(); + z.data.resize(n * 2, 0); + + let mut c: BigDigit = 0; + for i in 0..n { + let c2 = add_mul_vvw(&mut z.data[i..n + i], &x.data, y.data[i]); + let t = z.data[i].wrapping_mul(k); + let c3 = add_mul_vvw(&mut z.data[i..n + i], &m.data, t); + let cx = c.wrapping_add(c2); + let cy = cx.wrapping_add(c3); + z.data[n + i] = cy; + if cx < c2 || cy < c3 { + c = 1; + } else { + c = 0; + } } + + if c == 0 { + z.data = z.data[n..].to_vec(); + } else { + { + let (mut first, second) = z.data.split_at_mut(n); + sub_vv(&mut first, &second, &m.data); + } + z.data = z.data[..n].to_vec(); + } + + z } -// Montgomery Reduction -// -// Reference: -// Brent & Zimmermann, Modern Computer Arithmetic, v0.5.9, Algorithm 2.6 -fn monty_redc(a: BigUint, mr: &MontyReducer) -> BigUint { - let mut c = a.data; - let n = &mr.n.data; - let n_size = n.len(); - - // Allocate sufficient work space - c.resize(2 * n_size + 2, 0); - - // β is the size of a word, in this case 32 bits. So "a mod β" is - // equivalent to masking a to 32 bits. - // mu <- -N^(-1) mod β - let mu = 0u32.wrapping_sub(mr.n0inv); - - // 1: for i = 0 to (n-1) - for i in 0..n_size { - // 2: q_i <- mu*c_i mod β - let q_i = c[i].wrapping_mul(mu); - - // 3: C <- C + q_i * N * β^i - super::algorithms::mac_digit(&mut c[i..], n, q_i); +#[inline(always)] +fn add_mul_vvw(z: &mut [BigDigit], x: &[BigDigit], y: BigDigit) -> BigDigit { + let mut c = 0; + for (zi, xi) in z.iter_mut().zip(x.iter()) { + let (z1, z0) = mul_add_www(*xi, y, *zi); + let (c_, zi_) = add_ww(z0, c, 0); + *zi = zi_; + c = c_ + z1; } - // 4: R <- C * β^(-n) - // This is an n-word bitshift, equivalent to skipping n words. - let ret = BigUint::new(c[n_size..].to_vec()); + c +} - // 5: if R >= β^n then return R-N else return R. - if ret < *mr.n { - ret - } else { - ret - mr.n +/// The resulting carry c is either 0 or 1. +#[inline(always)] +fn sub_vv(z: &mut [BigDigit], x: &[BigDigit], y: &[BigDigit]) -> BigDigit { + let mut c = 0; + for (i, (xi, yi)) in x.iter().zip(y.iter()).enumerate().take(z.len()) { + let zi = xi.wrapping_sub(*yi).wrapping_sub(c); + z[i] = zi; + // see "Hacker's Delight", section 2-12 (overflow detection) + c = ((yi & !xi) | ((yi | !xi) & zi)) >> (big_digit::BITS - 1) } + + c } -// Montgomery Multiplication -fn monty_mult(a: BigUint, b: &BigUint, mr: &MontyReducer) -> BigUint { - monty_redc(a * b, mr) +/// z1<<_W + z0 = x+y+c, with c == 0 or 1 +#[inline(always)] +fn add_ww(x: BigDigit, y: BigDigit, c: BigDigit) -> (BigDigit, BigDigit) { + let yc = y.wrapping_add(c); + let z0 = x.wrapping_add(yc); + let z1 = if z0 < x || yc < y { 1 } else { 0 }; + + (z1, z0) } -// Montgomery Squaring -fn monty_sqr(a: BigUint, mr: &MontyReducer) -> BigUint { - // TODO: Replace with an optimised squaring function - monty_redc(&a * &a, mr) +/// z1 << _W + z0 = x * y + c +#[inline(always)] +fn mul_add_www(x: BigDigit, y: BigDigit, c: BigDigit) -> (BigDigit, BigDigit) { + let z = x as DoubleBigDigit * y as DoubleBigDigit + c as DoubleBigDigit; + ((z >> big_digit::BITS) as BigDigit, z as BigDigit) } -pub fn monty_modpow(a: &BigUint, exp: &BigUint, modulus: &BigUint) -> BigUint { - let mr = MontyReducer::new(modulus); +/// Calculates x ** y mod m using a fixed, 4-bit window. +pub fn monty_modpow(x: &BigUint, y: &BigUint, m: &BigUint) -> BigUint { + assert!(m.data[0] & 1 == 1); + let mr = MontyReducer::new(m); + let num_words = m.data.len(); - // Calculate the Montgomery parameter - let mut v = vec![0; modulus.data.len()]; - v.push(1); - let r = BigUint::new(v); + let mut x = x.clone(); + + // We want the lengths of x and m to be equal. + // It is OK if x >= m as long as len(x) == len(m). + if x.data.len() > num_words { + x %= m; + // Note: now len(x) <= numWords, not guaranteed ==. + } + if x.data.len() < num_words { + x.data.resize(num_words, 0); + } - // Map the base to the Montgomery domain - let mut apri = a * &r % modulus; + // rr = 2**(2*_W*len(m)) mod m + let mut rr = BigUint::one(); + rr = (rr.shl(2 * num_words * big_digit::BITS)) % m; + if rr.data.len() < num_words { + rr.data.resize(num_words, 0); + } + // one = 1, with equal length to that of m + let mut one = BigUint::one(); + one.data.resize(num_words, 0); + + let n = 4; + // powers[i] contains x^i + let mut powers = Vec::with_capacity(1 << n); + powers.push(montgomery(&one, &rr, m, mr.n0inv, num_words)); + powers.push(montgomery(&x, &rr, m, mr.n0inv, num_words)); + for i in 2..1 << n { + let r = montgomery(&powers[i - 1], &powers[1], m, mr.n0inv, num_words); + powers.push(r); + } + + // initialize z = 1 (Montgomery 1) + let mut z = powers[0].clone(); + z.data.resize(num_words, 0); + let mut zz = BigUint::zero(); + zz.data.resize(num_words, 0); + + // same windowed exponent, but with Montgomery multiplications + for i in (0..y.data.len()).rev() { + let mut yi = y.data[i]; + let mut j = 0; + while j < big_digit::BITS { + if i != y.data.len() - 1 || j != 0 { + zz = montgomery(&z, &z, m, mr.n0inv, num_words); + z = montgomery(&zz, &zz, m, mr.n0inv, num_words); + zz = montgomery(&z, &z, m, mr.n0inv, num_words); + z = montgomery(&zz, &zz, m, mr.n0inv, num_words); + } + zz = montgomery( + &z, + &powers[(yi >> (big_digit::BITS - n)) as usize], + m, + mr.n0inv, + num_words, + ); + mem::swap(&mut z, &mut zz); + yi <<= n; + j += n; + } + } - // Binary exponentiation - let mut ans = &r % modulus; - let mut e = exp.clone(); - while !e.is_zero() { - if e.is_odd() { - ans = monty_mult(ans, &apri, &mr); + // convert to regular number + zz = montgomery(&z, &one, m, mr.n0inv, num_words); + + zz.normalize(); + // One last reduction, just in case. + // See golang.org/issue/13907. + if zz >= *m { + // Common case is m has high bit set; in that case, + // since zz is the same length as m, there can be just + // one multiple of m to remove. Just subtract. + // We think that the subtract should be sufficient in general, + // so do that unconditionally, but double-check, + // in case our beliefs are wrong. + // The div is not expected to be reached. + zz -= m; + if zz >= *m { + zz %= m; } - apri = monty_sqr(apri, &mr); - e >>= 1; } - // Map the result back to the residues domain - monty_redc(ans, &mr) + zz.normalize(); + zz } diff --git a/tests/bigint_scalar.rs b/tests/bigint_scalar.rs index ae9a6d7a..8b199726 100644 --- a/tests/bigint_scalar.rs +++ b/tests/bigint_scalar.rs @@ -98,15 +98,14 @@ fn test_scalar_div_rem() { assert!(q == *ans_q); assert!(r == *ans_r); + let b = BigInt::from(b); let (a, b, ans_q, ans_r) = (a.clone(), b.clone(), ans_q.clone(), ans_r.clone()); - assert_op!(a / b == ans_q); - assert_op!(a % b == ans_r); + assert_signed_scalar_op!(a / b == ans_q); + assert_signed_scalar_op!(a % b == ans_r); - if b <= i32::max_value() as u32 { - let nb = -(b as i32); - assert_op!(a / nb == -ans_q.clone()); - assert_op!(a % nb == ans_r); - } + let nb = -b; + assert_signed_scalar_op!(a / nb == -ans_q.clone()); + assert_signed_scalar_op!(a % nb == ans_r); } fn check(a: &BigInt, b: u32, q: &BigInt, r: &BigInt) { diff --git a/tests/modpow.rs b/tests/modpow.rs index a5f8b928..e6784d84 100644 --- a/tests/modpow.rs +++ b/tests/modpow.rs @@ -76,7 +76,7 @@ mod biguint { } #[test] - fn test_modpow() { + fn test_modpow_single() { check_modpow::(1, 0, 11, 1); check_modpow::(0, 15, 11, 0); check_modpow::(3, 7, 11, 9); @@ -119,7 +119,7 @@ mod bigint { fn check_modpow>(b: T, e: T, m: T, r: T) { fn check(b: &BigInt, e: &BigInt, m: &BigInt, r: &BigInt) { - assert_eq!(&b.modpow(e, m), r); + assert_eq!(&b.modpow(e, m), r, "{} ** {} (mod {}) != {}", b, e, m, r); let even_m = m << 1; let even_modpow = b.modpow(e, m); diff --git a/tests/serde.rs b/tests/serde.rs index 0f3d4868..a0251b9c 100644 --- a/tests/serde.rs +++ b/tests/serde.rs @@ -101,3 +101,30 @@ fn bigint_factorial_100() { assert_tokens(&n, &tokens); } + +#[test] +fn big_digits() { + // Try a few different lengths for u32/u64 digit coverage + for len in 1..10 { + let digits = 1u32..len + 1; + let n = BigUint::new(digits.clone().collect()); + + let mut tokens = vec![]; + tokens.push(Token::Seq { + len: Some(len as usize), + }); + tokens.extend(digits.map(Token::U32)); + tokens.push(Token::SeqEnd); + + assert_tokens(&n, &tokens); + + let n = BigInt::from(n); + tokens.insert(0, Token::Tuple { len: 2 }); + tokens.insert(1, Token::I8(1)); + tokens.push(Token::TupleEnd); + assert_tokens(&n, &tokens); + + tokens[1] = Token::I8(-1); + assert_tokens(&-n, &tokens); + } +}