From 57fdf6a5eadcb5312aae0dfc3148aad225feba72 Mon Sep 17 00:00:00 2001 From: Elliott Mahler Date: Mon, 23 Mar 2020 22:03:22 -0700 Subject: [PATCH 1/7] Use the addcarry intrinsic when avilable --- src/algorithms.rs | 101 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 96 insertions(+), 5 deletions(-) diff --git a/src/algorithms.rs b/src/algorithms.rs index c65f3b4f..dd335eaf 100644 --- a/src/algorithms.rs +++ b/src/algorithms.rs @@ -17,6 +17,7 @@ use crate::big_digit::{self, BigDigit, DoubleBigDigit, SignedDoubleBigDigit}; // Generic functions for add/subtract/multiply with carry/borrow: // Add with carry: +#[allow(unused)] #[inline] fn adc(a: BigDigit, b: BigDigit, acc: &mut DoubleBigDigit) -> BigDigit { *acc += DoubleBigDigit::from(a); @@ -27,6 +28,7 @@ fn adc(a: BigDigit, b: BigDigit, acc: &mut DoubleBigDigit) -> BigDigit { } // Subtract with borrow: +#[allow(unused)] #[inline] fn sbb(a: BigDigit, b: BigDigit, acc: &mut SignedDoubleBigDigit) -> BigDigit { *acc += SignedDoubleBigDigit::from(a); @@ -132,6 +134,41 @@ pub(crate) fn rem_digit(a: &BigUint, b: BigDigit) -> BigDigit { /// the addition first hoping that it will fit. /// /// The caller _must_ ensure that `a` is at least as long as `b`. +#[cfg(all(u64_digit, target_arch = "x86_64"))] // only run on x86_64, when we have u64 digits +#[inline] +pub(crate) fn __add2(a: &mut [BigDigit], b: &[BigDigit]) -> BigDigit { + debug_assert!(a.len() >= b.len()); + + use std::arch::x86_64::_addcarry_u64; + + let mut carry = 0; + let (a_lo, a_hi) = a.split_at_mut(b.len()); + + for (a, b) in a_lo.iter_mut().zip(b) { + // Safety: There are absolutely no safety concerns with calling _addcarry_u64, it's just unsafe for API consistency with other intrinsics + carry = unsafe { _addcarry_u64(carry, *a, *b, a) }; + } + + if carry != 0 { + for a in a_hi { + // Safety: There are absolutely no safety concerns with calling _addcarry_u64, it's just unsafe for API consistency with other intrinsics + carry = unsafe { _addcarry_u64(carry, *a, 0, a) }; + if carry == 0 { + break; + } + } + } + + carry as BigDigit +} + +/// Two argument addition of raw slices, `a += b`, returning the carry. +/// +/// This is used when the data `Vec` might need to resize to push a non-zero carry, so we perform +/// the addition first hoping that it will fit. +/// +/// The caller _must_ ensure that `a` is at least as long as `b`. +#[cfg(not(all(u64_digit, target_arch = "x86_64")))] // run if we aren't using 64-bit digits, or if we're not running on x86_64 #[inline] pub(crate) fn __add2(a: &mut [BigDigit], b: &[BigDigit]) -> BigDigit { debug_assert!(a.len() >= b.len()); @@ -166,6 +203,39 @@ pub(crate) fn add2(a: &mut [BigDigit], b: &[BigDigit]) { debug_assert!(carry == 0); } +#[cfg(all(u64_digit, target_arch = "x86_64"))] // only run on x86_64, when we have u64 digits +pub(crate) fn sub2(a: &mut [BigDigit], b: &[BigDigit]) { + use std::arch::x86_64::_subborrow_u64; + + let mut borrow = 0; + + let len = cmp::min(a.len(), b.len()); + let (a_lo, a_hi) = a.split_at_mut(len); + let (b_lo, b_hi) = b.split_at(len); + + for (a, b) in a_lo.iter_mut().zip(b_lo) { + // Safety: There are absolutely no safety concerns with calling _subborrow_u64, it's just unsafe for API consistency with other intrinsics + borrow = unsafe { _subborrow_u64(borrow, *a, *b, a) }; + } + + if borrow != 0 { + for a in a_hi { + // Safety: There are absolutely no safety concerns with calling _subborrow_u64, it's just unsafe for API consistency with other intrinsics + borrow = unsafe { _subborrow_u64(borrow, *a, 0, a) }; + if borrow == 0 { + break; + } + } + } + + // note: we're _required_ to fail on underflow + assert!( + borrow == 0 && b_hi.iter().all(|x| *x == 0), + "Cannot subtract b from a because b is larger than a." + ); +} + +#[cfg(not(all(u64_digit, target_arch = "x86_64")))] // run if we aren't using 64-bit digits, or if we're not running on x86_64 pub(crate) fn sub2(a: &mut [BigDigit], b: &[BigDigit]) { let mut borrow = 0; @@ -194,6 +264,24 @@ pub(crate) fn sub2(a: &mut [BigDigit], b: &[BigDigit]) { } // Only for the Sub impl. `a` and `b` must have same length. +#[cfg(all(u64_digit, target_arch = "x86_64"))] // only run on x86_64, when we have u64 digits +#[inline] +pub(crate) fn __sub2rev(a: &[BigDigit], b: &mut [BigDigit]) -> BigDigit { + use std::arch::x86_64::_subborrow_u64; + debug_assert!(b.len() == a.len()); + + let mut borrow = 0; + + for (ai, bi) in a.iter().zip(b) { + // Safety: There are absolutely no safety concerns with calling _subborrow_u64, it's just unsafe for API consistency with other intrinsics + borrow = unsafe { _subborrow_u64(borrow, *ai, *bi, bi) }; + } + + borrow as BigDigit +} + +// Only for the Sub impl. `a` and `b` must have same length. +#[cfg(not(all(u64_digit, target_arch = "x86_64")))] // run if we aren't using 64-bit digits, or if we're not running on x86_64 #[inline] pub(crate) fn __sub2rev(a: &[BigDigit], b: &mut [BigDigit]) -> BigDigit { debug_assert!(b.len() == a.len()); @@ -259,11 +347,14 @@ pub(crate) fn mac_digit(acc: &mut [BigDigit], b: &[BigDigit], c: BigDigit) { *a = mac_with_carry(*a, b, c, &mut carry); } - let mut a = a_hi.iter_mut(); - while carry != 0 { - let a = a.next().expect("carry overflow during multiplication!"); - *a = adc(*a, 0, &mut carry); - } + let (carry_hi, carry_lo) = big_digit::from_doublebigdigit(carry); + + let final_carry = if carry_hi == 0 { + __add2(a_hi, &[carry_lo]) + } else { + __add2(a_hi, &[carry_hi, carry_lo]) + }; + assert_eq!(final_carry, 0, "carry overflow during multiplication!"); } fn bigint_from_slice(slice: &[BigDigit]) -> BigInt { From 4e20fc3a13eedb9d3654b9f38c19050de3b7d098 Mon Sep 17 00:00:00 2001 From: Elliott Mahler Date: Mon, 23 Mar 2020 22:42:33 -0700 Subject: [PATCH 2/7] Include intrinsics from core::arch, not std::arch --- src/algorithms.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/algorithms.rs b/src/algorithms.rs index dd335eaf..6b8f8a6d 100644 --- a/src/algorithms.rs +++ b/src/algorithms.rs @@ -139,7 +139,7 @@ pub(crate) fn rem_digit(a: &BigUint, b: BigDigit) -> BigDigit { pub(crate) fn __add2(a: &mut [BigDigit], b: &[BigDigit]) -> BigDigit { debug_assert!(a.len() >= b.len()); - use std::arch::x86_64::_addcarry_u64; + use core::arch::x86_64::_addcarry_u64; let mut carry = 0; let (a_lo, a_hi) = a.split_at_mut(b.len()); @@ -205,7 +205,7 @@ pub(crate) fn add2(a: &mut [BigDigit], b: &[BigDigit]) { #[cfg(all(u64_digit, target_arch = "x86_64"))] // only run on x86_64, when we have u64 digits pub(crate) fn sub2(a: &mut [BigDigit], b: &[BigDigit]) { - use std::arch::x86_64::_subborrow_u64; + use core::arch::x86_64::_subborrow_u64; let mut borrow = 0; @@ -267,7 +267,7 @@ pub(crate) fn sub2(a: &mut [BigDigit], b: &[BigDigit]) { #[cfg(all(u64_digit, target_arch = "x86_64"))] // only run on x86_64, when we have u64 digits #[inline] pub(crate) fn __sub2rev(a: &[BigDigit], b: &mut [BigDigit]) -> BigDigit { - use std::arch::x86_64::_subborrow_u64; + use core::arch::x86_64::_subborrow_u64; debug_assert!(b.len() == a.len()); let mut borrow = 0; From 0be00d81474127a0c0e801a995e12b6cf799b5f7 Mon Sep 17 00:00:00 2001 From: Elliott Mahler Date: Wed, 25 Mar 2020 22:32:15 -0700 Subject: [PATCH 3/7] Moved the platform-specific code to adc and sbb, added a build.res entry --- build.rs | 7 ++- src/algorithms.rs | 138 +++++++++++----------------------------------- 2 files changed, 38 insertions(+), 107 deletions(-) diff --git a/build.rs b/build.rs index 93ee0998..4952abd8 100644 --- a/build.rs +++ b/build.rs @@ -6,7 +6,8 @@ use std::path::Path; fn main() { let pointer_width = env::var("CARGO_CFG_TARGET_POINTER_WIDTH"); - if pointer_width.as_ref().map(String::as_str) == Ok("64") { + let u64_digit = pointer_width.as_ref().map(String::as_str) == Ok("64"); + if u64_digit { autocfg::emit("u64_digit"); } let ac = autocfg::new(); @@ -14,6 +15,10 @@ fn main() { autocfg::emit("has_try_from"); } + if u64_digit && (ac.probe_path("core::arch::x86_64::_addcarry_u64") || ac.probe_path("std::arch::x86_64::_addcarry_u64")) { + autocfg::emit("use_addcarry_u64"); + } + autocfg::rerun_path("build.rs"); write_radix_bases().unwrap(); diff --git a/src/algorithms.rs b/src/algorithms.rs index 6b8f8a6d..38196549 100644 --- a/src/algorithms.rs +++ b/src/algorithms.rs @@ -12,30 +12,42 @@ use crate::bigint::BigInt; use crate::bigint::Sign; use crate::bigint::Sign::{Minus, NoSign, Plus}; -use crate::big_digit::{self, BigDigit, DoubleBigDigit, SignedDoubleBigDigit}; +use crate::big_digit::{self, BigDigit, DoubleBigDigit}; + +#[cfg(not(use_addcarry_u64))] // only needed for the fallback implementation of `sbb` +use crate::big_digit::SignedDoubleBigDigit; // Generic functions for add/subtract/multiply with carry/borrow: // Add with carry: -#[allow(unused)] +#[cfg(use_addcarry_u64)] #[inline] -fn adc(a: BigDigit, b: BigDigit, acc: &mut DoubleBigDigit) -> BigDigit { - *acc += DoubleBigDigit::from(a); - *acc += DoubleBigDigit::from(b); - let lo = *acc as BigDigit; - *acc >>= big_digit::BITS; - lo +fn adc(carry: u8, a: BigDigit, b: BigDigit, out: &mut BigDigit) -> u8 { + unsafe { core::arch::x86_64::_addcarry_u64(carry, a, b, out) } +} + +#[cfg(not(use_addcarry_u64))] // fallback for environments where we don't have an addcarry intrinsic +#[inline] +fn adc(mut carry: DoubleBigDigit, a: BigDigit, b: BigDigit, out: &mut BigDigit) -> DoubleBigDigit { + carry += DoubleBigDigit::from(a); + carry += DoubleBigDigit::from(b); + *out = carry as BigDigit; + carry >> big_digit::BITS } // Subtract with borrow: -#[allow(unused)] +#[cfg(use_addcarry_u64)] #[inline] -fn sbb(a: BigDigit, b: BigDigit, acc: &mut SignedDoubleBigDigit) -> BigDigit { - *acc += SignedDoubleBigDigit::from(a); - *acc -= SignedDoubleBigDigit::from(b); - let lo = *acc as BigDigit; - *acc >>= big_digit::BITS; - lo +fn sbb(carry: u8, a: BigDigit, b: BigDigit, out: &mut BigDigit) -> u8 { + unsafe { core::arch::x86_64::_subborrow_u64(carry, a, b, out) } +} +#[cfg(not(use_addcarry_u64))] // fallback for environments where we don't have an addcarry intrinsic +#[inline] +fn sbb(mut carry: SignedDoubleBigDigit, a: BigDigit, b: BigDigit, out: &mut BigDigit) -> SignedDoubleBigDigit { + carry += SignedDoubleBigDigit::from(a); + carry -= SignedDoubleBigDigit::from(b); + *out = carry as BigDigit; + carry >> big_digit::BITS } #[inline] @@ -134,41 +146,6 @@ pub(crate) fn rem_digit(a: &BigUint, b: BigDigit) -> BigDigit { /// the addition first hoping that it will fit. /// /// The caller _must_ ensure that `a` is at least as long as `b`. -#[cfg(all(u64_digit, target_arch = "x86_64"))] // only run on x86_64, when we have u64 digits -#[inline] -pub(crate) fn __add2(a: &mut [BigDigit], b: &[BigDigit]) -> BigDigit { - debug_assert!(a.len() >= b.len()); - - use core::arch::x86_64::_addcarry_u64; - - let mut carry = 0; - let (a_lo, a_hi) = a.split_at_mut(b.len()); - - for (a, b) in a_lo.iter_mut().zip(b) { - // Safety: There are absolutely no safety concerns with calling _addcarry_u64, it's just unsafe for API consistency with other intrinsics - carry = unsafe { _addcarry_u64(carry, *a, *b, a) }; - } - - if carry != 0 { - for a in a_hi { - // Safety: There are absolutely no safety concerns with calling _addcarry_u64, it's just unsafe for API consistency with other intrinsics - carry = unsafe { _addcarry_u64(carry, *a, 0, a) }; - if carry == 0 { - break; - } - } - } - - carry as BigDigit -} - -/// Two argument addition of raw slices, `a += b`, returning the carry. -/// -/// This is used when the data `Vec` might need to resize to push a non-zero carry, so we perform -/// the addition first hoping that it will fit. -/// -/// The caller _must_ ensure that `a` is at least as long as `b`. -#[cfg(not(all(u64_digit, target_arch = "x86_64")))] // run if we aren't using 64-bit digits, or if we're not running on x86_64 #[inline] pub(crate) fn __add2(a: &mut [BigDigit], b: &[BigDigit]) -> BigDigit { debug_assert!(a.len() >= b.len()); @@ -177,12 +154,12 @@ pub(crate) fn __add2(a: &mut [BigDigit], b: &[BigDigit]) -> BigDigit { let (a_lo, a_hi) = a.split_at_mut(b.len()); for (a, b) in a_lo.iter_mut().zip(b) { - *a = adc(*a, *b, &mut carry); + carry = adc(carry, *a, *b, a); } if carry != 0 { for a in a_hi { - *a = adc(*a, 0, &mut carry); + carry = adc(carry, *a, 0, a); if carry == 0 { break; } @@ -203,39 +180,6 @@ pub(crate) fn add2(a: &mut [BigDigit], b: &[BigDigit]) { debug_assert!(carry == 0); } -#[cfg(all(u64_digit, target_arch = "x86_64"))] // only run on x86_64, when we have u64 digits -pub(crate) fn sub2(a: &mut [BigDigit], b: &[BigDigit]) { - use core::arch::x86_64::_subborrow_u64; - - let mut borrow = 0; - - let len = cmp::min(a.len(), b.len()); - let (a_lo, a_hi) = a.split_at_mut(len); - let (b_lo, b_hi) = b.split_at(len); - - for (a, b) in a_lo.iter_mut().zip(b_lo) { - // Safety: There are absolutely no safety concerns with calling _subborrow_u64, it's just unsafe for API consistency with other intrinsics - borrow = unsafe { _subborrow_u64(borrow, *a, *b, a) }; - } - - if borrow != 0 { - for a in a_hi { - // Safety: There are absolutely no safety concerns with calling _subborrow_u64, it's just unsafe for API consistency with other intrinsics - borrow = unsafe { _subborrow_u64(borrow, *a, 0, a) }; - if borrow == 0 { - break; - } - } - } - - // note: we're _required_ to fail on underflow - assert!( - borrow == 0 && b_hi.iter().all(|x| *x == 0), - "Cannot subtract b from a because b is larger than a." - ); -} - -#[cfg(not(all(u64_digit, target_arch = "x86_64")))] // run if we aren't using 64-bit digits, or if we're not running on x86_64 pub(crate) fn sub2(a: &mut [BigDigit], b: &[BigDigit]) { let mut borrow = 0; @@ -244,12 +188,12 @@ pub(crate) fn sub2(a: &mut [BigDigit], b: &[BigDigit]) { let (b_lo, b_hi) = b.split_at(len); for (a, b) in a_lo.iter_mut().zip(b_lo) { - *a = sbb(*a, *b, &mut borrow); + borrow = sbb(borrow, *a, *b, a); } if borrow != 0 { for a in a_hi { - *a = sbb(*a, 0, &mut borrow); + borrow = sbb(borrow, *a, 0, a); if borrow == 0 { break; } @@ -264,24 +208,6 @@ pub(crate) fn sub2(a: &mut [BigDigit], b: &[BigDigit]) { } // Only for the Sub impl. `a` and `b` must have same length. -#[cfg(all(u64_digit, target_arch = "x86_64"))] // only run on x86_64, when we have u64 digits -#[inline] -pub(crate) fn __sub2rev(a: &[BigDigit], b: &mut [BigDigit]) -> BigDigit { - use core::arch::x86_64::_subborrow_u64; - debug_assert!(b.len() == a.len()); - - let mut borrow = 0; - - for (ai, bi) in a.iter().zip(b) { - // Safety: There are absolutely no safety concerns with calling _subborrow_u64, it's just unsafe for API consistency with other intrinsics - borrow = unsafe { _subborrow_u64(borrow, *ai, *bi, bi) }; - } - - borrow as BigDigit -} - -// Only for the Sub impl. `a` and `b` must have same length. -#[cfg(not(all(u64_digit, target_arch = "x86_64")))] // run if we aren't using 64-bit digits, or if we're not running on x86_64 #[inline] pub(crate) fn __sub2rev(a: &[BigDigit], b: &mut [BigDigit]) -> BigDigit { debug_assert!(b.len() == a.len()); @@ -289,7 +215,7 @@ pub(crate) fn __sub2rev(a: &[BigDigit], b: &mut [BigDigit]) -> BigDigit { let mut borrow = 0; for (ai, bi) in a.iter().zip(b) { - *bi = sbb(*ai, *bi, &mut borrow); + borrow = sbb(borrow, *ai, *bi, bi); } borrow as BigDigit From 49ff7b745c22d3c973876ba8ea6a211b724acc5b Mon Sep 17 00:00:00 2001 From: Elliott Mahler Date: Wed, 25 Mar 2020 23:16:08 -0700 Subject: [PATCH 4/7] Backed out adc and sbb parameter rearrangement, implemented u32 for x64, applied rustfmt --- build.rs | 10 ++++++- src/algorithms.rs | 73 ++++++++++++++++++++++++++++++++--------------- 2 files changed, 59 insertions(+), 24 deletions(-) diff --git a/build.rs b/build.rs index 4952abd8..0aea8629 100644 --- a/build.rs +++ b/build.rs @@ -15,8 +15,16 @@ fn main() { autocfg::emit("has_try_from"); } - if u64_digit && (ac.probe_path("core::arch::x86_64::_addcarry_u64") || ac.probe_path("std::arch::x86_64::_addcarry_u64")) { + if u64_digit + && (ac.probe_path("core::arch::x86_64::_addcarry_u64") + || ac.probe_path("std::arch::x86_64::_addcarry_u64")) + { autocfg::emit("use_addcarry_u64"); + } else if !u64_digit + && (ac.probe_path("core::arch::x86_64::_addcarry_u32") + || ac.probe_path("core::arch::x86::_addcarry_u32")) + { + autocfg::emit("use_addcarry_u32"); } autocfg::rerun_path("build.rs"); diff --git a/src/algorithms.rs b/src/algorithms.rs index 38196549..66f13d9e 100644 --- a/src/algorithms.rs +++ b/src/algorithms.rs @@ -14,40 +14,67 @@ use crate::bigint::Sign::{Minus, NoSign, Plus}; use crate::big_digit::{self, BigDigit, DoubleBigDigit}; -#[cfg(not(use_addcarry_u64))] // only needed for the fallback implementation of `sbb` +// only needed for the fallback implementation of `sbb` +#[cfg(not(any(use_addcarry_u64, use_addcarry_u32)))] use crate::big_digit::SignedDoubleBigDigit; -// Generic functions for add/subtract/multiply with carry/borrow: +// Generic functions for add/subtract/multiply with carry/borrow. These are specialized for some platforms to take advantage of intrinsics etc // Add with carry: #[cfg(use_addcarry_u64)] #[inline] -fn adc(carry: u8, a: BigDigit, b: BigDigit, out: &mut BigDigit) -> u8 { - unsafe { core::arch::x86_64::_addcarry_u64(carry, a, b, out) } +fn adc(a: BigDigit, b: BigDigit, acc: &mut u8) -> BigDigit { + let mut out = 0; + // Safety: There are absolutely no safety concerns with calling _addcarry_u64, it's just unsafe for API consistency with other intrinsics + *acc = unsafe { core::arch::x86_64::_addcarry_u64(*acc, a, b, &mut out) }; + out } -#[cfg(not(use_addcarry_u64))] // fallback for environments where we don't have an addcarry intrinsic +#[cfg(use_addcarry_u32)] #[inline] -fn adc(mut carry: DoubleBigDigit, a: BigDigit, b: BigDigit, out: &mut BigDigit) -> DoubleBigDigit { - carry += DoubleBigDigit::from(a); - carry += DoubleBigDigit::from(b); - *out = carry as BigDigit; - carry >> big_digit::BITS +fn adc(a: BigDigit, b: BigDigit, acc: &mut u8) -> BigDigit { + let mut out = 0; + // Safety: There are absolutely no safety concerns with calling _addcarry_u64, it's just unsafe for API consistency with other intrinsics + *acc = unsafe { core::arch::x86_64::_addcarry_u32(*acc, a, b, &mut out) }; + out +} + +#[cfg(not(any(use_addcarry_u64, use_addcarry_u32)))] // fallback for environments where we don't have an addcarry intrinsic +#[inline] +fn adc(a: BigDigit, b: BigDigit, acc: &mut DoubleBigDigit) -> BigDigit { + *acc += DoubleBigDigit::from(a); + *acc += DoubleBigDigit::from(b); + let lo = *acc as BigDigit; + *acc >>= big_digit::BITS; + lo } // Subtract with borrow: #[cfg(use_addcarry_u64)] #[inline] -fn sbb(carry: u8, a: BigDigit, b: BigDigit, out: &mut BigDigit) -> u8 { - unsafe { core::arch::x86_64::_subborrow_u64(carry, a, b, out) } +fn sbb(a: BigDigit, b: BigDigit, acc: &mut u8) -> BigDigit { + let mut out = 0; + // Safety: There are absolutely no safety concerns with calling _addcarry_u64, it's just unsafe for API consistency with other intrinsics + *acc = unsafe { core::arch::x86_64::_subborrow_u64(*acc, a, b, &mut out) }; + out } -#[cfg(not(use_addcarry_u64))] // fallback for environments where we don't have an addcarry intrinsic +#[cfg(use_addcarry_u32)] #[inline] -fn sbb(mut carry: SignedDoubleBigDigit, a: BigDigit, b: BigDigit, out: &mut BigDigit) -> SignedDoubleBigDigit { - carry += SignedDoubleBigDigit::from(a); - carry -= SignedDoubleBigDigit::from(b); - *out = carry as BigDigit; - carry >> big_digit::BITS +fn sbb(a: BigDigit, b: BigDigit, acc: &mut u8) -> BigDigit { + let mut out = 0; + // Safety: There are absolutely no safety concerns with calling _addcarry_u64, it's just unsafe for API consistency with other intrinsics + *acc = unsafe { core::arch::x86_64::_subborrow_u32(*acc, a, b, &mut out) }; + out +} + +#[cfg(not(any(use_addcarry_u64, use_addcarry_u32)))] // fallback for environments where we don't have an addcarry intrinsic +#[inline] +fn sbb(a: BigDigit, b: BigDigit, acc: &mut SignedDoubleBigDigit) -> BigDigit { + *acc += SignedDoubleBigDigit::from(a); + *acc -= SignedDoubleBigDigit::from(b); + let lo = *acc as BigDigit; + *acc >>= big_digit::BITS; + lo } #[inline] @@ -154,12 +181,12 @@ pub(crate) fn __add2(a: &mut [BigDigit], b: &[BigDigit]) -> BigDigit { let (a_lo, a_hi) = a.split_at_mut(b.len()); for (a, b) in a_lo.iter_mut().zip(b) { - carry = adc(carry, *a, *b, a); + *a = adc(*a, *b, &mut carry); } if carry != 0 { for a in a_hi { - carry = adc(carry, *a, 0, a); + *a = adc(*a, 0, &mut carry); if carry == 0 { break; } @@ -188,12 +215,12 @@ pub(crate) fn sub2(a: &mut [BigDigit], b: &[BigDigit]) { let (b_lo, b_hi) = b.split_at(len); for (a, b) in a_lo.iter_mut().zip(b_lo) { - borrow = sbb(borrow, *a, *b, a); + *a = sbb(*a, *b, &mut borrow); } if borrow != 0 { for a in a_hi { - borrow = sbb(borrow, *a, 0, a); + *a = sbb(*a, 0, &mut borrow); if borrow == 0 { break; } @@ -215,7 +242,7 @@ pub(crate) fn __sub2rev(a: &[BigDigit], b: &mut [BigDigit]) -> BigDigit { let mut borrow = 0; for (ai, bi) in a.iter().zip(b) { - borrow = sbb(borrow, *ai, *bi, bi); + *bi = sbb(*ai, *bi, &mut borrow); } borrow as BigDigit From 0cc50c9b18dcc429f3b666d04b2093835909da55 Mon Sep 17 00:00:00 2001 From: Elliott Mahler Date: Wed, 25 Mar 2020 23:16:36 -0700 Subject: [PATCH 5/7] fixed copy/paste error --- src/algorithms.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/algorithms.rs b/src/algorithms.rs index 66f13d9e..d441fb55 100644 --- a/src/algorithms.rs +++ b/src/algorithms.rs @@ -34,7 +34,7 @@ fn adc(a: BigDigit, b: BigDigit, acc: &mut u8) -> BigDigit { #[inline] fn adc(a: BigDigit, b: BigDigit, acc: &mut u8) -> BigDigit { let mut out = 0; - // Safety: There are absolutely no safety concerns with calling _addcarry_u64, it's just unsafe for API consistency with other intrinsics + // Safety: There are absolutely no safety concerns with calling _addcarry_u32, it's just unsafe for API consistency with other intrinsics *acc = unsafe { core::arch::x86_64::_addcarry_u32(*acc, a, b, &mut out) }; out } @@ -54,7 +54,7 @@ fn adc(a: BigDigit, b: BigDigit, acc: &mut DoubleBigDigit) -> BigDigit { #[inline] fn sbb(a: BigDigit, b: BigDigit, acc: &mut u8) -> BigDigit { let mut out = 0; - // Safety: There are absolutely no safety concerns with calling _addcarry_u64, it's just unsafe for API consistency with other intrinsics + // Safety: There are absolutely no safety concerns with calling _subborrow_u64, it's just unsafe for API consistency with other intrinsics *acc = unsafe { core::arch::x86_64::_subborrow_u64(*acc, a, b, &mut out) }; out } @@ -62,7 +62,7 @@ fn sbb(a: BigDigit, b: BigDigit, acc: &mut u8) -> BigDigit { #[inline] fn sbb(a: BigDigit, b: BigDigit, acc: &mut u8) -> BigDigit { let mut out = 0; - // Safety: There are absolutely no safety concerns with calling _addcarry_u64, it's just unsafe for API consistency with other intrinsics + // Safety: There are absolutely no safety concerns with calling _subborrow_u32, it's just unsafe for API consistency with other intrinsics *acc = unsafe { core::arch::x86_64::_subborrow_u32(*acc, a, b, &mut out) }; out } From e3971e6558c2140ea09da13b1726e573969c7950 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Fri, 30 Oct 2020 13:04:50 -0700 Subject: [PATCH 6/7] Unify addcarry probing for x86_64/x86 --- build.rs | 26 +++++++++++++++----------- src/algorithms.rs | 46 ++++++++++++++++++++++++++++++---------------- 2 files changed, 45 insertions(+), 27 deletions(-) diff --git a/build.rs b/build.rs index 0aea8629..3daed5e8 100644 --- a/build.rs +++ b/build.rs @@ -11,20 +11,24 @@ fn main() { autocfg::emit("u64_digit"); } let ac = autocfg::new(); - if ac.probe_path("std::convert::TryFrom") || ac.probe_path("core::convert::TryFrom") { + let std = if ac.probe_sysroot_crate("std") { + "std" + } else { + "core" + }; + if ac.probe_path(&format!("{}::convert::TryFrom", std)) { autocfg::emit("has_try_from"); } - if u64_digit - && (ac.probe_path("core::arch::x86_64::_addcarry_u64") - || ac.probe_path("std::arch::x86_64::_addcarry_u64")) - { - autocfg::emit("use_addcarry_u64"); - } else if !u64_digit - && (ac.probe_path("core::arch::x86_64::_addcarry_u32") - || ac.probe_path("core::arch::x86::_addcarry_u32")) - { - autocfg::emit("use_addcarry_u32"); + if let Ok(target_arch) = env::var("CARGO_CFG_TARGET_ARCH") { + if target_arch == "x86_64" || target_arch == "x86" { + let digit = if u64_digit { "u64" } else { "u32" }; + + let addcarry = format!("{}::arch::{}::_addcarry_{}", std, target_arch, digit); + if ac.probe_path(&addcarry) { + autocfg::emit("use_addcarry"); + } + } } autocfg::rerun_path("build.rs"); diff --git a/src/algorithms.rs b/src/algorithms.rs index d441fb55..89fc35fe 100644 --- a/src/algorithms.rs +++ b/src/algorithms.rs @@ -5,6 +5,12 @@ use core::iter::repeat; use core::mem; use num_traits::{One, PrimInt, Zero}; +#[cfg(all(use_addcarry, target_arch = "x86_64"))] +use core::arch::x86_64 as arch; + +#[cfg(all(use_addcarry, target_arch = "x86"))] +use core::arch::x86 as arch; + use crate::biguint::biguint_from_vec; use crate::biguint::BigUint; @@ -15,31 +21,35 @@ use crate::bigint::Sign::{Minus, NoSign, Plus}; use crate::big_digit::{self, BigDigit, DoubleBigDigit}; // only needed for the fallback implementation of `sbb` -#[cfg(not(any(use_addcarry_u64, use_addcarry_u32)))] +#[cfg(not(use_addcarry))] use crate::big_digit::SignedDoubleBigDigit; -// Generic functions for add/subtract/multiply with carry/borrow. These are specialized for some platforms to take advantage of intrinsics etc +// Generic functions for add/subtract/multiply with carry/borrow. These are specialized +// for some platforms to take advantage of intrinsics, etc. // Add with carry: -#[cfg(use_addcarry_u64)] +#[cfg(all(use_addcarry, u64_digit))] #[inline] fn adc(a: BigDigit, b: BigDigit, acc: &mut u8) -> BigDigit { let mut out = 0; - // Safety: There are absolutely no safety concerns with calling _addcarry_u64, it's just unsafe for API consistency with other intrinsics - *acc = unsafe { core::arch::x86_64::_addcarry_u64(*acc, a, b, &mut out) }; + // Safety: There are absolutely no safety concerns with calling `_addcarry_u64`. + // It's just unsafe for API consistency with other intrinsics. + *acc = unsafe { arch::_addcarry_u64(*acc, a, b, &mut out) }; out } -#[cfg(use_addcarry_u32)] +#[cfg(all(use_addcarry, not(u64_digit)))] #[inline] fn adc(a: BigDigit, b: BigDigit, acc: &mut u8) -> BigDigit { let mut out = 0; - // Safety: There are absolutely no safety concerns with calling _addcarry_u32, it's just unsafe for API consistency with other intrinsics - *acc = unsafe { core::arch::x86_64::_addcarry_u32(*acc, a, b, &mut out) }; + // Safety: There are absolutely no safety concerns with calling `_addcarry_u32`. + // It's just unsafe for API consistency with other intrinsics. + *acc = unsafe { arch::_addcarry_u32(*acc, a, b, &mut out) }; out } -#[cfg(not(any(use_addcarry_u64, use_addcarry_u32)))] // fallback for environments where we don't have an addcarry intrinsic +// fallback for environments where we don't have an addcarry intrinsic +#[cfg(not(use_addcarry))] #[inline] fn adc(a: BigDigit, b: BigDigit, acc: &mut DoubleBigDigit) -> BigDigit { *acc += DoubleBigDigit::from(a); @@ -50,24 +60,28 @@ fn adc(a: BigDigit, b: BigDigit, acc: &mut DoubleBigDigit) -> BigDigit { } // Subtract with borrow: -#[cfg(use_addcarry_u64)] +#[cfg(all(use_addcarry, u64_digit))] #[inline] fn sbb(a: BigDigit, b: BigDigit, acc: &mut u8) -> BigDigit { let mut out = 0; - // Safety: There are absolutely no safety concerns with calling _subborrow_u64, it's just unsafe for API consistency with other intrinsics - *acc = unsafe { core::arch::x86_64::_subborrow_u64(*acc, a, b, &mut out) }; + // Safety: There are absolutely no safety concerns with calling `_subborrow_u64`. + // It's just unsafe for API consistency with other intrinsics. + *acc = unsafe { arch::_subborrow_u64(*acc, a, b, &mut out) }; out } -#[cfg(use_addcarry_u32)] + +#[cfg(all(use_addcarry, not(u64_digit)))] #[inline] fn sbb(a: BigDigit, b: BigDigit, acc: &mut u8) -> BigDigit { let mut out = 0; - // Safety: There are absolutely no safety concerns with calling _subborrow_u32, it's just unsafe for API consistency with other intrinsics - *acc = unsafe { core::arch::x86_64::_subborrow_u32(*acc, a, b, &mut out) }; + // Safety: There are absolutely no safety concerns with calling `_subborrow_u32`. + // It's just unsafe for API consistency with other intrinsics. + *acc = unsafe { arch::_subborrow_u32(*acc, a, b, &mut out) }; out } -#[cfg(not(any(use_addcarry_u64, use_addcarry_u32)))] // fallback for environments where we don't have an addcarry intrinsic +// fallback for environments where we don't have an addcarry intrinsic +#[cfg(not(use_addcarry))] #[inline] fn sbb(a: BigDigit, b: BigDigit, acc: &mut SignedDoubleBigDigit) -> BigDigit { *acc += SignedDoubleBigDigit::from(a); From e03bbc14681407bc9818eb059963b7489c6fdd37 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Fri, 30 Oct 2020 14:48:13 -0700 Subject: [PATCH 7/7] Restructure adc/sbb to match addcarry/subborrow --- src/algorithms.rs | 62 ++++++++++++++++++++--------------------------- 1 file changed, 26 insertions(+), 36 deletions(-) diff --git a/src/algorithms.rs b/src/algorithms.rs index 89fc35fe..a2037086 100644 --- a/src/algorithms.rs +++ b/src/algorithms.rs @@ -30,65 +30,55 @@ use crate::big_digit::SignedDoubleBigDigit; // Add with carry: #[cfg(all(use_addcarry, u64_digit))] #[inline] -fn adc(a: BigDigit, b: BigDigit, acc: &mut u8) -> BigDigit { - let mut out = 0; +fn adc(carry: u8, a: u64, b: u64, out: &mut u64) -> u8 { // Safety: There are absolutely no safety concerns with calling `_addcarry_u64`. // It's just unsafe for API consistency with other intrinsics. - *acc = unsafe { arch::_addcarry_u64(*acc, a, b, &mut out) }; - out + unsafe { arch::_addcarry_u64(carry, a, b, out) } } #[cfg(all(use_addcarry, not(u64_digit)))] #[inline] -fn adc(a: BigDigit, b: BigDigit, acc: &mut u8) -> BigDigit { - let mut out = 0; +fn adc(carry: u8, a: u32, b: u32, out: &mut u32) -> u8 { // Safety: There are absolutely no safety concerns with calling `_addcarry_u32`. // It's just unsafe for API consistency with other intrinsics. - *acc = unsafe { arch::_addcarry_u32(*acc, a, b, &mut out) }; - out + unsafe { arch::_addcarry_u32(carry, a, b, out) } } // fallback for environments where we don't have an addcarry intrinsic #[cfg(not(use_addcarry))] #[inline] -fn adc(a: BigDigit, b: BigDigit, acc: &mut DoubleBigDigit) -> BigDigit { - *acc += DoubleBigDigit::from(a); - *acc += DoubleBigDigit::from(b); - let lo = *acc as BigDigit; - *acc >>= big_digit::BITS; - lo +fn adc(carry: u8, a: BigDigit, b: BigDigit, out: &mut BigDigit) -> u8 { + let sum = DoubleBigDigit::from(a) + DoubleBigDigit::from(b) + DoubleBigDigit::from(carry); + *out = sum as BigDigit; + (sum >> big_digit::BITS) as u8 } // Subtract with borrow: #[cfg(all(use_addcarry, u64_digit))] #[inline] -fn sbb(a: BigDigit, b: BigDigit, acc: &mut u8) -> BigDigit { - let mut out = 0; +fn sbb(borrow: u8, a: u64, b: u64, out: &mut u64) -> u8 { // Safety: There are absolutely no safety concerns with calling `_subborrow_u64`. // It's just unsafe for API consistency with other intrinsics. - *acc = unsafe { arch::_subborrow_u64(*acc, a, b, &mut out) }; - out + unsafe { arch::_subborrow_u64(borrow, a, b, out) } } #[cfg(all(use_addcarry, not(u64_digit)))] #[inline] -fn sbb(a: BigDigit, b: BigDigit, acc: &mut u8) -> BigDigit { - let mut out = 0; +fn sbb(borrow: u8, a: u32, b: u32, out: &mut u32) -> u8 { // Safety: There are absolutely no safety concerns with calling `_subborrow_u32`. // It's just unsafe for API consistency with other intrinsics. - *acc = unsafe { arch::_subborrow_u32(*acc, a, b, &mut out) }; - out + unsafe { arch::_subborrow_u32(borrow, a, b, out) } } -// fallback for environments where we don't have an addcarry intrinsic +// fallback for environments where we don't have a subborrow intrinsic #[cfg(not(use_addcarry))] #[inline] -fn sbb(a: BigDigit, b: BigDigit, acc: &mut SignedDoubleBigDigit) -> BigDigit { - *acc += SignedDoubleBigDigit::from(a); - *acc -= SignedDoubleBigDigit::from(b); - let lo = *acc as BigDigit; - *acc >>= big_digit::BITS; - lo +fn sbb(borrow: u8, a: BigDigit, b: BigDigit, out: &mut BigDigit) -> u8 { + let difference = SignedDoubleBigDigit::from(a) + - SignedDoubleBigDigit::from(b) + - SignedDoubleBigDigit::from(borrow); + *out = difference as BigDigit; + u8::from(difference < 0) } #[inline] @@ -195,12 +185,12 @@ pub(crate) fn __add2(a: &mut [BigDigit], b: &[BigDigit]) -> BigDigit { let (a_lo, a_hi) = a.split_at_mut(b.len()); for (a, b) in a_lo.iter_mut().zip(b) { - *a = adc(*a, *b, &mut carry); + carry = adc(carry, *a, *b, a); } if carry != 0 { for a in a_hi { - *a = adc(*a, 0, &mut carry); + carry = adc(carry, *a, 0, a); if carry == 0 { break; } @@ -229,12 +219,12 @@ pub(crate) fn sub2(a: &mut [BigDigit], b: &[BigDigit]) { let (b_lo, b_hi) = b.split_at(len); for (a, b) in a_lo.iter_mut().zip(b_lo) { - *a = sbb(*a, *b, &mut borrow); + borrow = sbb(borrow, *a, *b, a); } if borrow != 0 { for a in a_hi { - *a = sbb(*a, 0, &mut borrow); + borrow = sbb(borrow, *a, 0, a); if borrow == 0 { break; } @@ -250,16 +240,16 @@ pub(crate) fn sub2(a: &mut [BigDigit], b: &[BigDigit]) { // Only for the Sub impl. `a` and `b` must have same length. #[inline] -pub(crate) fn __sub2rev(a: &[BigDigit], b: &mut [BigDigit]) -> BigDigit { +pub(crate) fn __sub2rev(a: &[BigDigit], b: &mut [BigDigit]) -> u8 { debug_assert!(b.len() == a.len()); let mut borrow = 0; for (ai, bi) in a.iter().zip(b) { - *bi = sbb(*ai, *bi, &mut borrow); + borrow = sbb(borrow, *ai, *bi, bi); } - borrow as BigDigit + borrow } pub(crate) fn sub2rev(a: &[BigDigit], b: &mut [BigDigit]) {