Skip to content

Commit

Permalink
MAINT: special: performance optimization for simple autodifferentiati…
Browse files Browse the repository at this point in the history
…on (scipy#21568)

* MAINT: special: performance optimization for simple autodifferentiation
  • Loading branch information
steppi authored Oct 4, 2024
1 parent 13703c7 commit bfa0caa
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 24 deletions.
4 changes: 2 additions & 2 deletions scipy/special/xsf/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,6 @@ using cuda::std::uint64_t;
#define XSF_ASSERT(a)
#endif

#endif

namespace xsf {

// basic
Expand Down Expand Up @@ -302,3 +300,5 @@ template <typename T>
using complex = complex_type_t<T>;

} // namespace xsf

#endif
41 changes: 27 additions & 14 deletions scipy/special/xsf/dual.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,29 @@
#pragma once

#include "binom.h"
#include "config.h"
#include "numbers.h"

namespace xsf {

namespace detail {
template <typename T>
constexpr T small_binom_coefs[3][3] = {
{T(1.0), T(0.0), T(0.0)}, {T(1.0), T(1.0), T(0.0)}, {T(1.0), T(2.0), T(1.0)}
};

/* Since we only compute derivatives up to order 2, we only need
* Binomial coefficients with n <= 2 for use in the General
* Leibniz rule. Get these from a lookup table. */
template <typename T>
T fast_binom(size_t n, size_t k) {
if ((n <= 2) && (k <= 2)) {
return small_binom_coefs<T>[n][k];
}
return T(xsf::binom(static_cast<double>(n), static_cast<double>(k)));
}
} // namespace detail

template <typename T, size_t Order0, size_t... Orders>
class dual;

Expand Down Expand Up @@ -82,11 +101,9 @@ class dual<T, Order> {
dual &operator*=(const dual &other) {
for (size_t i = Order + 1; i-- > 0;) {
data[i] *= other.data[0];

T coef = 1; // binomial coefficient
// General Leibniz Rule
for (size_t j = 0; j < i; ++j) {
data[i] += coef * data[j] * other.data[i - j];
coef *= T(i - j) / T(j + 1);
data[i] += detail::fast_binom<T>(i, j) * data[j] * other.data[i - j];
}
}

Expand All @@ -103,10 +120,9 @@ class dual<T, Order> {

dual &operator/=(const dual &other) {
for (size_t i = 0; i <= Order; ++i) {
T coef = 1; // binomial coefficient
// General Leibniz Rule
for (size_t j = 1; j <= i; ++j) {
data[i] -= coef * other.data[j] * data[i - j];
coef *= T(i - j) / T(j + 1);
data[i] -= detail::fast_binom<T>(i, j) * other.data[j] * data[i - j];
}

data[i] /= other.data[0];
Expand Down Expand Up @@ -215,11 +231,9 @@ class dual<T, Order0, Order1, Orders...> {
dual &operator*=(const dual &other) {
for (size_t i = Order0 + 1; i-- > 0;) {
data[i] *= other.data[0];

T coef = 1; // binomial coefficient
// General Leibniz Rule
for (size_t j = 0; j < i; ++j) {
data[i] += coef * data[j] * other.data[i - j];
coef *= T(i - j) / T(j + 1);
data[i] += detail::fast_binom<T>(i, j) * data[j] * other.data[i - j];
}
}

Expand All @@ -236,10 +250,9 @@ class dual<T, Order0, Order1, Orders...> {

dual &operator/=(const dual &other) {
for (size_t i = 0; i <= Order0; ++i) {
T coef = 1; // binomial coefficient
// General Leibniz Rule
for (size_t j = 1; j <= i; ++j) {
data[i] -= coef * other.data[j] * data[i - j];
coef *= T(i - j) / T(j + 1);
data[i] -= detail::fast_binom<T>(i, j) * other.data[j] * data[i - j];
}

data[i] /= other.data[0];
Expand Down
24 changes: 16 additions & 8 deletions scipy/special/xsf/legendre.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ struct legendre_p_recurrence_n {
T z;

void operator()(int n, T (&res)[2]) const {
T fac0 = -T(n - 1) / T(n);
T fac1 = T(2 * n - 1) / T(n);
using value_type = remove_dual_t<T>;
value_type fac0 = -value_type(n - 1) / value_type(n);
value_type fac1 = value_type(2 * n - 1) / value_type(n);

res[0] = fac0;
res[1] = fac1 * z;
Expand Down Expand Up @@ -270,8 +271,9 @@ struct assoc_legendre_p_recurrence_n<T, assoc_legendre_unnorm_policy> {
int type;

void operator()(int n, T (&res)[2]) const {
T fac0 = -T(n + m - 1) / T(n - m);
T fac1 = T(2 * n - 1) / T(n - m);
using value_type = remove_dual_t<T>;
value_type fac0 = -value_type(n + m - 1) / value_type(n - m);
value_type fac1 = value_type(2 * n - 1) / value_type(n - m);

res[0] = fac0;
res[1] = fac1 * z;
Expand All @@ -285,8 +287,11 @@ struct assoc_legendre_p_recurrence_n<T, assoc_legendre_norm_policy> {
int type;

void operator()(int n, T (&res)[2]) const {
T fac0 = -sqrt(T((2 * n + 1) * ((n - 1) * (n - 1) - m * m)) / T((2 * n - 3) * (n * n - m * m)));
T fac1 = sqrt(T((2 * n + 1) * (4 * (n - 1) * (n - 1) - 1)) / T((2 * n - 3) * (n * n - m * m)));
using value_type = remove_dual_t<T>;
value_type fac0 =
-sqrt(value_type((2 * n + 1) * ((n - 1) * (n - 1) - m * m)) / value_type((2 * n - 3) * (n * n - m * m)));
value_type fac1 =
sqrt(value_type((2 * n + 1) * (4 * (n - 1) * (n - 1) - 1)) / value_type((2 * n - 3) * (n * n - m * m)));

res[0] = fac0;
res[1] = fac1 * z;
Expand Down Expand Up @@ -566,8 +571,11 @@ struct sph_legendre_p_recurrence_n {
sph_legendre_p_recurrence_n(int m, T theta) : m(m), theta(theta), theta_cos(cos(theta)) {}

void operator()(int n, T (&res)[2]) const {
T fac0 = -sqrt(T((2 * n + 1) * ((n - 1) * (n - 1) - m * m)) / T((2 * n - 3) * (n * n - m * m)));
T fac1 = sqrt(T((2 * n + 1) * (4 * (n - 1) * (n - 1) - 1)) / T((2 * n - 3) * (n * n - m * m)));
using value_type = remove_dual_t<T>;
value_type fac0 =
-sqrt(value_type((2 * n + 1) * ((n - 1) * (n - 1) - m * m)) / value_type((2 * n - 3) * (n * n - m * m)));
value_type fac1 =
sqrt(value_type((2 * n + 1) * (4 * (n - 1) * (n - 1) - 1)) / value_type((2 * n - 3) * (n * n - m * m)));

res[0] = fac0;
res[1] = fac1 * theta_cos;
Expand Down

0 comments on commit bfa0caa

Please sign in to comment.