Skip to content

Commit

Permalink
Add parallelization option to VertexErrorFunction (#227)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #227

Added thread-count option to VertexErrorFunction for getGradient and getJacobian for speed improvements. Default value of 0 runs serial version.

Benchmark results :
Minor impact on performance when enabling multi-threading with low constraint count, 3-5x speedup with higher constraint counts.

BM_VertexErrorFunction_getJacobian/0/10            252 us          250 us         3072 items_per_second=39.9765k/s max_threads=0 num_const=10
BM_VertexErrorFunction_getJacobian/4/10            241 us          238 us         2726 items_per_second=42.046k/s max_threads=4 num_const=10
BM_VertexErrorFunction_getJacobian/16/10           254 us          252 us         2520 items_per_second=39.6628k/s max_threads=16 num_const=10
BM_VertexErrorFunction_getJacobian/64/10           300 us          298 us         2342 items_per_second=33.5753k/s max_threads=64 num_const=10
BM_VertexErrorFunction_getJacobian/0/100           293 us          291 us         2331 items_per_second=343.775k/s max_threads=0 num_const=100
BM_VertexErrorFunction_getJacobian/4/100           290 us          288 us         2422 items_per_second=347.618k/s max_threads=4 num_const=100
BM_VertexErrorFunction_getJacobian/16/100          317 us          314 us         1949 items_per_second=318.226k/s max_threads=16 num_const=100
BM_VertexErrorFunction_getJacobian/64/100          447 us          444 us         1529 items_per_second=225.36k/s max_threads=64 num_const=100
BM_VertexErrorFunction_getJacobian/0/1000          895 us          888 us          734 items_per_second=1.12596M/s max_threads=0 num_const=1000
BM_VertexErrorFunction_getJacobian/4/1000          569 us          565 us         1210 items_per_second=1.77072M/s max_threads=4 num_const=1000
BM_VertexErrorFunction_getJacobian/16/1000         488 us          484 us         1430 items_per_second=2.06509M/s max_threads=16 num_const=1000
BM_VertexErrorFunction_getJacobian/64/1000         598 us          593 us         1264 items_per_second=1.68774M/s max_threads=64 num_const=1000
BM_VertexErrorFunction_getJacobian/0/10000        9153 us         9060 us           68 items_per_second=1.10379M/s max_threads=0 num_const=10k
BM_VertexErrorFunction_getJacobian/4/10000        3776 us         3743 us          185 items_per_second=2.6717M/s max_threads=4 num_const=10k
BM_VertexErrorFunction_getJacobian/16/10000       1989 us         1974 us          358 items_per_second=5.06662M/s max_threads=16 num_const=10k
BM_VertexErrorFunction_getJacobian/64/10000       1856 us         1840 us          383 items_per_second=5.43382M/s max_threads=64 num_const=10k
BM_VertexErrorFunction_getJacobian/0/50000       30138 us        29918 us           26 items_per_second=1.67121M/s max_threads=0 num_const=50k
BM_VertexErrorFunction_getJacobian/4/50000       17568 us        17366 us           39 items_per_second=2.87913M/s max_threads=4 num_const=50k
BM_VertexErrorFunction_getJacobian/16/50000      11080 us        10995 us           71 items_per_second=4.54767M/s max_threads=16 num_const=50k
BM_VertexErrorFunction_getJacobian/64/50000       9585 us         9402 us           67 items_per_second=5.31824M/s max_threads=64 num_const=50k

Reviewed By: jeongseok-meta

Differential Revision: D70518166

fbshipit-source-id: 415d78fdbfb5f8153476cc4f46c60bfc041c5ad7
  • Loading branch information
cstollmeta authored and facebook-github-bot committed Mar 5, 2025
1 parent 57ff9aa commit c30d169
Show file tree
Hide file tree
Showing 3 changed files with 156 additions and 56 deletions.
140 changes: 106 additions & 34 deletions momentum/character_solver/vertex_error_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
#include "momentum/math/mesh.h"
#include "momentum/math/utility.h"

#include <dispenso/parallel_for.h>

#include <numeric>

namespace momentum {

std::string_view toString(VertexConstraintType type) {
Expand All @@ -30,10 +34,12 @@ std::string_view toString(VertexConstraintType type) {
template <typename T>
VertexErrorFunctionT<T>::VertexErrorFunctionT(
const Character& character_in,
VertexConstraintType type)
VertexConstraintType type,
size_t maxThreads)
: SkeletonErrorFunctionT<T>(character_in.skeleton, character_in.parameterTransform),
character_(character_in),
constraintType_(type) {
constraintType_(type),
maxThreads_(maxThreads) {
MT_CHECK(static_cast<bool>(character_in.mesh));
MT_CHECK(static_cast<bool>(character_in.skinWeights));
MT_THROW_IF(
Expand Down Expand Up @@ -753,24 +759,66 @@ double VertexErrorFunctionT<T>::getGradient(
updateMeshes(modelParameters, state);

double error = 0;
std::vector<std::tuple<double, VectorX<T>>> errorGradThread;

auto dispensoOptions = dispenso::ParForOptions();
dispensoOptions.maxThreads = maxThreads_;

if (constraintType_ == VertexConstraintType::Position) {
for (size_t iCons = 0; iCons < constraints_.size(); ++iCons) {
error += calculatePositionGradient(modelParameters, state, constraints_[iCons], gradient);
}
dispenso::parallel_for(
errorGradThread,
[&]() -> std::tuple<double, VectorX<T>> {
return {0.0, VectorX<T>::Zero(modelParameters.size())};
},
0,
constraints_.size(),
[&](std::tuple<double, VectorX<T>>& errorGradLocal, const size_t iCons) {
double& errorLocal = std::get<0>(errorGradLocal);
auto& gradLocal = std::get<1>(errorGradLocal);
errorLocal +=
calculatePositionGradient(modelParameters, state, constraints_[iCons], gradLocal);
},
dispensoOptions);
} else {
const auto [sourceNormalWeight, targetNormalWeight] = computeNormalWeights();
T sourceNormalWeight;
T targetNormalWeight;
std::tie(sourceNormalWeight, targetNormalWeight) = computeNormalWeights();

dispenso::parallel_for(
errorGradThread,
[&]() -> std::tuple<double, VectorX<T>> {
return {0.0, VectorX<T>::Zero(modelParameters.size())};
},
0,
constraints_.size(),
[&](std::tuple<double, VectorX<T>>& errorGradLocal, const size_t iCons) {
double& errorLocal = std::get<0>(errorGradLocal);
auto& gradLocal = std::get<1>(errorGradLocal);
errorLocal += calculateNormalGradient(
modelParameters,
state,
constraints_[iCons],
sourceNormalWeight,
targetNormalWeight,
gradLocal);
},
dispensoOptions);
}

for (size_t iCons = 0; iCons < constraints_.size(); ++iCons) {
error += calculateNormalGradient(
modelParameters,
state,
constraints_[iCons],
sourceNormalWeight,
targetNormalWeight,
gradient);
}
if (!errorGradThread.empty()) {
errorGradThread[0] = std::accumulate(
errorGradThread.begin() + 1,
errorGradThread.end(),
errorGradThread[0],
[](const auto& a, const auto& b) -> std::tuple<double, VectorX<T>> {
return {std::get<0>(a) + std::get<0>(b), std::get<1>(a) + std::get<1>(b)};
});

// finalize the gradient
gradient += std::get<1>(errorGradThread[0]);
error = std::get<0>(errorGradThread[0]);
}

return this->weight_ * error;
}

Expand All @@ -789,35 +837,59 @@ double VertexErrorFunctionT<T>::getJacobian(
updateMeshes(modelParameters, state);

double error = 0;
std::vector<double> errorThread;

auto dispensoOptions = dispenso::ParForOptions();
dispensoOptions.maxThreads = maxThreads_;

if (constraintType_ == VertexConstraintType::Position) {
MT_PROFILE_EVENT("VertexErrorFunction - position jacobians");

for (size_t iCons = 0; iCons < constraints_.size(); ++iCons) {
error += calculatePositionJacobian(
modelParameters,
state,
constraints_[iCons],
jacobian.block(3 * iCons, 0, 3, modelParameters.size()),
residual.middleRows(3 * iCons, 3));
}
dispenso::parallel_for(
errorThread,
[&]() -> double { return 0.0; },
0,
constraints_.size(),
[&](double& errorLocal, const size_t iCons) {
errorLocal += calculatePositionJacobian(
modelParameters,
state,
constraints_[iCons],
jacobian.block(3 * iCons, 0, 3, modelParameters.size()),
residual.middleRows(3 * iCons, 3));
},
dispensoOptions);
usedRows = 3 * constraints_.size();
} else {
MT_PROFILE_EVENT("VertexErrorFunction - normal jacobians");
const auto [sourceNormalWeight, targetNormalWeight] = computeNormalWeights();
T sourceNormalWeight;
T targetNormalWeight;
std::tie(sourceNormalWeight, targetNormalWeight) = computeNormalWeights();

dispenso::parallel_for(
errorThread,
[&]() -> double { return 0.0; },
0,
constraints_.size(),
[&](double& errorLocal, const size_t iCons) {
errorLocal += calculateNormalJacobian(
modelParameters,
state,
constraints_[iCons],
sourceNormalWeight,
targetNormalWeight,
jacobian.block(iCons, 0, 1, modelParameters.size()),
residual(iCons));
},
dispensoOptions);

for (size_t iCons = 0; iCons < constraints_.size(); ++iCons) {
error += calculateNormalJacobian(
modelParameters,
state,
constraints_[iCons],
sourceNormalWeight,
targetNormalWeight,
jacobian.block(iCons, 0, 1, modelParameters.size()),
residual(iCons));
}
usedRows = constraints_.size();
}

if (!errorThread.empty()) {
error = std::accumulate(errorThread.begin() + 1, errorThread.end(), errorThread[0]);
}

return error;
}

Expand Down
5 changes: 4 additions & 1 deletion momentum/character_solver/vertex_error_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ class VertexErrorFunctionT : public SkeletonErrorFunctionT<T> {
public:
explicit VertexErrorFunctionT(
const Character& character,
VertexConstraintType type = VertexConstraintType::Position);
VertexConstraintType type = VertexConstraintType::Position,
size_t maxThreads = 0);
virtual ~VertexErrorFunctionT() override;

[[nodiscard]] double getError(const ModelParametersT<T>& params, const SkeletonStateT<T>& state)
Expand Down Expand Up @@ -136,6 +137,8 @@ class VertexErrorFunctionT : public SkeletonErrorFunctionT<T> {

const VertexConstraintType constraintType_;

size_t maxThreads_;

void updateMeshes(const ModelParametersT<T>& modelParameters, const SkeletonStateT<T>& state);
};

Expand Down
67 changes: 46 additions & 21 deletions momentum/test/character_solver/error_functions_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,7 @@ TYPED_TEST(Momentum_ErrorFunctionsTest, VertexErrorFunction) {

// create skeleton and reference values

const size_t nConstraints = 10;
const size_t nConstraints = 1000;

// Test WITHOUT blend shapes:
{
Expand Down Expand Up @@ -604,25 +604,50 @@ TYPED_TEST(Momentum_ErrorFunctionsTest, VertexErrorFunction) {
}
}();

VertexErrorFunctionT<T> errorFunction(character_orig, type);
for (size_t iCons = 0; iCons < nConstraints; ++iCons) {
errorFunction.addConstraint(
uniform<int>(0, character_orig.mesh->vertices.size() - 1),
uniform<float>(0, 1),
uniform<Vector3<T>>(0, 1),
uniform<Vector3<T>>(0, 1).normalized());
// Test SERIAL error function
{
VertexErrorFunctionT<T> errorFunction(character_orig, type, 0);
for (size_t iCons = 0; iCons < nConstraints; ++iCons) {
errorFunction.addConstraint(
uniform<int>(0, character_orig.mesh->vertices.size() - 1),
uniform<float>(0, 1),
uniform<Vector3<T>>(0, 1),
uniform<Vector3<T>>(0, 1).normalized());
}

TEST_GRADIENT_AND_JACOBIAN(
T,
&errorFunction,
modelParams,
character_orig.skeleton,
character_orig.parameterTransform.cast<T>(),
errorTol,
Eps<T>(1e-6f, 1e-15),
true,
false);
}
// Test PARALLEL error function
{
VertexErrorFunctionT<T> errorFunction(character_orig, type, 100000);
for (size_t iCons = 0; iCons < nConstraints; ++iCons) {
errorFunction.addConstraint(
uniform<int>(0, character_orig.mesh->vertices.size() - 1),
uniform<float>(0, 1),
uniform<Vector3<T>>(0, 1),
uniform<Vector3<T>>(0, 1).normalized());
}

TEST_GRADIENT_AND_JACOBIAN(
T,
&errorFunction,
modelParams,
character_orig.skeleton,
character_orig.parameterTransform.cast<T>(),
errorTol,
Eps<T>(1e-6f, 1e-15),
true,
false);
TEST_GRADIENT_AND_JACOBIAN(
T,
&errorFunction,
modelParams,
character_orig.skeleton,
character_orig.parameterTransform.cast<T>(),
errorTol,
Eps<T>(1e-6f, 1e-15),
true,
false);
}
}
}

Expand Down Expand Up @@ -654,7 +679,7 @@ TYPED_TEST(Momentum_ErrorFunctionsTest, VertexErrorFunction) {
character_blend.skeleton,
character_blend.parameterTransform.cast<T>(),
Eps<T>(1e-2f, 1e-5),
Eps<T>(1e-6f, 5e-16),
Eps<T>(1e-6f, 1e-15),
true,
false);
}
Expand Down Expand Up @@ -1266,7 +1291,7 @@ TYPED_TEST(Momentum_ErrorFunctionsTest, NormalError_GradientsAndJacobians) {
ModelParametersT<T>::Zero(transform.numAllModelParameters()),
skeleton,
transform,
1e-2f);
2e-2f);
else if constexpr (std::is_same_v<T, double>)
TEST_GRADIENT_AND_JACOBIAN(
T,
Expand Down Expand Up @@ -1401,7 +1426,7 @@ TYPED_TEST(Momentum_ErrorFunctionsTest, AimDirError_GradientsAndJacobians) {
parameters,
skeleton,
transform,
Eps<T>(1e-1f, 2e-5),
Eps<T>(1e-1f, 3e-5),
Eps<T>(1e-6f, 1e-7));
}
}
Expand Down

0 comments on commit c30d169

Please sign in to comment.