Skip to content

Commit

Permalink
bugfix: proper shifts for cov-matrix in dPCA+
Browse files Browse the repository at this point in the history
  • Loading branch information
lettis committed Jul 12, 2017
1 parent 381a7e1 commit f127d6f
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 75 deletions.
35 changes: 25 additions & 10 deletions src/covariance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,14 @@ namespace FastPCA {

namespace {

_CovAccumulation::_CovAccumulation(SymmetricMatrix<double> m, std::size_t n)
_CovAccumulation::_CovAccumulation(SymmetricMatrix<double> m
, std::size_t n)
: m(m),
n_observations(n) {
}

_CovAccumulation::_CovAccumulation(std::size_t n_observations, std::size_t n_observables)
_CovAccumulation::_CovAccumulation(std::size_t n_observations
, std::size_t n_observables)
: m(SymmetricMatrix<double>(n_observables)),
n_observations(n_observations) {
}
Expand Down Expand Up @@ -86,7 +88,8 @@ namespace FastPCA {
}

_CovAccumulation
_join_accumulations(const _CovAccumulation& c1, const _CovAccumulation& c2) {
_join_accumulations(const _CovAccumulation& c1
, const _CovAccumulation& c2) {
assert(c1.m.n_cols() == c2.m.n_cols());
return {c1.m+c2.m, c1.n_observations+c2.n_observations};
}
Expand All @@ -111,10 +114,12 @@ namespace FastPCA {
, Matrix<double> stats) {
std::size_t n_variables = stats.n_rows();
std::vector<double> means(n_variables);
std::vector<double> shifts(n_variables);
std::vector<double> inverse_sigmas(n_variables);
for (std::size_t i=0; i < n_variables; ++i) {
means[i] = stats(i,0);
inverse_sigmas[i] = 1.0 / stats(i,1);
shifts[i] = stats(i,2);
}
// take only half size because of intermediate results.
std::size_t chunk_size = max_chunk_size / 2;
Expand All @@ -126,16 +131,18 @@ namespace FastPCA {
_CovAccumulation acc(0, n_cols);
Matrix<double> m = std::move(input_file.next_block());
while (m.n_rows() > 0) {
// center data
// periodic correction
if (periodic) {
FastPCA::deg2rad_inplace(m);
FastPCA::Periodic::shift_matrix_columns_inplace(m, means);
} else {
FastPCA::shift_matrix_columns_inplace(m, means);
FastPCA::Periodic::shift_matrix_columns_inplace(m
, shifts);
}
// center data
FastPCA::shift_matrix_columns_inplace(m, means);
// normalize columns by dividing by sigma
if (use_correlation) {
FastPCA::scale_matrix_columns_inplace(m, inverse_sigmas);
FastPCA::scale_matrix_columns_inplace(m
, inverse_sigmas);
}
acc = _join_accumulations(acc, _accumulate_covariance(m));
m = std::move(input_file.next_block());
Expand All @@ -154,7 +161,11 @@ namespace FastPCA {
, const std::size_t max_chunk_size
, bool use_correlation
, Matrix<double> stats) {
return _covariance_matrix(filename, max_chunk_size, use_correlation, true, stats);
return _covariance_matrix(filename
, max_chunk_size
, use_correlation
, true
, stats);
}
} // end namespace FastPCA::Periodic

Expand All @@ -163,7 +174,11 @@ namespace FastPCA {
, const std::size_t max_chunk_size
, bool use_correlation
, Matrix<double> stats) {
return _covariance_matrix(filename, max_chunk_size, use_correlation, false, stats);
return _covariance_matrix(filename
, max_chunk_size
, use_correlation
, false
, stats);
}
} // end namespace FastPCA

17 changes: 3 additions & 14 deletions src/file_io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,18 +118,11 @@ namespace FastPCA {
std::size_t n_variables = stats.n_rows();
std::vector<double> means(n_variables);
std::vector<double> inverse_sigmas(n_variables);
std::vector<double> dih_shifts(n_variables);
std::vector<double> scaled_periodicities(n_variables);
std::vector<double> shifts(n_variables);
for (std::size_t i=0; i < n_variables; ++i) {
means[i] = stats(i,0);
inverse_sigmas[i] = 1.0 / stats(i,1);
if (use_correlation) {
dih_shifts[i] = (stats(i,2) - means[i]) * inverse_sigmas[i];
scaled_periodicities[i] = 2*M_PI * inverse_sigmas[i];
} else {
dih_shifts[i] = stats(i,2);
scaled_periodicities[i] = 2*M_PI;
}
shifts[i] = stats(i,2);
}
// projections
bool append_to_file = false;
Expand All @@ -138,15 +131,11 @@ namespace FastPCA {
read_blockwise(fh_file_in, [&](Matrix<double>& m) {
// convert degrees to radians
FastPCA::deg2rad_inplace(m);
FastPCA::Periodic::shift_matrix_columns_inplace(m, shifts);
if (use_correlation) {
// shift by periodic means (necessary for scaling)
FastPCA::Periodic::shift_matrix_columns_inplace(m, means);
// scale data by sigmas for correlated projections
FastPCA::scale_matrix_columns_inplace(m, inverse_sigmas);
}
// shift dihedrals to minimize boundary jumps
// and correct for periodic boundary condition
FastPCA::Periodic::shift_matrix_columns_inplace(m, dih_shifts, scaled_periodicities);
// output
fh_file_out.write(m*eigenvecs, append_to_file);
append_to_file = true;
Expand Down
113 changes: 62 additions & 51 deletions src/util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,20 @@ namespace FastPCA {
namespace {
std::tuple<std::size_t, std::size_t, std::vector<double>>
_means(const std::string filename
, const std::size_t max_chunk_size) {
, const std::size_t max_chunk_size
, std::vector<double> shifts = {}) {
DataFileReader<double> input_file(filename, max_chunk_size);
std::size_t n_cols = input_file.n_cols();
std::size_t n_rows = 0;
std::vector<double> means(n_cols);
Matrix<double> m = std::move(input_file.next_block());
while (m.n_rows() > 0) {
n_rows += m.n_rows();
// correction for periodic data
if (shifts.size() > 0) {
FastPCA::deg2rad_inplace(m);
FastPCA::Periodic::shift_matrix_columns_inplace(m, shifts);
}
for (std::size_t i=0; i < m.n_rows(); ++i) {
for (std::size_t j=0; j < n_cols; ++j) {
means[j] += m(i,j);
Expand All @@ -56,60 +62,60 @@ namespace FastPCA {
}
return std::make_tuple(n_rows, n_cols, means);
}

// compute circular means by averaging sines and cosines
// and resolving the mean angle with the atan2 function.
// additionally, return number of observations.
std::tuple<std::size_t, std::size_t, std::vector<double>>
_circular_means(const std::string filename
, const std::size_t max_chunk_size) {
DataFileReader<double> input_file(filename, max_chunk_size);
Matrix<double> m = std::move(input_file.next_block());
FastPCA::deg2rad_inplace(m);
std::size_t i, j;
std::size_t nr = m.n_rows();
std::size_t nc = m.n_cols();
std::vector<double> means(nc, 0.0);
std::vector<double> means_sin(nc, 0.0);
std::vector<double> means_cos(nc, 0.0);
std::size_t n_rows_total = 0;
while (nr > 0) {
for (j=0; j < nc; ++j) {
for (i=0; i < nr; ++i) {
means_sin[j] += sin(m(i,j));
means_cos[j] += cos(m(i,j));
}
}
n_rows_total += nr;
m = std::move(input_file.next_block());
FastPCA::deg2rad_inplace(m);
nr = m.n_rows();
}
for (j=0; j < nc; ++j) {
means[j] = std::atan2(means_sin[j]/n_rows_total, means_cos[j]/n_rows_total);
}
return std::make_tuple(n_rows_total, nc, means);
}
// std::tuple<std::size_t, std::size_t, std::vector<double>>
// _circular_means(const std::string filename
// , const std::size_t max_chunk_size) {
// DataFileReader<double> input_file(filename, max_chunk_size);
// Matrix<double> m = std::move(input_file.next_block());
// FastPCA::deg2rad_inplace(m);
// std::size_t i, j;
// std::size_t nr = m.n_rows();
// std::size_t nc = m.n_cols();
// std::vector<double> means(nc, 0.0);
// std::vector<double> means_sin(nc, 0.0);
// std::vector<double> means_cos(nc, 0.0);
// std::size_t n_rows_total = 0;
// while (nr > 0) {
// for (j=0; j < nc; ++j) {
// for (i=0; i < nr; ++i) {
// means_sin[j] += sin(m(i,j));
// means_cos[j] += cos(m(i,j));
// }
// }
// n_rows_total += nr;
// m = std::move(input_file.next_block());
// FastPCA::deg2rad_inplace(m);
// nr = m.n_rows();
// }
// for (j=0; j < nc; ++j) {
// means[j] = std::atan2(means_sin[j]/n_rows_total
// , means_cos[j]/n_rows_total);
// }
// return std::make_tuple(n_rows_total, nc, means);
// }

std::vector<double>
_sigmas(const std::string filename
, const std::size_t max_chunk_size
, std::vector<double> means
, bool periodic) {
, std::vector<double> shifts = {}) {
DataFileReader<double> input_file(filename, max_chunk_size);
std::size_t n_cols = means.size();
std::size_t n_rows = 0;
std::vector<double> sigmas(n_cols, 0.0);
Matrix<double> m = std::move(input_file.next_block());
while (m.n_rows() > 0) {
n_rows += m.n_rows();
// subtract means
if (periodic) {
// periodic correction
if (shifts.size() > 0) {
FastPCA::deg2rad_inplace(m);
FastPCA::Periodic::shift_matrix_columns_inplace(m, means);
} else {
FastPCA::shift_matrix_columns_inplace(m, means);
FastPCA::Periodic::shift_matrix_columns_inplace(m, shifts);
}
FastPCA::shift_matrix_columns_inplace(m, means);
// compute variances
for (std::size_t i=0; i < m.n_rows(); ++i) {
for (std::size_t j=0; j < n_cols; ++j) {
Expand Down Expand Up @@ -368,20 +374,8 @@ namespace FastPCA {
, bool periodic
, bool dynamic_shift) {
std::vector<double> means;
std::vector<double> sigmas;
std::size_t n_rows, n_cols;
// means
if (periodic) {
std::tie(n_rows, n_cols, means) = _circular_means(filename
, max_chunk_size);
} else {
std::tie(n_rows, n_cols, means) = _means(filename
, max_chunk_size);
}
// sigmas
std::vector<double> sigmas = _sigmas(filename
, max_chunk_size
, means
, periodic);
// shifts
std::vector<double> shifts;
if (periodic && dynamic_shift) {
Expand All @@ -391,6 +385,23 @@ namespace FastPCA {
shifts = _dih_shifts_static(filename
, max_chunk_size);
}
// means & sigmas
if (periodic) {
std::tie(n_rows, n_cols, means) = _means(filename
, max_chunk_size
, shifts);
sigmas = _sigmas(filename
, max_chunk_size
, means
, shifts);
} else {
std::tie(n_rows, n_cols, means) = _means(filename
, max_chunk_size);
sigmas = _sigmas(filename
, max_chunk_size
, means);
}
// output
int n_cols_out;
if (periodic) {
n_cols_out = 3;
Expand Down

0 comments on commit f127d6f

Please sign in to comment.