Skip to content

Commit

Permalink
writing stats, finished new menu
Browse files Browse the repository at this point in the history
  • Loading branch information
lettis committed Jan 29, 2016
1 parent c3e4642 commit 22588f7
Show file tree
Hide file tree
Showing 14 changed files with 431 additions and 344 deletions.
3 changes: 2 additions & 1 deletion src/AsciiParser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ class AsciiParser {
// addressing in ROW_MAJOR format: elem(i,j) = block[i*n_cols+j]
// addressing in COL_MAJOR format: elem(i,j) = block[j*n+i]
// if less then n lines can be read (e.g. in case of EOF), n will
// be reset to the actually number of lines read.
// be reset to the actual number of lines read.
// If n is initialized to 0, read all lines until EOF.
std::vector<T> next_n_lines_continuous(std::size_t& n, Flags mode=AsciiParser::ROW_MAJOR);
// EOF reached?
bool eof();
Expand Down
40 changes: 31 additions & 9 deletions src/AsciiParser.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -96,33 +96,55 @@ std::vector< std::vector<T> > AsciiParser<T>::next_n_lines(std::size_t n) {

template <typename T>
std::vector<T> AsciiParser<T>::next_n_lines_continuous(std::size_t& n, AsciiParser::Flags mode) {
std::vector<T> res(n * this->_n_cols);
// use result-vector directly if n is known
std::vector<T> res(n * _n_cols);
// use this, if n == 0, i.e. if number of rows is unknown
std::vector<std::vector<T>> res_infty_n(_n_cols, std::vector<T>());
std::size_t i=0;
while (this->_comments_ignored()) {
if (i < n) {
for (std::size_t j=0; j < this->_n_cols; ++j) {
if (n == 0) {
// read complete file
for (std::size_t j=0; j < _n_cols; ++j) {
T buf;
_fh_in >> buf;
res_infty_n[j].push_back(buf);
}
} else if (i < n) {
// read until max number of rows is reached
for (std::size_t j=0; j < _n_cols; ++j) {
if (mode == ROW_MAJOR) {
this->_fh_in >> res[i*this->_n_cols+j];
this->_fh_in >> res[i*_n_cols+j];
} else if (mode == COL_MAJOR) {
this->_fh_in >> res[j*n+i];
}
}
++i;
} else {
break;
}
++i;
}
// handle case that less than n lines have been read
if (i != n) {
if (mode == ROW_MAJOR) {
// trivial, just shorten vector
res.resize(i * this->_n_cols);
res.resize(i * _n_cols);
if (n == 0) {
for (std::size_t ii=0; ii < i; ++ii) {
for (std::size_t jj=0; jj < _n_cols; ++jj) {
res[ii*_n_cols+jj] = res_infty_n[jj][ii];
}
}
}
} else if (mode == COL_MAJOR) {
// recopy to new, smaller vector
std::vector<T> buf(i * this->_n_cols);
std::vector<T> buf(i * _n_cols);
for (std::size_t ii=0; ii < i; ++ii) {
for (std::size_t jj=0; jj < this->_n_cols; ++jj) {
buf[jj*i+ii] = res[jj*n+ii];
for (std::size_t jj=0; jj < _n_cols; ++jj) {
if (n == 0) {
buf[jj*i+ii] = res_infty_n[jj][ii];
} else {
buf[jj*i+ii] = res[jj*n+ii];
}
}
}
res = buf;
Expand Down
38 changes: 15 additions & 23 deletions src/covariance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,24 +107,14 @@ namespace FastPCA {
_covariance_matrix(const std::string filename
, const std::size_t max_chunk_size
, bool use_correlation
, bool periodic) {
// get means and sigmas if needed
std::vector<double> means;
std::vector<double> sigmas;
if (periodic) {
std::tie(std::ignore, std::ignore, means) = FastPCA::Periodic::means(filename, max_chunk_size);
} else {
std::tie(std::ignore, std::ignore, means) = FastPCA::means(filename, max_chunk_size);
}
if (use_correlation) {
if (periodic) {
sigmas = FastPCA::Periodic::sigmas(filename, max_chunk_size, means);
} else {
sigmas = FastPCA::sigmas(filename, max_chunk_size, means);
}
for (double& s: sigmas) {
s = 1.0 / s;
}
, bool periodic
, Matrix<double> stats) {
std::size_t n_variables = stats.n_rows();
std::vector<double> means(n_variables);
std::vector<double> inverse_sigmas(n_variables);
for (std::size_t i=0; i < n_variables; ++i) {
means[i] = stats(i,0);
inverse_sigmas[i] = 1.0 / stats(i,1);
}
// take only half size because of intermediate results.
std::size_t chunk_size = max_chunk_size / 2;
Expand All @@ -145,7 +135,7 @@ namespace FastPCA {
}
// normalize columns by dividing by sigma
if (use_correlation) {
FastPCA::scale_matrix_columns_inplace(m, sigmas);
FastPCA::scale_matrix_columns_inplace(m, inverse_sigmas);
}
acc = _join_accumulations(acc, _accumulate_covariance(m));
m = std::move(input_file.next_block());
Expand All @@ -162,16 +152,18 @@ namespace FastPCA {
SymmetricMatrix<double>
covariance_matrix(const std::string filename
, const std::size_t max_chunk_size
, bool use_correlation) {
return _covariance_matrix(filename, max_chunk_size, use_correlation, true);
, bool use_correlation
, Matrix<double> stats) {
return _covariance_matrix(filename, max_chunk_size, use_correlation, true, stats);
}
} // end namespace FastPCA::Periodic

SymmetricMatrix<double>
covariance_matrix(const std::string filename
, const std::size_t max_chunk_size
, bool use_correlation) {
return _covariance_matrix(filename, max_chunk_size, use_correlation, false);
, bool use_correlation
, Matrix<double> stats) {
return _covariance_matrix(filename, max_chunk_size, use_correlation, false, stats);
}
} // end namespace FastPCA

6 changes: 4 additions & 2 deletions src/covariance.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,15 @@ namespace FastPCA {
SymmetricMatrix<double>
covariance_matrix(const std::string filename
, const std::size_t max_chunk_size
, bool use_correlation);
, bool use_correlation
, Matrix<double> stats);

namespace Periodic {
SymmetricMatrix<double>
covariance_matrix(const std::string filename
, const std::size_t max_chunk_size
, bool use_correlation);
, bool use_correlation
, Matrix<double> stats);
}; // end namespace FastPCA::Periodic

} // end namespace FastPCA
Expand Down
92 changes: 36 additions & 56 deletions src/file_io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,33 +71,29 @@ namespace FastPCA {
const std::string file_out,
Matrix<double> eigenvecs,
std::size_t mem_buf_size,
bool use_correlation) {
bool use_correlation,
Matrix<double> stats) {
// calculating the projection, we need twice the space
// (original data + result)
mem_buf_size /= 4;
bool append_to_file = false;
std::vector<double> means;
std::vector<double> sigmas;
std::tie(std::ignore, std::ignore, means) = FastPCA::means(file_in, mem_buf_size);
if (use_correlation) {
sigmas = FastPCA::sigmas(file_in, mem_buf_size, means);
for (double& s: sigmas) {
s = 1.0 / s;
}
std::size_t n_variables = stats.n_rows();
std::vector<double> means(n_variables);
std::vector<double> inverse_sigmas(n_variables);
for (std::size_t i=0; i < n_variables; ++i) {
means[i] = stats(i,0);
inverse_sigmas[i] = 1.0 / stats(i,1);
}
bool append_to_file = false;
DataFileReader<double> fh_file_in(file_in, mem_buf_size);
DataFileWriter<double> fh_file_out(file_out);
while ( ! fh_file_in.eof()) {
Matrix<double> m = std::move(fh_file_in.next_block());
if (m.n_rows() > 0) {
FastPCA::shift_matrix_columns_inplace(m, means);
if (use_correlation) {
FastPCA::scale_matrix_columns_inplace(m, sigmas);
}
fh_file_out.write(std::move(m*eigenvecs), append_to_file);
append_to_file = true;
read_blockwise(fh_file_in, [&](Matrix<double>& m) {
FastPCA::shift_matrix_columns_inplace(m, means);
if (use_correlation) {
FastPCA::scale_matrix_columns_inplace(m, inverse_sigmas);
}
}
fh_file_out.write(std::move(m*eigenvecs), append_to_file);
append_to_file = true;
});
}

void
Expand All @@ -117,33 +113,22 @@ namespace FastPCA {
Matrix<double> eigenvecs,
std::size_t mem_buf_size,
bool use_correlation,
bool use_dih_shifts) {
Matrix<double> stats) {
mem_buf_size /= 4;
std::vector<double> means;
std::vector<double> sigmas;
std::vector<double> dih_shifts;
std::vector<double> scaled_periodicities;
// compute means
if ((!use_dih_shifts) || use_correlation) {
std::tie(std::ignore, std::ignore, means) = FastPCA::Periodic::means(file_in, mem_buf_size);
}
// compute sigmas
if (use_correlation) {
sigmas = FastPCA::Periodic::sigmas(file_in, mem_buf_size, means);
scaled_periodicities.resize(sigmas.size(), 2*M_PI);
for (std::size_t j=0; j < sigmas.size(); ++j) {
// invert sigmas for easier rescaling
sigmas[j] = 1.0 / sigmas[j];
scaled_periodicities[j] *= sigmas[j];
}
}
// compute dih-shifts
if (use_dih_shifts) {
dih_shifts = FastPCA::Periodic::dih_shifts(file_in, mem_buf_size);
std::size_t n_variables = stats.n_rows();
std::vector<double> means(n_variables);
std::vector<double> inverse_sigmas(n_variables);
std::vector<double> dih_shifts(n_variables);
std::vector<double> scaled_periodicities(n_variables);
for (std::size_t i=0; i < n_variables; ++i) {
means[i] = stats(i,0);
inverse_sigmas[i] = 1.0 / stats(i,1);
if (use_correlation) {
for (std::size_t i=0; i < dih_shifts.size(); ++i) {
dih_shifts[i] = (dih_shifts[i] - means[i]) * sigmas[i];
}
dih_shifts[i] = (stats(i,2) - means[i]) * inverse_sigmas[i];
scaled_periodicities[i] = 2*M_PI * inverse_sigmas[i];
} else {
dih_shifts[i] = stats(i,2);
scaled_periodicities[i] = 2*M_PI;
}
}
// projections
Expand All @@ -153,20 +138,15 @@ namespace FastPCA {
read_blockwise(fh_file_in, [&](Matrix<double>& m) {
// convert degrees to radians
FastPCA::deg2rad_inplace(m);
if (use_correlation || (! use_dih_shifts)) {
// shift by periodic means
FastPCA::Periodic::shift_matrix_columns_inplace(m, means);
} else if (use_dih_shifts) {
// shift by optimal dih-shifts
FastPCA::Periodic::shift_matrix_columns_inplace(m, dih_shifts);
}
if (use_correlation) {
// shift by periodic means (necessary for scaling)
FastPCA::Periodic::shift_matrix_columns_inplace(m, means);
// scale data by sigmas for correlated projections
FastPCA::scale_matrix_columns_inplace(m, sigmas);
if (use_dih_shifts) {
FastPCA::Periodic::shift_matrix_columns_inplace(m, dih_shifts, scaled_periodicities);
}
FastPCA::scale_matrix_columns_inplace(m, inverse_sigmas);
}
// shift dihedrals to minimize boundary jumps
// and correct for periodic boundary condition
FastPCA::Periodic::shift_matrix_columns_inplace(m, dih_shifts, scaled_periodicities);
// output
fh_file_out.write(m*eigenvecs, append_to_file);
append_to_file = true;
Expand Down
5 changes: 3 additions & 2 deletions src/file_io.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ namespace FastPCA {
const std::string file_out,
Matrix<double> eigenvecs,
std::size_t mem_buf_size,
bool use_correlation);
bool use_correlation,
Matrix<double> stats);

/**
* read input file blockwise and pass reference of current matrix
Expand All @@ -151,7 +152,7 @@ namespace FastPCA {
Matrix<double> eigenvecs,
std::size_t mem_buf_size,
bool use_correlation,
bool use_dih_shifts);
Matrix<double> stats);
} // end namespace FastPCA::Periodic
} // end namespace FastPCA

Expand Down
8 changes: 2 additions & 6 deletions src/file_io.hxx
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

/*
Copyright (c) 2014, Florian Sittel (www.lettis.net)
All rights reserved.
Expand Down Expand Up @@ -85,7 +84,6 @@ namespace FastPCA {
fh.open(file_out, std::ios::out);
}
if (fh.is_open()) {
//int n_digits = std::numeric_limits<T>::digits10;
int n_digits = 6;
fh.precision(n_digits);
fh.setf(std::ios::scientific);
Expand Down Expand Up @@ -221,10 +219,8 @@ namespace FastPCA {
_n_cols(0),
_eof(false) {
this->_ftype = filename_suffix(filename);

if (this->_ftype == XTC) {
// use XTC file
//this->_fh_xtc = UseXTC::XTCFile(filename, "r");
this->_fh_xtc = std::shared_ptr<XDRFILE>(xdrfile_open(filename.c_str(), "r"),
// deleter function called when pointer goes out of scope
[](XDRFILE* f) {
Expand All @@ -239,7 +235,8 @@ namespace FastPCA {

template <class T>
DataFileReader<T>::DataFileReader(std::string filename)
: DataFileReader(filename, FastPCA::gigabytes_to_bytes(1)) {
// zero byte buffer: read everything at once
: DataFileReader(filename, 0) {
}

template <class T>
Expand Down Expand Up @@ -270,7 +267,6 @@ namespace FastPCA {
if (this->_n_cols == 0) {
throw FileFormatError();
}
//return UseCSV::read_chunk<T>(this->_fh, this->_n_cols, n_possible_rows);
return UseCSV::read_chunk<T>(this->_parser, n_possible_rows);
}
}
Expand Down
11 changes: 7 additions & 4 deletions src/matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,10 @@ void SymmetricMatrix<float>::_solve_eigensystem() {
_prepareSyevData<float>(dat, (*this), dat.work[0]);
ssyev_(&dat.jobz, &dat.uplo, &dat.n, &dat.a[0], &dat.lda, &dat.w[0], &dat.work[0], &dat.lwork, &dat.info);
// extract data
this->_eigenvalues = _extractEigenvalues<float>(dat);
this->_eigenvectors = _extractEigenvectors<float>(dat);
_eigenvalues = _extractEigenvalues<float>(dat);
_eigenvectors = _extractEigenvectors<float>(dat);
// enforce ordering from highest to lowest eigenvalue;
_enforce_eigval_order();
}

template <>
Expand All @@ -67,8 +69,9 @@ void SymmetricMatrix<double>::_solve_eigensystem() {
_prepareSyevData<double>(dat, (*this), dat.work[0]);
dsyev_(&dat.jobz, &dat.uplo, &dat.n, &dat.a[0], &dat.lda, &dat.w[0], &dat.work[0], &dat.lwork, &dat.info);
// extract data
this->_eigenvalues = _extractEigenvalues<double>(dat);
this->_eigenvectors = _extractEigenvectors<double>(dat);
_eigenvalues = _extractEigenvalues<double>(dat);
_eigenvectors = _extractEigenvectors<double>(dat);
_enforce_eigval_order();
}

} // end namespace FastPCA
Expand Down
7 changes: 7 additions & 0 deletions src/matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ class SymmetricMatrix {

bool _eigensystem_is_solved;
void _solve_eigensystem();
void _enforce_eigval_order();

std::vector<T> _eigenvalues;
Matrix<T> _eigenvectors;
Expand Down Expand Up @@ -130,6 +131,12 @@ namespace {
std::vector<T> _extractEigenvalues(const SyevData<T>& dat);
} // end local namespace



template <class T>
void
copy_column(const Matrix<T>& m_from, std::size_t i_col_from, Matrix<T>& m_to, std::size_t i_col_to);

} // end namespace FastPCA

// template implementations
Expand Down
Loading

0 comments on commit 22588f7

Please sign in to comment.