Skip to content

Commit

Permalink
Astropy lombscargle spec (#20)
Browse files Browse the repository at this point in the history
* Create class GACF.
Follows Astropy LombScargle implementation.
* bump version to 1.0.0
  • Loading branch information
joshbriegal authored Jun 15, 2021
1 parent f8d9f26 commit 8904fab
Show file tree
Hide file tree
Showing 6 changed files with 424 additions and 268 deletions.
6 changes: 4 additions & 2 deletions gacf/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
""" Root module of package """

from .gacf import find_correlation_from_file, find_correlation_from_lists, \
find_correlation_from_lists_cpp, GACF_LOG_MESSAGE
from .gacf import (
GACF_LOG_MESSAGE,
GACF,
)
from .datastructure import EmptyDataStructureException, BadDataFileReadException
40 changes: 8 additions & 32 deletions gacf/correlator/Correlator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,9 @@ Correlator::Correlator(DataStructure* ds_in){
}
max_lag = ds->rnormalised_timeseries()->back();
min_lag = -max_lag;
// lag_resolution = max_lag / M_datapoints; // Naive implementation, should use smallest difference
lag_resolution = findMinDiff(ds->rnormalised_timeseries());
alpha = ds->median_time();
num_lag_steps = (int) std::floor((max_lag - min_lag) / lag_resolution) + 3; // 2 additional elements at 0, max
num_lag_steps = (int) std::floor((max_lag - min_lag) / lag_resolution) + 3; // 3 additional elements at min, 0, max
correlation_data._correlations = std::vector<double>(N_datasets * num_lag_steps);
correlation_data._timeseries = std::vector<double>(num_lag_steps);
};
Expand All @@ -61,9 +60,6 @@ Correlator::Correlator(DataStructure *, DataStructure *) {
*/
}

//std::vector<double>* Correlator::rnormalised_timeseries(){ return ds->rnormalised_timeseries(); }
//std::vector< std::vector<double> >* Correlator::rvalues(){ return ds->rdata(); }

std::vector<double> Correlator::normalised_timeseries(){ return ds->normalised_timeseries(); }
std::vector< std::vector<double> > Correlator::values(){ return ds->data_2d(); }
std::vector<double> Correlator::lag_timeseries() { return correlation_data._timeseries; };
Expand Down Expand Up @@ -98,17 +94,12 @@ void Correlator::naturalSelectionFunctionIdx(CorrelationIterator* cor_it){
lower_bound(ds->rnormalised_timeseries()->begin(),
ds->rnormalised_timeseries()->end(),
(double) cor_it->shifted_timeseries[i]));
// std::cout << "Index for shift = " << cor_it->shifted_timeseries[i] << ": " << idx;
if(idx < 0){ idx = 0L; } //to prevent negative }
// if(idx < cor_it->shifted_timeseries.size()-1) { // i.e. we haven't found the last value
// if (abs(cor_it->shifted_timeseries[i] - ds->rnormalised_timeseries()->at(idx + 1))
// < abs(cor_it->shifted_timeseries[i] - ds->rnormalised_timeseries()->at(idx))) { idx = idx + 1; }
// }
if(idx < 0){ idx = 0L; } //to prevent negative

if(idx != 0){ //i.e. not the first index
if (abs(cor_it->shifted_timeseries[i] - ds->rnormalised_timeseries()->at(idx - 1))
< abs(cor_it->shifted_timeseries[i] - ds->rnormalised_timeseries()->at(idx))) { idx = idx - 1; }
}
// std::cout << " (" << idx << " adjusted)" << std::endl;
cor_it->selection_indices.push_back(idx);
}
}
Expand All @@ -121,7 +112,7 @@ void Correlator::fastSelectionFunctionIdx(CorrelationIterator* cor_it){
* by finding the closest point for the first data point and filling in the rest by index.
*/
for(auto const& time_point: *ds->rnormalised_timeseries()){
if((time_point + cor_it->k) > ds->rnormalised_timeseries()->back()){
if((time_point + cor_it->k) >= ds->rnormalised_timeseries()->back()){
break;
}
else { cor_it->shifted_timeseries.push_back(time_point + cor_it->k); }
Expand Down Expand Up @@ -234,14 +225,12 @@ void Correlator::addCorrelationData(CorrelationIterator* col_it, int timestep_nu
for(int j = 0; j < N_datasets; j++){
correlation_data._correlations[timestep_number + num_lag_steps * j] = col_it->correlation[j];
}
// this->correlation_data._correlations.push_back(col_it->correlation);
}

void Correlator::cleanCorrelationData(int i){
// clean up extra elements at end of vector not used.
correlation_data._timeseries.resize(i);
std::vector<double> copy_correlations = std::vector<double>(N_datasets * i);
// std::vector< std::vector<double> > tcorrs = correlations();
for(int j = 0; j < N_datasets; j++){
auto start = correlation_data._correlations.begin() + (j * num_lag_steps);
std::copy(start, start + i, copy_correlations.begin() + (j * i));
Expand All @@ -259,8 +248,11 @@ void Correlator::calculateStandardCorrelation(){
if(k==0){
is_positive = true;
}
if(k > 0 && !is_positive){
if((k > 0) && (!is_positive)){
// we have gone beyond 0, so calculate for correlation 0 and increment counter.
_calculateStandardCorrelation(0, i);
is_positive = true;
i++;
}
_calculateStandardCorrelation(k, i);
k += lag_resolution;
Expand All @@ -281,19 +273,3 @@ void Correlator::_calculateStandardCorrelation(double k, int i){
addCorrelationData(col_it, i);
delete col_it;
}

//void Correlator::standardCorrelation(double k, MemberPointerType weight_function, double alpha){
// // define weight function & alpha (scale length of weight function)
//
// auto const& col_it = new CorrelationIterator(k);
// this->naturalSelectionFunctionIdx(col_it);
//// this->fastSelectionFunctionIdx(col_it);
// this->deltaT(col_it);
// this->getWeights(col_it, weight_function, alpha);
// this->findCorrelation(col_it);
// this->correlation_data.t.push_back(k);
// this->correlation_data.X.push_back(col_it->correlation);
// delete col_it;
//}


262 changes: 136 additions & 126 deletions gacf/gacf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,135 +2,145 @@
from .correlator import CorrelationIterator, Correlator

SELECTION_FUNCTIONS = {
'fast': Correlator.fastSelectionFunctionIdx,
'natural': Correlator.naturalSelectionFunctionIdx
"fast": Correlator.fastSelectionFunctionIdx,
"natural": Correlator.naturalSelectionFunctionIdx,
}

WEIGHT_FUNCTIONS = {
'gaussian': Correlator.getGaussianWeights,
'fractional': Correlator.getFractionWeights
"gaussian": Correlator.getGaussianWeights,
"fractional": Correlator.getFractionWeights,
}

GACF_LOG_MESSAGE = ' ###### ### ###### ######## \n' \
'## ## ## ## ## ## ## \n' \
'## ## ## ## ## \n' \
'## #### ## ## ## ###### \n' \
'## ## ######### ## ## \n' \
'## ## ## ## ## ## ## \n' \
' ###### ## ## ###### ## \n' \
'------------------------------\n' \
'Number of Data Points: {no_data}\n' \
'Number of Lag Timesteps: {no_lag_points}\n' \
'Lag Resolution: {lag_resolution}\n' \
'------------------------------\n'


def set_up_correlation(corr, min_lag=None, max_lag=None, lag_resolution=None, alpha=None):
""" No return type. Applies non-default values to correlator """
if max_lag is not None:
corr.max_lag = max_lag
if min_lag is not None:
corr.min_lag = min_lag
if lag_resolution is not None:
corr.lag_resolution = lag_resolution
if alpha is not None:
corr.alpha = alpha


def find_correlation(corr, selection_function='natural', weight_function='gaussian'):
def lag_generator(min_lag, max_lag, lag_resolution):
k = min_lag
is_positive = False
while k <= max_lag:
if k == 0:
is_positive = True
if k > 0 and not is_positive:
is_positive = True
yield 0
yield k
k += lag_resolution

num_steps = int(round((corr.max_lag - corr.min_lag) / corr.lag_resolution))

for i, k in enumerate(lag_generator(corr.min_lag, corr.max_lag, corr.lag_resolution)):
col_it = CorrelationIterator(k, corr.N_datasets)
SELECTION_FUNCTIONS[selection_function](corr, col_it)
corr.deltaT(col_it)
WEIGHT_FUNCTIONS[weight_function](corr, col_it)
corr.findCorrelation(col_it)
corr.addCorrelationData(col_it, i)

corr.cleanCorrelationData(i + 1)


def find_correlation_from_file(filename, min_lag=None, max_lag=None, lag_resolution=None, selection_function='natural',
weight_function='gaussian', alpha=None):
"""
:param filename: string filename containing data in columns X, t, X_err
:param max_lag: max lag in days
:param lag_resolution: lag resolution in days
:param selection_function: 'fast' or 'natural' - see paper for more details
:param weight_function: 'gaussian' or 'fractional' see paper for more details
:param alpha: weight function characteristic length scale, default is t.median_time
:return: { 'lag_timeseries':[], 'correlations':[] }
"""
ds = DataStructure(filename)
corr = Correlator(ds)

set_up_correlation(corr, min_lag, max_lag, lag_resolution, alpha)
find_correlation(corr, selection_function, weight_function)
# return a 3 tuple of lag timeseries, correlations, corr object
return (corr.lag_timeseries(),
corr.correlations() if len(corr.correlations()) > 1. else corr.correlations()[0],
corr)


def find_correlation_from_lists(timeseries, values, errors=None, min_lag=None, max_lag=None, lag_resolution=None,
selection_function='natural', weight_function='gaussian', alpha=None):
"""
:param timeseries: list of time values
:param values: list of X values
:param errors: (optional) list of errors on X values
:param max_lag: max lag in days
:param lag_resolution: lag resolution in days
:param selection_function: 'fast' or 'natural' - see paper for more details
:param weight_function: 'gaussian' or 'fractional' see paper for more details
:param alpha: weight function characteristic length scale, default is t.median_time
:return: { 'lag_timeseries':[], 'correlations':[] }
"""
if errors is None:
ds = DataStructure(timeseries, values)
else:
ds = DataStructure(timeseries, values, errors)
corr = Correlator(ds)

set_up_correlation(corr, min_lag, max_lag, lag_resolution, alpha)
find_correlation(corr, selection_function, weight_function)
return (corr.lag_timeseries(),
corr.correlations() if len(corr.correlations()) > 1. else corr.correlations()[0],
corr)


def find_correlation_from_lists_cpp(timeseries, values, errors=None, min_lag=None, max_lag=None, lag_resolution=None,
alpha=None):
"""
Using a fixed set up of natural selection function and linear weight function,
GACF_LOG_MESSAGE = (
" ###### ### ###### ######## \n"
"## ## ## ## ## ## ## \n"
"## ## ## ## ## \n"
"## #### ## ## ## ###### \n"
"## ## ######### ## ## \n"
"## ## ## ## ## ## ## \n"
" ###### ## ## ###### ## \n"
"------------------------------\n"
"Number of Data Points: {no_data}\n"
"Number of Lag Timesteps: {no_lag_points}\n"
"Lag Resolution: {lag_resolution}\n"
"------------------------------\n"
)


class GACF:
"""Compute the Generalised Autocorrelation Function (G-ACF)"""

def __init__(self, timeseries=None, values=None, errors=None, filename=None):

if filename:
self.data = DataStructure(filename)
else:
if errors is None:
self.data = DataStructure(timeseries, values)
else:
self.data = DataStructure(timeseries, values, errors)

@staticmethod
def set_up_correlation(
corr, min_lag=None, max_lag=None, lag_resolution=None, alpha=None
):
""" No return type. Applies non-default values to correlator """
if max_lag is not None:
corr.max_lag = max_lag
if min_lag is not None:
corr.min_lag = min_lag
if lag_resolution is not None:
corr.lag_resolution = lag_resolution
if alpha is not None:
corr.alpha = alpha

@staticmethod
def find_correlation(
corr, selection_function="natural", weight_function="fractional"
):
"""If user specifies a different weight or selection function this method is invoked.
Will be considerably slower than the C++ implementation.
Args:
corr (Correlator): Correlator object to iterate over
selection_function (str, optional): Selection Function. Defaults to "natural".
weight_function (str, optional): Weight Function. Defaults to "fractional".
"""

def lag_generator(min_lag, max_lag, lag_resolution):
k = min_lag
is_positive = False
while k <= max_lag:
if k == 0:
is_positive = True
if k > 0 and not is_positive:
is_positive = True
yield 0
yield k
k += lag_resolution

for i, k in enumerate(
lag_generator(corr.min_lag, corr.max_lag, corr.lag_resolution)
):
col_it = CorrelationIterator(k, corr.N_datasets)
SELECTION_FUNCTIONS[selection_function](corr, col_it)
corr.deltaT(col_it)
WEIGHT_FUNCTIONS[weight_function](corr, col_it)
corr.findCorrelation(col_it)
corr.addCorrelationData(col_it, i)

corr.cleanCorrelationData(i + 1)

def autocorrelation(
self,
min_lag=None,
max_lag=None,
lag_resolution=None,
alpha=None,
selection_function="natural",
weight_function="fractional",
return_correlator=False,
):
"""Compute G-ACF
Using a fixed set up of natural selection function and linear weight function
will be faster than the python implementation in general.
:param timeseries: list of time values
:param values: list of X values
:param errors: (optional) list of errors on X values
:param max_lag: max lag in days
:param lag_resolution: lag resolution in days
:param alpha: weight function characteristic length scale, default is t.median_time
:return: { 'lag_timeseries':[], 'correlations':[] }
"""
if errors is None:
ds = DataStructure(timeseries, values)
else:
ds = DataStructure(timeseries, values, errors)
corr = Correlator(ds)
set_up_correlation(corr, min_lag, max_lag, lag_resolution, alpha)
corr.calculateStandardCorrelation()
return (corr.lag_timeseries(),
corr.correlations() if len(corr.correlations()) > 1. else corr.correlations()[0],
corr)
It is reccomended to leave selection_function and weight_function as default for speed.
Args:
min_lag (float, optional): min lag in units of time. Defaults to None.
max_lag (float, optional): max lag in units of time. Defaults to None.
lag_resolution (float, optional): lag resolution in units of time. Defaults to None.
alpha (float, optional): weight function characteristic length scale, default is t.median_time. Defaults to None.
selection_function (str, optional): 'fast' or 'natural' - see paper for more details. Defaults to "natural".
weight_function: (str, optional) 'gaussian' or 'fractional' see paper for more details. Defaults to "fractional".
return_correlator (bool, optional): return correlator object. Defaults to False.
Returns:
Tuple: (lag_timeseries, correlations, [Optional: correlator])
"""
corr = Correlator(self.data)
self.set_up_correlation(corr, min_lag, max_lag, lag_resolution, alpha)

if selection_function == "natural" and weight_function == "fractional":
# fast c++ method
corr.calculateStandardCorrelation()
else:
# slow python method with user specified function
self.find_correlation(corr, selection_function, weight_function)

if return_correlator:
return (
corr.lag_timeseries(),
corr.correlations()
if len(corr.correlations()) > 1.0
else corr.correlations()[0],
corr,
)
else:
return (
corr.lag_timeseries(),
corr.correlations()
if len(corr.correlations()) > 1.0
else corr.correlations()[0],
)
Loading

0 comments on commit 8904fab

Please sign in to comment.