Skip to content

Commit

Permalink
Move mesmer_m functionalities into stats module (MESMER-group#484)
Browse files Browse the repository at this point in the history
* move harmonic model and power transformer to stats

* renaming of functions

* privatizing some functions

* adjust inits

* adjust tests

* add mesmer_m funcs to documentation

* documentation

* changelog

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Mathias Hauser <mathause@users.noreply.github.com>
  • Loading branch information
3 people authored Aug 7, 2024
1 parent 188881d commit a90cd89
Show file tree
Hide file tree
Showing 11 changed files with 226 additions and 165 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ that this led to some numerical changes compared to the MESMER-M publication
- move MESMER-M scripts into mesmer (
`#419 <https://github.com/MESMER-group/mesmer/pull/419>`_, and
`#421 <https://github.com/MESMER-group/mesmer/pull/421>`_).
- move the harmonic model and power transformer functionalities to the stats module (
`#484 <https://github.com/MESMER-group/mesmer/pull/484>`_).

**Auto-Regression**

Expand Down
24 changes: 24 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,31 @@ Auto regression
~stats._fit_auto_regression_scen_ens
~stats.select_ar_order
~stats.fit_auto_regression
~stats.fit_auto_regression_monthly
~stats.draw_auto_regression_uncorrelated
~stats.draw_auto_regression_correlated
~stats.draw_auto_regression_monthly

Harmonic Model
--------------

.. autosummary::
:toctree: generated/

~stats.generate_fourier_series
~stats.fit_harmonic_model

Power Transformer
-----------------

.. autosummary::
:toctree: generated/

~stats.lambda_function
~stats.get_lambdas_from_covariates
~stats.fit_yeo_johnson_transform
~stats.yeo_johnson_transform
~stats.inverse_yeo_johnson_transform

Localized covariance
--------------------
Expand All @@ -43,6 +66,7 @@ Localized covariance

~stats.adjust_covariance_ar1
~stats.find_localized_empirical_covariance
~stats.find_localized_empirical_covariance_monthly

Smoothing
---------
Expand Down
12 changes: 1 addition & 11 deletions mesmer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,7 @@

from importlib.metadata import version as _get_version

from . import (
calibrate_mesmer,
core,
create_emulations,
io,
mesmer_m,
stats,
testing,
utils,
)
from . import calibrate_mesmer, core, create_emulations, io, stats, testing, utils
from .core import _data as data
from .core import geospatial, grid, mask, volc, weighted

Expand All @@ -37,7 +28,6 @@
"geospatial",
"grid",
"mask",
"mesmer_m",
"stats",
"testing",
"volc",
Expand Down
11 changes: 0 additions & 11 deletions mesmer/mesmer_m/__init__.py

This file was deleted.

17 changes: 17 additions & 0 deletions mesmer/stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,20 @@
select_ar_order,
)
from mesmer.stats._gaspari_cohn import gaspari_cohn, gaspari_cohn_correlation_matrices
from mesmer.stats._harmonic_model import fit_harmonic_model, generate_fourier_series
from mesmer.stats._linear_regression import LinearRegression
from mesmer.stats._localized_covariance import (
adjust_covariance_ar1,
find_localized_empirical_covariance,
find_localized_empirical_covariance_monthly,
)
from mesmer.stats._power_transformer import (
fit_yeo_johnson_transform,
get_lambdas_from_covariates,
inverse_yeo_johnson_transform,
lambda_function,
yeo_johnson_transform,
)
from mesmer.stats._smoothing import lowess

__all__ = [
Expand All @@ -38,4 +46,13 @@
"find_localized_empirical_covariance_monthly",
# smoothing
"lowess",
# harmonic model
"fit_harmonic_model",
"generate_fourier_series",
# power transformer
"lambda_function",
"get_lambdas_from_covariates",
"fit_yeo_johnson_transform",
"yeo_johnson_transform",
"inverse_yeo_johnson_transform",
]
23 changes: 16 additions & 7 deletions mesmer/stats/_auto_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,10 @@ def fit_auto_regression_monthly(monthly_data, time_dim="time"):
where :math:`\\tau \\in \\{1, \\ldots, N\\}` counts the seasons of some seasonal cycle, here the
months of a year :math:`(N=12)` and :math:`t` counts the repetitions of this seasonal cycle,
here the years. Here :math:`\\epsilon` is a white noise process, i.e. :math:`\\epsilon \\sim N(0, \\sigma^2)`.
The covariance matrix of the driving white noise process should be estimated on the residuals of the AR(1)
process. The residuals are returned here and should be passed to
:func:`find_localized_empirical_covariance_monthly <mesmer.stats.find_localized_empirical_covariance_monthly>`.
For more information refer to Storch and Zwiers (1999) Chapter 10.3.8 [1].
[1] Storch H von, Zwiers FW. Statistical Analysis in Climate Research.
Expand All @@ -635,7 +639,8 @@ def fit_auto_regression_monthly(monthly_data, time_dim="time"):
-------
obj : ``xr.Dataset``
Dataset containing the estimated parameters of the AR(1) process, the ``intercept`` and the
``slope`` for each month and gridpoint. Additionally, the ``residuals`` are returned.
``slope`` for each month and gridpoint. Additionally, the ``residuals`` are returned. These
are needed for the estimation of the covariance matrix.
"""
_check_dataarray_form(monthly_data, "monthly_data", ndim=2, required_dims=time_dim)
monthly_data = monthly_data.groupby(f"{time_dim}.month")
Expand Down Expand Up @@ -676,9 +681,10 @@ def fit_auto_regression_monthly(monthly_data, time_dim="time"):


def _fit_auto_regression_monthly_np(data_month, data_prev_month):
"""fit an auto regression of lag one (AR(1)) on monthly data - numpy wrapper
We use a linear function to relate the previous month's
data (predictor/independent variable) to the current month's data (target/ dependent variable).
"""fit an auto regression of lag one (AR(1)) on monthly data
We use a linear function to relate the previous month's data
(predictor/independent variable) to the current month's data
(target/ dependent variable).
Parameters
----------
Expand Down Expand Up @@ -720,8 +726,10 @@ def draw_auto_regression_monthly(
time_dim="time",
realisation_dim="realisation",
):
"""draw time series of an auto regression process with lag one
"""draw time series of a cyclo-stationary auto-regressive process of lag one (AR(1))
using individual parameters for each month including spatially-correlated innovations.
For more information on the cyclo-stationary AR(1) process please refer to
:func:`fit_auto_regression_monthly <mesmer.stats.fit_auto_regression_monthly>`.
Parameters
----------
Expand All @@ -734,8 +742,9 @@ def draw_auto_regression_monthly(
both of shape (12, n_gridpoints).
covariance : xr.DataArray of shape (12, n_gridpoints, n_gridpoints)
The covariance matrix representing spatial correlation between gridpoints for
each month. Must be symmetric and at least positive-semidefinite.
The covariance matrix representing the spatially correlated driving
white noise process for each month. Must be symmetric and at least
positive-semidefinite.
Used to draw spatially-correlated innovations using a multivariate normal.
time : xr.DataArray
The time coordinates that determines the length of the predicted timeseries and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@


def _generate_fourier_series_np(yearly_predictor, coeffs, months):
"""construct a Fourier Series
"""construct a Fourier Series from the yearly predictor with given coeffs.
The order of the Fourier series is inferred from the size of the coeffs array.
Parameters
----------
yearly_predictor : array-like of shape (n_years*12,)
yearly temperature values.
yearly predictor values.
coeffs : array-like of shape (4*order)
coefficients of Fourier Series.
months : array-like of shape (n_years*12,)
Expand All @@ -34,6 +36,8 @@ def _generate_fourier_series_np(yearly_predictor, coeffs, months):
"""
order = int(coeffs.size / 4)
# TODO: can also generate the month array here, that would be cleaner,
# we assume that the data starts in January anyways

# fix these parameters, according to paper
# we could also fit them and give an inital guess of 0 and 1 in the coeffs array as before
Expand All @@ -54,13 +58,12 @@ def _generate_fourier_series_np(yearly_predictor, coeffs, months):


def generate_fourier_series(yearly_predictor, coeffs, time, time_dim="time"):
"""construct a Fourier Series from yearly predictors with fitted coeffs
"""construct a Fourier Series from yearly predictors with fitted coeffs - numpy wrapper.
Parameters
----------
yearly_predictor : xr.DataArray of shape (n_years, n_gridcells)
Yearly temperature values used as predictors
Containing one value per year.
Predictor containing one value per year.
coeffs : xr.DataArray of shape (n_gridcells, n_coeffs)
coefficients of Fourier Series for each gridcell. Note that coeffs
may contain nans (for higher orders, that have not been fit).
Expand All @@ -73,7 +76,7 @@ def generate_fourier_series(yearly_predictor, coeffs, time, time_dim="time"):
Returns
-------
predictions: xr.DataArray of shape (n_years * 12, n_gridcells)
Fourier Series calculated over yearly_predictor with coeffs.
Fourier Series calculated over `yearly_predictor` with `coeffs`.
"""
_, n_gridcells = yearly_predictor.shape
Expand Down Expand Up @@ -103,17 +106,18 @@ def generate_fourier_series(yearly_predictor, coeffs, time, time_dim="time"):
return predictions.transpose(time_dim, ...)


def fit_fourier_series_np(yearly_predictor, monthly_target, first_guess):
"""execute fitting of the harmonic model/fourier series
def _fit_fourier_coeffs_np(yearly_predictor, monthly_target, first_guess):
"""fit the coefficients of a Fourier Series to the data using least squares for the
given order, which is inferred from the size of the `first_guess` array.
Parameters
----------
yearly_predictor : array-like of shape (n_years*12,)
Repeated yearly temperature values to predict with.
Repeated yearly predictor.
monthly_target : array-like of shape (n_years*12,)
Target monthly temperature values.
Target monthly values.
first_guess : array-like of shape (4*order)
Initial guess for the coefficients of the Fourier Series.
Expand All @@ -134,7 +138,7 @@ def fit_fourier_series_np(yearly_predictor, monthly_target, first_guess):
Fitted coefficients of Fourier series.
preds : array-like of shape (n_years*12,)
Predicted monthly temperature values.
Predicted monthly values.
"""

Expand Down Expand Up @@ -163,7 +167,7 @@ def func(coeffs, yearly_predictor, mon_train, mon_target):
return coeffs, preds, mse


def calculate_bic(n_samples, order, mse):
def _calculate_bic(n_samples, order, mse):
"""calculate Bayesian Information Criterion (BIC)
Parameters
Expand All @@ -182,21 +186,19 @@ def calculate_bic(n_samples, order, mse):
"""

n_params = order * 4
# NOTE: removed -2 now because for order=0 the first two coefficients dissapear as sin(0) == 0
# removed this now because we no longer fit beta0 and beta1

return n_samples * np.log(mse) + n_params * np.log(n_samples)


def fit_to_bic_np(yearly_predictor, monthly_target, max_order):
"""choose order of Fourier Series to fit for by minimising BIC score
def _fit_fourier_order_np(yearly_predictor, monthly_target, max_order):
"""determine order of Fourier Series for by minimizing BIC score.
For each order, the coefficients are fit using least squares.
Parameters
----------
yearly_predictor : array-like of shape (n_years*12,)
Repeated yearly temperature to predict with. Containing the repeated yearly value for every month.
Repeated yearly values, i.e. containing the repeated yearly value for every month.
monthly_target : array-like of shape (n_years*12,)
Target monthly temperature values.
Target monthly values.
max_order : Integer
Maximum considered order of Fourier Series for which to fit.
Expand All @@ -217,13 +219,13 @@ def fit_to_bic_np(yearly_predictor, monthly_target, max_order):

for i_order in range(1, max_order + 1):

coeffs, predictions, mse = fit_fourier_series_np(
coeffs, predictions, mse = _fit_fourier_coeffs_np(
yearly_predictor,
monthly_target,
# use coeffs from last iteration as first guess
first_guess=np.append(last_coeffs, np.zeros(4)),
)
bic_score = calculate_bic(len(monthly_target), i_order, mse)
bic_score = _calculate_bic(len(monthly_target), i_order, mse)

if bic_score < current_min_score:
current_min_score = bic_score
Expand All @@ -239,28 +241,27 @@ def fit_to_bic_np(yearly_predictor, monthly_target, max_order):
return selected_order, coeffs, predictions


def fit_to_bic_xr(yearly_predictor, monthly_target, max_order=6, time_dim="time"):
"""fit Fourier Series to every gridcell using BIC score to select order - xarray wrapper
Repeats yearly values for every month before passing to :func:`fit_to_bic_np`
def fit_harmonic_model(yearly_predictor, monthly_target, max_order=6, time_dim="time"):
"""fit harmonic model i.e. a Fourier Series to every gridcell using BIC score to
select the order and least squares to fit the coefficients for each order.
Parameters
----------
yearly_predictor : xr.DataArray of shape (n_years, n_gridcells)
Yearly temperature values used as predictors
Containing one value per year.
Yearly values used as predictors, containing one value per year.
monthly_target : xr.DataArray of shape (n_months, n_gridcells)
Monthly temperature values to fit for, containing one value per month, for every year in yearly_predictor.
So n_months = 12*n_years
Monthly values to fit to, containing one value per month, for every year in ´yearly_predictor´.
So `n_months` = 12 :math:`\\cdot` `n_years`
max_order : Integer, default 6
Maximum order of Fourier Series to fit for. Default is 6 since highest meaningful
maximum order is sample_frequency/2, i.e. 12/2 to fit for monthly data.
Returns
-------
data_vars : `xr.Dataset`
Dataset containing the selected order of Fourier Series (n_sel), estimated
Dataset containing the selected order of Fourier Series (n_sel), the estimated
coefficients of the Fourier Series (coeffs) and the resulting predictions for
monthly temperatures (predictions).
monthly values (predictions).
"""

Expand All @@ -275,7 +276,7 @@ def fit_to_bic_xr(yearly_predictor, monthly_target, max_order=6, time_dim="time"
)

n_sel, coeffs, preds = xr.apply_ufunc(
fit_to_bic_np,
_fit_fourier_order_np,
yearly_predictor,
monthly_target,
input_core_dims=[["time"], ["time"]],
Expand Down
8 changes: 6 additions & 2 deletions mesmer/stats/_localized_covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,12 +147,16 @@ def find_localized_empirical_covariance(
def find_localized_empirical_covariance_monthly(
data, weights, localizer, dim, k_folds, equal_dim_suffixes=("_i", "_j")
):
"""determine localized empirical covariance by cross validation for each month
"""determine localized empirical covariance by cross validation for each month. `data`
should be the residuals of the cyclo-stationary AR(1) process, see
:func:`fit_auto_regression_monthly <mesmer.stats.fit_auto_regression_monthly>`. Note that here,
no additional adjustment is necessary.
Parameters
----------
data : xr.DataArray
2D DataArray with monthly data to calculate the covariance for.
2D DataArray with monthly data to calculate the covariance for (residuals of the AR(1)
process).
weights : xr.DataArray
Weights for the individual samples.
localizer : dict of DataArray
Expand Down
Loading

0 comments on commit a90cd89

Please sign in to comment.