Skip to content

Commit

Permalink
Expression: split out evaluate_params (#539)
Browse files Browse the repository at this point in the history
* `Expression` split out `evaluate_params`

* fix reference to self.parameters_values

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* update tests

* docstring

* Apply suggestions from code review

Co-authored-by: Victoria <112418493+veni-vidi-vici-dormivi@users.noreply.github.com>

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Victoria <112418493+veni-vidi-vici-dormivi@users.noreply.github.com>
  • Loading branch information
3 people authored Oct 10, 2024
1 parent 685a5fd commit 4c48344
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 34 deletions.
24 changes: 12 additions & 12 deletions mesmer/mesmer_x/train_l_distrib_mesmerx.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,10 +684,11 @@ def _test_evol_params(self, distrib, data):
for param in self.boundaries_params:
bottom, top = self.boundaries_params[param]

# TODO: avoid using implementation detail of frozen distr of sp.stats
param_values = distrib.kwds[param]

# out of boundaries
if np.any(self.expr_fit.parameters_values[param] < bottom) or np.any(
top < self.expr_fit.parameters_values[param]
):
if np.any(param_values < bottom) or np.any(param_values >= top):
test = False

# test of the support of the distribution: is there any data out of the
Expand Down Expand Up @@ -1064,10 +1065,10 @@ def smooth_data(data, nn=10):
return np.convolve(data, np.ones(nn) / nn, mode="same")

def fg_fun_deriv01(self, x):
self.expr_fit.evaluate(x, self.fg_info_derivatives["pred_low"])
loc_low = self.expr_fit.parameters_values["loc"]
self.expr_fit.evaluate(x, self.fg_info_derivatives["pred_high"])
loc_high = self.expr_fit.parameters_values["loc"]
params = self.expr_fit.evaluate_params(x, self.fg_info_derivatives["pred_low"])
loc_low = params["loc"]
params = self.expr_fit.evaluate_params(x, self.fg_info_derivatives["pred_high"])
loc_high = params["loc"]

deriv = {
p: (loc_high - loc_low)
Expand All @@ -1091,16 +1092,15 @@ def fg_fun_deriv01(self, x):
def fg_fun_loc(self, x_loc):
x = np.copy(self.fg_coeffs)
x[self.fg_ind_loc] = x_loc
self.expr_fit.evaluate(x, self.data_pred)
loc = self.expr_fit.parameters_values["loc"]
params = self.expr_fit.evaluate_params(x, self.data_pred)
loc = params["loc"]
return np.sum((loc - self.smooth_data_targ) ** 2)

def fg_fun_sca(self, x_sca):
x = np.copy(self.fg_coeffs)
x[self.fg_ind_sca] = x_sca
self.expr_fit.evaluate(x, self.data_pred)
loc = self.expr_fit.parameters_values["loc"]
sca = self.expr_fit.parameters_values["scale"]
params = self.expr_fit.evaluate_params(x, self.data_pred)
loc, sca = params["loc"], params["scale"]
# ^ better to use that one instead of deviation, which is affected by the scale
dev = np.abs(self.data_targ - loc)
return np.sum((dev - sca) ** 2)
Expand Down
60 changes: 49 additions & 11 deletions mesmer/mesmer_x/train_utils_mesmerx.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,9 +296,9 @@ def _correct_expr_parameters(self):
param
].replace(f"__{i}__", i)

def evaluate(self, coefficients_values, inputs_values, forced_shape=None):
def evaluate_params(self, coefficients_values, inputs_values, forced_shape=None):
"""
Evaluates the distribution with the provided inputs and coefficients
Evaluates the parameters for the provided inputs and coefficients
Parameters
----------
Expand All @@ -314,6 +314,12 @@ def evaluate(self, coefficients_values, inputs_values, forced_shape=None):
forced_shape : None | tuple or list of dimensions
coefficients_values and inputs_values for transposition of the shape
Returns
-------
params: dict
Realized parameters for the given expression, coefficients and covariates;
to pass ``self.distrib(**params)`` or it's methods.
Warnings
--------
with xarrays for coefficients_values and inputs_values, the outputs with have
Expand Down Expand Up @@ -357,17 +363,17 @@ def evaluate(self, coefficients_values, inputs_values, forced_shape=None):
# gather coefficients and covariates (can't use d1 | d2, does not work for dataset)
locals = {**coefficients_values, **inputs_values}

# Evaluation 3: parameters
self.parameters_values = {}
# evaluate parameters
parameters_values = {}
for param, expr in self.parameters_expressions.items():
# may need to silence warnings here, to avoid spamming
self.parameters_values[param] = eval(expr, None, locals)
parameters_values[param] = eval(expr, None, locals)

# Correcting shapes 1: scalar parameters must have the shape of the inputs
if len(self.inputs_list) > 0:

for param in self.parameters_list:
param_value = self.parameters_values[param]
param_value = parameters_values[param]

# TODO: use np.ndim(param_value) == 0? (i.e. isscalar)
if isinstance(param_value, int | float) or param_value.ndim == 0:
Expand All @@ -382,21 +388,53 @@ def evaluate(self, coefficients_values, inputs_values, forced_shape=None):
inputs_values[self.inputs_list[0]].shape
)

self.parameters_values[param] = param_value
parameters_values[param] = param_value

# Correcting shapes 2: possibly forcing shape
if len(self.inputs_list) > 0 and forced_shape is not None:

for param in self.parameters_list:
dims_param = [
d for d in forced_shape if d in self.parameters_values[param].dims
d for d in forced_shape if d in parameters_values[param].dims
]
self.parameters_values[param] = self.parameters_values[param].transpose(
parameters_values[param] = parameters_values[param].transpose(
*dims_param
)

# evaluation of the distribution
return self.distrib(**self.parameters_values)
return parameters_values

def evaluate(self, coefficients_values, inputs_values, forced_shape=None):
"""
Evaluates the distribution with the provided inputs and coefficients
Parameters
----------
coefficients_values : dict | xr.Dataset(c_i) | list of values
Coefficient arrays or scalars. Can have the following form
- dict(c_i = values or np.array())
- xr.Dataset(c_i)
- list of values
inputs_values : dict | xr.Dataset
Input arrays or scalars. Can be passed as
- dict(inp_i = values or np.array())
- xr.Dataset(inp_i)
forced_shape : None | tuple or list of dimensions
coefficients_values and inputs_values for transposition of the shape
Returns
-------
distr: scipy stats frozen distribution
Frozen distribution with the realized parameters applied to.
Warnings
--------
with xarrays for coefficients_values and inputs_values, the outputs with have
for shape first the one of the coefficient, then the one of the inputs
--> trying to avoid this issue with 'forced_shape'
"""

params = self.evaluate_params(coefficients_values, inputs_values, forced_shape)
return self.distrib(**params)


def probability_integral_transform(
Expand Down
52 changes: 41 additions & 11 deletions tests/unit/test_mesmer_x_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def test_expression_coefficients_two_digits():
def test_expression_covariate_c_digit():

expr = Expression("norm(loc=c1, scale=c2 * __Tc3__)", "name")
expr.coefficients_list == ["c1", "c2"]
assert expr.coefficients_list == ["c1", "c2"]


@pytest.mark.xfail(
Expand Down Expand Up @@ -300,6 +300,46 @@ def test_evaluate_covariates_wrong_shape():
expr.evaluate([1, 1], xr.Dataset(data_vars=data_vars))


def test_evaluate_params_norm():

expr = Expression("norm(loc=c1 * __T__, scale=c2)", expr_name="name")
params = expr.evaluate_params([1, 2], {"T": np.array([1, 2])})

assert isinstance(params, dict)

expected = {"loc": np.array([1, 2]), "scale": np.array([2.0, 2.0])}

# assert frozen params are equal
mesmer.testing.assert_dict_allclose(params, expected)

# a second set of values
params = expr.evaluate_params([2, 1], {"T": np.array([2, 5])})

expected = {"loc": np.array([4, 10]), "scale": np.array([1.0, 1.0])}

# assert frozen params are equal
mesmer.testing.assert_dict_allclose(params, expected)


def test_evaluate_params_norm_dataset():
# NOTE: not sure if passing DataArray to scipy distribution is a good idea

expr = Expression("norm(loc=c1 * __T__, scale=c2)", expr_name="name")

coefficients_values = xr.Dataset(data_vars={"c1": 1, "c2": 2})
inputs_values = xr.Dataset(data_vars={"T": ("x", np.array([1, 2]))})

params = expr.evaluate_params(coefficients_values, inputs_values)

loc = xr.DataArray([1, 2], dims="x")
scale = xr.DataArray([2, 2], dims="x")

expected = {"loc": loc, "scale": scale}

# assert frozen params are equal
mesmer.testing.assert_dict_allclose(params, expected)


def test_evaluate_norm():

expr = Expression("norm(loc=c1 * __T__, scale=c2)", expr_name="name")
Expand All @@ -312,19 +352,12 @@ def test_evaluate_norm():
# assert frozen params are equal
mesmer.testing.assert_dict_allclose(dist.kwds, expected)

# NOTE: will write own function to return param values
mesmer.testing.assert_dict_allclose(dist.kwds, expr.parameters_values)

# a second set of values
dist = expr.evaluate([2, 1], {"T": np.array([2, 5])})

expected = {"loc": np.array([4, 10]), "scale": np.array([1.0, 1.0])}

# assert frozen params are equal
mesmer.testing.assert_dict_allclose(dist.kwds, expected)

mesmer.testing.assert_dict_allclose(dist.kwds, expr.parameters_values)


def test_evaluate_norm_dataset():
# NOTE: not sure if passing DataArray to scipy distribution is a good idea
Expand All @@ -345,6 +378,3 @@ def test_evaluate_norm_dataset():

# assert frozen params are equal
mesmer.testing.assert_dict_allclose(dist.kwds, expected)

# NOTE: will write own function to return param values
mesmer.testing.assert_dict_allclose(dist.kwds, expr.parameters_values)

0 comments on commit 4c48344

Please sign in to comment.