From 7eb90b0630261f548c0e803dbe98e325a5c10c21 Mon Sep 17 00:00:00 2001 From: TJ Lane <thomas.joseph.lane@gmail.com> Date: Fri, 25 Oct 2024 15:43:53 +0100 Subject: [PATCH] uncertainty scaling update (#41) * variance weighting bugfix need test * new test, not the strongest, but ok --- meteor/scale.py | 33 +++++++++++++-------------------- test/unit/test_scale.py | 39 ++++++++++++++++++++++++--------------- 2 files changed, 37 insertions(+), 35 deletions(-) diff --git a/meteor/scale.py b/meteor/scale.py index 35db57d..7359884 100644 --- a/meteor/scale.py +++ b/meteor/scale.py @@ -41,12 +41,6 @@ def _compute_anisotropic_scale_factors( return anisotropic_scale_parameters[0] * np.exp(exponential_argument) -def _check_index_consistency(series1: rs.DataSeries, series2: rs.DataSeries) -> None: - if not series1.index.equals(series2.index): - msg = f"indices of `{series1.name}`, `{series2.name}` differ" - raise IndexError(msg) - - def compute_scale_factors( *, reference_values: rs.DataSeries, @@ -94,22 +88,21 @@ def compute_scale_factors( [1] SCALEIT https://www.ccp4.ac.uk/html/scaleit.html """ common_miller_indices: pd.Index = reference_values.index.intersection(values_to_scale.index) - common_reference_values: np.ndarray = reference_values.loc[common_miller_indices].to_numpy() common_values_to_scale: np.ndarray = values_to_scale.loc[common_miller_indices].to_numpy() - # if we are going to weight the scaling using the uncertainty values, then the weights will be - # inverse_sigma = 1 / sqrt{ sigmaA ** 2 + sigmaB ** 2 } - uncertainty_weights: np.ndarray - if (reference_uncertainties is not None) and (to_scale_uncertainties is not None): - _check_index_consistency(reference_values, reference_uncertainties) - _check_index_consistency(values_to_scale, to_scale_uncertainties) - uncertainty_weights = np.sqrt( - np.square(reference_uncertainties.loc[common_miller_indices].to_numpy()) - + np.square(to_scale_uncertainties.loc[common_miller_indices].to_numpy()), - ) - else: - uncertainty_weights = np.array(1.0) + half_root_two = np.array(np.sqrt(2) / 2.0) # weights are one if no uncertainties provided + ref_variance: np.ndarray = ( + np.square(reference_uncertainties.loc[common_miller_indices].to_numpy()) + if reference_uncertainties is not None + else half_root_two + ) + to_scale_variance: np.ndarray = ( + to_scale_uncertainties.loc[common_miller_indices].to_numpy() + if to_scale_uncertainties is not None + else half_root_two + ) + inverse_variance = 1.0 / (ref_variance + to_scale_variance) def compute_residuals(scaling_parameters: ScaleParameters) -> np.ndarray: scale_factors = _compute_anisotropic_scale_factors( @@ -118,7 +111,7 @@ def compute_residuals(scaling_parameters: ScaleParameters) -> np.ndarray: ) difference_after_scaling = scale_factors * common_values_to_scale - common_reference_values - residuals = uncertainty_weights * difference_after_scaling + residuals = inverse_variance * difference_after_scaling if not isinstance(residuals, np.ndarray): msg = "scipy optimizers' behavior is unstable unless `np.ndarray`s are used" diff --git a/test/unit/test_scale.py b/test/unit/test_scale.py index 3b375a6..2a56f0c 100644 --- a/test/unit/test_scale.py +++ b/test/unit/test_scale.py @@ -76,21 +76,6 @@ def test_compute_scale_factors_anisotropic(miller_dataseries: rs.DataSeries) -> np.testing.assert_array_almost_equal(scale_factors, miller_dataseries.values) -def test_check_index_consistency(miller_dataseries: rs.DataSeries) -> None: - scale._check_index_consistency(miller_dataseries, miller_dataseries) - - modified_series = miller_dataseries.copy() - modified_series.index = pd.MultiIndex.from_tuples( - [ - (-100, -100, -100), - ] - * 5, - names=["H", "K", "L"], - ) - with pytest.raises(IndexError): - scale._check_index_consistency(modified_series, miller_dataseries) - - @pytest.mark.parametrize("use_uncertainties", [False, True]) def test_scale_maps(random_difference_map: Map, use_uncertainties: bool) -> None: multiple = 2.0 @@ -116,6 +101,30 @@ def test_scale_maps(random_difference_map: Map, use_uncertainties: bool) -> None ) +def test_scale_maps_uncertainty_weighting() -> None: + x = np.array([1, 2, 3]) + y = np.array([4, 8, 2]) + phi = np.array([0, 0, 0]) + weights = np.array([1, 1, 1e6]) + + miller_indices = [(0, 0, 0), (0, 0, 1), (0, 0, 2)] + index = pd.MultiIndex.from_tuples(miller_indices, names=["H", "K", "L"]) + + reference_map = Map({"F": x, "PHI": phi, "SIGF": weights}) + reference_map.index = index + map_to_scale = Map({"F": y, "PHI": phi, "SIGF": weights}) + map_to_scale.index = index + + scaled = scale.scale_maps( + reference_map=reference_map, + map_to_scale=map_to_scale, + weight_using_uncertainties=True, + ) + + assert np.isclose(scaled["F"][(0, 0, 2)], 0.5) + assert np.isclose(scaled["SIGF"][(0, 0, 2)], 250000.0) + + def test_scale_maps_no_uncertainties_error(random_difference_map: Map) -> None: no_uncertainties: Map = random_difference_map.copy() del no_uncertainties[no_uncertainties._uncertainty_column]