From 7eb90b0630261f548c0e803dbe98e325a5c10c21 Mon Sep 17 00:00:00 2001
From: TJ Lane <thomas.joseph.lane@gmail.com>
Date: Fri, 25 Oct 2024 15:43:53 +0100
Subject: [PATCH] uncertainty scaling update (#41)

* variance weighting bugfix need test

* new test, not the strongest, but ok
---
 meteor/scale.py         | 33 +++++++++++++--------------------
 test/unit/test_scale.py | 39 ++++++++++++++++++++++++---------------
 2 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/meteor/scale.py b/meteor/scale.py
index 35db57d..7359884 100644
--- a/meteor/scale.py
+++ b/meteor/scale.py
@@ -41,12 +41,6 @@ def _compute_anisotropic_scale_factors(
     return anisotropic_scale_parameters[0] * np.exp(exponential_argument)
 
 
-def _check_index_consistency(series1: rs.DataSeries, series2: rs.DataSeries) -> None:
-    if not series1.index.equals(series2.index):
-        msg = f"indices of `{series1.name}`, `{series2.name}` differ"
-        raise IndexError(msg)
-
-
 def compute_scale_factors(
     *,
     reference_values: rs.DataSeries,
@@ -94,22 +88,21 @@ def compute_scale_factors(
     [1] SCALEIT https://www.ccp4.ac.uk/html/scaleit.html
     """
     common_miller_indices: pd.Index = reference_values.index.intersection(values_to_scale.index)
-
     common_reference_values: np.ndarray = reference_values.loc[common_miller_indices].to_numpy()
     common_values_to_scale: np.ndarray = values_to_scale.loc[common_miller_indices].to_numpy()
 
-    # if we are going to weight the scaling using the uncertainty values, then the weights will be
-    #    inverse_sigma = 1 / sqrt{ sigmaA ** 2 + sigmaB ** 2 }
-    uncertainty_weights: np.ndarray
-    if (reference_uncertainties is not None) and (to_scale_uncertainties is not None):
-        _check_index_consistency(reference_values, reference_uncertainties)
-        _check_index_consistency(values_to_scale, to_scale_uncertainties)
-        uncertainty_weights = np.sqrt(
-            np.square(reference_uncertainties.loc[common_miller_indices].to_numpy())
-            + np.square(to_scale_uncertainties.loc[common_miller_indices].to_numpy()),
-        )
-    else:
-        uncertainty_weights = np.array(1.0)
+    half_root_two = np.array(np.sqrt(2) / 2.0)  # weights are one if no uncertainties provided
+    ref_variance: np.ndarray = (
+        np.square(reference_uncertainties.loc[common_miller_indices].to_numpy())
+        if reference_uncertainties is not None
+        else half_root_two
+    )
+    to_scale_variance: np.ndarray = (
+        to_scale_uncertainties.loc[common_miller_indices].to_numpy()
+        if to_scale_uncertainties is not None
+        else half_root_two
+    )
+    inverse_variance = 1.0 / (ref_variance + to_scale_variance)
 
     def compute_residuals(scaling_parameters: ScaleParameters) -> np.ndarray:
         scale_factors = _compute_anisotropic_scale_factors(
@@ -118,7 +111,7 @@ def compute_residuals(scaling_parameters: ScaleParameters) -> np.ndarray:
         )
 
         difference_after_scaling = scale_factors * common_values_to_scale - common_reference_values
-        residuals = uncertainty_weights * difference_after_scaling
+        residuals = inverse_variance * difference_after_scaling
 
         if not isinstance(residuals, np.ndarray):
             msg = "scipy optimizers' behavior is unstable unless `np.ndarray`s are used"
diff --git a/test/unit/test_scale.py b/test/unit/test_scale.py
index 3b375a6..2a56f0c 100644
--- a/test/unit/test_scale.py
+++ b/test/unit/test_scale.py
@@ -76,21 +76,6 @@ def test_compute_scale_factors_anisotropic(miller_dataseries: rs.DataSeries) ->
     np.testing.assert_array_almost_equal(scale_factors, miller_dataseries.values)
 
 
-def test_check_index_consistency(miller_dataseries: rs.DataSeries) -> None:
-    scale._check_index_consistency(miller_dataseries, miller_dataseries)
-
-    modified_series = miller_dataseries.copy()
-    modified_series.index = pd.MultiIndex.from_tuples(
-        [
-            (-100, -100, -100),
-        ]
-        * 5,
-        names=["H", "K", "L"],
-    )
-    with pytest.raises(IndexError):
-        scale._check_index_consistency(modified_series, miller_dataseries)
-
-
 @pytest.mark.parametrize("use_uncertainties", [False, True])
 def test_scale_maps(random_difference_map: Map, use_uncertainties: bool) -> None:
     multiple = 2.0
@@ -116,6 +101,30 @@ def test_scale_maps(random_difference_map: Map, use_uncertainties: bool) -> None
     )
 
 
+def test_scale_maps_uncertainty_weighting() -> None:
+    x = np.array([1, 2, 3])
+    y = np.array([4, 8, 2])
+    phi = np.array([0, 0, 0])
+    weights = np.array([1, 1, 1e6])
+
+    miller_indices = [(0, 0, 0), (0, 0, 1), (0, 0, 2)]
+    index = pd.MultiIndex.from_tuples(miller_indices, names=["H", "K", "L"])
+
+    reference_map = Map({"F": x, "PHI": phi, "SIGF": weights})
+    reference_map.index = index
+    map_to_scale = Map({"F": y, "PHI": phi, "SIGF": weights})
+    map_to_scale.index = index
+
+    scaled = scale.scale_maps(
+        reference_map=reference_map,
+        map_to_scale=map_to_scale,
+        weight_using_uncertainties=True,
+    )
+
+    assert np.isclose(scaled["F"][(0, 0, 2)], 0.5)
+    assert np.isclose(scaled["SIGF"][(0, 0, 2)], 250000.0)
+
+
 def test_scale_maps_no_uncertainties_error(random_difference_map: Map) -> None:
     no_uncertainties: Map = random_difference_map.copy()
     del no_uncertainties[no_uncertainties._uncertainty_column]