just various

rs-station · Oct 26, 2024 · e0e3166 · e0e3166
1 parent 2310d66
commit e0e3166
Show file tree

Hide file tree

Showing 12 changed files with 78 additions and 35 deletions.
diff --git a/.gitignore b/.gitignore
@@ -9,6 +9,7 @@ __pycache__/
 *.map
 *.mtz
 *.pdb
+*.cif
 
 # C extensions
 *.so

diff --git a/meteor/scripts/common.py b/meteor/scripts/common.py
@@ -12,7 +12,7 @@
 from meteor.rsmap import Map
 from meteor.scale import scale_maps
 from meteor.settings import COMPUTED_MAP_RESOLUTION_LIMIT, KWEIGHT_PARAMETER_DEFAULT
-from meteor.sfcalc import pdb_to_calculated_map
+from meteor.sfcalc import structure_file_to_calculated_map
 
 log = structlog.get_logger()
 
@@ -68,7 +68,6 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
             "derivative",
             description=(
                 "The 'derivative' diffraction data, typically: light-triggered, ligand-bound, etc. "
-                "We compute derivative-minus-native maps."
             ),
         )
         derivative_group.add_argument("derivative_mtz", type=Path)
@@ -89,11 +88,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
 
         native_group = self.add_argument_group(
             "native",
-            description=(
-                "The 'native' diffraction data, typically: dark, apo, etc. We compute derivative-"
-                "minus-native maps. The single set of known phases are typically assumed to "
-                "correspond to the native dataset."
-            ),
+            description=("The 'native' diffraction data, typically: dark, apo, etc."),
         )
         native_group.add_argument("native_mtz", type=Path)
         native_group.add_argument(
@@ -112,11 +107,11 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         )
 
         self.add_argument(
-            "-p",
-            "--pdb",
+            "-s",
+            "--structure",
             type=Path,
             required=True,
-            help="Specify PDB file path, model should correspond to the native MTZ.",
+            help="Specify CIF or PDB file path to compute phases from (usually a native model).",
         )
 
         self.add_argument(
@@ -211,9 +206,9 @@ def _construct_map(
     def load_difference_maps(args: argparse.Namespace) -> DiffMapSet:
         # note: method accepts `args`, in case the passed arguments are mutable
 
-        log.info("Loading PDB & computing FC/PHIC", file=str(args.pdb))
-        calculated_map = pdb_to_calculated_map(
-            args.pdb, high_resolution_limit=COMPUTED_MAP_RESOLUTION_LIMIT
+        log.info("Loading PDB & computing FC/PHIC", file=str(args.structure))
+        calculated_map = structure_file_to_calculated_map(
+            args.structure, high_resolution_limit=COMPUTED_MAP_RESOLUTION_LIMIT
         )
 
         derivative_map = DiffmapArgParser._construct_map(

diff --git a/meteor/scripts/compute_difference_map.py b/meteor/scripts/compute_difference_map.py
@@ -58,6 +58,9 @@ def make_requested_diffmap(
             mapset.derivative, mapset.native
         )
         log.info("  using negentropy optimized", kparameter=kweight_parameter)
+        if kweight_parameter is np.nan:
+            msg = "determined `k-parameter` is NaN, something went wrong..."
+            raise RuntimeError(msg)
 
     elif kweight_mode == WeightMode.fixed:
         if not isinstance(kweight_parameter, float):
@@ -149,7 +152,13 @@ def denoise_diffmap(
 
 def main(command_line_arguments: list[str] | None = None) -> None:
     parser = TvDiffmapArgParser(
-        description="Compute a difference map using native, derivative, and calculated MTZ files."
+        description=(
+            "Compute an isomorphous difference map, optionally applying k-weighting and/or "
+            "TV-denoising if desired. \n\n In the terminology adopted, this script computes a "
+            "`derivative` minus a `native` map, using a constant phase approximation. Phases, "
+            "typically from a model of the `native` data, are computed from a CIF/PDB model you "
+            "must provide."
+        )
     )
     args = parser.parse_args(command_line_arguments)
     parser.check_output_filepaths(args)

diff --git a/meteor/sfcalc.py b/meteor/sfcalc.py
@@ -21,9 +21,9 @@ def gemmi_structure_to_calculated_map(
     return Map.from_ccp4_map(ccp4_map, high_resolution_limit=high_resolution_limit)
 
 
-def pdb_to_calculated_map(pdb_file: Path, *, high_resolution_limit: float) -> Map:
-    if not pdb_file.exists():
-        msg = f"could not find file: {pdb_file}"
+def structure_file_to_calculated_map(cif_or_pdb_file: Path, *, high_resolution_limit: float) -> Map:
+    if not cif_or_pdb_file.exists():
+        msg = f"could not find file: {cif_or_pdb_file}"
         raise OSError(msg)
-    structure = gemmi.read_structure(str(pdb_file))
+    structure = gemmi.read_structure(str(cif_or_pdb_file))
     return gemmi_structure_to_calculated_map(structure, high_resolution_limit=high_resolution_limit)
diff --git a/pyproject.toml b/pyproject.toml
@@ -26,6 +26,9 @@ dev = [
 requires = ["setuptools >= 61.0"]
 build-backend = "setuptools.build_meta"
 
+[project.scripts]
+"meteor.diffmap" = "meteor.scripts.compute_difference_map:main"
+
 [tool.mypy]
 disallow_untyped_calls = true
 disallow_untyped_defs = true

diff --git a/test/conftest.py b/test/conftest.py
@@ -10,6 +10,11 @@ def data_dir() -> Path:
     return Path(__file__).parent.resolve() / "data"
 
 
+@pytest.fixture(scope="session")
+def testing_cif_file(data_dir: Path) -> Path:
+    return data_dir / "8a6g-chromophore-removed.cif"
+
+
 @pytest.fixture(scope="session")
 def testing_pdb_file(data_dir: Path) -> Path:
     return data_dir / "8a6g-chromophore-removed.pdb"

diff --git a/test/functional/test_compute_difference_map.py b/test/functional/test_compute_difference_map.py
@@ -4,15 +4,18 @@
 import pytest
 import reciprocalspaceship as rs
 
+from unittest import mock
+
 from meteor.rsmap import Map
 from meteor.scripts import compute_difference_map
 from meteor.scripts.common import WeightMode
 from meteor.tv import TvDenoiseResult
+from meteor.utils import filter_common_indices
 
-# previous tests show 0.09 is max-negentropy
-compute_difference_map.TV_WEIGHTS_TO_SCAN = np.array([0.005, 0.01, 0.025, 0.5])
 
+TV_WEIGHTS_TO_SCAN = np.array([0.005, 0.01, 0.025, 0.5])
 
+@mock.patch("meteor.scripts.compute_difference_map.TV_WEIGHTS_TO_SCAN", TV_WEIGHTS_TO_SCAN)
 @pytest.mark.parametrize("kweight_mode", list(WeightMode))
 @pytest.mark.parametrize("tv_weight_mode", list(WeightMode))
 def test_script_produces_consistent_results(
@@ -22,6 +25,7 @@ def test_script_produces_consistent_results(
     testing_mtz_file: Path,
     tmp_path: Path,
 ) -> None:
+
     # for when WeightMode.fixed; these maximize negentropy in manual testing
     kweight_parameter = 0.05
     tv_weight = 0.01
@@ -40,7 +44,7 @@ def test_script_produces_consistent_results(
         "F_off",
         "--native-uncertainty-column",
         "SIGF_off",
-        "--pdb",
+        "--structure",
         str(testing_pdb_file),
         "-o",
         str(output_mtz),
@@ -98,9 +102,13 @@ def test_script_produces_consistent_results(
     reference_dataset = rs.read_mtz(str(testing_mtz_file))
     reference_amplitudes = reference_dataset["F_TV"]
 
-    # TODO: find intersection of miller indices, scale to one another, then test
+    result_amplitudes, reference_amplitudes = filter_common_indices(
+        result_map.amplitudes, reference_amplitudes
+    )
+    rho = np.corrcoef(result_amplitudes.to_numpy(), reference_amplitudes.to_numpy())[0, 1]
 
-    # np.testing.assert_approx_equal(
-    #     result_map.amplitudes.to_numpy(),
-    #     reference_amplitudes.to_numpy()
-    # )
+    # comparing a correlation coefficienct allows for a global scale factor change, but nothing else
+    if (kweight_mode == WeightMode.none) or (tv_weight_mode == WeightMode.none):  # noqa: PLR1714
+        assert rho > 0.50
+    else:
+        assert rho > 0.98
diff --git a/test/unit/scripts/conftest.py b/test/unit/scripts/conftest.py
@@ -29,7 +29,7 @@ def base_cli_arguments(fixed_kparameter: float) -> list[str]:
         "--derivative-uncertainty-column",
         "SIGF",
         "fake-native.mtz",
-        "--pdb",
+        "--structure",
         "fake.pdb",
         "-o",
         "fake-output.mtz",

diff --git a/test/unit/scripts/test_common.py b/test/unit/scripts/test_common.py
@@ -53,7 +53,7 @@ def test_diffmap_argparser_parse_args(base_cli_arguments: list[str]) -> None:
     assert args.native_mtz == Path("fake-native.mtz")
     assert args.native_amplitude_column == "infer"
     assert args.native_uncertainty_column == "infer"
-    assert args.pdb == Path("fake.pdb")
+    assert args.structure == Path("fake.pdb")
     assert args.mtzout == Path("fake-output.mtz")
     assert args.metadataout == Path("fake-output-metadata.csv")
     assert args.kweight_mode == WeightMode.fixed
@@ -121,7 +121,7 @@ def test_load_difference_maps(random_difference_map: Map, base_cli_arguments: li
     def return_a_map(*args: Any, **kwargs: Any) -> Map:
         return random_difference_map
 
-    mocked_fxn_1 = "meteor.scripts.common.pdb_to_calculated_map"
+    mocked_fxn_1 = "meteor.scripts.common.structure_file_to_calculated_map"
     mocked_fxn_2 = "meteor.scripts.common.DiffmapArgParser._construct_map"
 
     with mock.patch(mocked_fxn_1, return_a_map), mock.patch(mocked_fxn_2, return_a_map):

diff --git a/test/unit/scripts/test_compute_difference_map.py b/test/unit/scripts/test_compute_difference_map.py
@@ -51,7 +51,10 @@ def test_tv_diffmap_parser(parsed_tv_cli_args: argparse.Namespace) -> None:
 def test_make_requested_diffmap(
     mode: WeightMode, diffmap_set: DiffMapSet, fixed_kparameter: float
 ) -> None:
-    diffmap = make_requested_diffmap(
+    # ensure the two maps aren't exactly the same to prevent numerical issues
+    diffmap_set.derivative.amplitudes.iloc[0] += 1.0
+
+    diffmap, _ = make_requested_diffmap(
         mapset=diffmap_set, kweight_mode=mode, kweight_parameter=fixed_kparameter
     )
     assert len(diffmap) > 0
@@ -100,7 +103,7 @@ def mock_load_difference_maps(self: Any, args: argparse.Namespace) -> DiffMapSet
     cli_arguments = [
         "fake-derivative.mtz",
         "fake-native.mtz",
-        "--pdb",
+        "--structure",
         "fake.pdb",
         "-o",
         str(output_mtz_path),

diff --git a/test/unit/test_sfcalc.py b/test/unit/test_sfcalc.py
@@ -32,8 +32,26 @@ def test_sf_calc(structure: gemmi.Structure) -> None:
     assert np.any(calc_map.phases != 0.0)
 
 
-def test_pdb_to_calculated_map(testing_pdb_file: Path) -> None:
-    calc_map: Map = sfcalc.pdb_to_calculated_map(testing_pdb_file, high_resolution_limit=RESOLUTION)
+def test_cif_to_calculated_map(testing_cif_file: Path) -> None:
+    calc_map: Map = sfcalc.structure_file_to_calculated_map(
+        testing_cif_file, high_resolution_limit=RESOLUTION
+    )
+    npt.assert_allclose(calc_map.resolution_limits[1], RESOLUTION, atol=1e-5)
+
+    # CRYST1   51.990   62.910   72.030  90.00  90.00  90.00 P 21 21 21
+    assert calc_map.spacegroup == gemmi.find_spacegroup_by_name("P 21 21 21")
+    assert calc_map.cell == gemmi.UnitCell(
+        a=51.990, b=62.910, c=72.030, alpha=90, beta=90, gamma=90
+    )
+
+    assert np.any(calc_map.amplitudes != 0.0)
+    assert np.any(calc_map.phases != 0.0)
+
+
+def test_structure_file_to_calculated_map(testing_pdb_file: Path) -> None:
+    calc_map: Map = sfcalc.structure_file_to_calculated_map(
+        testing_pdb_file, high_resolution_limit=RESOLUTION
+    )
     npt.assert_allclose(calc_map.resolution_limits[1], RESOLUTION, atol=1e-5)
 
     # CRYST1   51.990   62.910   72.030  90.00  90.00  90.00 P 21 21 21
@@ -46,7 +64,7 @@ def test_pdb_to_calculated_map(testing_pdb_file: Path) -> None:
     assert np.any(calc_map.phases != 0.0)
 
 
-def test_pdb_to_calculated_map_fails_if_pdb_path_wrong() -> None:
+def test_structure_file_to_calculated_map_fails_if_pdb_path_wrong() -> None:
     not_a_valid_path = Path("not-a-valid-path")
     with pytest.raises(OSError, match="could not find file"):
-        sfcalc.pdb_to_calculated_map(not_a_valid_path, high_resolution_limit=RESOLUTION)
+        sfcalc.structure_file_to_calculated_map(not_a_valid_path, high_resolution_limit=RESOLUTION)
diff --git a/test/unit/test_tv.py b/test/unit/test_tv.py
@@ -35,6 +35,7 @@ def tv_denoise_result_source_data() -> dict:
         "map_sampling_used_for_tv": 5,
         "weights_scanned": [0.0, 1.0],
         "negentropy_at_weights": [0.0, 5.0],
+        "k_parameter_used": 0.0,
     }
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,6 +9,7 @@ __pycache__/ @@
     *.map
     *.mtz
     *.pdb
+    *.cif
     # C extensions
     *.so
@@ Expand Down @@