Skip to content

Commit

Permalink
Merge branch 'master' into tjlane/diffmap_script-refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
tjlane committed Oct 24, 2024
2 parents ecdf676 + d71405a commit 25d96e9
Show file tree
Hide file tree
Showing 27 changed files with 3,214 additions and 98 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
test/data/scaled-test-data.mtz filter=lfs diff=lfs merge=lfs -text
4 changes: 3 additions & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ jobs:
python-version: ["3.11"]

steps:
- uses: actions/checkout@v4
- name: Checkout
uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/mypy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ jobs:
python-version: ["3.11"]

steps:
- uses: actions/checkout@v4
- name: Checkout
uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
Expand Down
8 changes: 6 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,16 @@ jobs:
python-version: ["3.11"]

steps:
- uses: actions/checkout@v4
- name: Checkout
uses: actions/checkout@v4
with:
lfs: true

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,13 @@
__pycache__/
*.py[cod]
*$py.class

# large crystallography files -- some of which are used in tests
# large files are accessed via github LFS
*.ccp4
*.map
*.mtz
*.pdb

# C extensions
*.so
Expand Down
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,5 @@ Map Enhancement Tools for Ephemeral Occupancy Refinement
[![Pytest](https://github.com/alisiafadini/meteor/actions/workflows/tests.yml/badge.svg)](https://github.com/your_username/your_repo/actions/workflows/tests.yml)
[![Mypy](https://github.com/alisiafadini/meteor/actions/workflows/mypy.yml/badge.svg)](https://github.com/your_username/your_repo/actions/workflows/mypy.yml)
[![Ruff](https://github.com/alisiafadini/meteor/actions/workflows/lint.yml/badge.svg)](https://github.com/your_username/your_repo/actions/workflows/lint.yml)
[![codecov](https://codecov.io/gh/your_username/your_repo/branch/main/graph/badge.svg)](https://codecov.io/gh/your_username/your_repo)

[![codecov](https://codecov.io/gh/alisiafadini/meteor/graph/badge.svg?token=NCYMP06MNS)](https://codecov.io/gh/alisiafadini/meteor)
Set of tools for crystallographic electron density maps containing low ligand/species populations.
76 changes: 76 additions & 0 deletions meteor/io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""https://www.ccp4.ac.uk/html/mtzformat.html
https://www.globalphasing.com/buster/wiki/index.cgi?MTZcolumns
"""

from __future__ import annotations

import re
from typing import Final

OBSERVED_INTENSITY_LABELS: Final[list[str]] = [
"I", # generic
"IMEAN", # CCP4
"I-obs", # phenix
]

OBSERVED_AMPLITUDE_LABELS: Final[list[str]] = [
"F", # generic
"FP", # CCP4 & GLPh native
r"FPH\d", # CCP4 derivative
"F-obs", # phenix
]

OBSERVED_UNCERTAINTY_LABELS: Final[list[str]] = [
"SIGF", # generic
"SIGFP", # CCP4 & GLPh native
r"SIGFPH\d", # CCP4
]

COMPUTED_AMPLITUDE_LABELS: Final[list[str]] = ["FC"]

COMPUTED_PHASE_LABELS: Final[list[str]] = ["PHIC"]


class AmbiguousMtzLabelError(ValueError): ...


def _infer_mtz_label(labels_to_search: list[str], labels_to_look_for: list[str]) -> str:
# the next line consumes ["FOO", "BAR", "BAZ"] and produces regex strings like "^(FOO|BAR|BAZ)$"
regex = re.compile(f"^({'|'.join(labels_to_look_for)})$")
matches = [regex.match(label) for label in labels_to_search if regex.match(label) is not None]

if len(matches) == 0:
msg = "cannot infer MTZ column name; "
msg += f"cannot find any of {labels_to_look_for} in {labels_to_search}"
raise AmbiguousMtzLabelError(msg)
if len(matches) > 1:
msg = "cannot infer MTZ column name; "
msg += f">1 instance of {labels_to_look_for} in {labels_to_search}"
raise AmbiguousMtzLabelError(msg)

[match] = matches
if match is None:
msg = "`None` not filtered during regex matching"
raise RuntimeError(msg)

return match.group(0)


def find_observed_intensity_label(mtz_column_labels: list[str]) -> str:
return _infer_mtz_label(mtz_column_labels, OBSERVED_INTENSITY_LABELS)


def find_observed_amplitude_label(mtz_column_labels: list[str]) -> str:
return _infer_mtz_label(mtz_column_labels, OBSERVED_AMPLITUDE_LABELS)


def find_observed_uncertainty_label(mtz_column_labels: list[str]) -> str:
return _infer_mtz_label(mtz_column_labels, OBSERVED_UNCERTAINTY_LABELS)


def find_computed_amplitude_label(mtz_column_labels: list[str]) -> str:
return _infer_mtz_label(mtz_column_labels, COMPUTED_AMPLITUDE_LABELS)


def find_computed_phase_label(mtz_column_labels: list[str]) -> str:
return _infer_mtz_label(mtz_column_labels, COMPUTED_PHASE_LABELS)
27 changes: 15 additions & 12 deletions meteor/rsmap.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from pathlib import Path
from typing import Any, ClassVar
from typing import Any, Callable, ClassVar, Literal, overload

import gemmi
import numpy as np
Expand Down Expand Up @@ -69,7 +69,7 @@ def __init__(
amplitude_column: str = "F",
phase_column: str = "PHI",
uncertainty_column: str | None = "SIGF",
**kwargs,
**kwargs: Any,
) -> None:
super().__init__(data=data, **kwargs)

Expand Down Expand Up @@ -99,8 +99,8 @@ def __init__(
self.uncertainties = self._verify_uncertainty_type(self.uncertainties, fix=True)

@property
def _constructor(self):
def constructor_fxn(*args, **kwargs):
def _constructor(self) -> Callable[[Any], Map]:
def constructor_fxn(*args: Any, **kwargs: Any) -> Map:
return Map(
*args,
amplitude_column=self._amplitude_column,
Expand All @@ -112,7 +112,7 @@ def constructor_fxn(*args, **kwargs):
return constructor_fxn

@property
def _constructor_sliced(self):
def _constructor_sliced(self) -> Callable[[Any], rs.DataSeries]:
return rs.DataSeries

def _verify_type(
Expand Down Expand Up @@ -175,7 +175,7 @@ def _verify_uncertainty_type(
name, uncertainty_dtypes, dataseries, fix=fix, cast_fix_to=rs.StandardDeviationDtype()
)

def __setitem__(self, key: str, value) -> None:
def __setitem__(self, key: str, value: Any) -> None:
allowed = list(self.columns) + self._allowed_columns
if key not in allowed:
msg = "column assignment not allowed for Map objects"
Expand All @@ -191,11 +191,14 @@ def insert(self, loc: int, column: str, value: Any, *, allow_duplicates: bool =
msg += "see also Map.set_uncertainties(...)"
raise MapMutabilityError(msg)

def drop(self, labels=None, *, axis=0, columns=None, inplace=False, **kwargs):
if (axis == 1) or (columns is not None):
msg = "columns are fixed for Map objects"
raise MapMutabilityError(msg)
return super().drop(labels=labels, axis=axis, columns=columns, inplace=inplace, **kwargs)
@overload
def drop(self, labels: Any = None, *, inplace: Literal[True]) -> None: ...

@overload
def drop(self, labels: Any = None, *, inplace: Literal[False]) -> Map: ...

def drop(self, labels: Any = None, *, inplace: bool = False) -> None | Map:
return super().drop(labels=labels, axis="index", columns=None, inplace=inplace)

def get_hkls(self) -> np.ndarray:
# overwrite rs implt'n, return w/o modifying self -> same behavior, under testing - @tjlane
Expand Down Expand Up @@ -272,7 +275,7 @@ def set_uncertainties(self, values: rs.DataSeries, column_name: str = "SIGF") ->
def complex_amplitudes(self) -> np.ndarray:
return self.amplitudes.to_numpy() * np.exp(1j * np.deg2rad(self.phases.to_numpy()))

def canonicalize_amplitudes(self):
def canonicalize_amplitudes(self) -> None:
canonicalize_amplitudes(
self,
amplitude_label=self._amplitude_column,
Expand Down
45 changes: 26 additions & 19 deletions meteor/scale.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ def _compute_anisotropic_scale_factors(
return anisotropic_scale_parameters[0] * np.exp(exponential_argument)


def _check_index_consistency(series1: rs.DataSeries, series2: rs.DataSeries) -> None:
if not series1.index.equals(series2.index):
msg = f"indices of `{series1.name}`, `{series2.name}` differ"
raise IndexError(msg)


def compute_scale_factors(
*,
reference_values: rs.DataSeries,
Expand Down Expand Up @@ -87,37 +93,38 @@ def compute_scale_factors(
----------
[1] SCALEIT https://www.ccp4.ac.uk/html/scaleit.html
"""
common_miller_indices = reference_values.index.intersection(values_to_scale.index)
common_miller_indices: pd.Index = reference_values.index.intersection(values_to_scale.index)

common_reference_values: np.ndarray = reference_values.loc[common_miller_indices].to_numpy()
common_values_to_scale: np.ndarray = values_to_scale.loc[common_miller_indices].to_numpy()

# if we are going to weight the scaling using the uncertainty values, then the weights will be
# inverse_sigma = 1 / sqrt{ sigmaA ** 2 + sigmaB ** 2 }
if reference_uncertainties is not None and to_scale_uncertainties is not None:
if not reference_uncertainties.index.equals(reference_values.index):
msg = "indices of `reference_uncertainties`, `reference_values` differ, cannot combine"
raise IndexError(msg)
if not to_scale_uncertainties.index.equals(values_to_scale.index):
msg = "indices of `to_scale_uncertainties`, `values_to_scale` differ, cannot combine"
raise IndexError(msg)

uncertainty_weights: np.ndarray
if (reference_uncertainties is not None) and (to_scale_uncertainties is not None):
_check_index_consistency(reference_values, reference_uncertainties)
_check_index_consistency(values_to_scale, to_scale_uncertainties)
uncertainty_weights = np.sqrt(
np.square(reference_uncertainties.loc[common_miller_indices])
+ np.square(to_scale_uncertainties.loc[common_miller_indices]),
np.square(reference_uncertainties.loc[common_miller_indices].to_numpy())
+ np.square(to_scale_uncertainties.loc[common_miller_indices].to_numpy()),
)

else:
uncertainty_weights = 1.0

common_reference_values = reference_values.loc[common_miller_indices]
common_values_to_scale = values_to_scale.loc[common_miller_indices]
uncertainty_weights = np.array(1.0)

def compute_residuals(scaling_parameters: ScaleParameters) -> np.ndarray:
scale_factors = _compute_anisotropic_scale_factors(
common_miller_indices,
scaling_parameters,
)
return uncertainty_weights * (
scale_factors * common_values_to_scale - common_reference_values
)

difference_after_scaling = scale_factors * common_values_to_scale - common_reference_values
residuals = uncertainty_weights * difference_after_scaling

if not isinstance(residuals, np.ndarray):
msg = "scipy optimizers' behavior is unstable unless `np.ndarray`s are used"
raise TypeError(msg)

return residuals

initial_scaling_parameters: ScaleParameters = (1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
optimization_result = opt.least_squares(compute_residuals, initial_scaling_parameters)
Expand Down
27 changes: 27 additions & 0 deletions meteor/sfcalc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from pathlib import Path

import gemmi

from .rsmap import Map


def structure_to_calculated_map(structure: gemmi.Structure, *, high_resolution_limit: float) -> Map:
density_map = gemmi.DensityCalculatorX()
density_map.d_min = high_resolution_limit
density_map.grid.setup_from(structure)
for i, _ in enumerate(structure):
density_map.put_model_density_on_grid(structure[i])

ccp4_map = gemmi.Ccp4Map()
ccp4_map.grid = density_map.grid
ccp4_map.update_ccp4_header()

return Map.from_ccp4_map(ccp4_map, high_resolution_limit=high_resolution_limit)


def pdb_to_calculated_map(pdb_file: Path, *, high_resolution_limit: float) -> Map:
if not pdb_file.exists():
msg = f"could not find file: {pdb_file}"
raise OSError(msg)
structure = gemmi.read_structure(str(pdb_file))
return structure_to_calculated_map(structure, high_resolution_limit=high_resolution_limit)
52 changes: 51 additions & 1 deletion meteor/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from dataclasses import dataclass

import gemmi
import numpy as np


Expand All @@ -12,10 +13,59 @@ class MapColumns:
uncertainty: str | None = None


def assert_phases_allclose(array1: np.ndarray, array2: np.ndarray, atol=1e-3):
def assert_phases_allclose(array1: np.ndarray, array2: np.ndarray, atol: float = 1e-3) -> None:
diff = array2 - array1
diff = (diff + 180) % 360 - 180
absolute_difference = np.sum(np.abs(diff)) / float(np.prod(diff.shape))
if not absolute_difference < atol:
msg = f"per element diff {absolute_difference} > tolerance {atol}"
raise AssertionError(msg)


def single_carbon_structure(
carbon_position: tuple[float, float, float],
space_group: gemmi.SpaceGroup,
unit_cell: gemmi.UnitCell,
) -> gemmi.Structure:
model = gemmi.Model("single_atom")
chain = gemmi.Chain("A")

residue = gemmi.Residue()
residue.name = "X"
residue.seqid = gemmi.SeqId("1")

atom = gemmi.Atom()
atom.name = "C"
atom.element = gemmi.Element("C")
atom.pos = gemmi.Position(*carbon_position)

residue.add_atom(atom)
chain.add_residue(residue)
model.add_chain(chain)

structure = gemmi.Structure()
structure.add_model(model)
structure.cell = unit_cell
structure.spacegroup_hm = space_group.hm

return structure


def single_carbon_density(
carbon_position: tuple[float, float, float],
space_group: gemmi.SpaceGroup,
unit_cell: gemmi.UnitCell,
high_resolution_limit: float,
) -> gemmi.Ccp4Map:
structure = single_carbon_structure(carbon_position, space_group, unit_cell)

density_map = gemmi.DensityCalculatorX()
density_map.d_min = high_resolution_limit
density_map.grid.setup_from(structure)
density_map.put_model_density_on_grid(structure[0])

ccp4_map = gemmi.Ccp4Map()
ccp4_map.grid = density_map.grid
ccp4_map.update_ccp4_header()

return ccp4_map
Loading

0 comments on commit 25d96e9

Please sign in to comment.