diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 31ba0b1..0aac474 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -2,10 +2,8 @@ name: Publish Documentation on: push: - branches: - - main - tags: - - '*' + branches: ["main"] + tags: ["*"] jobs: deploy-docs: @@ -43,11 +41,12 @@ jobs: git config --global --add safe.directory "$PWD" git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }} + git pull origin gh-pages git fetch --all --prune make env sed -i 's/# extensions/extensions/' mkdocs.yml + make docs-insiders INSIDER_DOCS_TOKEN="${INSIDER_DOCS_TOKEN}" - make docs-insiders INSIDER_DOCS_TOKEN="${INSIDER_DOCS_TOKEN}" make docs-deploy VERSION="$VERSION" diff --git a/descent/optim/__init__.py b/descent/optim/__init__.py index 8ec8e9a..5953cf0 100644 --- a/descent/optim/__init__.py +++ b/descent/optim/__init__.py @@ -1,5 +1,5 @@ """Custom parameter optimizers.""" -from descent.optim._lm import LevenbergMarquardtConfig, levenberg_marquardt +from descent.optim._lm import ClosureFn, LevenbergMarquardtConfig, levenberg_marquardt -__all__ = ["LevenbergMarquardtConfig", "levenberg_marquardt"] +__all__ = ["ClosureFn", "LevenbergMarquardtConfig", "levenberg_marquardt"] diff --git a/descent/targets/dimers.py b/descent/targets/dimers.py index 0a20639..9cc06cb 100644 --- a/descent/targets/dimers.py +++ b/descent/targets/dimers.py @@ -12,12 +12,15 @@ import tqdm import descent.utils.dataset +import descent.utils.loss import descent.utils.molecule import descent.utils.reporting if typing.TYPE_CHECKING: import pandas + import descent.train + EnergyFn = typing.Callable[ ["pandas.DataFrame", tuple[str, ...], torch.Tensor], torch.Tensor @@ -277,6 +280,32 @@ def predict( return torch.cat(reference), torch.cat(predicted) +def default_closure( + trainable: "descent.train.Trainable", + topologies: dict[str, smee.TensorTopology], + dataset: datasets.Dataset, +): + """Return a default closure function for training against dimer energies. + + Args: + trainable: The wrapper around trainable parameters. + topologies: The topologies of the molecules present in the dataset, with keys + of mapped SMILES patterns. + dataset: The dataset to train against. + + Returns: + The default closure function. + """ + + def loss_fn(_x: torch.Tensor) -> torch.Tensor: + y_ref, y_pred = descent.targets.dimers.predict( + dataset, trainable.to_force_field(_x), topologies + ) + return ((y_pred - y_ref) ** 2).sum() + + return descent.utils.loss.to_closure(loss_fn) + + def _plot_energies(energies: dict[str, torch.Tensor]) -> str: from matplotlib import pyplot diff --git a/descent/targets/thermo.py b/descent/targets/thermo.py index 57f93d0..462c1a7 100644 --- a/descent/targets/thermo.py +++ b/descent/targets/thermo.py @@ -16,9 +16,15 @@ import smee.mm import smee.utils import torch -from rdkit import Chem +import descent.optim import descent.utils.dataset +import descent.utils.loss +import descent.utils.molecule + +if typing.TYPE_CHECKING: + import descent.train + _LOGGER = logging.getLogger(__name__) @@ -138,24 +144,6 @@ class _Observables(typing.NamedTuple): _SystemDict = dict[SimulationKey, smee.TensorSystem] -def _map_smiles(smiles: str) -> str: - """Add atom mapping to a SMILES string if it is not already present.""" - params = Chem.SmilesParserParams() - params.removeHs = False - - mol = Chem.AddHs(Chem.MolFromSmiles(smiles, params)) - - map_idxs = sorted(atom.GetAtomMapNum() for atom in mol.GetAtoms()) - - if map_idxs == list(range(1, len(map_idxs) + 1)): - return smiles - - for i, atom in enumerate(mol.GetAtoms()): - atom.SetAtomMapNum(i + 1) - - return Chem.MolToSmiles(mol) - - def create_dataset(*rows: DataEntry) -> datasets.Dataset: """Create a dataset from a list of existing data points. @@ -167,12 +155,12 @@ def create_dataset(*rows: DataEntry) -> datasets.Dataset: """ for row in rows: - row["smiles_a"] = _map_smiles(row["smiles_a"]) + row["smiles_a"] = descent.utils.molecule.map_smiles(row["smiles_a"]) if row["smiles_b"] is None: continue - row["smiles_b"] = _map_smiles(row["smiles_b"]) + row["smiles_b"] = descent.utils.molecule.map_smiles(row["smiles_b"]) # TODO: validate rows table = pyarrow.Table.from_pylist([*rows], schema=DATA_SCHEMA) @@ -582,6 +570,7 @@ def predict( output_dir: pathlib.Path, cached_dir: pathlib.Path | None = None, per_type_scales: dict[DataType, float] | None = None, + verbose: bool = False, ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: """Predict the properties in a dataset using molecular simulation, or by reweighting previous simulation data. @@ -596,6 +585,7 @@ def predict( from. per_type_scales: The scale factor to apply to each data type. A default of 1.0 will be used for any data type not specified. + verbose: Whether to log additional information. """ entries: list[DataEntry] = [*descent.utils.dataset.iter_dataset(dataset)] @@ -616,6 +606,8 @@ def predict( reference = [] reference_std = [] + verbose_rows = [] + per_type_scales = per_type_scales if per_type_scales is not None else {} for entry, keys in zip(entries, entry_to_simulation): @@ -631,6 +623,29 @@ def predict( torch.nan if entry["std"] is None else entry["std"] * abs(type_scale) ) + if verbose: + std_ref = "" if entry["std"] is None else f" ± {float(entry['std']):.3f}" + + verbose_rows.append( + { + "type": f'{entry["type"]} [{entry["units"]}]', + "smiles_a": descent.utils.molecule.unmap_smiles(entry["smiles_a"]), + "smiles_b": ( + "" + if entry["smiles_b"] is None + else descent.utils.molecule.unmap_smiles(entry["smiles_b"]) + ), + "pred": f"{float(value):.3f} ± {float(std):.3f}", + "ref": f"{float(entry['value']):.3f}{std_ref}", + } + ) + + if verbose: + import pandas + + _LOGGER.info(f"predicted {len(entries)} properties") + _LOGGER.info("\n" + pandas.DataFrame(verbose_rows).to_string(index=False)) + predicted = torch.stack(predicted) predicted_std = torch.stack(predicted_std) @@ -638,3 +653,53 @@ def predict( reference_std = smee.utils.tensor_like(reference_std, predicted_std) return reference, reference_std, predicted, predicted_std + + +def default_closure( + trainable: "descent.train.Trainable", + topologies: dict[str, smee.TensorTopology], + dataset: datasets.Dataset, + per_type_scales: dict[DataType, float] | None = None, + verbose: bool = False, +) -> descent.optim.ClosureFn: + """Return a default closure function for training against thermodynamic + properties. + + Args: + trainable: The wrapper around trainable parameters. + topologies: The topologies of the molecules present in the dataset, with keys + of mapped SMILES patterns. + dataset: The dataset to train against. + per_type_scales: The scale factor to apply to each data type. + verbose: Whether to log additional information about predictions. + + Returns: + The default closure function. + """ + + def closure_fn( + x: torch.Tensor, + compute_gradient: bool, + compute_hessian: bool, + ): + force_field = trainable.to_force_field(x) + + y_ref, _, y_pred, _ = descent.targets.thermo.predict( + dataset, + force_field, + topologies, + pathlib.Path.cwd(), + None, + per_type_scales, + verbose, + ) + loss, gradient, hessian = ((y_pred - y_ref) ** 2).sum(), None, None + + if compute_hessian: + hessian = descent.utils.loss.approximate_hessian(x, y_pred) + if compute_gradient: + gradient = torch.autograd.grad(loss, x, retain_graph=True)[0].detach() + + return loss.detach(), gradient, hessian + + return closure_fn diff --git a/descent/tests/targets/test_dimers.py b/descent/tests/targets/test_dimers.py index 45fa9d9..879d742 100644 --- a/descent/tests/targets/test_dimers.py +++ b/descent/tests/targets/test_dimers.py @@ -11,6 +11,7 @@ compute_dimer_energy, create_dataset, create_from_des, + default_closure, extract_smiles, predict, report, @@ -184,6 +185,36 @@ def test_predict(mock_dimer, mocker): ) +def test_default_closure(mock_dimer, mocker): + dataset = create_dataset([mock_dimer]) + + mock_x = torch.tensor([2.0], requires_grad=True) + + mock_y_pred = torch.tensor([3.0, 4.0]) * mock_x + mock_y_ref = torch.Tensor([-1.23, 4.56]) + + mocker.patch( + "descent.targets.dimers.predict", + autospec=True, + return_value=(mock_y_ref, mock_y_pred), + ) + mock_topologies = { + mock_dimer["smiles_a"]: mocker.MagicMock(), + mock_dimer["smiles_b"]: mocker.MagicMock(), + } + mock_trainable = mocker.MagicMock() + + closure_fn = default_closure(mock_trainable, mock_topologies, dataset) + + expected_loss = (mock_y_pred - mock_y_ref).pow(2).sum() + + loss, grad, hess = closure_fn(mock_x, compute_gradient=True, compute_hessian=True) + + assert torch.isclose(loss, expected_loss) + assert grad.shape == mock_x.shape + assert hess.shape == (1, 1) + + def test_report(tmp_cwd, mock_dimer, mocker): dataset = create_dataset([mock_dimer]) diff --git a/descent/tests/targets/test_thermo.py b/descent/tests/targets/test_thermo.py index 89473b9..6abc9b4 100644 --- a/descent/tests/targets/test_thermo.py +++ b/descent/tests/targets/test_thermo.py @@ -11,12 +11,12 @@ SimulationKey, _compute_observables, _convert_entry_to_system, - _map_smiles, _Observables, _plan_simulations, _predict, _simulate, create_dataset, + default_closure, default_config, extract_smiles, predict, @@ -91,21 +91,6 @@ def mock_hmix() -> DataEntry: } -@pytest.mark.parametrize( - "smiles, expected", - [ - ("C", "[C:1]([H:2])([H:3])([H:4])[H:5]"), - ("[CH4:1]", "[C:1]([H:2])([H:3])([H:4])[H:5]"), - ("[Cl:1][H:2]", "[Cl:1][H:2]"), - ("[Cl:2][H:1]", "[Cl:2][H:1]"), - ("[Cl:2][H:2]", "[Cl:1][H:2]"), - ("[Cl:1][H]", "[Cl:1][H:2]"), - ], -) -def test_map_smiles(smiles, expected): - assert _map_smiles(smiles) == expected - - def test_create_dataset(mock_density_pure, mock_density_binary): expected_entries = [mock_density_pure, mock_density_binary] @@ -533,7 +518,13 @@ def test_predict(tmp_cwd, mock_density_pure, mocker): mock_scale = 3.0 y_ref, y_ref_std, y_pred, y_pred_std = predict( - dataset, mock_ff, mock_topologies, tmp_cwd, None, {"density": mock_scale} + dataset, + mock_ff, + mock_topologies, + tmp_cwd, + None, + {"density": mock_scale}, + verbose=True, ) mock_compute.assert_called_once_with( @@ -565,3 +556,30 @@ def test_predict(tmp_cwd, mock_density_pure, mocker): assert torch.allclose(y_pred, expected_y_pred) assert y_pred_std.shape == expected_y_pred_std.shape assert torch.allclose(y_pred_std, expected_y_pred_std) + + +def test_default_closure(tmp_cwd, mock_density_pure, mocker): + dataset = create_dataset(mock_density_pure) + + mock_x = torch.tensor([2.0], requires_grad=True) + + mock_y_pred = torch.tensor([3.0, 4.0]) * mock_x + mock_y_ref = torch.Tensor([-1.23, 4.56]) + + mocker.patch( + "descent.targets.thermo.predict", + autospec=True, + return_value=(mock_y_ref, None, mock_y_pred, None), + ) + mock_topologies = {mock_density_pure["smiles_a"]: mocker.MagicMock()} + mock_trainable = mocker.MagicMock() + + closure_fn = default_closure(mock_trainable, mock_topologies, dataset, None) + + expected_loss = (mock_y_pred - mock_y_ref).pow(2).sum() + + loss, grad, hess = closure_fn(mock_x, compute_gradient=True, compute_hessian=True) + + assert torch.isclose(loss, expected_loss) + assert grad.shape == mock_x.shape + assert hess.shape == (1, 1) diff --git a/descent/tests/utils/test_loss.py b/descent/tests/utils/test_loss.py index 61f2451..88535f0 100644 --- a/descent/tests/utils/test_loss.py +++ b/descent/tests/utils/test_loss.py @@ -1,6 +1,7 @@ +import pytest import torch -from descent.utils.loss import approximate_hessian, to_closure +from descent.utils.loss import approximate_hessian, combine_closures, to_closure def test_to_closure(): @@ -37,6 +38,39 @@ def mock_loss_fn(x: torch.Tensor, a: float, b: float) -> torch.Tensor: assert hess is None +def test_combine_closures(): + + def mock_closure_a(x_, compute_gradient, compute_hessian): + loss = x_[0] ** 2 + grad = 2 * x_[0] if compute_gradient else None + hess = 2.0 if compute_hessian else None + + return loss, grad, hess + + def mock_closure_b(x_, compute_gradient, compute_hessian): + loss = x_[0] ** 3 + grad = 3 * x_[0] ** 2 if compute_gradient else None + hess = 6 * x_[0] if compute_hessian else None + + return loss, grad, hess + + closures = {"a": mock_closure_a, "b": mock_closure_b} + + weights = {"a": 1.0, "b": 2.0} + + combined_closure_fn = combine_closures(closures, weights, verbose=True) + + x = torch.tensor([2.0], requires_grad=True) + + loss, grad, hess = combined_closure_fn( + x, compute_gradient=True, compute_hessian=True + ) + + assert loss == pytest.approx(2.0**2 + 2.0 * 2.0**3) + assert grad == pytest.approx(2 * 2.0 + 2.0 * 3 * 2.0**2) + assert hess == pytest.approx(2.0 + 2.0 * 6 * 2.0) + + def test_approximate_hessian(): x = torch.tensor([1.0, 2.0, 3.0, 4.0], requires_grad=True) y_pred = 5.0 * x**2 + 3.0 * x + 2.0 diff --git a/descent/tests/utils/test_molecule.py b/descent/tests/utils/test_molecule.py index 844f677..d78063a 100644 --- a/descent/tests/utils/test_molecule.py +++ b/descent/tests/utils/test_molecule.py @@ -1,7 +1,7 @@ import pytest from rdkit import Chem -from descent.utils.molecule import mol_to_smiles +from descent.utils.molecule import map_smiles, mol_to_smiles, unmap_smiles @pytest.mark.parametrize( @@ -16,3 +16,27 @@ def test_mol_to_smiles(input_smiles, expected_smiles, canonical): actual_smiles = mol_to_smiles(mol, canonical) assert actual_smiles == expected_smiles + + +def test_unmap_smiles(): + smiles = "[H:1][C:4]([H:2])([O:3][H:5])[H:6]" + unmapped_smiles = unmap_smiles(smiles) + + assert unmapped_smiles == "CO" + + +@pytest.mark.parametrize( + "smiles, expected", + [ + ("C", "[C:1]([H:2])([H:3])([H:4])[H:5]"), + ("[CH4:1]", "[C:1]([H:2])([H:3])([H:4])[H:5]"), + ("[Cl:1][H:2]", "[Cl:1][H:2]"), + ("[Cl:2][H:1]", "[Cl:2][H:1]"), + ("[Cl:2][H:2]", "[Cl:1][H:2]"), + ("[Cl:1][H]", "[Cl:1][H:2]"), + ("[H:1][C:4]([H:2])([O:3][H:5])[H:6]", "[H:1][C:4]([H:2])([O:3][H:5])[H:6]"), + ("CO", "[C:1]([O:2][H:6])([H:3])([H:4])[H:5]"), + ], +) +def test_map_smiles(smiles, expected): + assert map_smiles(smiles) == expected diff --git a/descent/utils/loss.py b/descent/utils/loss.py index 240037d..79df64b 100644 --- a/descent/utils/loss.py +++ b/descent/utils/loss.py @@ -1,6 +1,7 @@ """Utilities for defining loss functions.""" import functools +import logging import typing import torch @@ -11,6 +12,8 @@ ] P = typing.ParamSpec("P") +_LOGGER = logging.getLogger(__name__) + def to_closure( loss_fn: typing.Callable[typing.Concatenate[torch.Tensor, P], torch.Tensor], @@ -54,6 +57,78 @@ def closure_fn( return closure_fn +def combine_closures( + closures: dict[str, ClosureFn], + weights: dict[str, float] | None = None, + verbose: bool = False, +) -> ClosureFn: + """Combine multiple closures into a single closure. + + Args: + closures: A dictionary of closure functions. + weights: Optional dictionary of weights for each closure function. + verbose: Whether to log the loss of each closure function. + + Returns: + A combined closure function. + """ + + weights = weights if weights is not None else {name: 1.0 for name in closures} + + if len(closures) == 0: + raise NotImplementedError("At least one closure function is required.") + + if {*closures} != {*weights}: + raise ValueError("The closures and weights must have the same keys.") + + def combined_closure_fn( + x: torch.Tensor, compute_gradient: bool, compute_hessian: bool + ) -> tuple[torch.Tensor, torch.Tensor | None, torch.Tensor | None]: + + loss = [] + grad = None if not compute_gradient else [] + hess = None if not compute_hessian else [] + + verbose_rows = [] + + for name, closure_fn in closures.items(): + + local_loss, local_grad, local_hess = closure_fn( + x, compute_gradient, compute_hessian + ) + + loss.append(weights[name] * local_loss) + + if compute_gradient: + grad.append(weights[name] * local_grad) + if compute_hessian: + hess.append(weights[name] * local_hess) + + if verbose: + verbose_rows.append( + {"target": name, "loss": float(f"{local_loss:.5f}")} + ) + + loss = sum(loss[1:], loss[0]) + + if compute_gradient: + grad = sum(grad[1:], grad[0]).detach() + if compute_hessian: + hess = sum(hess[1:], hess[0]).detach() + + if verbose: + import pandas + + _LOGGER.info( + "loss breakdown:\n" + + pandas.DataFrame(verbose_rows).to_string(index=False) + ) + + return loss.detach(), grad, hess + + return combined_closure_fn + + def approximate_hessian(x: torch.Tensor, y_pred: torch.Tensor): """Compute the outer product approximation of the hessian of a least squares loss function of the sum ``sum((y_pred - y_ref)**2)``. diff --git a/descent/utils/molecule.py b/descent/utils/molecule.py index fce1191..5d40c40 100644 --- a/descent/utils/molecule.py +++ b/descent/utils/molecule.py @@ -27,3 +27,52 @@ def mol_to_smiles(mol: "Chem.Mol", canonical: bool = True) -> str: atom.SetAtomMapNum(atom.GetIdx() + 1) return Chem.MolToSmiles(mol) + + +def unmap_smiles(smiles: str) -> str: + """Remove atom mapping from a SMILES string. + + Args: + smiles: The SMILES string to unmap. + + Returns: + The unmapped SMILES string. + """ + from rdkit import Chem + + mol = Chem.MolFromSmiles(smiles) + + for atom in mol.GetAtoms(): + atom.SetAtomMapNum(0) + + return Chem.MolToSmiles(mol) + + +def map_smiles(smiles: str) -> str: + """Add atom mapping to a SMILES string. + + Notes: + Fully mapped SMILES strings are returned as-is. + + Args: + smiles: The SMILES string to add map indices to. + + Returns: + The mapped SMILES string. + """ + from rdkit import Chem + + params = Chem.SmilesParserParams() + params.removeHs = False + + mol = Chem.AddHs(Chem.MolFromSmiles(smiles, params)) + + map_idxs = sorted(atom.GetAtomMapNum() for atom in mol.GetAtoms()) + + if map_idxs == list(range(1, len(map_idxs) + 1)): + return smiles + + for i, atom in enumerate(mol.GetAtoms()): + atom.SetAtomMapNum(i + 1) + + return Chem.MolToSmiles(mol) diff --git a/devtools/envs/base.yaml b/devtools/envs/base.yaml index 7cf0eba..f01e335 100644 --- a/devtools/envs/base.yaml +++ b/devtools/envs/base.yaml @@ -9,7 +9,7 @@ dependencies: - pip # Core packages - - smee >=0.8.0 + - smee >=0.10.0 - pytorch - pydantic diff --git a/mkdocs.yml b/mkdocs.yml index 180e6a9..6e279c4 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -5,7 +5,7 @@ repo_url: "https://github.com/SimonBoothroyd/descent" repo_name: "SimonBoothroyd/descent" site_dir: "site" watch: [mkdocs.yml, README.md, descent/, docs] -copyright: Copyright © 2023 Simon Boothroyd +copyright: Copyright © 2024 Simon Boothroyd edit_uri: edit/main/docs/ validation: @@ -13,6 +13,10 @@ validation: absolute_links: warn unrecognized_links: warn +extra: + version: + provider: mike + nav: - Home: - Overview: index.md @@ -36,10 +40,6 @@ theme: - search.suggest - toc.follow palette: - - media: "(prefers-color-scheme)" - toggle: - icon: material/brightness-auto - name: Switch to light mode - media: "(prefers-color-scheme: light)" scheme: default primary: teal @@ -53,10 +53,11 @@ theme: accent: lime toggle: icon: material/weather-night - name: Switch to system preference + name: Switch to light mode markdown_extensions: - attr_list +- md_in_html - def_list - admonition - footnotes @@ -91,6 +92,8 @@ plugins: handlers: python: paths: [descent/] + import: + - http://docs.openmm.org/latest/api-python/objects.inv options: # extensions: [ griffe_pydantic ] docstring_options: @@ -98,7 +101,6 @@ plugins: returns_multiple_items: false returns_named_value: false docstring_section_style: list - filters: ["!^_"] heading_level: 1 inherited_members: true merge_init_into_class: true @@ -108,6 +110,7 @@ plugins: show_signature_annotations: true show_symbol_type_heading: true show_symbol_type_toc: true + show_if_no_docstring: false signature_crossrefs: true summary: true members_order: source