Skip to content

Commit

Permalink
Merge branch 'main' into report-multi-tops
Browse files Browse the repository at this point in the history
  • Loading branch information
jthorton authored Jun 3, 2024
2 parents d78f39a + feea215 commit bdea278
Show file tree
Hide file tree
Showing 18 changed files with 417 additions and 127 deletions.
9 changes: 4 additions & 5 deletions .github/workflows/docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,8 @@ name: Publish Documentation

on:
push:
branches:
- main
tags:
- '*'
branches: ["main"]
tags: ["*"]

jobs:
deploy-docs:
Expand Down Expand Up @@ -43,11 +41,12 @@ jobs:
git config --global --add safe.directory "$PWD"
git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}
git pull origin gh-pages --allow-unrelated-histories
git fetch --all --prune
make env
sed -i 's/# extensions/extensions/' mkdocs.yml
make docs-insiders INSIDER_DOCS_TOKEN="${INSIDER_DOCS_TOKEN}"
make docs-insiders INSIDER_DOCS_TOKEN="${INSIDER_DOCS_TOKEN}"
make docs-deploy VERSION="$VERSION"
37 changes: 4 additions & 33 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,38 +1,9 @@
repos:
- repo: local
hooks:
- id: isort
name: "[Package] Import formatting"
- id: ruff
name: "[Package] Formatting"
language: system
entry: isort
entry: make
args: [ lint ]
files: \.py$

- id: black
name: "[Package] Code formatting"
language: system
entry: black
files: \.py$

- id: flake8
name: "[Package] Linting"
language: system
entry: flake8
files: \.py$

- id: isort-examples
name: "[Examples] Import formatting"
language: system
entry: nbqa isort
files: examples/.+\.ipynb$

- id: black-examples
name: "[Examples] Code formatting"
language: system
entry: nbqa black
files: examples/.+\.ipynb$

- id: flake8-examples
name: "[Examples] Linting"
language: system
entry: nbqa flake8 --ignore=E402
files: examples/.+\.ipynb$
23 changes: 9 additions & 14 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
PACKAGE_NAME := descent
PACKAGE_NAME := descent
PACKAGE_DIR := $(PACKAGE_NAME)

CONDA_ENV_RUN := conda run --no-capture-output --name $(PACKAGE_NAME)

.PHONY: pip-install env lint format test test-examples
Expand All @@ -13,23 +15,16 @@ env:
$(CONDA_ENV_RUN) pre-commit install || true

lint:
$(CONDA_ENV_RUN) isort --check-only $(PACKAGE_NAME)
$(CONDA_ENV_RUN) black --check $(PACKAGE_NAME)
$(CONDA_ENV_RUN) flake8 $(PACKAGE_NAME)
$(CONDA_ENV_RUN) nbqa isort --check-only examples
$(CONDA_ENV_RUN) nbqa black --check examples
$(CONDA_ENV_RUN) nbqa flake8 --ignore=E402 examples
$(CONDA_ENV_RUN) ruff check $(PACKAGE_DIR)

format:
$(CONDA_ENV_RUN) isort $(PACKAGE_NAME)
$(CONDA_ENV_RUN) black $(PACKAGE_NAME)
$(CONDA_ENV_RUN) flake8 $(PACKAGE_NAME)
$(CONDA_ENV_RUN) nbqa isort examples
$(CONDA_ENV_RUN) nbqa black examples
$(CONDA_ENV_RUN) nbqa flake8 --ignore=E402 examples
$(CONDA_ENV_RUN) ruff format $(PACKAGE_DIR)
$(CONDA_ENV_RUN) ruff check --fix --select I $(PACKAGE_DIR)
$(CONDA_ENV_RUN) nbqa 'ruff format' examples
$(CONDA_ENV_RUN) nbqa 'ruff check' --fix --select=I examples

test:
$(CONDA_ENV_RUN) pytest -v --cov=$(PACKAGE_NAME) --cov-report=xml --color=yes $(PACKAGE_NAME)/tests/
$(CONDA_ENV_RUN) pytest -v --cov=$(PACKAGE_NAME) --cov-report=xml --color=yes $(PACKAGE_DIR)/tests/

docs-build:
$(CONDA_ENV_RUN) mkdocs build
Expand Down
4 changes: 2 additions & 2 deletions descent/optim/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Custom parameter optimizers."""

from descent.optim._lm import LevenbergMarquardtConfig, levenberg_marquardt
from descent.optim._lm import ClosureFn, LevenbergMarquardtConfig, levenberg_marquardt

__all__ = ["LevenbergMarquardtConfig", "levenberg_marquardt"]
__all__ = ["ClosureFn", "LevenbergMarquardtConfig", "levenberg_marquardt"]
32 changes: 31 additions & 1 deletion descent/targets/dimers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,15 @@
import tqdm

import descent.utils.dataset
import descent.utils.loss
import descent.utils.molecule
import descent.utils.reporting

if typing.TYPE_CHECKING:
import pandas

import descent.train


EnergyFn = typing.Callable[
["pandas.DataFrame", tuple[str, ...], torch.Tensor], torch.Tensor
Expand Down Expand Up @@ -272,11 +275,38 @@ def predict(
*[
_predict(dimer, force_field, topologies)
for dimer in descent.utils.dataset.iter_dataset(dataset)
]
],
strict=True,
)
return torch.cat(reference), torch.cat(predicted)


def default_closure(
trainable: "descent.train.Trainable",
topologies: dict[str, smee.TensorTopology],
dataset: datasets.Dataset,
):
"""Return a default closure function for training against dimer energies.
Args:
trainable: The wrapper around trainable parameters.
topologies: The topologies of the molecules present in the dataset, with keys
of mapped SMILES patterns.
dataset: The dataset to train against.
Returns:
The default closure function.
"""

def loss_fn(_x: torch.Tensor) -> torch.Tensor:
y_ref, y_pred = descent.targets.dimers.predict(
dataset, trainable.to_force_field(_x), topologies
)
return ((y_pred - y_ref) ** 2).sum()

return descent.utils.loss.to_closure(loss_fn)


def _plot_energies(energies: dict[str, torch.Tensor]) -> str:
from matplotlib import pyplot

Expand Down
109 changes: 87 additions & 22 deletions descent/targets/thermo.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,15 @@
import smee.mm
import smee.utils
import torch
from rdkit import Chem

import descent.optim
import descent.utils.dataset
import descent.utils.loss
import descent.utils.molecule

if typing.TYPE_CHECKING:
import descent.train


_LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -138,24 +144,6 @@ class _Observables(typing.NamedTuple):
_SystemDict = dict[SimulationKey, smee.TensorSystem]


def _map_smiles(smiles: str) -> str:
"""Add atom mapping to a SMILES string if it is not already present."""
params = Chem.SmilesParserParams()
params.removeHs = False

mol = Chem.AddHs(Chem.MolFromSmiles(smiles, params))

map_idxs = sorted(atom.GetAtomMapNum() for atom in mol.GetAtoms())

if map_idxs == list(range(1, len(map_idxs) + 1)):
return smiles

for i, atom in enumerate(mol.GetAtoms()):
atom.SetAtomMapNum(i + 1)

return Chem.MolToSmiles(mol)


def create_dataset(*rows: DataEntry) -> datasets.Dataset:
"""Create a dataset from a list of existing data points.
Expand All @@ -167,12 +155,12 @@ def create_dataset(*rows: DataEntry) -> datasets.Dataset:
"""

for row in rows:
row["smiles_a"] = _map_smiles(row["smiles_a"])
row["smiles_a"] = descent.utils.molecule.map_smiles(row["smiles_a"])

if row["smiles_b"] is None:
continue

row["smiles_b"] = _map_smiles(row["smiles_b"])
row["smiles_b"] = descent.utils.molecule.map_smiles(row["smiles_b"])

# TODO: validate rows
table = pyarrow.Table.from_pylist([*rows], schema=DATA_SCHEMA)
Expand Down Expand Up @@ -582,6 +570,7 @@ def predict(
output_dir: pathlib.Path,
cached_dir: pathlib.Path | None = None,
per_type_scales: dict[DataType, float] | None = None,
verbose: bool = False,
) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
"""Predict the properties in a dataset using molecular simulation, or by reweighting
previous simulation data.
Expand All @@ -596,6 +585,7 @@ def predict(
from.
per_type_scales: The scale factor to apply to each data type. A default of 1.0
will be used for any data type not specified.
verbose: Whether to log additional information.
"""

entries: list[DataEntry] = [*descent.utils.dataset.iter_dataset(dataset)]
Expand All @@ -616,9 +606,11 @@ def predict(
reference = []
reference_std = []

verbose_rows = []

per_type_scales = per_type_scales if per_type_scales is not None else {}

for entry, keys in zip(entries, entry_to_simulation):
for entry, keys in zip(entries, entry_to_simulation, strict=True):
value, std = _predict(entry, keys, observables, required_simulations)

type_scale = per_type_scales.get(entry["type"], 1.0)
Expand All @@ -631,10 +623,83 @@ def predict(
torch.nan if entry["std"] is None else entry["std"] * abs(type_scale)
)

if verbose:
std_ref = "" if entry["std"] is None else f" ± {float(entry['std']):.3f}"

verbose_rows.append(
{
"type": f'{entry["type"]} [{entry["units"]}]',
"smiles_a": descent.utils.molecule.unmap_smiles(entry["smiles_a"]),
"smiles_b": (
""
if entry["smiles_b"] is None
else descent.utils.molecule.unmap_smiles(entry["smiles_b"])
),
"pred": f"{float(value):.3f} ± {float(std):.3f}",
"ref": f"{float(entry['value']):.3f}{std_ref}",
}
)

if verbose:
import pandas

_LOGGER.info(f"predicted {len(entries)} properties")
_LOGGER.info("\n" + pandas.DataFrame(verbose_rows).to_string(index=False))

predicted = torch.stack(predicted)
predicted_std = torch.stack(predicted_std)

reference = smee.utils.tensor_like(reference, predicted)
reference_std = smee.utils.tensor_like(reference_std, predicted_std)

return reference, reference_std, predicted, predicted_std


def default_closure(
trainable: "descent.train.Trainable",
topologies: dict[str, smee.TensorTopology],
dataset: datasets.Dataset,
per_type_scales: dict[DataType, float] | None = None,
verbose: bool = False,
) -> descent.optim.ClosureFn:
"""Return a default closure function for training against thermodynamic
properties.
Args:
trainable: The wrapper around trainable parameters.
topologies: The topologies of the molecules present in the dataset, with keys
of mapped SMILES patterns.
dataset: The dataset to train against.
per_type_scales: The scale factor to apply to each data type.
verbose: Whether to log additional information about predictions.
Returns:
The default closure function.
"""

def closure_fn(
x: torch.Tensor,
compute_gradient: bool,
compute_hessian: bool,
):
force_field = trainable.to_force_field(x)

y_ref, _, y_pred, _ = descent.targets.thermo.predict(
dataset,
force_field,
topologies,
pathlib.Path.cwd(),
None,
per_type_scales,
verbose,
)
loss, gradient, hessian = ((y_pred - y_ref) ** 2).sum(), None, None

if compute_hessian:
hessian = descent.utils.loss.approximate_hessian(x, y_pred)
if compute_gradient:
gradient = torch.autograd.grad(loss, x, retain_graph=True)[0].detach()

return loss.detach(), gradient, hessian

return closure_fn
5 changes: 3 additions & 2 deletions descent/tests/optim/test_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ def test_damping_factor_loss_fn(mocker):


@pytest.mark.parametrize(
"n_convergence_criteria, n_convergence_steps, step_quality, expected_converged, expected_logs",
"n_convergence_criteria, n_convergence_steps, step_quality, expected_converged, "
"expected_logs",
[
(0, 2, 1.0, False, []),
(1, 2, 0.0, False, []),
Expand Down Expand Up @@ -283,7 +284,7 @@ def mock_loss_fn(_x, *_):
]
assert len(trust_radius_messages) == len(expected_messages)

for message, expected in zip(trust_radius_messages, expected_messages):
for message, expected in zip(trust_radius_messages, expected_messages, strict=True):
assert message.startswith(expected)

# mock_step_fn.assert_has_calls(expected_loss_traj, any_order=False)
Expand Down
Loading

0 comments on commit bdea278

Please sign in to comment.