diff --git a/descent/targets/thermo.py b/descent/targets/thermo.py index 13f475b..4c87d2b 100644 --- a/descent/targets/thermo.py +++ b/descent/targets/thermo.py @@ -13,6 +13,7 @@ import pyarrow import pydantic import smee.mm +import smee.utils import torch from rdkit import Chem @@ -108,6 +109,10 @@ class SimulationConfig(pydantic.BaseModel): ..., description="Configuration for generating initial coordinates." ) + apply_hmr: bool = pydantic.Field( + False, description="Whether to apply hydrogen mass repartitioning." + ) + equilibrate: list[ smee.mm.MinimizationConfig | smee.mm.SimulationConfig ] = pydantic.Field(..., description="Configuration for equilibration simulations.") @@ -120,6 +125,15 @@ class SimulationConfig(pydantic.BaseModel): ) +class _Observables(typing.NamedTuple): + """Ensemble averages of the observables computed from a simulation.""" + + mean: dict[str, torch.Tensor] + """The mean value of each observable with ``shape=()``.""" + std: dict[str, torch.Tensor] + """The standard deviation of each observable with ``shape=()``.""" + + _SystemDict = dict[SimulationKey, smee.TensorSystem] @@ -399,17 +413,18 @@ def _simulate( config.equilibrate, config.production, [reporter], + config.apply_hmr, ) -def _compute_averages( +def _compute_observables( phase: Phase, key: SimulationKey, system: smee.TensorSystem, force_field: smee.TensorForceField, output_dir: pathlib.Path, cached_dir: pathlib.Path | None, -) -> dict[str, torch.Tensor]: +) -> _Observables: traj_hash = hashlib.sha256(pickle.dumps(key)).hexdigest() traj_name = f"{phase}-{traj_hash}-frames.msgpack" @@ -420,9 +435,12 @@ def _compute_averages( if cached_path is not None and cached_path.exists(): with contextlib.suppress(smee.mm.NotEnoughSamplesError): - return smee.mm.reweight_ensemble_averages( + means = smee.mm.reweight_ensemble_averages( system, force_field, cached_path, temperature, pressure ) + stds = {key: smee.utils.tensor_like(torch.nan, means[key]) for key in means} + + return _Observables(means, stds) if cached_path is not None: _LOGGER.debug(f"unable to re-weight {key}: data exists={cached_path.exists()}") @@ -432,80 +450,104 @@ def _compute_averages( config = default_config(phase, key.temperature, key.pressure) _simulate(system, force_field, config, output_path) - return smee.mm.compute_ensemble_averages( - system, force_field, output_path, temperature, pressure + return _Observables( + *smee.mm.compute_ensemble_averages( + system, force_field, output_path, temperature, pressure + ) ) def _predict_density( - entry: DataEntry, averages: dict[str, torch.Tensor] -) -> torch.Tensor: + entry: DataEntry, observables: _Observables +) -> tuple[torch.Tensor, torch.Tensor | None]: assert entry["units"] == "g/mL" - return averages["density"] + return observables.mean["density"], observables.std["density"] def _predict_hvap( entry: DataEntry, - averages_bulk: dict[str, torch.Tensor], - averages_vacuum: dict[str, torch.Tensor], + observables_bulk: _Observables, + observables_vacuum: _Observables, system_bulk: smee.TensorSystem, -) -> torch.Tensor: +) -> tuple[torch.Tensor, torch.Tensor]: assert entry["units"] == "kcal/mol" temperature = entry["temperature"] * openmm.unit.kelvin + n_mols = sum(system_bulk.n_copies) - potential_bulk = averages_bulk["potential_energy"] / sum(system_bulk.n_copies) - potential_vacuum = averages_vacuum["potential_energy"] + potential_bulk = observables_bulk.mean["potential_energy"] / n_mols + potential_bulk_std = observables_bulk.std["potential_energy"] / n_mols + + potential_vacuum = observables_vacuum.mean["potential_energy"] + potential_vacuum_std = observables_vacuum.std["potential_energy"] rt = (temperature * openmm.unit.MOLAR_GAS_CONSTANT_R).value_in_unit( openmm.unit.kilocalorie_per_mole ) - return potential_vacuum - potential_bulk + rt + + value = potential_vacuum - potential_bulk + rt + std = torch.sqrt(potential_vacuum_std**2 + potential_bulk_std**2) + + return value, std def _predict_hmix( entry: DataEntry, - averages_mix: dict[str, torch.Tensor], - averages_0: dict[str, torch.Tensor], - averages_1: dict[str, torch.Tensor], + observables_mix: _Observables, + observables_0: _Observables, + observables_1: _Observables, system_mix: smee.TensorSystem, system_0: smee.TensorSystem, system_1: smee.TensorSystem, -) -> torch.Tensor: +) -> tuple[torch.Tensor, torch.Tensor | None]: assert entry["units"] == "kcal/mol" - x_0 = system_mix.n_copies[0] / sum(system_mix.n_copies) + n_mols_mix = sum(system_mix.n_copies) + n_mols_0 = sum(system_0.n_copies) + n_mols_1 = sum(system_1.n_copies) + + x_0 = system_mix.n_copies[0] / n_mols_mix x_1 = 1.0 - x_0 - enthalpy_mix = averages_mix["enthalpy"] / sum(system_mix.n_copies) + enthalpy_mix = observables_mix.mean["enthalpy"] / n_mols_mix + enthalpy_mix_std = observables_mix.std["enthalpy"] / n_mols_mix - enthalpy_0 = averages_0["enthalpy"] / sum(system_0.n_copies) - enthalpy_1 = averages_1["enthalpy"] / sum(system_1.n_copies) + enthalpy_0 = observables_0.mean["enthalpy"] / n_mols_0 + enthalpy_0_std = observables_0.std["enthalpy"] / n_mols_0 + enthalpy_1 = observables_1.mean["enthalpy"] / n_mols_1 + enthalpy_1_std = observables_1.std["enthalpy"] / n_mols_1 - return enthalpy_mix - x_0 * enthalpy_0 - x_1 * enthalpy_1 + value = enthalpy_mix - x_0 * enthalpy_0 - x_1 * enthalpy_1 + std = torch.sqrt( + enthalpy_mix_std**2 + + x_0**2 * enthalpy_0_std**2 + + x_1**2 * enthalpy_1_std**2 + ) + + return value, std def _predict( entry: DataEntry, keys: dict[str, SimulationKey], - averages: dict[Phase, dict[SimulationKey, dict[str, torch.Tensor]]], + observables: dict[Phase, dict[SimulationKey, _Observables]], systems: dict[Phase, dict[SimulationKey, smee.TensorSystem]], -): +) -> tuple[torch.Tensor, torch.Tensor]: if entry["type"] == "density": - value = _predict_density(entry, averages["bulk"][keys["bulk"]]) + value = _predict_density(entry, observables["bulk"][keys["bulk"]]) elif entry["type"] == "hvap": value = _predict_hvap( entry, - averages["bulk"][keys["bulk"]], - averages["vacuum"][keys["vacuum"]], + observables["bulk"][keys["bulk"]], + observables["vacuum"][keys["vacuum"]], systems["bulk"][keys["bulk"]], ) elif entry["type"] == "hmix": value = _predict_hmix( entry, - averages["bulk"][keys["bulk"]], - averages["bulk"][keys["bulk_0"]], - averages["bulk"][keys["bulk_1"]], + observables["bulk"][keys["bulk"]], + observables["bulk"][keys["bulk_0"]], + observables["bulk"][keys["bulk_1"]], systems["bulk"][keys["bulk"]], systems["bulk"][keys["bulk_0"]], systems["bulk"][keys["bulk_1"]], @@ -523,7 +565,7 @@ def predict( output_dir: pathlib.Path, cached_dir: pathlib.Path | None = None, per_type_scales: dict[DataType, float] | None = None, -) -> tuple[torch.Tensor, torch.Tensor]: +) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: """Predict the properties in a dataset using molecular simulation, or by reweighting previous simulation data. @@ -542,9 +584,9 @@ def predict( entries: list[DataEntry] = [*descent.utils.dataset.iter_dataset(dataset)] required_simulations, entry_to_simulation = _plan_simulations(entries, topologies) - averages = { + observables = { phase: { - key: _compute_averages( + key: _compute_observables( phase, key, system, force_field, output_dir, cached_dir ) for key, system in systems.items() @@ -553,19 +595,29 @@ def predict( } predicted = [] + predicted_std = [] reference = [] + reference_std = [] per_type_scales = per_type_scales if per_type_scales is not None else {} for entry, keys in zip(entries, entry_to_simulation): - value = _predict(entry, keys, averages, required_simulations) + value, std = _predict(entry, keys, observables, required_simulations) + + type_scale = per_type_scales.get(entry["type"], 1.0) - predicted.append(value * per_type_scales.get(entry["type"], 1.0)) - reference.append( - torch.tensor(entry["value"]) * per_type_scales.get(entry["type"], 1.0) + predicted.append(value * type_scale) + predicted_std.append(torch.nan if std is None else std * abs(type_scale)) + + reference.append(entry["value"] * type_scale) + reference_std.append( + torch.nan if entry["std"] is None else entry["std"] * abs(type_scale) ) predicted = torch.stack(predicted) - reference = torch.stack(reference).to(predicted.device) + predicted_std = torch.stack(predicted_std) + + reference = smee.utils.tensor_like(reference, predicted) + reference_std = smee.utils.tensor_like(reference_std, predicted_std) - return reference, predicted + return reference, reference_std, predicted, predicted_std diff --git a/descent/tests/targets/test_thermo.py b/descent/tests/targets/test_thermo.py index c19c77c..1cebce0 100644 --- a/descent/tests/targets/test_thermo.py +++ b/descent/tests/targets/test_thermo.py @@ -3,13 +3,15 @@ import pytest import smee.mm import torch +import uncertainties.unumpy import descent.utils.dataset from descent.targets.thermo import ( DataEntry, SimulationKey, - _compute_averages, + _compute_observables, _convert_entry_to_system, + _Observables, _plan_simulations, _predict, _simulate, @@ -286,6 +288,7 @@ def test_simulation(tmp_cwd, mocker): config.equilibrate, config.production, [mocker.ANY], + False, ) assert expected_output.exists() @@ -301,8 +304,8 @@ def test_simulation(tmp_cwd, mocker): ) -def test_compute_averages_reweighted(tmp_cwd, mocker): - mock_result = mocker.Mock() +def test_compute_observables_reweighted(tmp_cwd, mocker): + mock_result = {"density": torch.randn(1)} mock_reweight = mocker.patch( "smee.mm.reweight_ensemble_averages", autospec=True, return_value=mock_result ) @@ -326,16 +329,17 @@ def test_compute_averages_reweighted(tmp_cwd, mocker): expected_path = cached_dir / f"{phase}-{expected_hash}-frames.msgpack" expected_path.touch() - result = _compute_averages(phase, key, mock_system, mock_ff, tmp_cwd, cached_dir) - assert result == mock_result + result = _compute_observables(phase, key, mock_system, mock_ff, tmp_cwd, cached_dir) + assert result.mean == mock_result + assert {*result.std} == {*result.mean} mock_reweight.assert_called_once_with( mock_system, mock_ff, expected_path, 298.15 * openmm.unit.kelvin, None ) -def test_compute_averages_simulated(tmp_cwd, mocker): - mock_result = mocker.Mock() +def test_compute_observables_simulated(tmp_cwd, mocker): + mock_result = {"density": torch.randn(1)}, {"density": torch.randn(1)} mocker.patch( "smee.mm.reweight_ensemble_averages", autospec=True, @@ -366,7 +370,7 @@ def test_compute_averages_simulated(tmp_cwd, mocker): expected_path = tmp_cwd / f"{phase}-{expected_hash}-frames.msgpack" expected_path.touch() - result = _compute_averages(phase, key, mock_system, mock_ff, tmp_cwd, cached_dir) + result = _compute_observables(phase, key, mock_system, mock_ff, tmp_cwd, cached_dir) assert result == mock_result mock_simulate.assert_called_once_with( @@ -381,13 +385,19 @@ def test_predict_density(mock_density_pure, mocker): topologies = {"CO": mocker.Mock()} key, system = _convert_entry_to_system(mock_density_pure, topologies, 123) - expected_result = mocker.Mock() + expected_result = torch.randn(1) + expected_std = torch.randn(1) - averages = {"bulk": {key: {"density": expected_result}}} + observables = { + "bulk": { + key: _Observables({"density": expected_result}, {"density": expected_std}) + } + } systems = {"bulk": {key: system}} - result = _predict(mock_density_pure, {"bulk": key}, averages, systems) + result, std = _predict(mock_density_pure, {"bulk": key}, observables, systems) assert result == expected_result + assert std == expected_std def test_predict_hvap(mock_hvap, mocker): @@ -401,11 +411,24 @@ def test_predict_hvap(mock_hvap, mocker): system_vacuum = smee.TensorSystem([topologies["CCCC"]], [1], False) potential_bulk = torch.tensor([7.0]) + potential_bulk_std = torch.randn(1).abs() + potential_vacuum = torch.tensor([3.0]) + potential_vacuum_std = torch.randn(1).abs() averages = { - "bulk": {key_bulk: {"potential_energy": potential_bulk}}, - "vacuum": {key_vaccum: {"potential_energy": potential_vacuum}}, + "bulk": { + key_bulk: _Observables( + {"potential_energy": potential_bulk}, + {"potential_energy": potential_bulk_std}, + ) + }, + "vacuum": { + key_vaccum: _Observables( + {"potential_energy": potential_vacuum}, + {"potential_energy": potential_vacuum_std}, + ) + }, } systems = {"bulk": {key_bulk: system_bulk}, "vacuum": {key_vaccum: system_vacuum}} keys = {"bulk": key_bulk, "vacuum": key_vaccum} @@ -414,10 +437,15 @@ def test_predict_hvap(mock_hvap, mocker): mock_hvap["temperature"] * openmm.unit.kelvin * openmm.unit.MOLAR_GAS_CONSTANT_R ).value_in_unit(openmm.unit.kilocalorie_per_mole) - expected = potential_vacuum - potential_bulk / n_mols + rt + expected = ( + uncertainties.ufloat(potential_vacuum, potential_vacuum_std) + - uncertainties.ufloat(potential_bulk, potential_bulk_std) / n_mols + + rt + ) - result = _predict(mock_hvap, keys, averages, systems) - assert result == pytest.approx(expected) + result, std = _predict(mock_hvap, keys, averages, systems) + assert result == pytest.approx(expected.n) + assert std == pytest.approx(expected.s) def test_predict_hmix(mock_hmix, mocker): @@ -437,25 +465,33 @@ def test_predict_hmix(mock_hmix, mocker): system_1 = smee.TensorSystem([topologies["CO"]], [n_mols], False) enthalpy_bulk = torch.tensor([16.0]) + enthalpy_bulk_std = torch.randn(1).abs() enthalpy_0 = torch.tensor([4.0]) + enthalpy_0_std = torch.randn(1).abs() enthalpy_1 = torch.tensor([3.0]) + enthalpy_1_std = torch.randn(1).abs() averages = { "bulk": { - key_bulk: {"enthalpy": enthalpy_bulk}, - key_0: {"enthalpy": enthalpy_0}, - key_1: {"enthalpy": enthalpy_1}, + key_bulk: _Observables( + {"enthalpy": enthalpy_bulk}, {"enthalpy": enthalpy_bulk_std} + ), + key_0: _Observables({"enthalpy": enthalpy_0}, {"enthalpy": enthalpy_0_std}), + key_1: _Observables({"enthalpy": enthalpy_1}, {"enthalpy": enthalpy_1_std}), }, } systems = {"bulk": {key_bulk: system_bulk, key_0: system_0, key_1: system_1}} keys = {"bulk": key_bulk, "bulk_0": key_0, "bulk_1": key_1} expected = ( - enthalpy_bulk / n_mols - 0.5 * enthalpy_0 / n_mols - 0.5 * enthalpy_1 / n_mols + uncertainties.ufloat(enthalpy_bulk, enthalpy_bulk_std) / n_mols + - 0.5 * uncertainties.ufloat(enthalpy_0, enthalpy_0_std) / n_mols + - 0.5 * uncertainties.ufloat(enthalpy_1, enthalpy_1_std) / n_mols ) - result = _predict(mock_hmix, keys, averages, systems) - assert result == pytest.approx(expected) + result, std = _predict(mock_hmix, keys, averages, systems) + assert result == pytest.approx(expected.n) + assert std == pytest.approx(expected.s) def test_predict(tmp_cwd, mock_density_pure, mocker): @@ -465,16 +501,19 @@ def test_predict(tmp_cwd, mock_density_pure, mocker): mock_ff = mocker.Mock() mock_density = torch.tensor(123.0) + mock_density_std = torch.tensor(0.456) mock_compute = mocker.patch( - "descent.targets.thermo._compute_averages", + "descent.targets.thermo._compute_observables", autospec=True, - return_value={"density": mock_density}, + return_value=_Observables( + {"density": mock_density}, {"density": mock_density_std} + ), ) mock_scale = 3.0 - y_ref, y_pred = predict( + y_ref, y_ref_std, y_pred, y_pred_std = predict( dataset, mock_ff, mock_topologies, tmp_cwd, None, {"density": mock_scale} ) @@ -493,10 +532,17 @@ def test_predict(tmp_cwd, mock_density_pure, mocker): ) expected_y_ref = torch.tensor([mock_density_pure["value"] * mock_scale]) + expected_y_ref_std = torch.tensor([mock_density_pure["std"] * mock_scale]) + expected_y_pred = torch.tensor([mock_density * mock_scale]) + expected_y_pred_std = torch.tensor([mock_density_std * mock_scale]) assert y_ref.shape == expected_y_ref.shape assert torch.allclose(y_ref, expected_y_ref) + assert y_ref_std.shape == expected_y_ref_std.shape + assert torch.allclose(y_ref_std, expected_y_ref_std) assert y_pred.shape == expected_y_pred.shape assert torch.allclose(y_pred, expected_y_pred) + assert y_pred_std.shape == expected_y_pred_std.shape + assert torch.allclose(y_pred_std, expected_y_pred_std) diff --git a/devtools/envs/base.yaml b/devtools/envs/base.yaml index 4c50f88..87355e7 100644 --- a/devtools/envs/base.yaml +++ b/devtools/envs/base.yaml @@ -9,7 +9,7 @@ dependencies: - pip # Core packages - - smee >=0.7.0 + - smee >=0.8.0 - pytorch - pydantic @@ -28,9 +28,15 @@ dependencies: - jupyter - nbconvert + # Training +# - ray-default + - tensorboard + - tensorboardX + # Dev / Testing - ambertools - rdkit + - uncertainties - versioneer