Skip to content

Commit

Permalink
Thermo dataset from evaluator (#67)
Browse files Browse the repository at this point in the history
  • Loading branch information
jthorton authored Jun 3, 2024
1 parent 9ff3a92 commit 6d4b216
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 0 deletions.
75 changes: 75 additions & 0 deletions descent/targets/thermo.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,81 @@ def create_dataset(*rows: DataEntry) -> datasets.Dataset:
return dataset


def create_from_evaluator(dataset_file: pathlib.Path) -> datasets.Dataset:
"""
Create a dataset from an evaluator PhysicalPropertyDataSet
Args:
dataset_file: The path to the evaluator dataset
Returns:
The created dataset
"""
import json

from openff.units import unit

_evaluator_to_prop = {
"openff.evaluator.properties.density.Density": "density",
"openff.evaluator.properties.enthalpy.EnthalpyOfMixing": "hmix",
"openff.evaluator.properties.enthalpy.EnthalpyOfVaporization": "hvap",
}
_prop_units = {"density": "g/mL", "hmix": "kcal/mol", "hvap": "kcal/mol"}

properties: list[DataEntry] = []
property_data = json.load(dataset_file.open())

for phys_prop in property_data["properties"]:
try:
prop_type = _evaluator_to_prop[phys_prop["@type"]]
except KeyError:
raise KeyError(f"{phys_prop['@type']} not currently supported.") from None

smiles_and_role = [
(comp["smiles"], comp["smiles"] + "{" + comp["role"]["value"] + "}")
for comp in phys_prop["substance"]["components"]
]
smiles_a, role_a = smiles_and_role[0]
x_a = phys_prop["substance"]["amounts"][role_a][0]["value"]
if len(smiles_and_role) == 1:
smiles_b, x_b = None, None
elif len(smiles_and_role) == 2:
smiles_b, role_b = smiles_and_role[1]
x_b = phys_prop["substance"]["amounts"][role_b][0]["value"]
else:
raise NotImplementedError("up to binary mixtures are currently supported")

temp_unit = getattr(
unit, phys_prop["thermodynamic_state"]["temperature"]["unit"]
)
temp = phys_prop["thermodynamic_state"]["temperature"]["value"] * temp_unit
pressure_unit = getattr(
unit, phys_prop["thermodynamic_state"]["pressure"]["unit"]
)
pressure = phys_prop["thermodynamic_state"]["pressure"]["value"] * pressure_unit
value = phys_prop["value"]["value"] * getattr(unit, phys_prop["value"]["unit"])
std = phys_prop["uncertainty"]["value"] * getattr(
unit, phys_prop["uncertainty"]["unit"]
)
default_units = getattr(unit, _prop_units[prop_type])
prop = {
"type": prop_type,
"smiles_a": smiles_a,
"x_a": x_a,
"smiles_b": smiles_b,
"x_b": x_b,
"temperature": temp.to(unit.kelvin).m,
"pressure": pressure.to(unit.atm).m,
"value": value.to(default_units).m,
"units": _prop_units[prop_type],
"std": std.to(default_units).m,
"source": phys_prop["source"]["doi"],
}
properties.append(prop)

return create_dataset(*properties)


def extract_smiles(dataset: datasets.Dataset) -> list[str]:
"""Return a list of unique SMILES strings in the dataset.
Expand Down
1 change: 1 addition & 0 deletions descent/tests/data/evaluator_mock.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"properties": [{"id": "1", "substance": {"components": [{"smiles": "CCO", "role": {"value": "solv", "@type": "openff.evaluator.substances.components.Component.Role"}, "@type": "openff.evaluator.substances.components.Component"}, {"smiles": "O", "role": {"value": "solv", "@type": "openff.evaluator.substances.components.Component.Role"}, "@type": "openff.evaluator.substances.components.Component"}], "amounts": {"CCO{solv}": [{"value": 0.48268, "@type": "openff.evaluator.substances.amounts.MoleFraction"}], "O{solv}": [{"value": 0.51732, "@type": "openff.evaluator.substances.amounts.MoleFraction"}]}, "@type": "openff.evaluator.substances.substances.Substance"}, "phase": 2, "thermodynamic_state": {"temperature": {"value": 298.15, "unit": "kelvin", "@type": "openff.evaluator.unit.Quantity"}, "pressure": {"value": 101.3, "unit": "kilopascal", "@type": "openff.evaluator.unit.Quantity"}, "@type": "openff.evaluator.thermodynamics.ThermodynamicState"}, "value": {"value": 0.99, "unit": "gram / milliliter", "@type": "openff.evaluator.unit.Quantity"}, "uncertainty": {"value": 0.000505, "unit": "gram / milliliter", "@type": "openff.evaluator.unit.Quantity"}, "source": {"doi": "mock", "reference": "", "@type": "openff.evaluator.datasets.provenance.MeasurementSource"}, "gradients": [], "@type": "openff.evaluator.properties.density.Density"}]}
1 change: 1 addition & 0 deletions descent/tests/data/missing_property_evaluator.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"properties": [{"id": "1", "substance": {"components": [{"smiles": "CC(C)(C)O", "role": {"value": "solv", "@type": "openff.evaluator.substances.components.Component.Role"}, "@type": "openff.evaluator.substances.components.Component"}, {"smiles": "O", "role": {"value": "solv", "@type": "openff.evaluator.substances.components.Component.Role"}, "@type": "openff.evaluator.substances.components.Component"}], "amounts": {"CC(C)(C)O{solv}": [{"value": 0.48268, "@type": "openff.evaluator.substances.amounts.MoleFraction"}], "O{solv}": [{"value": 0.51732, "@type": "openff.evaluator.substances.amounts.MoleFraction"}]}, "@type": "openff.evaluator.substances.substances.Substance"}, "phase": 2, "thermodynamic_state": {"temperature": {"value": 298.15, "unit": "kelvin", "@type": "openff.evaluator.unit.Quantity"}, "pressure": {"value": 101.3, "unit": "kilopascal", "@type": "openff.evaluator.unit.Quantity"}, "@type": "openff.evaluator.thermodynamics.ThermodynamicState"}, "value": {"value": 0.99, "unit": "gram / milliliter", "@type": "openff.evaluator.unit.Quantity"}, "uncertainty": {"value": 0.000505, "unit": "gram / milliliter", "@type": "openff.evaluator.unit.Quantity"}, "source": {"doi": "mock", "reference": "", "@type": "openff.evaluator.datasets.provenance.MeasurementSource"}, "gradients": [], "@type": "openff.evaluator.properties.density.DielectricConstant"}]}
28 changes: 28 additions & 0 deletions descent/tests/targets/test_thermo.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
_predict,
_simulate,
create_dataset,
create_from_evaluator,
default_closure,
default_config,
extract_smiles,
Expand Down Expand Up @@ -583,3 +584,30 @@ def test_default_closure(tmp_cwd, mock_density_pure, mocker):
assert torch.isclose(loss, expected_loss)
assert grad.shape == mock_x.shape
assert hess.shape == (1, 1)


def test_create_from_evaluator(data_dir):
dataset = create_from_evaluator(dataset_file=data_dir / "evaluator_mock.json")

entries = list(descent.utils.dataset.iter_dataset(dataset))
expected = {
"smiles_a": "[C:1]([C:2]([O:3][H:9])([H:7])[H:8])([H:4])([H:5])[H:6]",
"x_a": 0.48268,
"smiles_b": "[O:1]([H:2])[H:3]",
"x_b": 0.51732,
"temperature": 298.15,
"pressure": 0.999753269183321,
"value": 0.99,
"std": 0.000505,
"units": "g/mL",
"source": "mock",
"type": "density",
}
assert entries[0] == expected


def test_unsupported_property(data_dir):
with pytest.raises(KeyError):
_ = create_from_evaluator(
dataset_file=data_dir / "missing_property_evaluator.json"
)

0 comments on commit 6d4b216

Please sign in to comment.