Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Thermo dataset from evaluator #67

Merged
merged 5 commits into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions descent/targets/thermo.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,79 @@ def create_dataset(*rows: DataEntry) -> datasets.Dataset:
return dataset


def create_from_evaluator(dataset_file: pathlib.Path) -> datasets.Dataset:
"""
Create a dataset from an evaluator PhysicalPropertyDataSet

Args:
dataset_file: The path to the evaluator dataset

Returns:
The created dataset
"""
import json

from openff.units import unit

_evaluator_to_prop = {
"openff.evaluator.properties.density.Density": "density",
"openff.evaluator.properties.enthalpy.EnthalpyOfMixing": "hmix",
"openff.evaluator.properties.enthalpy.EnthalpyOfVaporization": "hvap",
}
_prop_units = {"density": "g/mL", "hmix": "kcal/mol", "hvap": "kcal/mol"}

properties: list[DataEntry] = []
property_data = json.load(dataset_file.open())

for phys_prop in property_data["properties"]:
try:
prop_type = _evaluator_to_prop[phys_prop["@type"]]
except KeyError:
raise KeyError(f"{phys_prop['@type']} not currently supported.") from None
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've not seen from None before, looks cool!


smiles_and_role = [
(comp["smiles"], comp["smiles"] + "{" + comp["role"]["value"] + "}")
for comp in phys_prop["substance"]["components"]
]
smiles_a, role_a = smiles_and_role[0]
x_a = phys_prop["substance"]["amounts"][role_a][0]["value"]
if len(smiles_and_role) == 1:
smiles_b, x_b = None, None
else:
smiles_b, role_b = smiles_and_role[1]
x_b = phys_prop["substance"]["amounts"][role_b][0]["value"]

temp_unit = getattr(
unit, phys_prop["thermodynamic_state"]["temperature"]["unit"]
)
temp = phys_prop["thermodynamic_state"]["temperature"]["value"] * temp_unit
pressure_unit = getattr(
unit, phys_prop["thermodynamic_state"]["pressure"]["unit"]
)
pressure = phys_prop["thermodynamic_state"]["pressure"]["value"] * pressure_unit
value = phys_prop["value"]["value"] * getattr(unit, phys_prop["value"]["unit"])
std = phys_prop["uncertainty"]["value"] * getattr(
unit, phys_prop["uncertainty"]["unit"]
)
default_units = getattr(unit, _prop_units[prop_type])
prop = {
"type": prop_type,
"smiles_a": smiles_a,
"x_a": x_a,
"smiles_b": smiles_b,
"x_b": x_b,
"temperature": temp.to(unit.kelvin).m,
"pressure": pressure.to(unit.atm).m,
"value": value.to(default_units).m,
"units": _prop_units[prop_type],
"std": std.to(default_units).m,
"source": phys_prop["source"]["doi"],
}
properties.append(prop)

return create_dataset(*properties)


def extract_smiles(dataset: datasets.Dataset) -> list[str]:
"""Return a list of unique SMILES strings in the dataset.

Expand Down
1 change: 1 addition & 0 deletions descent/tests/data/evaluator_mock.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"properties": [{"id": "1", "substance": {"components": [{"smiles": "CCO", "role": {"value": "solv", "@type": "openff.evaluator.substances.components.Component.Role"}, "@type": "openff.evaluator.substances.components.Component"}, {"smiles": "O", "role": {"value": "solv", "@type": "openff.evaluator.substances.components.Component.Role"}, "@type": "openff.evaluator.substances.components.Component"}], "amounts": {"CCO{solv}": [{"value": 0.48268, "@type": "openff.evaluator.substances.amounts.MoleFraction"}], "O{solv}": [{"value": 0.51732, "@type": "openff.evaluator.substances.amounts.MoleFraction"}]}, "@type": "openff.evaluator.substances.substances.Substance"}, "phase": 2, "thermodynamic_state": {"temperature": {"value": 298.15, "unit": "kelvin", "@type": "openff.evaluator.unit.Quantity"}, "pressure": {"value": 101.3, "unit": "kilopascal", "@type": "openff.evaluator.unit.Quantity"}, "@type": "openff.evaluator.thermodynamics.ThermodynamicState"}, "value": {"value": 0.99, "unit": "gram / milliliter", "@type": "openff.evaluator.unit.Quantity"}, "uncertainty": {"value": 0.000505, "unit": "gram / milliliter", "@type": "openff.evaluator.unit.Quantity"}, "source": {"doi": "mock", "reference": "", "@type": "openff.evaluator.datasets.provenance.MeasurementSource"}, "gradients": [], "@type": "openff.evaluator.properties.density.Density"}]}
1 change: 1 addition & 0 deletions descent/tests/data/missing_property_evaluator.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"properties": [{"id": "1", "substance": {"components": [{"smiles": "CC(C)(C)O", "role": {"value": "solv", "@type": "openff.evaluator.substances.components.Component.Role"}, "@type": "openff.evaluator.substances.components.Component"}, {"smiles": "O", "role": {"value": "solv", "@type": "openff.evaluator.substances.components.Component.Role"}, "@type": "openff.evaluator.substances.components.Component"}], "amounts": {"CC(C)(C)O{solv}": [{"value": 0.48268, "@type": "openff.evaluator.substances.amounts.MoleFraction"}], "O{solv}": [{"value": 0.51732, "@type": "openff.evaluator.substances.amounts.MoleFraction"}]}, "@type": "openff.evaluator.substances.substances.Substance"}, "phase": 2, "thermodynamic_state": {"temperature": {"value": 298.15, "unit": "kelvin", "@type": "openff.evaluator.unit.Quantity"}, "pressure": {"value": 101.3, "unit": "kilopascal", "@type": "openff.evaluator.unit.Quantity"}, "@type": "openff.evaluator.thermodynamics.ThermodynamicState"}, "value": {"value": 0.99, "unit": "gram / milliliter", "@type": "openff.evaluator.unit.Quantity"}, "uncertainty": {"value": 0.000505, "unit": "gram / milliliter", "@type": "openff.evaluator.unit.Quantity"}, "source": {"doi": "mock", "reference": "", "@type": "openff.evaluator.datasets.provenance.MeasurementSource"}, "gradients": [], "@type": "openff.evaluator.properties.density.DielectricConstant"}]}
28 changes: 28 additions & 0 deletions descent/tests/targets/test_thermo.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
_predict,
_simulate,
create_dataset,
create_from_evaluator,
default_closure,
default_config,
extract_smiles,
Expand Down Expand Up @@ -583,3 +584,30 @@ def test_default_closure(tmp_cwd, mock_density_pure, mocker):
assert torch.isclose(loss, expected_loss)
assert grad.shape == mock_x.shape
assert hess.shape == (1, 1)


def test_create_from_evaluator(data_dir):
dataset = create_from_evaluator(dataset_file=data_dir / "evaluator_mock.json")

entries = list(descent.utils.dataset.iter_dataset(dataset))
expected = {
"smiles_a": "[C:1]([C:2]([O:3][H:9])([H:7])[H:8])([H:4])([H:5])[H:6]",
"x_a": 0.48268,
"smiles_b": "[O:1]([H:2])[H:3]",
"x_b": 0.51732,
"temperature": 298.15,
"pressure": 0.999753269183321,
"value": 0.99,
"std": 0.000505,
"units": "g/mL",
"source": "mock",
"type": "density",
}
assert entries[0] == expected


def test_unsupported_property(data_dir):
with pytest.raises(KeyError):
_ = create_from_evaluator(
dataset_file=data_dir / "missing_property_evaluator.json"
)
Loading