Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Chemfiles as a coordinate reader/writer #1862

Merged
merged 21 commits into from
Mar 10, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
240c3d3
Add a dot between filename and extension
Apr 10, 2018
1862eff
Implement CHEMFILES reader and writer
Apr 10, 2018
65c5c86
Add chemfiles to conda dependencies on CI
Mar 28, 2019
ad5d199
Allow passing the format name to chemfiles
Luthaf May 8, 2019
b450ee3
Add documentation to the code
Luthaf May 15, 2019
634e326
Convert MDAnalysis topology to chemfiles when writing
Luthaf May 15, 2019
ca33ab2
added standard XYZ tests via Chemfiles
richardjgowers Jul 8, 2019
5eadec7
minor tweaks to get tests passing
richardjgowers Jul 8, 2019
b4410db
Also translate segindex attribute to property
Luthaf Feb 10, 2020
93ddfd7
Move version checking inside a function
Luthaf Feb 11, 2020
6f195ae
Documentation improvements
Luthaf Feb 11, 2020
c3d5321
Add a test when opening a file without extension
Luthaf Feb 11, 2020
eaf172c
Add a comment about unit conversion
Luthaf Feb 11, 2020
359ea4e
Add a test checking that velocities and unit cell are written
Luthaf Feb 12, 2020
b1aa7d2
made chemfiles optional dependency using FORMAT_HINT
richardjgowers Feb 18, 2020
55e97e7
properly skip chemfiles tests if bad version too
richardjgowers Feb 18, 2020
8fcdc23
changelog and authors
richardjgowers Feb 18, 2020
0338863
fixed logic in chemfiles skipping
richardjgowers Feb 21, 2020
85b7ac1
Merge branch 'develop' into chemfiles
richardjgowers Feb 26, 2020
570e191
TST: skip test if wrong chemfiles version is installed.
kain88-de Mar 10, 2020
7f6395f
Merge branch 'develop' into chemfiles
richardjgowers Mar 10, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ env:
- MAIN_CMD="pytest ${PYTEST_LIST}"
- SETUP_CMD="${PYTEST_FLAGS}"
- BUILD_CMD="pip install -e package/ && (cd testsuite/ && python setup.py build)"
- CONDA_MIN_DEPENDENCIES="mmtf-python mock six biopython networkx cython matplotlib scipy griddataformats hypothesis gsd codecov chemfiles"
- CONDA_MIN_DEPENDENCIES="mmtf-python mock six biopython networkx cython matplotlib scipy griddataformats hypothesis gsd codecov"
# Cap seaborn to 0.9 since 0.10 drops Python 2 support (temporarily solve #2495)
- CONDA_DEPENDENCIES="${CONDA_MIN_DEPENDENCIES} seaborn>=0.7.0,<=0.9 clustalw=2.1 netcdf4 scikit-learn joblib>=0.12"
- CONDA_DEPENDENCIES="${CONDA_MIN_DEPENDENCIES} seaborn>=0.7.0,<=0.9 clustalw=2.1 netcdf4 scikit-learn joblib>=0.12 chemfiles"
- CONDA_CHANNELS='biobuilds conda-forge'
- CONDA_CHANNEL_PRIORITY=True
- PIP_DEPENDENCIES="duecredit parmed"
Expand Down
1 change: 1 addition & 0 deletions package/MDAnalysis/coordinates/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,7 @@ class can choose an appropriate reader automatically.
from . import base
from .core import reader, writer
from . import chain
from . import chemfiles
from . import CRD
from . import DCD
from . import DLPoly
Expand Down
55 changes: 37 additions & 18 deletions package/MDAnalysis/coordinates/chemfiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,25 @@

from . import base, core

import chemfiles
from chemfiles import Trajectory, Frame, Atom, UnitCell, Residue, Topology
try:
import chemfiles
except ImportError:
HAS_CHEMFILES = False
else:
HAS_CHEMFILES = True



MIN_CHEMFILES_VERSION = LooseVersion("0.9")
MAX_CHEMFILES_VERSION = LooseVersion("0.10")


def check_chemfiles_version():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@richardjgowers you want a nicer error message than this? I notice the imports should be in the classes themselves to avoid an error when importing mda without chemfiles.

if not HAS_CHEMFILES:
raise RuntimeError(
"No Chemfiles package found. "
"Try installing with 'pip install chemfiles'"
)
version = LooseVersion(chemfiles.__version__)
if version < MIN_CHEMFILES_VERSION or version >= MAX_CHEMFILES_VERSION:
raise RuntimeError(
Expand All @@ -65,8 +75,7 @@ def check_chemfiles_version():


class ChemfilesReader(base.ReaderBase):
"""
Coordinate reader using chemfiles.
"""Coordinate reader using chemfiles.

The file format to used is guessed based on the file extension. If no
matching format is found, a ``ChemfilesError`` is raised. It is also
Expand All @@ -79,12 +88,13 @@ def __init__(self, filename, chemfiles_format="", **kwargs):
"""
Parameters
----------
filename : str
trajectory filename
filename : chemfiles.Trajectory or str
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure I understand why passing a chemfiles.Trajectory should work. What would be the use case for this? Users creating chemfiles trajectories and then wanting to do analysis with MDA?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, it's so you can open a chemfiles thing as you want then pass this as the trajectory MDA will use. Passing a filename and specifying format='CHEMFILES' still works, but that's relying on the baked in way to open a trajectory

the chemfiles object to read or filename to read
chemfiles_format : str (optional)
use the given format name instead of guessing from the extension.
The `list of supported formats <formats>`_ and the associated names
is available in chemfiles documentation.
if *filename* was a string, use the given format name instead of
guessing from the extension. The `list of supported formats
<formats>`_ and the associated names is available in chemfiles
documentation.
**kwargs : dict
General reader arguments.

Expand All @@ -99,13 +109,21 @@ def __init__(self, filename, chemfiles_format="", **kwargs):
self.ts = self._Timestep(self.n_atoms, **self._ts_kwargs)
self.next()

@staticmethod
def _format_hint(thing):
"""Can this Reader read *thing*"""
# nb, filename strings can still get passed through if
# format='CHEMFILES' is used
return HAS_CHEMFILES and isinstance(thing, chemfiles.Trajectory)

def _open(self):
self._closed = False
self._step = 0
self._frame = -1
# Open file last to ensure that all other attributes are set
# in case of exception
self._file = Trajectory(self.filename, 'r', self._format)
if isinstance(self.filename, chemfiles.Trajectory):
self._file = self.filename
else:
self._file = chemfiles.Trajectory(self.filename, 'r', self._format)

def close(self):
"""close reader"""
Expand Down Expand Up @@ -224,7 +242,7 @@ def __init__(self, filename, n_atoms=0, mode="w", chemfiles_format="", topology=
self.n_atoms = n_atoms
if mode != "a" and mode != "w":
raise IOError("Expected 'a' or 'w' as mode in ChemfilesWriter")
self._file = Trajectory(self.filename, mode, chemfiles_format)
self._file = chemfiles.Trajectory(self.filename, mode, chemfiles_format)
self._closed = False
if topology is not None:
if isinstance(topology, string_types):
Expand Down Expand Up @@ -293,21 +311,22 @@ def _timestep_to_chemfiles(self, ts):
"""
Convert a Timestep to a chemfiles Frame
"""
frame = Frame()
# TODO: CONVERTERS?
frame = chemfiles.Frame()
frame.resize(ts.n_atoms)
if ts.has_positions:
frame.positions[:] = ts.positions[:]
if ts.has_velocities:
frame.add_velocities()
frame.velocities[:] = ts.velocities[:]
Luthaf marked this conversation as resolved.
Show resolved Hide resolved
frame.cell = UnitCell(*ts.dimensions)
frame.cell = chemfiles.UnitCell(*ts.dimensions)
return frame

def _topology_to_chemfiles(self, obj, n_atoms):
"""
Convert an AtomGroup or an Universe to a chemfiles Topology
"""
topology = Topology()
topology = chemfiles.Topology()
if not hasattr(obj, "atoms"):
# use an empty topology
topology.resize(n_atoms)
Expand All @@ -318,7 +337,7 @@ def _topology_to_chemfiles(self, obj, n_atoms):
for atom in obj.atoms:
name = getattr(atom, 'name', "")
type = getattr(atom, 'type', name)
chemfiles_atom = Atom(name, type)
chemfiles_atom = chemfiles.Atom(name, type)

if hasattr(atom, 'altLoc'):
chemfiles_atom["altloc"] = str(atom.altLoc)
Expand All @@ -332,7 +351,7 @@ def _topology_to_chemfiles(self, obj, n_atoms):
if hasattr(atom, 'resid'):
resname = getattr(atom, 'resname', "")
if atom.resid not in residues.keys():
residues[atom.resid] = Residue(resname, atom.resid)
residues[atom.resid] = chemfiles.Residue(resname, atom.resid)
residue = residues[atom.resid]

atom_idx = len(topology.atoms)
Expand Down
2 changes: 0 additions & 2 deletions package/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,6 @@ def dynamic_author_list():
'matplotlib>=1.5.1',
'mock',
'parmed',
'chemfiles>=0.9.3',
]
if not os.name == 'nt':
install_requires.append('gsd>=1.4.0')
Expand Down Expand Up @@ -602,7 +601,6 @@ def dynamic_author_list():
'scipy (>=1.0.0)',
'matplotlib (>=1.5.1)',
'parmed',
'chemfiles (>=0.9.3)',
],
# all standard requirements are available through PyPi and
# typically can be installed without difficulties through setuptools
Expand Down
45 changes: 32 additions & 13 deletions testsuite/MDAnalysisTests/coordinates/test_chemfiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,39 @@
#
from __future__ import absolute_import

import numpy as np
import pytest

import MDAnalysis as mda
import numpy as np
from unittest import TestCase

from chemfiles import ChemfilesError

from MDAnalysis.coordinates.chemfiles import ChemfilesReader, ChemfilesWriter

from MDAnalysisTests import datafiles, tempdir
from MDAnalysisTests.coordinates.base import (
MultiframeReaderTest, BaseWriterTest, BaseReference
)
from MDAnalysisTests.coordinates.test_xyz import XYZReference


chemfiles = pytest.importorskip('chemfiles')


class TestChemFileXYZ(MultiframeReaderTest):
@staticmethod
@pytest.fixture
def ref():
base = XYZReference()
base.writer = ChemfilesWriter
base.dimensions = np.array([0, 0, 0, 90, 90, 90], dtype=np.float32)

return base

@pytest.fixture
def reader(self, ref):
reader = ChemfilesReader(ref.trajectory)
reader.add_auxiliary('lowf', ref.aux_lowf, dt=ref.aux_lowf_dt, initial_time=0, time_selector=None)
reader.add_auxiliary('highf', ref.aux_highf, dt=ref.aux_highf_dt, initial_time=0, time_selector=None)
return reader



class ChemfilesXYZReference(BaseReference):
Expand Down Expand Up @@ -69,11 +88,11 @@ def test_no_container(self, ref):
pass
Luthaf marked this conversation as resolved.
Show resolved Hide resolved

def test_no_extension_raises(self, ref):
with pytest.raises(ChemfilesError):
with pytest.raises(chemfiles.ChemfilesError):
ref.writer('foo')


class TestChemfiles(TestCase):
class TestChemfiles(object):
def test_read_chemfiles_format(self):
u = mda.Universe(
datafiles.LAMMPSdata,
Expand All @@ -83,7 +102,7 @@ def test_read_chemfiles_format(self):
)

for ts in u.trajectory:
self.assertEqual(ts.n_atoms, 18364)
assert ts.n_atoms == 18364

def test_changing_system_size(self):
outfile = tempdir.TempDir().name + "chemfiles-changing-size.xyz"
Expand All @@ -92,11 +111,11 @@ def test_changing_system_size(self):

u = mda.Universe(outfile, format="chemfiles", topology_format="XYZ")

with self.assertRaises(IOError):
with pytest.raises(IOError):
u.trajectory._read_next_timestep()

def test_wrong_open_mode(self):
with self.assertRaises(IOError):
with pytest.raises(IOError):
_ = ChemfilesWriter("", mode="r")

def check_topology(self, reference, file):
Expand All @@ -117,10 +136,10 @@ def check_topology(self, reference, file):
)

for atom in check.atoms:
self.assertIn((atom.name, atom.type, atom.record_type), atoms)
assert (atom.name, atom.type, atom.record_type) in atoms

for bond in check.bonds:
self.assertIn((bond.atoms[0].ix, bond.atoms[1].ix), bonds)
assert (bond.atoms[0].ix, bond.atoms[1].ix) in bonds

def test_write_topology(self):
u = mda.Universe(datafiles.CONECT)
Expand Down Expand Up @@ -174,7 +193,7 @@ def test_write_velocities(self):

with open(outfile) as file:
content = file.read()
self.assertEqual(content, EXPECTED_LAMMPS_DATA)
assert content == EXPECTED_LAMMPS_DATA


VARYING_XYZ = """2
Expand Down
20 changes: 0 additions & 20 deletions testsuite/MDAnalysisTests/coordinates/test_xyz.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@
assert_almost_equal,
)

from MDAnalysis.coordinates import chemfiles

from MDAnalysis.coordinates.XYZ import XYZWriter

from MDAnalysisTests.datafiles import COORDINATES_XYZ, COORDINATES_XYZ_BZ2
Expand Down Expand Up @@ -178,21 +176,3 @@ def test_elements_and_names(self, outfile, attr):
names = ''.join(l.split()[0].strip() for l in r.readlines()[2:-1])

assert names[:-1].lower() == 'testing'


class TestChemFileXYZ(MultiframeReaderTest):
@staticmethod
@pytest.fixture
def ref():
base = XYZReference()
base.writer = chemfiles.ChemfilesWriter
base.dimensions = np.array([0, 0, 0, 90, 90, 90], dtype=np.float32)

return base

@pytest.fixture
def reader(self, ref):
reader = chemfiles.ChemfilesReader(ref.trajectory)
reader.add_auxiliary('lowf', ref.aux_lowf, dt=ref.aux_lowf_dt, initial_time=0, time_selector=None)
reader.add_auxiliary('highf', ref.aux_highf, dt=ref.aux_highf_dt, initial_time=0, time_selector=None)
return reader