Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Cythonizes GROParser & GROReader very minimally #2227

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@
from ..core import flags
from ..exceptions import NoDataError
from ..lib import util
cimport cython


class Timestep(base.Timestep):
Expand Down Expand Up @@ -167,11 +168,17 @@ class GROReader(base.SingleFrameReaderBase):
.. versionchanged:: 0.11.0
Frames now 0-based instead of 1-based
"""
format = 'GRO'
units = {'time': None, 'length': 'nm', 'velocity': 'nm/ps'}
_Timestep = Timestep

def _read_first_frame(self):
cdef str format = 'GRO'
cpdef dict units = {'time': None, 'length': 'nm', 'velocity': 'nm/ps'}
cdef int n_atoms
cdef str first_atomline
cdef int cs
cdef float[:,:] velocities
cdef int pos
cdef str line
cdef float unitcell
with util.openany(self.filename, 'rt') as grofile:
# Read first two lines to get number of atoms
grofile.readline()
Expand Down
3 changes: 3 additions & 0 deletions package/MDAnalysis/coordinates/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,6 +725,7 @@ class can choose an appropriate reader automatically.
import six

from . import base
from .base import _READERS
from .core import reader, writer
from . import chain
from . import CRD
Expand All @@ -749,3 +750,5 @@ class can choose an appropriate reader automatically.
from . import MMTF
from . import GSD
from . import null

_READERS['GRO'] = GRO.GROReader
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,16 @@ def parse(self, **kwargs):
"""Return the *Topology* object for this file"""
# Gro has the following columns
# resid, resname, name, index, (x,y,z)
cdef int n_atoms;
cdef int i;
cdef str line
cdef int count = 0
cdef int[:] resids
cdef object[:] resnames
cdef object[:] names
cdef int[:] indices
cdef int[:] starts
cdef int[:] wraps
with openany(self.filename) as inf:
next(inf)
n_atoms = int(next(inf))
Expand All @@ -93,51 +103,49 @@ def parse(self, **kwargs):
resnames = np.zeros(n_atoms, dtype=object)
names = np.zeros(n_atoms, dtype=object)
indices = np.zeros(n_atoms, dtype=np.int32)

for i, line in enumerate(inf):
if i == n_atoms:
for line in inf:
if count == n_atoms:
break
try:
resids[i] = int(line[:5])
resnames[i] = line[5:10].strip()
names[i] = line[10:15].strip()
indices[i] = int(line[15:20])
resids[count] = int(line[:5])
resnames[count] = line[5:10].strip()
names[count] = line[10:15].strip()
indices[count] = int(line[15:20])
except (ValueError, TypeError):
raise IOError(
"Couldn't read the following line of the .gro file:\n"
"{0}".format(line))
count += 1
# Check all lines had names
if not np.all(names):
missing = np.where(names == '')
missing = np.where(np.asarray(names) == '')
raise IOError("Missing atom name on line: {0}"
"".format(missing[0][0] + 3)) # 2 header, 1 based

# Fix wrapping of resids (if we ever saw a wrap)
if np.any(resids == 0):
if np.any(np.asarray(resids) == 0):
# find places where resid hit zero again
wraps = np.where(resids == 0)[0]
wraps = np.where(np.asarray(resids) == 0)[0].astype(np.int32)
# group these places together:
# find indices of first 0 in each block of zeroes
# 1) find large changes in index, (ie non sequential blocks)
diff = np.diff(wraps) != 1
diff = np.diff(np.asarray(wraps)) != 1
# 2) make array of where 0-blocks start
starts = np.hstack([wraps[0], wraps[1:][diff]])
starts = np.hstack([np.asarray(wraps)[0], np.asarray(wraps)[1:][diff]]).astype(np.int32)

# remove 0 in starts, ie the first residue **can** be 0
if starts[0] == 0:
starts = starts[1:]

# for each resid after a wrap, add 100k (5 digit wrap)
for s in starts:
resids[s:] += 100000
np.asarray(resids)[s:] += 100000

# Guess types and masses
atomtypes = guessers.guess_types(names)
atomtypes = guessers.guess_types(np.asarray(names, dtype=object))
masses = guessers.guess_masses(atomtypes)

residx, (new_resids, new_resnames) = change_squash(
(resids, resnames), (resids, resnames))

(np.asarray(resids, dtype=np.int32), np.asarray(resnames, dtype = object)), (np.asarray(resids, dtype=np.int32), np.asarray(resnames, dtype = object)))
# new_resids is len(residues)
# so resindex 0 has resid new_resids[0]
attrs = [
Expand Down
12 changes: 11 additions & 1 deletion package/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,16 @@ def extensions(config):
include_dirs=include_dirs + ['MDAnalysis/lib/formats/include'],
define_macros=define_macros,
extra_compile_args=extra_compile_args)
groparser = MDAExtension('MDAnalysis.topology.GROParser',
['MDAnalysis/topology/GROParser' + source_suffix],
include_dirs = include_dirs + ['MDAnalysis/topology/include'],
define_macros = define_macros,
extra_compile_args= extra_compile_args)
groreader = MDAExtension('MDAnalysis.coordinates.GRO',
['MDAnalysis/coordinates/GRO' + source_suffix],
include_dirs = include_dirs + ['MDAnalysis/coordinates/include'],
define_macros = define_macros,
extra_compile_args= extra_compile_args)
distances = MDAExtension('MDAnalysis.lib.c_distances',
['MDAnalysis/lib/c_distances' + source_suffix],
include_dirs=include_dirs + ['MDAnalysis/lib/include'],
Expand Down Expand Up @@ -416,7 +426,7 @@ def extensions(config):
extra_link_args= cpp_extra_link_args)
pre_exts = [libdcd, distances, distances_omp, qcprot,
transformation, libmdaxdr, util, encore_utils,
ap_clustering, spe_dimred, cutil, augment, nsgrid]
ap_clustering, spe_dimred, cutil, augment, nsgrid, groparser, groreader]


cython_generated = []
Expand Down