Skip to content

Commit

Permalink
removed some code duplication
Browse files Browse the repository at this point in the history
made protein selection faster, 48ms -> 0.5ms on GRO testfile
  • Loading branch information
richardjgowers committed Jul 5, 2020
1 parent f48d4b4 commit 00ba0ee
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 38 deletions.
4 changes: 3 additions & 1 deletion package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Fixes
* TOPParser no longer guesses elements when missing atomic number records
(Issues #2449, #2651)
* Testsuite does not any more matplotlib.use('agg') (#2191)
* In ChainReader, read_frame does not trigger change of iterating position.
* In ChainReader, read_frame does not trigger change of iterating position.
(Issue #2723, PR #2815)

Enhancements
Expand All @@ -39,6 +39,8 @@ Enhancements
* Added computation of Mean Squared Displacements (#2438, PR #2619)
* Improved performances when parsing TPR files (PR #2804)
* Added converter between Cartesian and Bond-Angle-Torsion coordinates (PR #2668)
* Improved performance of select_atoms on strings (e.g. name, type, resname) and
'protein' selection (#2751 PR #2755)

Changes
* Changes development status from Beta to Mature (Issue #2773)
Expand Down
57 changes: 20 additions & 37 deletions package/MDAnalysis/core/selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ def apply(self, group):
return group[mask]


class StringSelection(Selection):
class _ProtoStringSelection(Selection):
"""Selections based on text attributes
.. versionchanged:: 1.0.0
Expand All @@ -540,11 +540,15 @@ def apply(self, group):
matches.append(ix)

# atomname indices for members of this group
nmidx = nmattr.nmidx[group.ix]
nmidx = nmattr.nmidx[getattr(group, self.level)]

return group[np.in1d(nmidx, matches)].unique


class StringSelection(_ProtoStringSelection):
level = 'ix' # operates on atom level attribute, i.e. '.ix'


class AtomNameSelection(StringSelection):
"""Select atoms based on 'names' attribute"""
token = 'name'
Expand All @@ -569,21 +573,8 @@ class AtomICodeSelection(StringSelection):
field = 'icodes'


class _ResidueStringSelection(StringSelection):
def apply(self, group):
# rather than work on group.names, cheat and look at the lookup table
nmattr = getattr(group.universe._topology, self.field)

matches = [] # list of passing indices
# iterate through set of known atom names, check which pass
for nm, ix in nmattr.namedict.items():
if any(fnmatch.fnmatch(nm, val) for val in self.values):
matches.append(ix)

# atomname indices for members of this group
nmidx = nmattr.nmidx[group.resindices]

return group[np.in1d(nmidx, matches)].unique
class _ResidueStringSelection(_ProtoStringSelection):
level= 'resindices'


class ResidueNameSelection(_ResidueStringSelection):
Expand All @@ -598,25 +589,11 @@ class MoleculeTypeSelection(_ResidueStringSelection):
field = 'moltypes'


class SegmentNameSelection(StringSelection):
class SegmentNameSelection(_ProtoStringSelection):
"""Select atoms based on 'segids' attribute"""
token = 'segid'
field = 'segids'

def apply(self, group):
# rather than work on group.names, cheat and look at the lookup table
nmattr = group.universe._topology.segids

matches = [] # list of passing indices
# iterate through set of known atom names, check which pass
for nm, ix in nmattr.namedict.items():
if any(fnmatch.fnmatch(nm, val) for val in self.values):
matches.append(ix)

# atomname indices for members of this group
nmidx = nmattr.nmidx[group.segindices]

return group[np.in1d(nmidx, matches)].unique
level = 'segindices'


class AltlocSelection(StringSelection):
Expand Down Expand Up @@ -845,7 +822,7 @@ class ProteinSelection(Selection):
"""
token = 'protein'

prot_res = np.array([
prot_res = {
# CHARMM top_all27_prot_lipid.rtf
'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HSD',
'HSE', 'HSP', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR',
Expand All @@ -868,14 +845,20 @@ class ProteinSelection(Selection):
'CLEU', 'CILE', 'CVAL', 'CASF', 'CASN', 'CGLN', 'CARG', 'CHID', 'CHIE',
'CHIP', 'CTRP', 'CPHE', 'CTYR', 'CGLU', 'CASP', 'CLYS', 'CPRO', 'CCYS',
'CCYX', 'CMET', 'CME', 'ASF',
])
}

def __init__(self, parser, tokens):
pass

def apply(self, group):
mask = np.in1d(group.resnames, self.prot_res)
return group[mask].unique
resname_attr = group.universe._topology.resnames
# which values in resname attr are in prot_res?
matches = [ix for (nm, ix) in resname_attr.namedict.items()
if nm in self.prot_res]
# index of each atom's resname
nmidx = resname_attr.nmidx[group.resindices]
# intersect atom's resname index and matches to prot_res
return group[np.in1d(nmidx, matches)].unique


class NucleicSelection(Selection):
Expand Down

0 comments on commit 00ba0ee

Please sign in to comment.