Skip to content

Commit

Permalink
improved nucleic/backbone selections
Browse files Browse the repository at this point in the history
  • Loading branch information
richardjgowers committed Jul 11, 2020
1 parent 856e434 commit 55f9a5b
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 21 deletions.
2 changes: 1 addition & 1 deletion package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ Fixes
(Issue #2723, PR #2815)
* TRZReader now checks `n_atoms` on reading. (Issue #2817, PR #2820)
* TRZWriter now writes `n_atoms` to the file. (Issue #2817, PR #2820)

* Fixed performance regression on select_atoms for string selections (#2751)

Enhancements
* Added the RDKitParser which creates a `core.topology.Topology` object from
Expand Down
119 changes: 100 additions & 19 deletions package/MDAnalysis/core/selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -819,6 +819,10 @@ class ProteinSelection(Selection):
See Also
--------
:func:`MDAnalysis.lib.util.convert_aa_code`
.. versionchanged:: 2.0.0
prot_res changed to set (from numpy array)
performance improved by ~100x on larger systems
"""
token = 'protein'

Expand Down Expand Up @@ -873,23 +877,32 @@ class NucleicSelection(Selection):
.. versionchanged:: 0.8
additional Gromacs selections
.. versionchanged:: 2.0.0
nucl_res changed to set (from numpy array)
performance improved by ~100x on larger systems
"""
token = 'nucleic'

nucl_res = np.array([
nucl_res = {
'ADE', 'URA', 'CYT', 'GUA', 'THY', 'DA', 'DC', 'DG', 'DT', 'RA',
'RU', 'RG', 'RC', 'A', 'T', 'U', 'C', 'G',
'DA5', 'DC5', 'DG5', 'DT5',
'DA3', 'DC3', 'DG3', 'DT3',
'RA5', 'RU5', 'RG5', 'RC5',
'RA3', 'RU3', 'RG3', 'RC3'
])
}

def __init__(self, parser, tokens):
pass

def apply(self, group):
mask = np.in1d(group.resnames, self.nucl_res)
resnames = group.universe._topology.resnames
nmidx = resnames.nmidx[group.resindices]

matches = [ix for (nm, ix) in resnames.namedict.items()
if nm in self.nucl_res]
mask = np.in1d(nmidx, matches)

return group[mask].unique


Expand All @@ -898,29 +911,63 @@ class BackboneSelection(ProteinSelection):
This excludes OT* on C-termini
(which are included by, eg VMD's backbone selection).
.. versionchanged:: 2.0.0
bb_atoms changed to set (from numpy array)
performance improved by ~100x on larger systems
"""
token = 'backbone'
bb_atoms = np.array(['N', 'CA', 'C', 'O'])
bb_atoms = {'N', 'CA', 'C', 'O'}

def apply(self, group):
mask = np.in1d(group.names, self.bb_atoms)
mask &= np.in1d(group.resnames, self.prot_res)
return group[mask].unique
atomnames = group.universe._topology.names
resnames = group.universe._topology.resnames

# filter by atom names
name_matches = [ix for (nm, ix) in atomnames.namedict.items()
if nm in self.bb_atoms]
nmidx = atomnames.nmidx[group.ix]
group = group[np.in1d(nmidx, name_matches)]

# filter by resnames
resname_matches = [ix for (nm, ix) in resnames.namedict.items()
if nm in self.prot_res]
nmidx = resnames.nmidx[group.resindices]
group = group[np.in1d(nmidx, resname_matches)]

return group.unique


class NucleicBackboneSelection(NucleicSelection):
"""Contains all atoms with name "P", "C5'", C3'", "O3'", "O5'".
These atoms are only recognized if they are in a residue matched
by the :class:`NucleicSelection`.
.. versionchanged:: 2.0.0
bb_atoms changed to set (from numpy array)
performance improved by ~100x on larger systems
"""
token = 'nucleicbackbone'
bb_atoms = np.array(["P", "C5'", "C3'", "O3'", "O5'"])
bb_atoms = {"P", "C5'", "C3'", "O3'", "O5'"}

def apply(self, group):
mask = np.in1d(group.names, self.bb_atoms)
mask &= np.in1d(group.resnames, self.nucl_res)
return group[mask].unique
atomnames = group.universe._topology.names
resnames = group.universe._topology.resnames

# filter by atom names
name_matches = [ix for (nm, ix) in atomnames.namedict.items()
if nm in self.bb_atoms]
nmidx = atomnames.nmidx[group.ix]
group = group[np.in1d(nmidx, name_matches)]

# filter by resnames
resname_matches = [ix for (nm, ix) in resnames.namedict.items()
if nm in self.nucl_res]
nmidx = resnames.nmidx[group.resindices]
group = group[np.in1d(nmidx, resname_matches)]

return group.unique


class BaseSelection(NucleicSelection):
Expand All @@ -930,29 +977,63 @@ class BaseSelection(NucleicSelection):
'N9', 'N7', 'C8', 'C5', 'C4', 'N3', 'C2', 'N1', 'C6',
'O6','N2','N6', 'O2','N4','O4','C5M'
.. versionchanged:: 2.0.0
base_atoms changed to set (from numpy array)
performance improved by ~100x on larger systems
"""
token = 'nucleicbase'
base_atoms = np.array([
base_atoms = {
'N9', 'N7', 'C8', 'C5', 'C4', 'N3', 'C2', 'N1', 'C6',
'O6', 'N2', 'N6',
'O2', 'N4', 'O4', 'C5M'])
'O2', 'N4', 'O4', 'C5M'}

def apply(self, group):
mask = np.in1d(group.names, self.base_atoms)
mask &= np.in1d(group.resnames, self.nucl_res)
return group[mask].unique
atomnames = group.universe._topology.names
resnames = group.universe._topology.resnames

# filter by atom names
name_matches = [ix for (nm, ix) in atomnames.namedict.items()
if nm in self.base_atoms]
nmidx = atomnames.nmidx[group.ix]
group = group[np.in1d(nmidx, name_matches)]

# filter by resnames
resname_matches = [ix for (nm, ix) in resnames.namedict.items()
if nm in self.nucl_res]
nmidx = resnames.nmidx[group.resindices]
group = group[np.in1d(nmidx, resname_matches)]

return group.unique


class NucleicSugarSelection(NucleicSelection):
"""Contains all atoms with name C1', C2', C3', C4', O2', O4', O3'.
.. versionchanged:: 2.0.0
sug_atoms changed to set (from numpy array)
performance improved by ~100x on larger systems
"""
token = 'nucleicsugar'
sug_atoms = np.array(["C1'", "C2'", "C3'", "C4'", "O4'"])

def apply(self, group):
mask = np.in1d(group.names, self.sug_atoms)
mask &= np.in1d(group.resnames, self.nucl_res)
return group[mask].unique
atomnames = group.universe._topology.names
resnames = group.universe._topology.resnames

# filter by atom names
name_matches = [ix for (nm, ix) in atomnames.namedict.items()
if nm in self.sug_atoms]
nmidx = atomnames.nmidx[group.ix]
group = group[np.in1d(nmidx, name_matches)]

# filter by resnames
resname_matches = [ix for (nm, ix) in resnames.namedict.items()
if nm in self.nucl_res]
nmidx = resnames.nmidx[group.resindices]
group = group[np.in1d(nmidx, resname_matches)]

return group.unique


class PropertySelection(Selection):
Expand Down
2 changes: 1 addition & 1 deletion testsuite/MDAnalysisTests/core/test_atomselections.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def test_protein(self, universe):
sorted(universe.select_atoms('segid 4AKE').indices),
"selected protein is not the same as auto-generated protein segment s4AKE")

@pytest.mark.parametrize('resname', MDAnalysis.core.selection.ProteinSelection.prot_res)
@pytest.mark.parametrize('resname', sorted(MDAnalysis.core.selection.ProteinSelection.prot_res))
def test_protein_resnames(self, resname):
u = make_Universe(('resnames',))
# set half the residues' names to the resname we're testing
Expand Down

0 comments on commit 55f9a5b

Please sign in to comment.