Skip to content

Commit

Permalink
use dict-lookup string attrs EVERYWHERERE
Browse files Browse the repository at this point in the history
  • Loading branch information
richardjgowers committed Jul 5, 2020
1 parent 18b97ec commit f48d4b4
Show file tree
Hide file tree
Showing 2 changed files with 176 additions and 47 deletions.
57 changes: 41 additions & 16 deletions package/MDAnalysis/core/selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,22 +529,9 @@ def __init__(self, parser, tokens):
self.values = vals

@return_empty_on_apply
def apply(self, group):
mask = np.zeros(len(group), dtype=np.bool)
values = getattr(group, self.field)
for val in self.values:
mask |= [fnmatch.fnmatch(x, val) for x in values]
return group[mask].unique


class AtomNameSelection(StringSelection):
"""Select atoms based on 'names' attribute"""
token = 'name'
field = 'names'

def apply(self, group):
# rather than work on group.names, cheat and look at the lookup table
nmattr = group.universe._topology.names
nmattr = getattr(group.universe._topology, self.field)

matches = [] # list of passing indices
# iterate through set of known atom names, check which pass
Expand All @@ -558,6 +545,12 @@ def apply(self, group):
return group[np.in1d(nmidx, matches)].unique


class AtomNameSelection(StringSelection):
"""Select atoms based on 'names' attribute"""
token = 'name'
field = 'names'


class AtomTypeSelection(StringSelection):
"""Select atoms based on 'types' attribute"""
token = 'type'
Expand All @@ -576,13 +569,30 @@ class AtomICodeSelection(StringSelection):
field = 'icodes'


class ResidueNameSelection(StringSelection):
class _ResidueStringSelection(StringSelection):
def apply(self, group):
# rather than work on group.names, cheat and look at the lookup table
nmattr = getattr(group.universe._topology, self.field)

matches = [] # list of passing indices
# iterate through set of known atom names, check which pass
for nm, ix in nmattr.namedict.items():
if any(fnmatch.fnmatch(nm, val) for val in self.values):
matches.append(ix)

# atomname indices for members of this group
nmidx = nmattr.nmidx[group.resindices]

return group[np.in1d(nmidx, matches)].unique


class ResidueNameSelection(_ResidueStringSelection):
"""Select atoms based on 'resnames' attribute"""
token = 'resname'
field = 'resnames'


class MoleculeTypeSelection(StringSelection):
class MoleculeTypeSelection(_ResidueStringSelection):
"""Select atoms based on 'moltypes' attribute"""
token = 'moltype'
field = 'moltypes'
Expand All @@ -593,6 +603,21 @@ class SegmentNameSelection(StringSelection):
token = 'segid'
field = 'segids'

def apply(self, group):
# rather than work on group.names, cheat and look at the lookup table
nmattr = group.universe._topology.segids

matches = [] # list of passing indices
# iterate through set of known atom names, check which pass
for nm, ix in nmattr.namedict.items():
if any(fnmatch.fnmatch(nm, val) for val in self.values):
matches.append(ix)

# atomname indices for members of this group
nmidx = nmattr.nmidx[group.segindices]

return group[np.in1d(nmidx, matches)].unique


class AltlocSelection(StringSelection):
"""Select atoms based on 'altLoc' attribute"""
Expand Down
166 changes: 135 additions & 31 deletions package/MDAnalysis/core/topologyattrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,16 +473,7 @@ def _gen_initial_values(na, nr, ns):
return np.arange(1, na + 1)


# TODO: update docs to property doc
class Atomnames(AtomAttr):
"""Name for each atom.
"""
attrname = 'names'
singular = 'name'
per_object = 'atom'
dtype = object
transplants = defaultdict(list)

class _AtomStringAttr(AtomAttr):
def __init__(self, vals, guessed=False):
self._guessed = guessed

Expand Down Expand Up @@ -538,6 +529,17 @@ def set_atoms(self, ag, values):
self.name_lookup = np.concatenate([self.name_lookup, newnames])
self.values = self.name_lookup[self.nmidx]


# TODO: update docs to property doc
class Atomnames(_AtomStringAttr):
"""Name for each atom.
"""
attrname = 'names'
singular = 'name'
per_object = 'atom'
dtype = object
transplants = defaultdict(list)

def phi_selection(residue, c_name='C', n_name='N', ca_name='CA'):
"""Select AtomGroup corresponding to the phi protein backbone dihedral
C'-N-CA-C.
Expand Down Expand Up @@ -1011,20 +1013,16 @@ def chi1_selections(residues, n_name='N', ca_name='CA', cb_name='CB',


# TODO: update docs to property doc
class Atomtypes(AtomAttr):
class Atomtypes(_AtomStringAttr):
"""Type for each atom"""
attrname = 'types'
singular = 'type'
per_object = 'atom'
dtype = object

@staticmethod
def _gen_initial_values(na, nr, ns):
return np.array(['' for _ in range(na)], dtype=object)


# TODO: update docs to property doc
class Elements(AtomAttr):
class Elements(_AtomStringAttr):
"""Element for each atom"""
attrname = 'elements'
singular = 'element'
Expand All @@ -1048,7 +1046,7 @@ def _gen_initial_values(na, nr, ns):
return np.zeros(na)


class RecordTypes(AtomAttr):
class RecordTypes(_AtomStringAttr):
"""For PDB-like formats, indicates if ATOM or HETATM
Defaults to 'ATOM'
Expand All @@ -1066,7 +1064,7 @@ def _gen_initial_values(na, nr, ns):
return np.array(['ATOM'] * na, dtype=object)


class ChainIDs(AtomAttr):
class ChainIDs(_AtomStringAttr):
"""ChainID per atom
Note
Expand All @@ -1078,10 +1076,6 @@ class ChainIDs(AtomAttr):
per_object = 'atom'
dtype = object

@staticmethod
def _gen_initial_values(na, nr, ns):
return np.array(['' for _ in range(na)], dtype=object)


class Tempfactors(AtomAttr):
"""Tempfactor for atoms"""
Expand Down Expand Up @@ -1627,7 +1621,7 @@ def _gen_initial_values(na, nr, ns):


# TODO: update docs to property doc
class AltLocs(AtomAttr):
class AltLocs(_AtomStringAttr):
"""AltLocs for each atom"""
attrname = 'altLocs'
singular = 'altLoc'
Expand Down Expand Up @@ -1781,8 +1775,65 @@ def _gen_initial_values(na, nr, ns):
return np.arange(1, nr + 1)


class _ResidueStringAttr(ResidueAttr):
def __init__(self, vals, guessed=False):
self._guessed = guessed

self.namedict = dict() # maps str to nmidx
name_lookup = [] # maps idx to str
# eg namedict['O'] = 5 & name_lookup[5] = 'O'

self.nmidx = np.zeros_like(vals, dtype=int) # the lookup for each atom
# eg Atom 5 is 'C', so nmidx[5] = 7, where name_lookup[7] = 'C'

for i, val in enumerate(vals):
try:
self.nmidx[i] = self.namedict[val]
except KeyError:
nextidx = len(self.namedict)
self.namedict[val] = nextidx
name_lookup.append(val)

self.nmidx[i] = nextidx

self.name_lookup = np.array(name_lookup, dtype=object)
self.values = self.name_lookup[self.nmidx]

@staticmethod
def _gen_initial_values(na, nr, ns):
return np.array(['' for _ in range(nr)], dtype=object)

@_check_length
def set_residues(self, rg, values):
newnames = []

# two possibilities, either single value given, or one per Atom
if isinstance(values, str):
try:
newidx = self.namedict[values]
except KeyError:
newidx = len(self.namedict)
self.namedict[values] = newidx
newnames.append(values)
else:
newidx = np.zeros_like(values, dtype=int)
for i, val in enumerate(values):
try:
newidx[i] = self.namedict[val]
except KeyError:
nextidx = len(self.namedict)
self.namedict[val] = nextidx
newnames.append(val)
newidx[i] = nextidx

self.nmidx[rg.ix] = newidx # newidx either single value or same size array
if newnames:
self.name_lookup = np.concatenate([self.name_lookup, newnames])
self.values = self.name_lookup[self.nmidx]


# TODO: update docs to property doc
class Resnames(ResidueAttr):
class Resnames(_ResidueStringAttr):
attrname = 'resnames'
singular = 'resname'
target_classes = [AtomGroup, ResidueGroup, SegmentGroup, Atom, Residue]
Expand Down Expand Up @@ -1903,18 +1954,14 @@ def _gen_initial_values(na, nr, ns):
return np.arange(1, nr + 1)


class ICodes(ResidueAttr):
class ICodes(_ResidueStringAttr):
"""Insertion code for Atoms"""
attrname = 'icodes'
singular = 'icode'
dtype = object

@staticmethod
def _gen_initial_values(na, nr, ns):
return np.array(['' for _ in range(nr)], dtype=object)


class Moltypes(ResidueAttr):
class Moltypes(_ResidueStringAttr):
"""Name of the molecule type
Two molecules that share a molecule type share a common template topology.
Expand Down Expand Up @@ -1969,8 +2016,65 @@ def set_segments(self, sg, values):
self.values[sg.ix] = values


class _SegmentStringAttr(SegmentAttr):
def __init__(self, vals, guessed=False):
self._guessed = guessed

self.namedict = dict() # maps str to nmidx
name_lookup = [] # maps idx to str
# eg namedict['O'] = 5 & name_lookup[5] = 'O'

self.nmidx = np.zeros_like(vals, dtype=int) # the lookup for each atom
# eg Atom 5 is 'C', so nmidx[5] = 7, where name_lookup[7] = 'C'

for i, val in enumerate(vals):
try:
self.nmidx[i] = self.namedict[val]
except KeyError:
nextidx = len(self.namedict)
self.namedict[val] = nextidx
name_lookup.append(val)

self.nmidx[i] = nextidx

self.name_lookup = np.array(name_lookup, dtype=object)
self.values = self.name_lookup[self.nmidx]

@staticmethod
def _gen_initial_values(na, nr, ns):
return np.array(['' for _ in range(nr)], dtype=object)

@_check_length
def set_segments(self, sg, values):
newnames = []

# two possibilities, either single value given, or one per Atom
if isinstance(values, str):
try:
newidx = self.namedict[values]
except KeyError:
newidx = len(self.namedict)
self.namedict[values] = newidx
newnames.append(values)
else:
newidx = np.zeros_like(values, dtype=int)
for i, val in enumerate(values):
try:
newidx[i] = self.namedict[val]
except KeyError:
nextidx = len(self.namedict)
self.namedict[val] = nextidx
newnames.append(val)
newidx[i] = nextidx

self.nmidx[sg.ix] = newidx # newidx either single value or same size array
if newnames:
self.name_lookup = np.concatenate([self.name_lookup, newnames])
self.values = self.name_lookup[self.nmidx]


# TODO: update docs to property doc
class Segids(SegmentAttr):
class Segids(_SegmentStringAttr):
attrname = 'segids'
singular = 'segid'
target_classes = [AtomGroup, ResidueGroup, SegmentGroup,
Expand Down

0 comments on commit f48d4b4

Please sign in to comment.