From f48d4b44184001e87fddde9f6ca3ed5a14b8b520 Mon Sep 17 00:00:00 2001 From: richard Date: Sun, 21 Jun 2020 20:59:06 +0100 Subject: [PATCH] use dict-lookup string attrs EVERYWHERERE --- package/MDAnalysis/core/selection.py | 57 +++++--- package/MDAnalysis/core/topologyattrs.py | 166 ++++++++++++++++++----- 2 files changed, 176 insertions(+), 47 deletions(-) diff --git a/package/MDAnalysis/core/selection.py b/package/MDAnalysis/core/selection.py index 40b3e5a8a24..c6b2e779816 100644 --- a/package/MDAnalysis/core/selection.py +++ b/package/MDAnalysis/core/selection.py @@ -529,22 +529,9 @@ def __init__(self, parser, tokens): self.values = vals @return_empty_on_apply - def apply(self, group): - mask = np.zeros(len(group), dtype=np.bool) - values = getattr(group, self.field) - for val in self.values: - mask |= [fnmatch.fnmatch(x, val) for x in values] - return group[mask].unique - - -class AtomNameSelection(StringSelection): - """Select atoms based on 'names' attribute""" - token = 'name' - field = 'names' - def apply(self, group): # rather than work on group.names, cheat and look at the lookup table - nmattr = group.universe._topology.names + nmattr = getattr(group.universe._topology, self.field) matches = [] # list of passing indices # iterate through set of known atom names, check which pass @@ -558,6 +545,12 @@ def apply(self, group): return group[np.in1d(nmidx, matches)].unique +class AtomNameSelection(StringSelection): + """Select atoms based on 'names' attribute""" + token = 'name' + field = 'names' + + class AtomTypeSelection(StringSelection): """Select atoms based on 'types' attribute""" token = 'type' @@ -576,13 +569,30 @@ class AtomICodeSelection(StringSelection): field = 'icodes' -class ResidueNameSelection(StringSelection): +class _ResidueStringSelection(StringSelection): + def apply(self, group): + # rather than work on group.names, cheat and look at the lookup table + nmattr = getattr(group.universe._topology, self.field) + + matches = [] # list of passing indices + # iterate through set of known atom names, check which pass + for nm, ix in nmattr.namedict.items(): + if any(fnmatch.fnmatch(nm, val) for val in self.values): + matches.append(ix) + + # atomname indices for members of this group + nmidx = nmattr.nmidx[group.resindices] + + return group[np.in1d(nmidx, matches)].unique + + +class ResidueNameSelection(_ResidueStringSelection): """Select atoms based on 'resnames' attribute""" token = 'resname' field = 'resnames' -class MoleculeTypeSelection(StringSelection): +class MoleculeTypeSelection(_ResidueStringSelection): """Select atoms based on 'moltypes' attribute""" token = 'moltype' field = 'moltypes' @@ -593,6 +603,21 @@ class SegmentNameSelection(StringSelection): token = 'segid' field = 'segids' + def apply(self, group): + # rather than work on group.names, cheat and look at the lookup table + nmattr = group.universe._topology.segids + + matches = [] # list of passing indices + # iterate through set of known atom names, check which pass + for nm, ix in nmattr.namedict.items(): + if any(fnmatch.fnmatch(nm, val) for val in self.values): + matches.append(ix) + + # atomname indices for members of this group + nmidx = nmattr.nmidx[group.segindices] + + return group[np.in1d(nmidx, matches)].unique + class AltlocSelection(StringSelection): """Select atoms based on 'altLoc' attribute""" diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index f36b58fda5f..d6328780765 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -473,16 +473,7 @@ def _gen_initial_values(na, nr, ns): return np.arange(1, na + 1) -# TODO: update docs to property doc -class Atomnames(AtomAttr): - """Name for each atom. - """ - attrname = 'names' - singular = 'name' - per_object = 'atom' - dtype = object - transplants = defaultdict(list) - +class _AtomStringAttr(AtomAttr): def __init__(self, vals, guessed=False): self._guessed = guessed @@ -538,6 +529,17 @@ def set_atoms(self, ag, values): self.name_lookup = np.concatenate([self.name_lookup, newnames]) self.values = self.name_lookup[self.nmidx] + +# TODO: update docs to property doc +class Atomnames(_AtomStringAttr): + """Name for each atom. + """ + attrname = 'names' + singular = 'name' + per_object = 'atom' + dtype = object + transplants = defaultdict(list) + def phi_selection(residue, c_name='C', n_name='N', ca_name='CA'): """Select AtomGroup corresponding to the phi protein backbone dihedral C'-N-CA-C. @@ -1011,20 +1013,16 @@ def chi1_selections(residues, n_name='N', ca_name='CA', cb_name='CB', # TODO: update docs to property doc -class Atomtypes(AtomAttr): +class Atomtypes(_AtomStringAttr): """Type for each atom""" attrname = 'types' singular = 'type' per_object = 'atom' dtype = object - @staticmethod - def _gen_initial_values(na, nr, ns): - return np.array(['' for _ in range(na)], dtype=object) - # TODO: update docs to property doc -class Elements(AtomAttr): +class Elements(_AtomStringAttr): """Element for each atom""" attrname = 'elements' singular = 'element' @@ -1048,7 +1046,7 @@ def _gen_initial_values(na, nr, ns): return np.zeros(na) -class RecordTypes(AtomAttr): +class RecordTypes(_AtomStringAttr): """For PDB-like formats, indicates if ATOM or HETATM Defaults to 'ATOM' @@ -1066,7 +1064,7 @@ def _gen_initial_values(na, nr, ns): return np.array(['ATOM'] * na, dtype=object) -class ChainIDs(AtomAttr): +class ChainIDs(_AtomStringAttr): """ChainID per atom Note @@ -1078,10 +1076,6 @@ class ChainIDs(AtomAttr): per_object = 'atom' dtype = object - @staticmethod - def _gen_initial_values(na, nr, ns): - return np.array(['' for _ in range(na)], dtype=object) - class Tempfactors(AtomAttr): """Tempfactor for atoms""" @@ -1627,7 +1621,7 @@ def _gen_initial_values(na, nr, ns): # TODO: update docs to property doc -class AltLocs(AtomAttr): +class AltLocs(_AtomStringAttr): """AltLocs for each atom""" attrname = 'altLocs' singular = 'altLoc' @@ -1781,8 +1775,65 @@ def _gen_initial_values(na, nr, ns): return np.arange(1, nr + 1) +class _ResidueStringAttr(ResidueAttr): + def __init__(self, vals, guessed=False): + self._guessed = guessed + + self.namedict = dict() # maps str to nmidx + name_lookup = [] # maps idx to str + # eg namedict['O'] = 5 & name_lookup[5] = 'O' + + self.nmidx = np.zeros_like(vals, dtype=int) # the lookup for each atom + # eg Atom 5 is 'C', so nmidx[5] = 7, where name_lookup[7] = 'C' + + for i, val in enumerate(vals): + try: + self.nmidx[i] = self.namedict[val] + except KeyError: + nextidx = len(self.namedict) + self.namedict[val] = nextidx + name_lookup.append(val) + + self.nmidx[i] = nextidx + + self.name_lookup = np.array(name_lookup, dtype=object) + self.values = self.name_lookup[self.nmidx] + + @staticmethod + def _gen_initial_values(na, nr, ns): + return np.array(['' for _ in range(nr)], dtype=object) + + @_check_length + def set_residues(self, rg, values): + newnames = [] + + # two possibilities, either single value given, or one per Atom + if isinstance(values, str): + try: + newidx = self.namedict[values] + except KeyError: + newidx = len(self.namedict) + self.namedict[values] = newidx + newnames.append(values) + else: + newidx = np.zeros_like(values, dtype=int) + for i, val in enumerate(values): + try: + newidx[i] = self.namedict[val] + except KeyError: + nextidx = len(self.namedict) + self.namedict[val] = nextidx + newnames.append(val) + newidx[i] = nextidx + + self.nmidx[rg.ix] = newidx # newidx either single value or same size array + if newnames: + self.name_lookup = np.concatenate([self.name_lookup, newnames]) + self.values = self.name_lookup[self.nmidx] + + # TODO: update docs to property doc -class Resnames(ResidueAttr): +class Resnames(_ResidueStringAttr): attrname = 'resnames' singular = 'resname' target_classes = [AtomGroup, ResidueGroup, SegmentGroup, Atom, Residue] @@ -1903,18 +1954,14 @@ def _gen_initial_values(na, nr, ns): return np.arange(1, nr + 1) -class ICodes(ResidueAttr): +class ICodes(_ResidueStringAttr): """Insertion code for Atoms""" attrname = 'icodes' singular = 'icode' dtype = object - @staticmethod - def _gen_initial_values(na, nr, ns): - return np.array(['' for _ in range(nr)], dtype=object) - -class Moltypes(ResidueAttr): +class Moltypes(_ResidueStringAttr): """Name of the molecule type Two molecules that share a molecule type share a common template topology. @@ -1969,8 +2016,65 @@ def set_segments(self, sg, values): self.values[sg.ix] = values +class _SegmentStringAttr(SegmentAttr): + def __init__(self, vals, guessed=False): + self._guessed = guessed + + self.namedict = dict() # maps str to nmidx + name_lookup = [] # maps idx to str + # eg namedict['O'] = 5 & name_lookup[5] = 'O' + + self.nmidx = np.zeros_like(vals, dtype=int) # the lookup for each atom + # eg Atom 5 is 'C', so nmidx[5] = 7, where name_lookup[7] = 'C' + + for i, val in enumerate(vals): + try: + self.nmidx[i] = self.namedict[val] + except KeyError: + nextidx = len(self.namedict) + self.namedict[val] = nextidx + name_lookup.append(val) + + self.nmidx[i] = nextidx + + self.name_lookup = np.array(name_lookup, dtype=object) + self.values = self.name_lookup[self.nmidx] + + @staticmethod + def _gen_initial_values(na, nr, ns): + return np.array(['' for _ in range(nr)], dtype=object) + + @_check_length + def set_segments(self, sg, values): + newnames = [] + + # two possibilities, either single value given, or one per Atom + if isinstance(values, str): + try: + newidx = self.namedict[values] + except KeyError: + newidx = len(self.namedict) + self.namedict[values] = newidx + newnames.append(values) + else: + newidx = np.zeros_like(values, dtype=int) + for i, val in enumerate(values): + try: + newidx[i] = self.namedict[val] + except KeyError: + nextidx = len(self.namedict) + self.namedict[val] = nextidx + newnames.append(val) + newidx[i] = nextidx + + self.nmidx[sg.ix] = newidx # newidx either single value or same size array + if newnames: + self.name_lookup = np.concatenate([self.name_lookup, newnames]) + self.values = self.name_lookup[self.nmidx] + + # TODO: update docs to property doc -class Segids(SegmentAttr): +class Segids(_SegmentStringAttr): attrname = 'segids' singular = 'segid' target_classes = [AtomGroup, ResidueGroup, SegmentGroup,