From 2a1a092cb6af987b438211155313b940a100373c Mon Sep 17 00:00:00 2001 From: zhiyiwu Date: Tue, 18 Aug 2020 11:51:44 +0100 Subject: [PATCH 01/26] depricate hydrogen bond analysis in water bridge --- .../analysis/hbonds/wbridge_analysis.py | 1525 +--------------- .../hydrogenbonds/wbridge_analysis.py | 1620 +++++++++++++++++ .../MDAnalysisTests/analysis/test_wbridge.py | 2 +- 3 files changed, 1623 insertions(+), 1524 deletions(-) create mode 100644 package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py diff --git a/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py index c86c4f96572..5f709cb02b4 100644 --- a/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py @@ -33,1527 +33,6 @@ .. _`@xiki-tempula`: https://github.com/xiki-tempula +This module is moved to :class:`MDAnalysis.analysis.hydrogenbonds.WaterBridgeAnalysis`.""" -Given a :class:`~MDAnalysis.core.universe.Universe` (simulation -trajectory with 1 or more frames) measure all water bridges for each -frame between selections 1 and 2. -Water bridge is defined as a bridging water which simultaneously forms -two hydrogen bonds with atoms from both selection 1 and selection 2. - -A water bridge can form between two hydrogen bond acceptors. - -e.g. -CO\ :sub:`2`\ :sup:`-`:···H−O−H···:\ :sup:`-`\ O\ :sub:`2`\ C- - -A water bridge can also form between two hydrogen bond donors. - -e.g. -NH···:O:···HN- (where O is the oxygen of a bridging water) - -A hydrogen bond acceptor and another hydrogen bond donor can be bridged by a -water. - -e.g. -CO\ :sub:`2`\ :sup:`-`:···H−O:···HN- (where H−O is part of **H−O**\ −H) - -A higher order water bridge is defined as more than one water bridging -hydrogen bond acceptor and donor. An example of a second order water bridge: - -e.g. -CO\ :sub:`2`\ :sup:`-`:···H−O:···H−O:···HN- (where H−O is part of **H−O**\ −H) - -The following keyword arguments are important to control the behaviour of the -water bridge analysis: - - - *water_selection* (``resname SOL``): the selection string for the bridging - water - - *order* the maximum number of water bridging both ends - - donor-acceptor *distance* (Å): 3.0 - - Angle *cutoff* (degrees): 120.0 - - *forcefield* to switch between default values for different force fields - - *donors* and *acceptors* atom types (to add additional atom names) - -Theory ------- - -This module attempts to find multi-order water bridge by an approach similar -to breadth-first search, where the first solvation shell of selection 1 is -selected, followed by the selection of the second solvation shell as well as -any hydrogen bonding partner from selection 1. After that, the third solvation -shell, as well as any binding partners from selection 2, are detected. This -process is repeated until the maximum order of water bridges is reached. - -.. _wb_Analysis_Network: - -Output as Network ------------------ - -Since the waters connecting the two ends of the selections are by nature a network. -We provide a network representation of the water network. Water bridge data are -returned per frame, which is stored in :attr:`WaterBridgeAnalysis.network`. Each -frame is represented as a dictionary, where the keys are the hydrogen bonds -originating from selection 1 and the values are new dictionaries representing -the hydrogen bonds coming out of the corresponding molecules making hydrogen bonds -with selection 1. - -As for the hydrogen bonds which reach the selection 2, the values of the -corresponding keys are None. One example where selection 1 and selection 2 are -joined by one water molecule (A) which also hydrogen bond to another water (B) -which also hydrogen bond to selection 2 would be represented as :: - - # (selection 1)-O:···H-O(water 1):···H-(selection 2) - # | : - # H·············O-H(water2) - # H - {(sele1_acceptor, None, water1_donor, water1_donor_heavy, distance, angle): - {(water1_acceptor, None, sele2_donor, sele2_donor_heavy, distance, angle): None}, - {(water1_donor, water1_donor_heavy, water2_acceptor, None, distance, angle): - {(water2_acceptor, None, sele2_donor, sele2_donor_heavy, distance, angle): None} - }, - } - -The atoms are represented by atom index and if the atom is hydrogen bond donor, -it is followed by the index of the corresponding heavy atom -``(donor_proton, donor_heavy_atom)``. -If the atom is a hydrogen bond acceptor, it is followed by none. - -.. _wb_Analysis_Timeseries: - -Output as Timeseries --------------------- - -For lower order water bridges, it might be desirable to represent the connections as -:attr:`WaterBridgeAnalysis.timeseries`. The results are returned per frame and -are a list of hydrogen bonds between the selection 1 or selection 2 and the -bridging waters. Due to the complexity of the higher order water bridge and the -fact that one hydrogen bond between two waters can appear in both third and -fourth order water bridge, the hydrogen bonds in the -:attr:`WaterBridgeAnalysis.timeseries` attribute are generated in a depth-first -search manner to avoid duplication. Example code of how -:attr:`WaterBridgeAnalysis.timeseries` is generated:: - - def network2timeseries(network, timeseries): - '''Traverse the network in a depth-first fashion. - expand_timeseries will expand the compact representation to the familiar - timeseries representation.''' - - if network is None: - return - else: - for node in network: - timeseries.append(expand_timeseries(node)) - network2timeseries(network[node], timeseries) - - timeseries = [] - network2timeseries(network, timeseries) - -The list is formatted similar to the \ :attr:`HydrogenBondAnalysis.timeseries -` -except that the atom identifier is expressed as (residue name, residue number, -atom name). An example would be. :: - - results = [ - [ # frame 1 - [ , , - (, , ), - (, , ), - , ], - .... - ], - [ # frame 2 - [ ... ], [ ... ], ... - ], - ... - ] - -Using the :meth:`WaterBridgeAnalysis.generate_table` method one can reformat -the results as a flat "normalised" table that is easier to import into a -database or dataframe for further processing. - -Detection of water bridges --------------------------- -Water bridges are recorded if a bridging water simultaneously forms -hydrogen bonds with selection 1 and selection 2. - -Hydrogen bonds are detected as is described in \ -:class:`~MDAnalysis.analysis.hbonds.hbond_analysis.HydrogenBondAnalysis`, see \ -:ref:`Detection-of-hydrogen-bonds`. - -The lists of donor and acceptor names can be extended by providing lists of -atom names in the `donors` and `acceptors` keywords to -:class:`WaterBridgeAnalysis`. If the lists are entirely inappropriate -(e.g. when analysing simulations done with a force field that uses very -different atom names) then one should either use the value "other" for -`forcefield` to set no default values or derive a new class and set the -default list oneself:: - - class WaterBridgeAnalysis_OtherFF(WaterBridgeAnalysis): - DEFAULT_DONORS = {"OtherFF": tuple(set([...]))} - DEFAULT_ACCEPTORS = {"OtherFF": tuple(set([...]))} - -Then simply use the new class instead of the parent class and call it with -```forcefield` = "OtherFF"``. Please also consider contributing the list of heavy -atom names to MDAnalysis. - -How to perform WaterBridgeAnalysis ----------------------------------- - -All water bridges between arginine and aspartic acid can be analysed with :: - - import MDAnalysis - import MDAnalysis.analysis.hbonds - - u = MDAnalysis.Universe('topology', 'trajectory') - w = MDAnalysis.analysis.hbonds.WaterBridgeAnalysis(u, 'resname ARG', 'resname ASP') - w.run() - -The maximum number of bridging waters detected can be changed using the order keyword. :: - - w = MDAnalysis.analysis.hbonds.WaterBridgeAnalysis(u, 'resname ARG', 'resname ASP', - order=3) - -Thus, a maximum of three bridging waters will be detected. - -An example of using the :attr:`~WaterBridgeAnalysis` would be -detecting the percentage of time a certain water bridge exits. - -Trajectory :code:`u` has two frames, where the first frame contains a water -bridge from the oxygen of the first arginine to one of the oxygens in the carboxylic -group of aspartate (ASP3:OD1). In the second frame, the same water bridge forms but -is between the oxygen of the arginine and the other oxygen in the carboxylic -group (ASP3:OD2). :: - - # index residue id residue name atom name - # 0 1 ARG O - # 1 2 SOL OW - # 2 2 SOL HW1 - # 3 2 SOL HW2 - # 4 3 ASP OD1 - # 5 3 ASP OD2 - print(w.timeseries) - -prints out. :: - - [ # frame 1 - # A water bridge SOL2 links O from ARG1 to the carboxylic group OD1 of ASP3 - [[0,2,('ARG',1, 'O'),('SOL',2,'HW1'), 3.0,180], - [3,4,('SOL',2,'HW2'),('ASP',3,'OD1'), 3.0,180], - ], - # frame 2 - # Another water bridge SOL2 links O from ARG1 to the other oxygen of the - # carboxylic group OD2 of ASP3 - [[0,2,('ARG',1, 'O'),('SOL',2,'HW1'), 3.0,180], - [3,5,('SOL',2,'HW2'),('ASP',3,'OD2'), 3.0,180], - ], - ] - - -.. _wb_count_by_type: - -Use count_by_type ------------------ - -We can use the :meth:`~WaterBridgeAnalysis.count_by_type` to -generate the frequence of all water bridges in the simulation. :: - - w.count_by_type() - -Returns :: - - [(0, 3, 'ARG', 1, 'O', 'ASP', 3, 'OD1', 0.5), - (0, 4, 'ARG', 1, 'O', 'ASP', 3, 'OD2', 0.5),] - -You might think that the OD1 and OD2 are the same oxygen and the aspartate has just flipped -and thus, they should be counted as the same type of water bridge. The type of the water -bridge can be customised by supplying an analysis function to -:meth:`~WaterBridgeAnalysis.count_by_type`. - -The analysis function has two parameters. The current and the output. The current is a list -of hydrogen bonds from selection 1 to selection 2, formatted in the same fashion as -:attr:`WaterBridgeAnalysis.network`, and an example will be :: - - [ # sele1 acceptor idx, , water donor index, donor heavy atom idx, dist, ang. - [ 0, None, 2, 1, 3.0,180], - # water donor idx, donor heavy atom idx, sele2 acceptor idx, distance, angle. - [ 3, 1, 4, None, 3.0,180],] - -Where ``current[0]`` is the first hydrogen bond originating from selection 1 and ``current[-1]`` is -the final hydrogen bond ending in selection 2. The output sums up all the information in the -current frame and is a dictionary with a user-defined key and the value is the weight of the -corresponding key. During the analysis phase, the function analysis iterates through all the -water bridges and modify the output in-place. At the end of the analysis, the keys from -all the frames are collected and the corresponding values will be summed up and returned. :: - - def analysis(current, output, u): - r'''This function defines how the type of water bridge should be specified. - - Parameters - ---------- - current : list - The current water bridge being analysed is a list of hydrogen bonds from - selection 1 to selection 2. - output : dict - A dictionary which is modified in-place where the key is the type of - the water bridge and the value is the weight of this type of water bridge. - u : MDAnalysis.universe - The current Universe for looking up atoms.''' - - # decompose the first hydrogen bond. - sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] - # decompose the last hydrogen bond. - atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] - # expand the atom index to the resname, resid, atom names - sele1 = u.atoms[sele1_index] - sele2 = u.atoms[sele2_index] - (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) - (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) - # if the residue name is ASP and the atom name is OD2 or OD1, - # the atom name is changed to OD - if s2_resname == 'ASP' and (s2_name == 'OD1' or s2_name == 'OD2'): - s2_name = 'OD' - # setting up the key which defines this type of water bridge. - key = (s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) - # The number of this type of water bridge is incremented by 1. - output[key] += 1 - - w.count_by_type(analysis_func=analysis) - -Returns :: - - [(('ARG', 1, 'O', 'ASP', 3, 'OD'), 1.0),] - -Note that the result is arranged in the format of ``(key, the proportion of time)``. When no -custom analysis function is supplied, the key is expanded for backward compatibility. So -that when the same code is executed, the result returned will be the same as the result -given since version 0.17.0 and the same as the -:meth:`HydrogenBondAnalysis.count_by_type`. - -Some people might only interested in contacts between residues and pay no attention -to the details regarding the atom name. However, since multiple water bridges can -exist between two residues, which sometimes can give a result such that the water -bridge between two residues exists 300% of the time. Though this might be a desirable -result for some people, others might want the water bridge between two residues to be -only counted once per frame. This can also be achieved by supplying an analysis function -to :meth:`~WaterBridgeAnalysis.count_by_type`. :: - - def analysis(current, output, u): - '''This function defines how the type of water bridge should be specified. - - Parameters - ---------- - current : list - The current water bridge being analysed is a list of hydrogen bonds from - selection 1 to selection 2. - output : dict - A dictionary which is modified in-place where the key is the type of - the water bridge and the value is the weight of this type of water bridge. - u : MDAnalysis.universe - The current Universe for looking up atoms. - ''' - - # decompose the first hydrogen bond. - sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] - # decompose the last hydrogen bond. - atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] - # expand the atom index to the resname, resid, atom names - sele1 = u.atoms[sele1_index] - sele2 = u.atoms[sele2_index] - (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) - (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) - # s1_name and s2_name are not included in the key - key = (s1_resname, s1_resid, s2_resname, s2_resid) - - # Each residue is only counted once per frame - output[key] = 1 - - w.count_by_type(analysis_func=analysis) - -Returns :: - - [(('ARG', 1, 'ASP', 3), 1.0),] - -On the other hand, other people may insist that the first order and second-order water -bridges shouldn't be mixed together, which can also be achieved by supplying an analysis -function to :meth:`~WaterBridgeAnalysis.count_by_type`. :: - - def analysis(current, output, u): - '''This function defines how the type of water bridge should be specified. - - Parameters - ---------- - current : list - The current water bridge being analysed is a list of hydrogen bonds from - selection 1 to selection 2. - output : dict - A dictionary which is modified in-place where the key is the type of - the water bridge and the value is the weight of this type of water bridge. - u : MDAnalysis.universe - The current Universe for looking up atoms. - ''' - - # decompose the first hydrogen bond. - sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] - # decompose the last hydrogen bond. - atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] - # expand the atom index to the resname, resid, atom names - sele1 = u.atoms[sele1_index] - sele2 = u.atoms[sele2_index] - (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) - (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) - # order of the current water bridge is computed - order_of_water_bridge = len(current) - 1 - # and is included in the key - key = (s1_resname, s1_resid, s2_resname, s2_resid, order_of_water_bridge) - # The number of this type of water bridge is incremented by 1. - output[key] += 1 - - w.count_by_type(analysis_func=analysis) - -The extra number 1 precede the 1.0 indicate that this is a first order water bridge :: - - [(('ARG', 1, 'ASP', 3, 1), 1.0),] - -Some people might not be interested in the interactions related to arginine. The undesirable -interactions can be discarded by supplying an analysis function to -:meth:`~WaterBridgeAnalysis.count_by_type`. :: - - def analysis(current, output, u): - '''This function defines how the type of water bridge should be specified. - - Parameters - ---------- - current : list - The current water bridge being analysed is a list of hydrogen bonds from - selection 1 to selection 2. - output : dict - A dictionary which is modified in-place where the key is the type of - the water bridge and the value is the number of this type of water bridge. - u : MDAnalysis.universe - The current Universe for looking up atoms. - ''' - - # decompose the first hydrogen bond. - sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] - # decompose the last hydrogen bond. - atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] - # expand the atom index to the resname, resid, atom names - sele1 = u.atoms[sele1_index] - sele2 = u.atoms[sele2_index] - (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) - (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) - if not s1_resname == 'ARG': - key = (s1_resname, s1_resid, s2_resname, s2_resid) - output[key] += 1 - - w.count_by_type(analysis_func=analysis) - -Returns nothing in this case :: - - [,] - -Additional keywords can be supplied to the analysis function by passing through -:meth:`~WaterBridgeAnalysis.count_by_type`. :: - - def analysis(current, output, **kwargs): - ... - w.count_by_type(analysis_func=analysis, **kwargs) - - -.. _wb_count_by_time: - -Use count_by_time ------------------ - -:meth:`~WaterBridgeAnalysis.count_by_type` aggregates data across frames, which -might be desirable in some cases but not the others. :meth:`~WaterBridgeAnalysis.count_by_time` -provides additional functionality for aggregating results for each frame. - -The default behaviour of :meth:`~WaterBridgeAnalysis.count_by_time` is counting the number of -water bridges from selection 1 to selection 2 for each frame. Take the previous ASP, ARG salt -bridge for example: :: - - w.count_by_time() - -As one water bridge is found in both frames, the method returns :: - - [(1.0, 1), (2.0, 1), ] - -Similar to :meth:`~WaterBridgeAnalysis.count_by_type` -The behaviour of :meth:`~WaterBridgeAnalysis.count_by_time` can also be modified by supplying -an analysis function. - -Suppose we want to count - - - the **number** of water molecules involved in bridging selection 1 to selection 2. - - only if water bridge terminates in atom name **OD1 of ASP**. - - only when water bridge is joined by less than **two** water. - -The analysis function can be written as:: - - def analysis(current, output, u, **kwargs): - '''This function defines how the counting of water bridge should be specified. - - Parameters - ---------- - current : list - The current water bridge being analysed is a list of hydrogen bonds from - selection 1 to selection 2. - output : dict - A dictionary which is modified in-place where the key is the type of - the water bridge and the value is the number of this type of water bridge. - u : MDAnalysis.universe - The current Universe for looking up atoms. - ''' - - # decompose the first hydrogen bond. - sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] - # decompose the last hydrogen bond. - atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] - # expand the atom index to the resname, resid, atom names - sele1 = u.atoms[sele1_index] - sele2 = u.atoms[sele2_index] - (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) - (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) - - # only the residue name is ASP and the atom name is OD1, - if s2_resname == 'ASP' and s2_name == 'OD1': - # only if the order of water bridge is less than 2 - if len(current) -1 < 2: - # extract all water molecules involved in the water bridge - # extract the first water from selection 1 - s1_index, to_index, (s1_resname, s1_resid, s1_name), - (to_resname, to_resid, to_name), dist, angle = current[0] - key = (to_resname, to_resid) - output[key] = 1 - - # extract all the waters between selection 1 and selection 2 - for hbond in current[1:-1]: - # decompose the hydrogen bond. - from_index, to_index, (from_resname, from_resid, from_name), - (to_resname, to_resid, to_name), dist, angle = hbond - # add first water - key1 = (from_resname, from_resid) - output[key1] = 1 - # add second water - key2 = (to_resname, to_resid) - output[key2] = 1 - - # extract the last water to selection 2 - from_index, s2_index, (from_resname, from_resid, from_name), - (s2_resname, s2_resid, s2_name), dist, angle = current[-1] - key = (from_resname, from_resid) - output[key] = 1 - - w.count_by_time(analysis_func=analysis) - -Returns :: - - [(1.0, 1), (2.0, 0),] - -Classes -------- - -.. autoclass:: WaterBridgeAnalysis - :members: - - .. attribute:: timesteps - - List of the times of each timestep. This can be used together with - :attr:`~WaterBridgeAnalysis.timeseries` to find the specific time point - of a water bridge existence. - -""" -from collections import defaultdict -import logging -import warnings -import numpy as np - -from ..base import AnalysisBase -from MDAnalysis.lib.NeighborSearch import AtomNeighborSearch -from MDAnalysis.lib.distances import capped_distance, calc_angles -from MDAnalysis import NoDataError, MissingDataWarning, SelectionError -from MDAnalysis.lib import distances - -logger = logging.getLogger('MDAnalysis.analysis.WaterBridgeAnalysis') - -class WaterBridgeAnalysis(AnalysisBase): - """Perform a water bridge analysis - - The analysis of the trajectory is performed with the - :meth:`WaterBridgeAnalysis.run` method. The result is stored in - :attr:`WaterBridgeAnalysis.timeseries`. See - :meth:`~WaterBridgeAnalysis.run` for the format. - - :class:`WaterBridgeAnalysis` uses the same default atom names as - :class:`~MDAnalysis.analysis.hbonds.hbond_analysis.HydrogenBondAnalysis`, - see :ref:`Default atom names for hydrogen bonding analysis` - - - .. versionadded:: 0.17.0 - - - """ - # use tuple(set()) here so that one can just copy&paste names from the - # table; set() takes care for removing duplicates. At the end the - # DEFAULT_DONORS and DEFAULT_ACCEPTORS should simply be tuples. - - #: default heavy atom names whose hydrogens are treated as *donors* - #: (see :ref:`Default atom names for hydrogen bonding analysis`); - #: use the keyword `donors` to add a list of additional donor names. - DEFAULT_DONORS = { - 'CHARMM27': tuple(set([ - 'N', 'OH2', 'OW', 'NE', 'NH1', 'NH2', 'ND2', 'SG', 'NE2', 'ND1', - 'NZ', 'OG', 'OG1', 'NE1', 'OH'])), - 'GLYCAM06': tuple(set(['N', 'NT', 'N3', 'OH', 'OW'])), - 'other': tuple(set([]))} - - #: default atom names that are treated as hydrogen *acceptors* - #: (see :ref:`Default atom names for hydrogen bonding analysis`); - #: use the keyword `acceptors` to add a list of additional acceptor names. - DEFAULT_ACCEPTORS = { - 'CHARMM27': tuple(set([ - 'O', 'OC1', 'OC2', 'OH2', 'OW', 'OD1', 'OD2', 'SG', 'OE1', 'OE1', - 'OE2', 'ND1', 'NE2', 'SD', 'OG', 'OG1', 'OH'])), - 'GLYCAM06': tuple(set(['N', 'NT', 'O', 'O2', 'OH', 'OS', 'OW', 'OY', 'SM'])), - 'other': tuple(set([]))} - - #: A :class:`collections.defaultdict` of covalent radii of common donors - #: (used in :meth`_get_bonded_hydrogens_list` to check if a hydrogen is - #: sufficiently close to its donor heavy atom). Values are stored for - #: N, O, P, and S. Any other heavy atoms are assumed to have hydrogens - #: covalently bound at a maximum distance of 1.5 Å. - r_cov = defaultdict(lambda: 1.5, # default value - N=1.31, O=1.31, P=1.58, S=1.55) - - def __init__(self, universe, selection1='protein', - selection2='not resname SOL', water_selection='resname SOL', order=1, - selection1_type='both', update_selection=False, update_water_selection=True, - filter_first=True, distance_type='hydrogen', distance=3.0, - angle=120.0, forcefield='CHARMM27', donors=None, - acceptors=None, output_format="sele1_sele2", debug=None, verbose=False, - pbc=False, **kwargs): - """Set up the calculation of water bridges between two selections in a - universe. - - The timeseries is accessible as the attribute - :attr:`WaterBridgeAnalysis.timeseries`. - - If no hydrogen bonds are detected or if the initial check fails, look - at the log output (enable with :func:`MDAnalysis.start_logging` and set - `verbose` ``=True``). It is likely that the default names for donors - and acceptors are not suitable (especially for non-standard - ligands). In this case, either change the `forcefield` or use - customized `donors` and/or `acceptors`. - - Parameters - ---------- - universe : Universe - Universe object - selection1 : str (optional) - Selection string for first selection ['protein'] - selection2 : str (optional) - Selection string for second selection ['not resname SOL'] - This string selects everything except water where water is assumed - to have a residue name as SOL. - water_selection : str (optional) - Selection string for bridging water selection ['resname SOL'] - The default selection assumes that the water molecules have residue - name "SOL". Change it to the appropriate selection for your - specific force field. - - However, in theory, this selection can be anything which forms - a hydrogen bond with selection 1 and selection 2. - order : int (optional) - The maximum number of water bridges linking both selections. - if the order is set to 3, then all the residues linked with less than - three water molecules will be detected. [1] - - Computation of high order water bridges can be very time-consuming. - Think carefully before running the calculation, do you really want - to compute the 20th order water bridge between domain A and domain B - or you just want to know the third order water bridge between two residues. - selection1_type : {"donor", "acceptor", "both"} (optional) - Selection 1 can be 'donor', 'acceptor' or 'both'. Note that the - value for `selection1_type` automatically determines how - `selection2` handles donors and acceptors: If `selection1` contains - 'both' then `selection2` will also contain 'both'. If `selection1` - is set to 'donor' then `selection2` is 'acceptor' (and vice versa). - ['both']. - update_selection : bool (optional) - Update selection 1 and 2 at each frame. Setting to ``True`` if the - selection is not static. Selections are filtered first to speed up - performance. Thus, setting to ``True`` is recommended if contact - surface between selection 1 and selection 2 is constantly - changing. [``False``] - update_water_selection : bool (optional) - Update selection of water at each frame. Setting to ``False`` is - **only** recommended when the total amount of water molecules in the - simulation are small and when water molecules remain static across - the simulation. - - However, in normal simulations, only a tiny proportion of water is - engaged in the formation of water bridge. It is recommended to - update the water selection and set keyword `filter_first` to - ``True`` so as to filter out water not residing between the two - selections. [``True``] - filter_first : bool (optional) - Filter the water selection to only include water within 4 Å + `order` * - (2 Å + `distance`) away from `both` selection 1 and selection 2. - Selection 1 and selection 2 are both filtered to only include atoms - with the same distance away from the other selection. [``True``] - distance : float (optional) - Distance cutoff for hydrogen bonds; only interactions with a H-A - distance <= `distance` (and the appropriate D-H-A angle, see - `angle`) are recorded. (Note: `distance_type` can change this to - the D-A distance.) [3.0] - angle : float (optional) - Angle cutoff for hydrogen bonds; an ideal H-bond has an angle of - 180º. A hydrogen bond is only recorded if the D-H-A angle is - >= `angle`. The default of 120º also finds fairly non-specific - hydrogen interactions and possibly better value is 150º. [120.0] - forcefield : {"CHARMM27", "GLYCAM06", "other"} (optional) - Name of the forcefield used. Switches between different - :attr:`~HydrogenBondAnalysis.DEFAULT_DONORS` and - :attr:`~HydrogenBondAnalysis.DEFAULT_ACCEPTORS` values. - ["CHARMM27"] - donors : sequence (optional) - Extra H donor atom types (in addition to those in - :attr:`~HydrogenBondAnalysis.DEFAULT_DONORS`), must be a sequence. - acceptors : sequence (optional) - Extra H acceptor atom types (in addition to those in - :attr:`~HydrogenBondAnalysis.DEFAULT_ACCEPTORS`), must be a - sequence. - distance_type : {"hydrogen", "heavy"} (optional) - Measure hydrogen bond lengths between donor and acceptor heavy - atoms ("heavy") or between donor hydrogen and acceptor heavy - atom ("hydrogen"). If using "heavy" then one should set the - *distance* cutoff to a higher value such as 3.5 Å. ["hydrogen"] - output_format: {"sele1_sele2", "donor_acceptor"} (optional) - Setting the output format for timeseries and table. If set to - "sele1_sele2", for each hydrogen bond, the one close to selection 1 - will be placed before selection 2. If set to "donor_acceptor", the - donor will be placed before acceptor. "sele1_sele2"] - debug : bool (optional) - If set to ``True`` enables per-frame debug logging. This is disabled - by default because it generates a very large amount of output in - the log file. (Note that a logger must have been started to see - the output, e.g. using :func:`MDAnalysis.start_logging`.) - verbose : bool (optional) - Toggle progress output. (Can also be given as keyword argument to - :meth:`run`.) - - Notes - ----- - In order to speed up processing, atoms are filtered by a coarse - distance criterion before a detailed hydrogen bonding analysis is - performed (`filter_first` = ``True``). - - If selection 1 and selection 2 are very mobile during the simulation - and the contact surface is constantly changing (i.e. residues are - moving farther than 4 Å + `order` * (2 Å + `distance`)), you might - consider setting the `update_selection` keywords to ``True`` - to ensure correctness. - - .. versionchanged 0.20.0 - The :attr:`WaterBridgeAnalysis.timeseries` has been updated - see :attr:`WaterBridgeAnalysis.timeseries` for detail. - This class is now based on - :class:`~MDAnalysis.analysis.base.AnalysisBase`. - - - """ - super(WaterBridgeAnalysis, self).__init__(universe.trajectory, - **kwargs) - self.water_selection = water_selection - self.update_water_selection = update_water_selection - # per-frame debugging output? - self.debug = debug - - # set the output format - self.output_format = output_format - - self.u = universe - self.selection1 = selection1 - self.selection2 = selection2 - self.selection1_type = selection1_type - - # if the selection 1 and selection 2 are the same - if selection1 == selection2: - # eliminate the duplication - self.selection1_type = "donor" - self.update_selection = update_selection - self.filter_first = filter_first - self.distance = distance - self.distance_type = distance_type # note: everything except 'heavy' will give the default behavior - self.angle = angle - self.pbc = pbc and all(self.u.dimensions[:3]) - self.order = order - - # set up the donors/acceptors lists - if donors is None: - donors = () - if acceptors is None: - acceptors = () - self.forcefield = forcefield - self.donors = tuple(set(self.DEFAULT_DONORS[forcefield]).union(donors)) - self.acceptors = tuple(set(self.DEFAULT_ACCEPTORS[forcefield]).union(acceptors)) - - if self.selection1_type not in ('both', 'donor', 'acceptor'): - raise ValueError('HydrogenBondAnalysis: Invalid selection type {0!s}'.format( - self.selection1_type)) - - self._network = [] # final result accessed as self.network - self.timesteps = None # time for each frame - - self._log_parameters() - - def _log_parameters(self): - """Log important parameters to the logfile.""" - logger.info("WaterBridgeAnalysis: selection = %r (update: %r)", - self.selection2, self.update_selection) - logger.info("WaterBridgeAnalysis: water selection = %r (update: %r)", - self.water_selection, self.update_water_selection) - logger.info("WaterBridgeAnalysis: criterion: donor %s atom and acceptor \ - atom distance <= %.3f A", self.distance_type, - self.distance) - logger.info("WaterBridgeAnalysis: criterion: angle D-H-A >= %.3f degrees", - self.angle) - logger.info("WaterBridgeAnalysis: force field %s to guess donor and \ - acceptor names", self.forcefield) - - def _build_residue_dict(self, selection): - # Build the residue_dict where the key is the residue name - # The content is a dictionary where hydrogen bond donor heavy atom names is the key - # The content is the hydrogen bond donor hydrogen atom names - atom_group = self.u.select_atoms(selection) - for residue in atom_group.residues: - if not residue.resname in self._residue_dict: - self._residue_dict[residue.resname] = defaultdict(set) - for atom in residue.atoms: - if atom.name in self.donors: - self._residue_dict[residue.resname][atom.name].update(self._get_bonded_hydrogens(atom).names) - - def _update_donor_h(self, atom_ix, h_donors, donors_h): - atom = self.u.atoms[atom_ix] - residue = atom.residue - hydrogen_names = self._residue_dict[residue.resname][atom.name] - if hydrogen_names: - hydrogens = residue.atoms.select_atoms('name {0}'.format( - ' '.join(hydrogen_names))).ix - for atom in hydrogens: - h_donors[atom] = atom_ix - donors_h[atom_ix].append(atom) - - def _update_selection(self): - self._s1_donors = [] - self._s1_h_donors = {} - self._s1_donors_h = defaultdict(list) - self._s1_acceptors = [] - - self._s2_donors = [] - self._s2_h_donors = {} - self._s2_donors_h = defaultdict(list) - self._s2_acceptors = [] - - self._s1 = self.u.select_atoms(self.selection1).ix - self._s2 = self.u.select_atoms(self.selection2).ix - - if self.filter_first and len(self._s1): - self.logger_debug('Size of selection 1 before filtering:' - ' {} atoms'.format(len(self._s1))) - ns_selection_1 = AtomNeighborSearch(self.u.atoms[self._s1], box=self.box) - self._s1 = ns_selection_1.search(self.u.atoms[self._s2], self.selection_distance).ix - self.logger_debug("Size of selection 1: {0} atoms".format(len(self._s1))) - - if len(self._s1) == 0: - logger.warning('Selection 1 "{0}" did not select any atoms.' - .format(str(self.selection1)[:80])) - return - - if self.filter_first and len(self._s2): - self.logger_debug('Size of selection 2 before filtering:' - ' {} atoms'.format(len(self._s2))) - ns_selection_2 = AtomNeighborSearch(self.u.atoms[self._s2], box=self.box) - self._s2 = ns_selection_2.search(self.u.atoms[self._s1], self.selection_distance).ix - self.logger_debug('Size of selection 2: {0} atoms'.format(len(self._s2))) - - if len(self._s2) == 0: - logger.warning('Selection 2 "{0}" did not select any atoms.' - .format(str(self.selection2)[:80])) - return - - if self.selection1_type in ('donor', 'both'): - self._s1_donors = self.u.atoms[self._s1].select_atoms( - 'name {0}'.format(' '.join(self.donors))).ix - for atom_ix in self._s1_donors: - self._update_donor_h(atom_ix, self._s1_h_donors, self._s1_donors_h) - self.logger_debug("Selection 1 donors: {0}".format(len(self._s1_donors))) - self.logger_debug("Selection 1 donor hydrogens: {0}".format(len(self._s1_h_donors))) - if self.selection1_type in ('acceptor', 'both'): - self._s1_acceptors = self.u.atoms[self._s1].select_atoms( - 'name {0}'.format(' '.join(self.acceptors))).ix - self.logger_debug("Selection 1 acceptors: {0}".format(len(self._s1_acceptors))) - - if len(self._s2) == 0: - return None - if self.selection1_type in ('donor', 'both'): - self._s2_acceptors = self.u.atoms[self._s2].select_atoms( - 'name {0}'.format(' '.join(self.acceptors))).ix - self.logger_debug("Selection 2 acceptors: {0:d}".format(len(self._s2_acceptors))) - if self.selection1_type in ('acceptor', 'both'): - self._s2_donors = self.u.atoms[self._s2].select_atoms( - 'name {0}'.format(' '.join(self.donors))).ix - for atom_ix in self._s2_donors: - self._update_donor_h(atom_ix, self._s2_h_donors, self._s2_donors_h) - self.logger_debug("Selection 2 donors: {0:d}".format(len(self._s2_donors))) - self.logger_debug("Selection 2 donor hydrogens: {0:d}".format(len(self._s2_h_donors))) - - def _update_water_selection(self): - self._water_donors = [] - self._water_h_donors = {} - self._water_donors_h = defaultdict(list) - self._water_acceptors = [] - - self._water = self.u.select_atoms(self.water_selection).ix - self.logger_debug('Size of water selection before filtering:' - ' {} atoms'.format(len(self._water))) - if len(self._water) and self.filter_first: - filtered_s1 = AtomNeighborSearch(self.u.atoms[self._water], box=self.box).search(self.u.atoms[self._s1], - self.selection_distance) - if filtered_s1: - self._water = AtomNeighborSearch(filtered_s1, box=self.box).search(self.u.atoms[self._s2], - self.selection_distance).ix - - self.logger_debug("Size of water selection: {0} atoms".format(len(self._water))) - - if len(self._water) == 0: - logger.warning("Water selection '{0}' did not select any atoms." - .format(str(self.water_selection)[:80])) - else: - self._water_donors = self.u.atoms[self._water].select_atoms( - 'name {0}'.format(' '.join(self.donors))).ix - for atom_ix in self._water_donors: - self._update_donor_h(atom_ix, self._water_h_donors, self._water_donors_h) - self.logger_debug("Water donors: {0}".format(len(self._water_donors))) - self.logger_debug("Water donor hydrogens: {0}".format(len(self._water_h_donors))) - self._water_acceptors = self.u.atoms[self._water].select_atoms( - 'name {0}'.format(' '.join(self.acceptors))).ix - self.logger_debug("Water acceptors: {0}".format(len(self._water_acceptors))) - - def _get_bonded_hydrogens(self, atom): - """Find hydrogens bonded within cutoff to `atom`. - - Hydrogens are detected by either name ("H*", "[123]H*") or type ("H"); - this is not fool-proof as the atom type is not always a character but - the name pattern should catch most typical occurrences. - - The distance from `atom` is calculated for all hydrogens in the residue - and only those within a cutoff are kept. The cutoff depends on the - heavy atom (more precisely, on its element, which is taken as the first - letter of its name ``atom.name[0]``) and is parameterized in - :attr:`HydrogenBondAnalysis.r_cov`. If no match is found then the - default of 1.5 Å is used. - - - Parameters - ---------- - atom : groups.Atom - heavy atom - - Returns - ------- - hydrogen_atoms : AtomGroup or [] - list of hydrogens (can be a :class:`~MDAnalysis.core.groups.AtomGroup`) - or empty list ``[]`` if none were found. - """ - try: - return atom.residue.atoms.select_atoms( - "(name H* 1H* 2H* 3H* or type H) and around {0:f} name {1!s}" - "".format(self.r_cov[atom.name[0]], atom.name)) - except NoDataError: - return [] - - def logger_debug(self, *args): - if self.debug: - logger.debug(*args) - - - def _prepare(self): - # The distance for selection is defined as twice the maximum bond length of an O-H bond (2A) - # plus order of water bridge times the length of OH bond plus hydrogne bond distance - self.selection_distance = (2 * 2 + self.order * (2 + self.distance)) - - self.box = self.u.dimensions if self.pbc else None - self._residue_dict = {} - self._build_residue_dict(self.selection1) - self._build_residue_dict(self.selection2) - self._build_residue_dict(self.water_selection) - - self._update_selection() - - self.timesteps = [] - if len(self._s1) and len(self._s2): - self._update_water_selection() - else: - logger.info("WaterBridgeAnalysis: no atoms found in the selection.") - - logger.info("WaterBridgeAnalysis: initial checks passed.") - - logger.info("WaterBridgeAnalysis: starting") - logger.debug("WaterBridgeAnalysis: donors %r", self.donors) - logger.debug("WaterBridgeAnalysis: acceptors %r", self.acceptors) - logger.debug("WaterBridgeAnalysis: water bridge %r", self.water_selection) - - if self.debug: - logger.debug("Toggling debug to %r", self.debug) - else: - logger.debug("WaterBridgeAnalysis: For full step-by-step debugging output use debug=True") - - logger.info("Starting analysis (frame index start=%d stop=%d, step=%d)", - self.start, self.stop, self.step) - - def _donor2acceptor(self, donors, h_donors, acceptor): - if len(donors) == 0 or len(acceptor) == 0: - return [] - if self.distance_type != 'heavy': - donors_idx = list(h_donors.keys()) - else: - donors_idx = list(donors.keys()) - result = [] - # Code modified from p-j-smith - pairs, distances = capped_distance(self.u.atoms[donors_idx].positions, - self.u.atoms[acceptor].positions, - max_cutoff=self.distance, box=self.box, - return_distances=True) - if self.distance_type != 'heavy': - tmp_donors = [h_donors[donors_idx[idx]] for idx in pairs[:,0]] - tmp_hydrogens = [donors_idx[idx] for idx in pairs[:,0]] - tmp_acceptors = [acceptor[idx] for idx in pairs[:,1]] - else: - tmp_donors = [] - tmp_hydrogens = [] - tmp_acceptors = [] - for idx in range(len(pairs[:,0])): - for h in donors[donors_idx[pairs[idx,0]]]: - tmp_donors.append(donors_idx[pairs[idx,0]]) - tmp_hydrogens.append(h) - tmp_acceptors.append(acceptor[pairs[idx,1]]) - - angles = np.rad2deg( - calc_angles( - self.u.atoms[tmp_donors].positions, - self.u.atoms[tmp_hydrogens].positions, - self.u.atoms[tmp_acceptors].positions, - box=self.box - ) - ) - hbond_indices = np.where(angles > self.angle)[0] - for index in hbond_indices: - h = tmp_hydrogens[index] - d = tmp_donors[index] - a = tmp_acceptors[index] - result.append((h, d, a, self._expand_index(h), self._expand_index(a), - distances[index], angles[index])) - return result - - def _single_frame(self): - self.timesteps.append(self._ts.time) - self.box = self.u.dimensions if self.pbc else None - - if self.update_selection: - self._update_selection() - if len(self._s1) and len(self._s2): - if self.update_water_selection: - self._update_water_selection() - else: - self._network.append(defaultdict(dict)) - return - - selection_1 = [] - water_pool = defaultdict(list) - next_round_water = set([]) - selection_2 = [] - - if self.selection1_type in ('donor', 'both'): - # check for direct hbond from s1 to s2 - self.logger_debug("Selection 1 Donors <-> Selection 2 Acceptors") - results = self._donor2acceptor(self._s1_donors_h, self._s1_h_donors,self._s2_acceptors) - for line in results: - h_index, d_index, a_index, (h_resname, h_resid, h_name), (a_resname, a_resid, a_name), dist, angle = line - water_pool[(a_resname, a_resid)] = None - selection_1.append((h_index, d_index, a_index, None, dist, angle)) - selection_2.append((a_resname, a_resid)) - if self.order > 0: - self.logger_debug("Selection 1 Donors <-> Water Acceptors") - results = self._donor2acceptor(self._s1_donors_h, self._s1_h_donors, self._water_acceptors) - for line in results: - h_index, d_index, a_index, (h_resname, h_resid, h_name), ( - a_resname, a_resid, a_name), dist, angle = line - selection_1.append((h_index, d_index, a_index, None, dist, angle)) - - self.logger_debug("Water Donors <-> Selection 2 Acceptors") - results = self._donor2acceptor(self._water_donors_h, self._water_h_donors, self._s2_acceptors) - for line in results: - h_index, d_index, a_index, (h_resname, h_resid, h_name), ( - a_resname, a_resid, a_name), dist, angle = line - water_pool[(h_resname, h_resid)].append((h_index, d_index, a_index, None, dist, angle)) - selection_2.append((a_resname, a_resid)) - - if self.selection1_type in ('acceptor', 'both'): - self.logger_debug("Selection 2 Donors <-> Selection 1 Acceptors") - results = self._donor2acceptor(self._s2_donors_h, self._s2_h_donors, self._s1_acceptors) - for line in results: - h_index, d_index, a_index, (h_resname, h_resid, h_name), (a_resname, a_resid, a_name), dist, angle = line - water_pool[(h_resname, h_resid)] = None - selection_1.append((a_index, None, h_index, d_index, dist, angle)) - selection_2.append((h_resname, h_resid)) - - if self.order > 0: - self.logger_debug("Selection 2 Donors <-> Water Acceptors") - results = self._donor2acceptor(self._s2_donors_h, self._s2_h_donors, self._water_acceptors) - for line in results: - h_index, d_index, a_index, (h_resname, h_resid, h_name), ( - a_resname, a_resid, a_name), dist, angle = line - water_pool[(a_resname, a_resid)].append((a_index, None, h_index, d_index, dist, angle)) - selection_2.append((h_resname, h_resid)) - - self.logger_debug("Selection 1 Acceptors <-> Water Donors") - results = self._donor2acceptor(self._water_donors_h, self._water_h_donors, self._s1_acceptors) - for line in results: - h_index, d_index, a_index, (h_resname, h_resid, h_name), ( - a_resname, a_resid, a_name), dist, angle = line - selection_1.append((a_index, None, h_index, d_index, dist, angle)) - - if self.order > 1: - self.logger_debug("Water donor <-> Water Acceptors") - results = self._donor2acceptor(self._water_donors_h, self._water_h_donors, self._water_acceptors) - for line in results: - h_index, d_index, a_index, (h_resname, h_resid, h_name), ( - a_resname, a_resid, a_name), dist, angle = line - water_pool[(a_resname, a_resid)].append((a_index, None, h_index, d_index, dist, angle)) - water_pool[(h_resname, h_resid)].append((h_index, d_index, a_index, None, dist, angle)) - - # solve the connectivity network - # The following code attempt to generate a water network which is formed by the class dict. - # Suppose we have a water bridge connection ARG1 to ASP3 via the two hydrogen bonds. - # [0,1,('ARG',1,'O'), ('SOL',2,'HW1'), 3.0,180], - # [2,3,('SOL',2,'HW2'),('ASP',3,'OD1'), 3.0,180], - # The resulting network will be - #{(0,1,('ARG',1,'O'), ('SOL',2,'HW1'), 3.0,180): {(2,3,('SOL',2,'HW2'),('ASP',3,'OD1'), 3.0,180): None}} - # Where the key of the a dict will be all the hydrogen bonds starting from this nodes. - # The corresponding value of a certain key will be a dictionary whose key will be all the hydrogen bonds from - # the destination of in the key. - # If the value of a certain key is None, which means it is reaching selection 2. - - result = {'start': defaultdict(dict), 'water': defaultdict(dict)} - - def add_route(result, route): - if len(route) == 1: - result['start'][route[0]] = None - else: - # exclude the the selection which goes back to itself - if (sorted(route[0][0:3:2]) == sorted(route[-1][0:3:2])): - return - - # selection 2 to water - result['water'][route[-1]] = None - # water to water - for i in range(1, len(route) - 1): - result['water'][route[i]][route[i+1]] = result['water'][route[i+1]] - # selection 1 to water - result['start'][route[0]][route[1]] = result['water'][route[1]] - - def traverse_water_network(graph, node, end, route, maxdepth, result): - if len(route) > self.order + 1: - return - else: - if node in end: - # check if any duplication happens - if len(route) == len(set(route)): - add_route(result, route) - else: - for new_node in graph[node]: - new_route = route[:] - new_route.append(new_node) - new_node = self._expand_timeseries(new_node,'sele1_sele2')[3][:2] - traverse_water_network(graph, new_node, end, new_route, maxdepth, result) - for s1 in selection_1: - route = [s1, ] - next_mol = self._expand_timeseries(s1,'sele1_sele2')[3][:2] - traverse_water_network(water_pool, next_mol, selection_2, route[:], self.order, result) - - self._network.append(result['start']) - - def _traverse_water_network(self, graph, current, analysis_func=None, output=None, link_func=None, **kwargs): - ''' - This function recursively traverses the water network self._network and finds the hydrogen bonds which connect - the current atom to the next atom. The newly found hydrogen bond will be appended to the hydrogen bonds - connecting the selection 1 to the current atom via link_func. When selection 2 is reached, the full list of - hydrogen bonds connecting the selection 1 to selection 2 will be fed into analysis_func, which will then modify - the output in place. - :param graph: The connection network describes the connection between the atoms in the water network. - :param current: The hydrogen bonds from selection 1 until now. - :param analysis_func: The analysis function which is called to analysis the hydrogen bonds. - :param output: where the result is stored. - :param link_func: The new hydrogen bonds will be appended to current. - :param kwargs: the keywords which are passed into the analysis_func. - :return: - ''' - if link_func is None: - # If no link_func is provided, the default link_func will be used - link_func = self._full_link - - if graph is None: - # if selection 2 is reached - if not analysis_func is None: - # the result is analysed by analysis_func which will change the output - analysis_func(current, output, self.u, **kwargs) - else: - # make sure no loop can occur - if len(current) <= self.order: - for node in graph: - # the new hydrogen bond will be added to the existing bonds - new = link_func(current, node) - self._traverse_water_network(graph[node], new, analysis_func, output, link_func, **kwargs) - - def _expand_index(self, index): - ''' - Expand the index into (resname, resid, name). - ''' - atom = self.u.atoms[index] - return (atom.resname, atom.resid, atom.name) - - def _expand_timeseries(self, entry, output_format=None): - ''' - Expand the compact data format into the old timeseries form. - The old is defined as the format for release up to 0.19.2. - As is discussed in Issue #2177, the internal storage of the hydrogen - bond information has been changed to the compact format. - The function takes in the argument `output_format` to see which output format will be chosen. - if `output_format` is not specified, the value will be taken from :attr:`output_format`. - If `output_format` is 'sele1_sele2', the output will be the old water bridge analysis format:: - - # donor from selection 1 to acceptor in selection 2 - [sele1_index, sele2_index, - (sele1_resname, sele1_resid, sele1_name), - (sele2_resname, sele2_resid, sele2_name), dist, angle] - - If `output_format` is 'donor_acceptor', the output will be the old hydrogen bond analysis format:: - - # From donor to acceptor - [donor_index, acceptor_index, - (donor_resname, donor_resid, donor_name), - (acceptor_resname, acceptor_resid, acceptor_name), dist, angle] - ''' - output_format = output_format or self.output_format - # Expand the compact entry into atom1, which is the first index in the output and atom2, which is the second - # entry. - atom1, heavy_atom1, atom2, heavy_atom2, dist, angle = entry - if output_format == 'sele1_sele2': - # If the output format is the sele1_sele2, no change will be executed - atom1, atom2 = atom1, atom2 - elif output_format == 'donor_acceptor': - # If the output format is donor_acceptor, use heavy atom position to check which is donor and which is - # acceptor - if heavy_atom1 is None: - # atom1 is hydrogen bond acceptor and thus, the position of atom1 and atom2 are swapped. - atom1, atom2 = atom2, atom1 - else: - # atom1 is hydrogen bond donor, position not swapped. - atom1, atom2 = atom1, atom2 - else: - raise KeyError("Only 'sele1_sele2' or 'donor_acceptor' are allowed as output format") - - return (atom1, atom2, self._expand_index(atom1), self._expand_index(atom2), dist, angle) - - def _generate_timeseries(self, output_format=None): - r'''Time series of water bridges. - - The output is generated per frame as is explained in :ref:`wb_Analysis_Timeseries`. - The format of output can be changed via the output_format selection. - If ``output_format="sele1_sele2"``, the hydrogen bond forms a directional - link from selection 1 to selection 2. If ``output_format="donor_acceptor"``, - for each hydrogen bond, the donor is always written before the acceptor. - - Note - ---- - To find an acceptor atom in :attr:`Universe.atoms` by - *index* one would use ``u.atoms[acceptor_index]``. - - The :attr:`timeseries` is a managed attribute and it is generated - from the underlying data in :attr:`_network` every time the - attribute is accessed. It is therefore costly to call and if - :attr:`timeseries` is needed repeatedly it is recommended that you - assign to a variable:: - - w = WaterBridgeAnalysis(u) - w.run() - timeseries = w.timeseries - - .. versionchanged 0.20.0 - The :attr:`WaterBridgeAnalysis.timeseries` has been updated where - the donor and acceptor string has been changed to tuple - (resname string, resid, name_string). - - - ''' - output_format = output_format or self.output_format - def analysis(current, output, *args, **kwargs): - output = current - - timeseries = [] - for frame in self._network: - new_frame = [] - self._traverse_water_network(frame, new_frame, analysis_func=analysis, - output=new_frame, link_func=self._compact_link) - timeseries.append([self._expand_timeseries(entry, output_format) for entry in new_frame]) - return timeseries - - timeseries = property(_generate_timeseries) - - def _get_network(self): - r'''Network representation of the water network. - - The output is generated per frame as is explained in :ref:`wb_Analysis_Network`. - Each hydrogen bond has a compact representation of :: - - [sele1_acceptor_idx, None, sele2_donor_idx, donor_heavy_idx, distance, angle] - - or :: - - [sele1_donor_idx, donor_heavy_idx, sele1_acceptor_idx, None, distance, angle] - - The donor_heavy_idx is the heavy atom bonding to the proton and atoms - can be retrived from the universe:: - - atom = u.atoms[idx] - - .. versionadded:: 0.20.0 - - ''' - return self._network - - def set_network(self, network): - self._network = network - - network = property(_get_network, set_network) - - @classmethod - def _full_link(self, output, node): - ''' - A function used in _traverse_water_network to add the new hydrogen bond to the existing bonds. - :param output: The existing hydrogen bonds from selection 1 - :param node: The new hydrogen bond - :return: The hydrogen bonds from selection 1 with the new hydrogen bond added - ''' - result = output[:] - result.append(node) - return result - - @classmethod - def _compact_link(self, output, node): - ''' - A function used in _traverse_water_network to add the new hydrogen bond to the existing bonds. - In this form no new list is created and thus, one bridge will only appear once. - :param output: The existing hydrogen bonds from selection 1 - :param node: The new hydrogen bond - :return: The hydrogen bonds from selection 1 with the new hydrogen bond added - ''' - output.append(node) - return output - - def _count_by_type_analysis(self, current, output, *args, **kwargs): - ''' - Generates the key for count_by_type analysis. - :return: - ''' - - s1_index, to_index, (s1_resname, s1_resid, s1_name), (to_resname, to_resid, to_name), dist, angle = \ - self._expand_timeseries(current[0]) - from_index, s2_index, (from_resname, from_resid, from_name), (s2_resname, s2_resid, s2_name), dist, angle = \ - self._expand_timeseries(current[-1]) - key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) - output[key] += 1 - - def count_by_type(self, analysis_func=None, **kwargs): - """Counts the frequency of water bridge of a specific type. - - If one atom *A* from *selection 1* is linked to atom *B* from - *selection 2* through one or more bridging waters, an entity will be created and - the proportion of time that this linkage exists in the whole simulation - will be calculated. - - The identification of a specific type of water bridge can be modified by - supplying the analysis_func function. See :ref:`wb_count_by_type` - for detail. - - Returns - ------- - counts : list - Returns a :class:`list` containing atom indices for *A* and - *B*, residue names, residue numbers, atom names (for both A and B) and - the fraction of the total time during which the water bridge was - detected. This method returns None if method - :meth:`WaterBridgeAnalysis.run` was not executed first. - - - """ - output = None - if analysis_func is None: - analysis_func = self._count_by_type_analysis - output = 'combined' - - if self._network: - length = len(self._network) - result_dict = defaultdict(int) - for frame in self._network: - frame_dict = defaultdict(int) - self._traverse_water_network(frame, [], analysis_func=analysis_func, - output=frame_dict, - link_func=self._full_link, **kwargs) - for key, value in frame_dict.items(): - result_dict[key] += frame_dict[key] - - if output == 'combined': - result = [[i for i in key] for key in result_dict] - [result[i].append(result_dict[key]/length) for i, key in enumerate(result_dict)] - else: - result = [(key, result_dict[key]/length) for key in result_dict] - return result - else: - return None - - def _count_by_time_analysis(self, current, output, *args, **kwargs): - s1_index, to_index, (s1_resname, s1_resid, s1_name), (to_resname, to_resid, to_name), dist, angle = \ - self._expand_timeseries(current[0]) - from_index, s2_index, (from_resname, from_resid, from_name), (s2_resname, s2_resid, s2_name), dist, angle = \ - self._expand_timeseries(current[-1]) - key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) - output[key] += 1 - - def count_by_time(self, analysis_func=None, **kwargs): - """Counts the number of water bridges per timestep. - - The counting behaviour can be adjusted by supplying analysis_func. - See :ref:`wb_count_by_time` for details. - - Returns - ------- - counts : list - Returns a time series ``N(t)`` where ``N`` is the total - number of observed water bridges at time ``t``. - - """ - if analysis_func is None: - analysis_func = self._count_by_time_analysis - if self._network: - result = [] - for time, frame in zip(self.timesteps, self._network): - result_dict = defaultdict(int) - self._traverse_water_network(frame, [], analysis_func=analysis_func, - output=result_dict, - link_func=self._full_link, **kwargs) - result.append((time, sum([result_dict[key] for key in result_dict]))) - return result - else: - return None - - def _timesteps_by_type_analysis(self, current, output, *args, **kwargs): - s1_index, to_index, (s1_resname, s1_resid, s1_name), (to_resname, to_resid, to_name), dist, angle = \ - self._expand_timeseries(current[0]) - from_index, s2_index, (from_resname, from_resid, from_name), (s2_resname, s2_resid, s2_name), dist, angle = \ - self._expand_timeseries(current[-1]) - key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) - output[key].append(kwargs.pop('time')) - - def timesteps_by_type(self, analysis_func=None, **kwargs): - """Frames during which each water bridges existed, sorted by each water bridges. - - Processes :attr:`WaterBridgeAnalysis._network` and returns a - :class:`list` containing atom indices, residue names, residue - numbers (from selection 1 and selection 2) and each timestep at which the - water bridge was detected. - - Similar to :meth:`~WaterBridgeAnalysis.count_by_type` and - :meth:`~WaterBridgeAnalysis.count_by_time`, the behavior can be adjusted by - supplying an analysis_func. - - Returns - ------- - data : list - - """ - output = None - if analysis_func is None: - analysis_func = self._timesteps_by_type_analysis - output = 'combined' - - if self._network: - result = defaultdict(list) - if self.timesteps is None: - timesteps = range(len(self._network)) - else: - timesteps = self.timesteps - for time, frame in zip(timesteps, self._network): - self._traverse_water_network(frame, [], analysis_func=analysis_func, - output=result, - link_func=self._full_link, time=time, **kwargs) - - result_list = [] - for key, time_list in result.items(): - for time in time_list: - if output == 'combined': - key = list(key) - key.append(time) - result_list.append(key) - else: - result_list.append((key, time)) - return result_list - else: - return None - - def generate_table(self, output_format=None): - """Generate a normalised table of the results. - - The table is stored as a :class:`numpy.recarray` in the - attribute :attr:`~WaterBridgeAnalysis.table`. - - The output format of :attr:`~WaterBridgeAnalysis.table` can also be - changed using output_format in a fashion similar to :attr:`WaterBridgeAnalysis.timeseries` - """ - output_format = output_format or self.output_format - if self._network == []: - msg = "No data computed, do run() first." - warnings.warn(msg, category=MissingDataWarning) - logger.warning(msg) - return None - timeseries = self._generate_timeseries(output_format) - - num_records = np.sum([len(hframe) for hframe in timeseries]) - # build empty output table - if output_format == 'sele1_sele2': - dtype = [ - ("time", float), - ("sele1_index", int), ("sele2_index", int), - ("sele1_resnm", "|U4"), ("sele1_resid", int), ("sele1_atom", "|U4"), - ("sele2_resnm", "|U4"), ("sele2_resid", int), ("sele2_atom", "|U4"), - ("distance", float), ("angle", float)] - elif output_format == 'donor_acceptor': - dtype = [ - ("time", float), - ("donor_index", int), ("acceptor_index", int), - ("donor_resnm", "|U4"), ("donor_resid", int), ("donor_atom", "|U4"), - ("acceptor_resnm", "|U4"), ("acceptor_resid", int), ("acceptor_atom", "|U4"), - ("distance", float), ("angle", float)] - - # according to Lukas' notes below, using a recarray at this stage is ineffective - # and speedups of ~x10 can be achieved by filling a standard array, like this: - out = np.empty((num_records,), dtype=dtype) - cursor = 0 # current row - for t, hframe in zip(self.timesteps, timeseries): - for (donor_index, acceptor_index, donor, - acceptor, distance, angle) in hframe: - # donor|acceptor = (resname, resid, atomid) - out[cursor] = (t, donor_index, acceptor_index) + \ - donor + acceptor + (distance, angle) - cursor += 1 - assert cursor == num_records, "Internal Error: Not all wb records stored" - table = out.view(np.recarray) - logger.debug("WBridge: Stored results as table with %(num_records)d entries.", vars()) - self.table = table +from MDAnalysis.analysis.hydrogenbonds.WaterBridgeAnalysis import WaterBridgeAnalysis diff --git a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py new file mode 100644 index 00000000000..e0fab139133 --- /dev/null +++ b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py @@ -0,0 +1,1620 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +# Water Bridge Analysis +r"""Water Bridge analysis --- :mod:`MDAnalysis.analysis.hydrogenbonds.WaterBridgeAnalysis` +=============================================================================== + +:Author: Zhiyi Wu +:Year: 2017-2018 +:Copyright: GNU Public License v3 +:Maintainer: Zhiyi Wu , `@xiki-tempula`_ on GitHub + + +.. _`@xiki-tempula`: https://github.com/xiki-tempula + + +Given a :class:`~MDAnalysis.core.universe.Universe` (simulation +trajectory with 1 or more frames) measure all water bridges for each +frame between selections 1 and 2. +Water bridge is defined as a bridging water which simultaneously forms +two hydrogen bonds with atoms from both selection 1 and selection 2. + +A water bridge can form between two hydrogen bond acceptors. + +e.g. -CO\ :sub:`2`\ :sup:`-`:···H−O−H···:\ :sup:`-`\ O\ :sub:`2`\ C- + +A water bridge can also form between two hydrogen bond donors. + +e.g. -NH···:O:···HN- (where O is the oxygen of a bridging water) + +A hydrogen bond acceptor and another hydrogen bond donor can be bridged by a +water. + +e.g. -CO\ :sub:`2`\ :sup:`-`:···H−O:···HN- (where H−O is part of **H−O**\ −H) + +A higher order water bridge is defined as more than one water bridging +hydrogen bond acceptor and donor. An example of a second order water bridge: + +e.g. -CO\ :sub:`2`\ :sup:`-`:···H−O:···H−O:···HN- (where H−O is part of **H−O**\ −H) + +The following keyword arguments are important to control the behaviour of the +water bridge analysis: + + - *water_selection* (``resname SOL``): the selection string for the bridging + water + - *order* the maximum number of water bridging both ends + - donor-acceptor *distance* (Å): 3.0 + - Angle *cutoff* (degrees): 120.0 + - *forcefield* to switch between default values for different force fields + - *donors* and *acceptors* atom types (to add additional atom names) + +Theory +------ + +This module attempts to find multi-order water bridge by an approach similar +to breadth-first search, where the first solvation shell of selection 1 is +selected, followed by the selection of the second solvation shell as well as +any hydrogen bonding partner from selection 1. After that, the third solvation +shell, as well as any binding partners from selection 2, are detected. This +process is repeated until the maximum order of water bridges is reached. + +.. _wb_Analysis_Network: + +Output as Network +----------------- + +Since the waters connecting the two ends of the selections are by nature a network. +We provide a network representation of the water network. Water bridge data are +returned per frame, which is stored in :attr:`WaterBridgeAnalysis.network`. Each +frame is represented as a dictionary, where the keys are the hydrogen bonds +originating from selection 1 and the values are new dictionaries representing +the hydrogen bonds coming out of the corresponding molecules making hydrogen bonds +with selection 1. + +As for the hydrogen bonds which reach the selection 2, the values of the +corresponding keys are None. One example where selection 1 and selection 2 are +joined by one water molecule (A) which also hydrogen bond to another water (B) +which also hydrogen bond to selection 2 would be represented as :: + + # (selection 1)-O:···H-O(water 1):···H-(selection 2) + # | : + # H·············O-H(water2) + # H + {(sele1_acceptor, None, water1_donor, water1_donor_heavy, distance, angle): + {(water1_acceptor, None, sele2_donor, sele2_donor_heavy, distance, angle): None}, + {(water1_donor, water1_donor_heavy, water2_acceptor, None, distance, angle): + {(water2_acceptor, None, sele2_donor, sele2_donor_heavy, distance, angle): None} + }, + } + +The atoms are represented by atom index and if the atom is hydrogen bond donor, +it is followed by the index of the corresponding heavy atom +``(donor_proton, donor_heavy_atom)``. +If the atom is a hydrogen bond acceptor, it is followed by none. + +.. _wb_Analysis_Timeseries: + +Output as Timeseries +-------------------- + +For lower order water bridges, it might be desirable to represent the connections as +:attr:`WaterBridgeAnalysis.timeseries`. The results are returned per frame and +are a list of hydrogen bonds between the selection 1 or selection 2 and the +bridging waters. Due to the complexity of the higher order water bridge and the +fact that one hydrogen bond between two waters can appear in both third and +fourth order water bridge, the hydrogen bonds in the +:attr:`WaterBridgeAnalysis.timeseries` attribute are generated in a depth-first +search manner to avoid duplication. Example code of how +:attr:`WaterBridgeAnalysis.timeseries` is generated:: + + def network2timeseries(network, timeseries): + '''Traverse the network in a depth-first fashion. + expand_timeseries will expand the compact representation to the familiar + timeseries representation.''' + + if network is None: + return + else: + for node in network: + timeseries.append(expand_timeseries(node)) + network2timeseries(network[node], timeseries) + + timeseries = [] + network2timeseries(network, timeseries) + +An example would be. :: + + results = [ + [ # frame 1 + [ , , + (, , ), + (, , ), + , ], + .... + ], + [ # frame 2 + [ ... ], [ ... ], ... + ], + ... + ] + +Using the :meth:`WaterBridgeAnalysis.generate_table` method one can reformat +the results as a flat "normalised" table that is easier to import into a +database or dataframe for further processing. + +Detection of water bridges +-------------------------- +Water bridges are recorded if a bridging water simultaneously forms +hydrogen bonds with selection 1 and selection 2. + +Hydrogen bonds are detected based on a geometric criterion: + +1. The distance between acceptor and hydrogen is less than or equal to + `distance` (default is 3 Å). + +2. The angle between donor-hydrogen-acceptor is greater than or equal to + `angle` (default is 120º). + +The cut-off values `angle` and `distance` can be set as keywords to +:class:`WaterBridgeAnalysis`. + +Donor and acceptor heavy atoms are detected from atom names. The current +defaults are appropriate for the CHARMM27 and GLYCAM06 force fields as defined +in Table `Default atom names for hydrogen bonding analysis`_. + +Hydrogen atoms bonded to a donor are searched based on its distance to the +donor. The algorithm searches for all hydrogens +(name "H*" or name "[123]H" or type "H") in the same residue as the donor atom +within a cut-off distance of 1.2 Å. + +.. _Default atom names for hydrogen bonding analysis: + +.. table:: Default heavy atom names for CHARMM27 force field. + + =========== ============== =========== ==================================== + group donor acceptor comments + =========== ============== =========== ==================================== + main chain N O, OC1, OC2 OC1, OC2 from amber99sb-ildn + (Gromacs) + water OH2, OW OH2, OW SPC, TIP3P, TIP4P (CHARMM27,Gromacs) + + ARG NE, NH1, NH2 + ASN ND2 OD1 + ASP OD1, OD2 + CYS SG + CYH SG possible false positives for CYS + GLN NE2 OE1 + GLU OE1, OE2 + HIS ND1, NE2 ND1, NE2 presence of H determines if donor + HSD ND1 NE2 + HSE NE2 ND1 + HSP ND1, NE2 + LYS NZ + MET SD see e.g. [Gregoret1991]_ + SER OG OG + THR OG1 OG1 + TRP NE1 + TYR OH OH + =========== ============== =========== ==================================== + +.. table:: Heavy atom types for GLYCAM06 force field. + + =========== =========== ================== + element donor acceptor + =========== =========== ================== + N N,NT,N3 N,NT + O OH,OW O,O2,OH,OS,OW,OY + S SM + =========== =========== ================== + +Donor and acceptor names for the CHARMM27 force field will also work for e.g. +OPLS/AA or amber (tested in Gromacs). Residue names in the table are for +information only and are not taken into account when determining acceptors and +donors. This can potentially lead to some ambiguity in the assignment of +donors/acceptors for residues such as histidine or cytosine. + +For more information about the naming convention in GLYCAM06 have a look at the +`Carbohydrate Naming Convention in Glycam`_. + +.. _`Carbohydrate Naming Convention in Glycam`: + http://glycam.ccrc.uga.edu/documents/FutureNomenclature.htm + +The lists of donor and acceptor names can be extended by providing lists of +atom names in the `donors` and `acceptors` keywords to +:class:`WaterBridgeAnalysis`. If the lists are entirely inappropriate +(e.g. when analysing simulations done with a force field that uses very +different atom names) then one should either use the value "other" for +`forcefield` to set no default values or derive a new class and set the +default list oneself:: + + class WaterBridgeAnalysis_OtherFF(WaterBridgeAnalysis): + DEFAULT_DONORS = {"OtherFF": tuple(set([...]))} + DEFAULT_ACCEPTORS = {"OtherFF": tuple(set([...]))} + +Then simply use the new class instead of the parent class and call it with +```forcefield` = "OtherFF"``. Please also consider contributing the list of heavy +atom names to MDAnalysis. + +How to perform WaterBridgeAnalysis +---------------------------------- + +All water bridges between arginine and aspartic acid can be analysed with :: + + import MDAnalysis + import MDAnalysis.analysis.hbonds + + u = MDAnalysis.Universe('topology', 'trajectory') + w = MDAnalysis.analysis.hbonds.WaterBridgeAnalysis(u, 'resname ARG', 'resname ASP') + w.run() + +The maximum number of bridging waters detected can be changed using the order keyword. :: + + w = MDAnalysis.analysis.hbonds.WaterBridgeAnalysis(u, 'resname ARG', 'resname ASP', + order=3) + +Thus, a maximum of three bridging waters will be detected. + +An example of using the :attr:`~WaterBridgeAnalysis` would be +detecting the percentage of time a certain water bridge exits. + +Trajectory :code:`u` has two frames, where the first frame contains a water +bridge from the oxygen of the first arginine to one of the oxygens in the carboxylic +group of aspartate (ASP3:OD1). In the second frame, the same water bridge forms but +is between the oxygen of the arginine and the other oxygen in the carboxylic +group (ASP3:OD2). :: + + # index residue id residue name atom name + # 0 1 ARG O + # 1 2 SOL OW + # 2 2 SOL HW1 + # 3 2 SOL HW2 + # 4 3 ASP OD1 + # 5 3 ASP OD2 + print(w.timeseries) + +prints out. :: + + [ # frame 1 + # A water bridge SOL2 links O from ARG1 to the carboxylic group OD1 of ASP3 + [[0,2,('ARG',1, 'O'),('SOL',2,'HW1'), 3.0,180], + [3,4,('SOL',2,'HW2'),('ASP',3,'OD1'), 3.0,180], + ], + # frame 2 + # Another water bridge SOL2 links O from ARG1 to the other oxygen of the + # carboxylic group OD2 of ASP3 + [[0,2,('ARG',1, 'O'),('SOL',2,'HW1'), 3.0,180], + [3,5,('SOL',2,'HW2'),('ASP',3,'OD2'), 3.0,180], + ], + ] + + +.. _wb_count_by_type: + +Use count_by_type +----------------- + +We can use the :meth:`~WaterBridgeAnalysis.count_by_type` to +generate the frequence of all water bridges in the simulation. :: + + w.count_by_type() + +Returns :: + + [(0, 3, 'ARG', 1, 'O', 'ASP', 3, 'OD1', 0.5), + (0, 4, 'ARG', 1, 'O', 'ASP', 3, 'OD2', 0.5),] + +You might think that the OD1 and OD2 are the same oxygen and the aspartate has just flipped +and thus, they should be counted as the same type of water bridge. The type of the water +bridge can be customised by supplying an analysis function to +:meth:`~WaterBridgeAnalysis.count_by_type`. + +The analysis function has two parameters. The current and the output. The current is a list +of hydrogen bonds from selection 1 to selection 2, formatted in the same fashion as +:attr:`WaterBridgeAnalysis.network`, and an example will be :: + + [ # sele1 acceptor idx, , water donor index, donor heavy atom idx, dist, ang. + [ 0, None, 2, 1, 3.0,180], + # water donor idx, donor heavy atom idx, sele2 acceptor idx, distance, angle. + [ 3, 1, 4, None, 3.0,180],] + +Where ``current[0]`` is the first hydrogen bond originating from selection 1 and ``current[-1]`` is +the final hydrogen bond ending in selection 2. The output sums up all the information in the +current frame and is a dictionary with a user-defined key and the value is the weight of the +corresponding key. During the analysis phase, the function analysis iterates through all the +water bridges and modify the output in-place. At the end of the analysis, the keys from +all the frames are collected and the corresponding values will be summed up and returned. :: + + def analysis(current, output, u): + r'''This function defines how the type of water bridge should be specified. + + Parameters + ---------- + current : list + The current water bridge being analysed is a list of hydrogen bonds from + selection 1 to selection 2. + output : dict + A dictionary which is modified in-place where the key is the type of + the water bridge and the value is the weight of this type of water bridge. + u : MDAnalysis.universe + The current Universe for looking up atoms.''' + + # decompose the first hydrogen bond. + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] + # decompose the last hydrogen bond. + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] + # expand the atom index to the resname, resid, atom names + sele1 = u.atoms[sele1_index] + sele2 = u.atoms[sele2_index] + (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) + (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) + # if the residue name is ASP and the atom name is OD2 or OD1, + # the atom name is changed to OD + if s2_resname == 'ASP' and (s2_name == 'OD1' or s2_name == 'OD2'): + s2_name = 'OD' + # setting up the key which defines this type of water bridge. + key = (s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) + # The number of this type of water bridge is incremented by 1. + output[key] += 1 + + w.count_by_type(analysis_func=analysis) + +Returns :: + + [(('ARG', 1, 'O', 'ASP', 3, 'OD'), 1.0),] + +Note that the result is arranged in the format of +``(key, the proportion of time)``. When no custom analysis function is supplied, +the key is expanded and is formatted as :: + + [('ARG', 1, 'O', 'ASP', 3, 'OD', 1.0),] + +Some people might only interested in contacts between residues and pay no attention +to the details regarding the atom name. However, since multiple water bridges can +exist between two residues, which sometimes can give a result such that the water +bridge between two residues exists 300% of the time. Though this might be a desirable +result for some people, others might want the water bridge between two residues to be +only counted once per frame. This can also be achieved by supplying an analysis function +to :meth:`~WaterBridgeAnalysis.count_by_type`. :: + + def analysis(current, output, u): + '''This function defines how the type of water bridge should be specified. + + Parameters + ---------- + current : list + The current water bridge being analysed is a list of hydrogen bonds from + selection 1 to selection 2. + output : dict + A dictionary which is modified in-place where the key is the type of + the water bridge and the value is the weight of this type of water bridge. + u : MDAnalysis.universe + The current Universe for looking up atoms. + ''' + + # decompose the first hydrogen bond. + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] + # decompose the last hydrogen bond. + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] + # expand the atom index to the resname, resid, atom names + sele1 = u.atoms[sele1_index] + sele2 = u.atoms[sele2_index] + (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) + (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) + # s1_name and s2_name are not included in the key + key = (s1_resname, s1_resid, s2_resname, s2_resid) + + # Each residue is only counted once per frame + output[key] = 1 + + w.count_by_type(analysis_func=analysis) + +Returns :: + + [(('ARG', 1, 'ASP', 3), 1.0),] + +On the other hand, other people may insist that the first order and second-order water +bridges shouldn't be mixed together, which can also be achieved by supplying an analysis +function to :meth:`~WaterBridgeAnalysis.count_by_type`. :: + + def analysis(current, output, u): + '''This function defines how the type of water bridge should be specified. + + Parameters + ---------- + current : list + The current water bridge being analysed is a list of hydrogen bonds from + selection 1 to selection 2. + output : dict + A dictionary which is modified in-place where the key is the type of + the water bridge and the value is the weight of this type of water bridge. + u : MDAnalysis.universe + The current Universe for looking up atoms. + ''' + + # decompose the first hydrogen bond. + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] + # decompose the last hydrogen bond. + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] + # expand the atom index to the resname, resid, atom names + sele1 = u.atoms[sele1_index] + sele2 = u.atoms[sele2_index] + (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) + (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) + # order of the current water bridge is computed + order_of_water_bridge = len(current) - 1 + # and is included in the key + key = (s1_resname, s1_resid, s2_resname, s2_resid, order_of_water_bridge) + # The number of this type of water bridge is incremented by 1. + output[key] += 1 + + w.count_by_type(analysis_func=analysis) + +The extra number 1 precede the 1.0 indicate that this is a first order water bridge :: + + [(('ARG', 1, 'ASP', 3, 1), 1.0),] + +Some people might not be interested in the interactions related to arginine. The undesirable +interactions can be discarded by supplying an analysis function to +:meth:`~WaterBridgeAnalysis.count_by_type`. :: + + def analysis(current, output, u): + '''This function defines how the type of water bridge should be specified. + + Parameters + ---------- + current : list + The current water bridge being analysed is a list of hydrogen bonds from + selection 1 to selection 2. + output : dict + A dictionary which is modified in-place where the key is the type of + the water bridge and the value is the number of this type of water bridge. + u : MDAnalysis.universe + The current Universe for looking up atoms. + ''' + + # decompose the first hydrogen bond. + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] + # decompose the last hydrogen bond. + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] + # expand the atom index to the resname, resid, atom names + sele1 = u.atoms[sele1_index] + sele2 = u.atoms[sele2_index] + (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) + (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) + if not s1_resname == 'ARG': + key = (s1_resname, s1_resid, s2_resname, s2_resid) + output[key] += 1 + + w.count_by_type(analysis_func=analysis) + +Returns nothing in this case :: + + [,] + +Additional keywords can be supplied to the analysis function by passing through +:meth:`~WaterBridgeAnalysis.count_by_type`. :: + + def analysis(current, output, **kwargs): + ... + w.count_by_type(analysis_func=analysis, **kwargs) + + +.. _wb_count_by_time: + +Use count_by_time +----------------- + +:meth:`~WaterBridgeAnalysis.count_by_type` aggregates data across frames, which +might be desirable in some cases but not the others. :meth:`~WaterBridgeAnalysis.count_by_time` +provides additional functionality for aggregating results for each frame. + +The default behaviour of :meth:`~WaterBridgeAnalysis.count_by_time` is counting the number of +water bridges from selection 1 to selection 2 for each frame. Take the previous ASP, ARG salt +bridge for example: :: + + w.count_by_time() + +As one water bridge is found in both frames, the method returns :: + + [(1.0, 1), (2.0, 1), ] + +Similar to :meth:`~WaterBridgeAnalysis.count_by_type` +The behaviour of :meth:`~WaterBridgeAnalysis.count_by_time` can also be modified by supplying +an analysis function. + +Suppose we want to count + + - the **number** of water molecules involved in bridging selection 1 to selection 2. + - only if water bridge terminates in atom name **OD1 of ASP**. + - only when water bridge is joined by less than **two** water. + +The analysis function can be written as:: + + def analysis(current, output, u, **kwargs): + '''This function defines how the counting of water bridge should be specified. + + Parameters + ---------- + current : list + The current water bridge being analysed is a list of hydrogen bonds from + selection 1 to selection 2. + output : dict + A dictionary which is modified in-place where the key is the type of + the water bridge and the value is the number of this type of water bridge. + u : MDAnalysis.universe + The current Universe for looking up atoms. + ''' + + # decompose the first hydrogen bond. + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] + # decompose the last hydrogen bond. + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] + # expand the atom index to the resname, resid, atom names + sele1 = u.atoms[sele1_index] + sele2 = u.atoms[sele2_index] + (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) + (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) + + # only the residue name is ASP and the atom name is OD1, + if s2_resname == 'ASP' and s2_name == 'OD1': + # only if the order of water bridge is less than 2 + if len(current) -1 < 2: + # extract all water molecules involved in the water bridge + # extract the first water from selection 1 + s1_index, to_index, (s1_resname, s1_resid, s1_name), + (to_resname, to_resid, to_name), dist, angle = current[0] + key = (to_resname, to_resid) + output[key] = 1 + + # extract all the waters between selection 1 and selection 2 + for hbond in current[1:-1]: + # decompose the hydrogen bond. + from_index, to_index, (from_resname, from_resid, from_name), + (to_resname, to_resid, to_name), dist, angle = hbond + # add first water + key1 = (from_resname, from_resid) + output[key1] = 1 + # add second water + key2 = (to_resname, to_resid) + output[key2] = 1 + + # extract the last water to selection 2 + from_index, s2_index, (from_resname, from_resid, from_name), + (s2_resname, s2_resid, s2_name), dist, angle = current[-1] + key = (from_resname, from_resid) + output[key] = 1 + + w.count_by_time(analysis_func=analysis) + +Returns :: + + [(1.0, 1), (2.0, 0),] + +Classes +------- + +.. autoclass:: WaterBridgeAnalysis + :members: + + .. attribute:: timesteps + + List of the times of each timestep. This can be used together with + :attr:`~WaterBridgeAnalysis.timeseries` to find the specific time point + of a water bridge existence. + +""" +from collections import defaultdict +import logging +import warnings +import numpy as np + +from ..base import AnalysisBase +from MDAnalysis.lib.NeighborSearch import AtomNeighborSearch +from MDAnalysis.lib.distances import capped_distance, calc_angles +from MDAnalysis import NoDataError, MissingDataWarning, SelectionError +from MDAnalysis.lib import distances + +logger = logging.getLogger('MDAnalysis.analysis.WaterBridgeAnalysis') + +class WaterBridgeAnalysis(AnalysisBase): + """Perform a water bridge analysis + + The analysis of the trajectory is performed with the + :meth:`WaterBridgeAnalysis.run` method. The result is stored in + :attr:`WaterBridgeAnalysis.timeseries`. See + :meth:`~WaterBridgeAnalysis.run` for the format. + + + .. versionadded:: 0.17.0 + + + """ + # use tuple(set()) here so that one can just copy&paste names from the + # table; set() takes care for removing duplicates. At the end the + # DEFAULT_DONORS and DEFAULT_ACCEPTORS should simply be tuples. + + #: default heavy atom names whose hydrogens are treated as *donors* + #: (see :ref:`Default atom names for hydrogen bonding analysis`); + #: use the keyword `donors` to add a list of additional donor names. + DEFAULT_DONORS = { + 'CHARMM27': tuple(set([ + 'N', 'OH2', 'OW', 'NE', 'NH1', 'NH2', 'ND2', 'SG', 'NE2', 'ND1', + 'NZ', 'OG', 'OG1', 'NE1', 'OH'])), + 'GLYCAM06': tuple(set(['N', 'NT', 'N3', 'OH', 'OW'])), + 'other': tuple(set([]))} + + #: default atom names that are treated as hydrogen *acceptors* + #: (see :ref:`Default atom names for hydrogen bonding analysis`); + #: use the keyword `acceptors` to add a list of additional acceptor names. + DEFAULT_ACCEPTORS = { + 'CHARMM27': tuple(set([ + 'O', 'OC1', 'OC2', 'OH2', 'OW', 'OD1', 'OD2', 'SG', 'OE1', 'OE1', + 'OE2', 'ND1', 'NE2', 'SD', 'OG', 'OG1', 'OH'])), + 'GLYCAM06': tuple(set(['N', 'NT', 'O', 'O2', 'OH', 'OS', 'OW', 'OY', 'SM'])), + 'other': tuple(set([]))} + + #: A :class:`collections.defaultdict` of covalent radii of common donors + #: (used in :meth`_get_bonded_hydrogens_list` to check if a hydrogen is + #: sufficiently close to its donor heavy atom). Values are stored for + #: N, O, P, and S. Any other heavy atoms are assumed to have hydrogens + #: covalently bound at a maximum distance of 1.5 Å. + r_cov = defaultdict(lambda: 1.5, # default value + N=1.31, O=1.31, P=1.58, S=1.55) + + def __init__(self, universe, selection1='protein', + selection2='not resname SOL', water_selection='resname SOL', order=1, + selection1_type='both', update_selection=False, update_water_selection=True, + filter_first=True, distance_type='hydrogen', distance=3.0, + angle=120.0, forcefield='CHARMM27', donors=None, + acceptors=None, output_format="sele1_sele2", debug=None, verbose=False, + pbc=False, **kwargs): + """Set up the calculation of water bridges between two selections in a + universe. + + The timeseries is accessible as the attribute + :attr:`WaterBridgeAnalysis.timeseries`. + + If no hydrogen bonds are detected or if the initial check fails, look + at the log output (enable with :func:`MDAnalysis.start_logging` and set + `verbose` ``=True``). It is likely that the default names for donors + and acceptors are not suitable (especially for non-standard + ligands). In this case, either change the `forcefield` or use + customized `donors` and/or `acceptors`. + + Parameters + ---------- + universe : Universe + Universe object + selection1 : str (optional) + Selection string for first selection ['protein'] + selection2 : str (optional) + Selection string for second selection ['not resname SOL'] + This string selects everything except water where water is assumed + to have a residue name as SOL. + water_selection : str (optional) + Selection string for bridging water selection ['resname SOL'] + The default selection assumes that the water molecules have residue + name "SOL". Change it to the appropriate selection for your + specific force field. + + However, in theory, this selection can be anything which forms + a hydrogen bond with selection 1 and selection 2. + order : int (optional) + The maximum number of water bridges linking both selections. + if the order is set to 3, then all the residues linked with less than + three water molecules will be detected. [1] + + Computation of high order water bridges can be very time-consuming. + Think carefully before running the calculation, do you really want + to compute the 20th order water bridge between domain A and domain B + or you just want to know the third order water bridge between two residues. + selection1_type : {"donor", "acceptor", "both"} (optional) + Selection 1 can be 'donor', 'acceptor' or 'both'. Note that the + value for `selection1_type` automatically determines how + `selection2` handles donors and acceptors: If `selection1` contains + 'both' then `selection2` will also contain 'both'. If `selection1` + is set to 'donor' then `selection2` is 'acceptor' (and vice versa). + ['both']. + update_selection : bool (optional) + Update selection 1 and 2 at each frame. Setting to ``True`` if the + selection is not static. Selections are filtered first to speed up + performance. Thus, setting to ``True`` is recommended if contact + surface between selection 1 and selection 2 is constantly + changing. [``False``] + update_water_selection : bool (optional) + Update selection of water at each frame. Setting to ``False`` is + **only** recommended when the total amount of water molecules in the + simulation are small and when water molecules remain static across + the simulation. + + However, in normal simulations, only a tiny proportion of water is + engaged in the formation of water bridge. It is recommended to + update the water selection and set keyword `filter_first` to + ``True`` so as to filter out water not residing between the two + selections. [``True``] + filter_first : bool (optional) + Filter the water selection to only include water within 4 Å + `order` * + (2 Å + `distance`) away from `both` selection 1 and selection 2. + Selection 1 and selection 2 are both filtered to only include atoms + with the same distance away from the other selection. [``True``] + distance : float (optional) + Distance cutoff for hydrogen bonds; only interactions with a H-A + distance <= `distance` (and the appropriate D-H-A angle, see + `angle`) are recorded. (Note: `distance_type` can change this to + the D-A distance.) [3.0] + angle : float (optional) + Angle cutoff for hydrogen bonds; an ideal H-bond has an angle of + 180º. A hydrogen bond is only recorded if the D-H-A angle is + >= `angle`. The default of 120º also finds fairly non-specific + hydrogen interactions and possibly better value is 150º. [120.0] + forcefield : {"CHARMM27", "GLYCAM06", "other"} (optional) + Name of the forcefield used. Switches between different + :attr:`~DEFAULT_DONORS` and + :attr:`~DEFAULT_ACCEPTORS` values. + ["CHARMM27"] + donors : sequence (optional) + Extra H donor atom types (in addition to those in + :attr:`~DEFAULT_DONORS`), must be a sequence. + acceptors : sequence (optional) + Extra H acceptor atom types (in addition to those in + :attr:`~DEFAULT_ACCEPTORS`), must be a + sequence. + distance_type : {"hydrogen", "heavy"} (optional) + Measure hydrogen bond lengths between donor and acceptor heavy + atoms ("heavy") or between donor hydrogen and acceptor heavy + atom ("hydrogen"). If using "heavy" then one should set the + *distance* cutoff to a higher value such as 3.5 Å. ["hydrogen"] + output_format: {"sele1_sele2", "donor_acceptor"} (optional) + Setting the output format for timeseries and table. If set to + "sele1_sele2", for each hydrogen bond, the one close to selection 1 + will be placed before selection 2. If set to "donor_acceptor", the + donor will be placed before acceptor. "sele1_sele2"] + debug : bool (optional) + If set to ``True`` enables per-frame debug logging. This is disabled + by default because it generates a very large amount of output in + the log file. (Note that a logger must have been started to see + the output, e.g. using :func:`MDAnalysis.start_logging`.) + verbose : bool (optional) + Toggle progress output. (Can also be given as keyword argument to + :meth:`run`.) + + Notes + ----- + In order to speed up processing, atoms are filtered by a coarse + distance criterion before a detailed hydrogen bonding analysis is + performed (`filter_first` = ``True``). + + If selection 1 and selection 2 are very mobile during the simulation + and the contact surface is constantly changing (i.e. residues are + moving farther than 4 Å + `order` * (2 Å + `distance`)), you might + consider setting the `update_selection` keywords to ``True`` + to ensure correctness. + + .. versionchanged 0.20.0 + The :attr:`WaterBridgeAnalysis.timeseries` has been updated + see :attr:`WaterBridgeAnalysis.timeseries` for detail. + This class is now based on + :class:`~MDAnalysis.analysis.base.AnalysisBase`. + + + """ + super(WaterBridgeAnalysis, self).__init__(universe.trajectory, + **kwargs) + self.water_selection = water_selection + self.update_water_selection = update_water_selection + # per-frame debugging output? + self.debug = debug + + # set the output format + self.output_format = output_format + + self.u = universe + self.selection1 = selection1 + self.selection2 = selection2 + self.selection1_type = selection1_type + + # if the selection 1 and selection 2 are the same + if selection1 == selection2: + # eliminate the duplication + self.selection1_type = "donor" + self.update_selection = update_selection + self.filter_first = filter_first + self.distance = distance + self.distance_type = distance_type # note: everything except 'heavy' will give the default behavior + self.angle = angle + self.pbc = pbc and all(self.u.dimensions[:3]) + self.order = order + + # set up the donors/acceptors lists + if donors is None: + donors = () + if acceptors is None: + acceptors = () + self.forcefield = forcefield + self.donors = tuple(set(self.DEFAULT_DONORS[forcefield]).union(donors)) + self.acceptors = tuple(set(self.DEFAULT_ACCEPTORS[forcefield]).union(acceptors)) + + if self.selection1_type not in ('both', 'donor', 'acceptor'): + raise ValueError('WaterBridgeAnalysis: Invalid selection type {0!s}'.format( + self.selection1_type)) + + self._network = [] # final result accessed as self.network + self.timesteps = None # time for each frame + + self._log_parameters() + + def _log_parameters(self): + """Log important parameters to the logfile.""" + logger.info("WaterBridgeAnalysis: selection = %r (update: %r)", + self.selection2, self.update_selection) + logger.info("WaterBridgeAnalysis: water selection = %r (update: %r)", + self.water_selection, self.update_water_selection) + logger.info("WaterBridgeAnalysis: criterion: donor %s atom and acceptor \ + atom distance <= %.3f A", self.distance_type, + self.distance) + logger.info("WaterBridgeAnalysis: criterion: angle D-H-A >= %.3f degrees", + self.angle) + logger.info("WaterBridgeAnalysis: force field %s to guess donor and \ + acceptor names", self.forcefield) + + def _build_residue_dict(self, selection): + # Build the residue_dict where the key is the residue name + # The content is a dictionary where hydrogen bond donor heavy atom names is the key + # The content is the hydrogen bond donor hydrogen atom names + atom_group = self.u.select_atoms(selection) + for residue in atom_group.residues: + if not residue.resname in self._residue_dict: + self._residue_dict[residue.resname] = defaultdict(set) + for atom in residue.atoms: + if atom.name in self.donors: + self._residue_dict[residue.resname][atom.name].update(self._get_bonded_hydrogens(atom).names) + + def _update_donor_h(self, atom_ix, h_donors, donors_h): + atom = self.u.atoms[atom_ix] + residue = atom.residue + hydrogen_names = self._residue_dict[residue.resname][atom.name] + if hydrogen_names: + hydrogens = residue.atoms.select_atoms('name {0}'.format( + ' '.join(hydrogen_names))).ix + for atom in hydrogens: + h_donors[atom] = atom_ix + donors_h[atom_ix].append(atom) + + def _update_selection(self): + self._s1_donors = [] + self._s1_h_donors = {} + self._s1_donors_h = defaultdict(list) + self._s1_acceptors = [] + + self._s2_donors = [] + self._s2_h_donors = {} + self._s2_donors_h = defaultdict(list) + self._s2_acceptors = [] + + self._s1 = self.u.select_atoms(self.selection1).ix + self._s2 = self.u.select_atoms(self.selection2).ix + + if self.filter_first and len(self._s1): + self.logger_debug('Size of selection 1 before filtering:' + ' {} atoms'.format(len(self._s1))) + ns_selection_1 = AtomNeighborSearch(self.u.atoms[self._s1], box=self.box) + self._s1 = ns_selection_1.search(self.u.atoms[self._s2], self.selection_distance).ix + self.logger_debug("Size of selection 1: {0} atoms".format(len(self._s1))) + + if len(self._s1) == 0: + logger.warning('Selection 1 "{0}" did not select any atoms.' + .format(str(self.selection1)[:80])) + return + + if self.filter_first and len(self._s2): + self.logger_debug('Size of selection 2 before filtering:' + ' {} atoms'.format(len(self._s2))) + ns_selection_2 = AtomNeighborSearch(self.u.atoms[self._s2], box=self.box) + self._s2 = ns_selection_2.search(self.u.atoms[self._s1], self.selection_distance).ix + self.logger_debug('Size of selection 2: {0} atoms'.format(len(self._s2))) + + if len(self._s2) == 0: + logger.warning('Selection 2 "{0}" did not select any atoms.' + .format(str(self.selection2)[:80])) + return + + if self.selection1_type in ('donor', 'both'): + self._s1_donors = self.u.atoms[self._s1].select_atoms( + 'name {0}'.format(' '.join(self.donors))).ix + for atom_ix in self._s1_donors: + self._update_donor_h(atom_ix, self._s1_h_donors, self._s1_donors_h) + self.logger_debug("Selection 1 donors: {0}".format(len(self._s1_donors))) + self.logger_debug("Selection 1 donor hydrogens: {0}".format(len(self._s1_h_donors))) + if self.selection1_type in ('acceptor', 'both'): + self._s1_acceptors = self.u.atoms[self._s1].select_atoms( + 'name {0}'.format(' '.join(self.acceptors))).ix + self.logger_debug("Selection 1 acceptors: {0}".format(len(self._s1_acceptors))) + + if len(self._s2) == 0: + return None + if self.selection1_type in ('donor', 'both'): + self._s2_acceptors = self.u.atoms[self._s2].select_atoms( + 'name {0}'.format(' '.join(self.acceptors))).ix + self.logger_debug("Selection 2 acceptors: {0:d}".format(len(self._s2_acceptors))) + if self.selection1_type in ('acceptor', 'both'): + self._s2_donors = self.u.atoms[self._s2].select_atoms( + 'name {0}'.format(' '.join(self.donors))).ix + for atom_ix in self._s2_donors: + self._update_donor_h(atom_ix, self._s2_h_donors, self._s2_donors_h) + self.logger_debug("Selection 2 donors: {0:d}".format(len(self._s2_donors))) + self.logger_debug("Selection 2 donor hydrogens: {0:d}".format(len(self._s2_h_donors))) + + def _update_water_selection(self): + self._water_donors = [] + self._water_h_donors = {} + self._water_donors_h = defaultdict(list) + self._water_acceptors = [] + + self._water = self.u.select_atoms(self.water_selection).ix + self.logger_debug('Size of water selection before filtering:' + ' {} atoms'.format(len(self._water))) + if len(self._water) and self.filter_first: + filtered_s1 = AtomNeighborSearch(self.u.atoms[self._water], box=self.box).search(self.u.atoms[self._s1], + self.selection_distance) + if filtered_s1: + self._water = AtomNeighborSearch(filtered_s1, box=self.box).search(self.u.atoms[self._s2], + self.selection_distance).ix + + self.logger_debug("Size of water selection: {0} atoms".format(len(self._water))) + + if len(self._water) == 0: + logger.warning("Water selection '{0}' did not select any atoms." + .format(str(self.water_selection)[:80])) + else: + self._water_donors = self.u.atoms[self._water].select_atoms( + 'name {0}'.format(' '.join(self.donors))).ix + for atom_ix in self._water_donors: + self._update_donor_h(atom_ix, self._water_h_donors, self._water_donors_h) + self.logger_debug("Water donors: {0}".format(len(self._water_donors))) + self.logger_debug("Water donor hydrogens: {0}".format(len(self._water_h_donors))) + self._water_acceptors = self.u.atoms[self._water].select_atoms( + 'name {0}'.format(' '.join(self.acceptors))).ix + self.logger_debug("Water acceptors: {0}".format(len(self._water_acceptors))) + + def _get_bonded_hydrogens(self, atom): + """Find hydrogens bonded within cutoff to `atom`. + + Hydrogens are detected by either name ("H*", "[123]H*") or type ("H"); + this is not fool-proof as the atom type is not always a character but + the name pattern should catch most typical occurrences. + + The distance from `atom` is calculated for all hydrogens in the residue + and only those within a cutoff are kept. The cutoff depends on the + heavy atom (more precisely, on its element, which is taken as the first + letter of its name ``atom.name[0]``) and is parameterized in + :attr:`WaterBridgeAnalysis.r_cov`. If no match is found then the + default of 1.5 Å is used. + + + Parameters + ---------- + atom : groups.Atom + heavy atom + + Returns + ------- + hydrogen_atoms : AtomGroup or [] + list of hydrogens (can be a :class:`~MDAnalysis.core.groups.AtomGroup`) + or empty list ``[]`` if none were found. + """ + try: + return atom.residue.atoms.select_atoms( + "(name H* 1H* 2H* 3H* or type H) and around {0:f} name {1!s}" + "".format(self.r_cov[atom.name[0]], atom.name)) + except NoDataError: + return [] + + def logger_debug(self, *args): + if self.debug: + logger.debug(*args) + + + def _prepare(self): + # The distance for selection is defined as twice the maximum bond length of an O-H bond (2A) + # plus order of water bridge times the length of OH bond plus hydrogne bond distance + self.selection_distance = (2 * 2 + self.order * (2 + self.distance)) + + self.box = self.u.dimensions if self.pbc else None + self._residue_dict = {} + self._build_residue_dict(self.selection1) + self._build_residue_dict(self.selection2) + self._build_residue_dict(self.water_selection) + + self._update_selection() + + self.timesteps = [] + if len(self._s1) and len(self._s2): + self._update_water_selection() + else: + logger.info("WaterBridgeAnalysis: no atoms found in the selection.") + + logger.info("WaterBridgeAnalysis: initial checks passed.") + + logger.info("WaterBridgeAnalysis: starting") + logger.debug("WaterBridgeAnalysis: donors %r", self.donors) + logger.debug("WaterBridgeAnalysis: acceptors %r", self.acceptors) + logger.debug("WaterBridgeAnalysis: water bridge %r", self.water_selection) + + if self.debug: + logger.debug("Toggling debug to %r", self.debug) + else: + logger.debug("WaterBridgeAnalysis: For full step-by-step debugging output use debug=True") + + logger.info("Starting analysis (frame index start=%d stop=%d, step=%d)", + self.start, self.stop, self.step) + + def _donor2acceptor(self, donors, h_donors, acceptor): + if len(donors) == 0 or len(acceptor) == 0: + return [] + if self.distance_type != 'heavy': + donors_idx = list(h_donors.keys()) + else: + donors_idx = list(donors.keys()) + result = [] + # Code modified from p-j-smith + pairs, distances = capped_distance(self.u.atoms[donors_idx].positions, + self.u.atoms[acceptor].positions, + max_cutoff=self.distance, box=self.box, + return_distances=True) + if self.distance_type != 'heavy': + tmp_donors = [h_donors[donors_idx[idx]] for idx in pairs[:,0]] + tmp_hydrogens = [donors_idx[idx] for idx in pairs[:,0]] + tmp_acceptors = [acceptor[idx] for idx in pairs[:,1]] + else: + tmp_donors = [] + tmp_hydrogens = [] + tmp_acceptors = [] + for idx in range(len(pairs[:,0])): + for h in donors[donors_idx[pairs[idx,0]]]: + tmp_donors.append(donors_idx[pairs[idx,0]]) + tmp_hydrogens.append(h) + tmp_acceptors.append(acceptor[pairs[idx,1]]) + + angles = np.rad2deg( + calc_angles( + self.u.atoms[tmp_donors].positions, + self.u.atoms[tmp_hydrogens].positions, + self.u.atoms[tmp_acceptors].positions, + box=self.box + ) + ) + hbond_indices = np.where(angles > self.angle)[0] + for index in hbond_indices: + h = tmp_hydrogens[index] + d = tmp_donors[index] + a = tmp_acceptors[index] + result.append((h, d, a, self._expand_index(h), self._expand_index(a), + distances[index], angles[index])) + return result + + def _single_frame(self): + self.timesteps.append(self._ts.time) + self.box = self.u.dimensions if self.pbc else None + + if self.update_selection: + self._update_selection() + if len(self._s1) and len(self._s2): + if self.update_water_selection: + self._update_water_selection() + else: + self._network.append(defaultdict(dict)) + return + + selection_1 = [] + water_pool = defaultdict(list) + next_round_water = set([]) + selection_2 = [] + + if self.selection1_type in ('donor', 'both'): + # check for direct hbond from s1 to s2 + self.logger_debug("Selection 1 Donors <-> Selection 2 Acceptors") + results = self._donor2acceptor(self._s1_donors_h, self._s1_h_donors,self._s2_acceptors) + for line in results: + h_index, d_index, a_index, (h_resname, h_resid, h_name), (a_resname, a_resid, a_name), dist, angle = line + water_pool[(a_resname, a_resid)] = None + selection_1.append((h_index, d_index, a_index, None, dist, angle)) + selection_2.append((a_resname, a_resid)) + if self.order > 0: + self.logger_debug("Selection 1 Donors <-> Water Acceptors") + results = self._donor2acceptor(self._s1_donors_h, self._s1_h_donors, self._water_acceptors) + for line in results: + h_index, d_index, a_index, (h_resname, h_resid, h_name), ( + a_resname, a_resid, a_name), dist, angle = line + selection_1.append((h_index, d_index, a_index, None, dist, angle)) + + self.logger_debug("Water Donors <-> Selection 2 Acceptors") + results = self._donor2acceptor(self._water_donors_h, self._water_h_donors, self._s2_acceptors) + for line in results: + h_index, d_index, a_index, (h_resname, h_resid, h_name), ( + a_resname, a_resid, a_name), dist, angle = line + water_pool[(h_resname, h_resid)].append((h_index, d_index, a_index, None, dist, angle)) + selection_2.append((a_resname, a_resid)) + + if self.selection1_type in ('acceptor', 'both'): + self.logger_debug("Selection 2 Donors <-> Selection 1 Acceptors") + results = self._donor2acceptor(self._s2_donors_h, self._s2_h_donors, self._s1_acceptors) + for line in results: + h_index, d_index, a_index, (h_resname, h_resid, h_name), (a_resname, a_resid, a_name), dist, angle = line + water_pool[(h_resname, h_resid)] = None + selection_1.append((a_index, None, h_index, d_index, dist, angle)) + selection_2.append((h_resname, h_resid)) + + if self.order > 0: + self.logger_debug("Selection 2 Donors <-> Water Acceptors") + results = self._donor2acceptor(self._s2_donors_h, self._s2_h_donors, self._water_acceptors) + for line in results: + h_index, d_index, a_index, (h_resname, h_resid, h_name), ( + a_resname, a_resid, a_name), dist, angle = line + water_pool[(a_resname, a_resid)].append((a_index, None, h_index, d_index, dist, angle)) + selection_2.append((h_resname, h_resid)) + + self.logger_debug("Selection 1 Acceptors <-> Water Donors") + results = self._donor2acceptor(self._water_donors_h, self._water_h_donors, self._s1_acceptors) + for line in results: + h_index, d_index, a_index, (h_resname, h_resid, h_name), ( + a_resname, a_resid, a_name), dist, angle = line + selection_1.append((a_index, None, h_index, d_index, dist, angle)) + + if self.order > 1: + self.logger_debug("Water donor <-> Water Acceptors") + results = self._donor2acceptor(self._water_donors_h, self._water_h_donors, self._water_acceptors) + for line in results: + h_index, d_index, a_index, (h_resname, h_resid, h_name), ( + a_resname, a_resid, a_name), dist, angle = line + water_pool[(a_resname, a_resid)].append((a_index, None, h_index, d_index, dist, angle)) + water_pool[(h_resname, h_resid)].append((h_index, d_index, a_index, None, dist, angle)) + + # solve the connectivity network + # The following code attempt to generate a water network which is formed by the class dict. + # Suppose we have a water bridge connection ARG1 to ASP3 via the two hydrogen bonds. + # [0,1,('ARG',1,'O'), ('SOL',2,'HW1'), 3.0,180], + # [2,3,('SOL',2,'HW2'),('ASP',3,'OD1'), 3.0,180], + # The resulting network will be + #{(0,1,('ARG',1,'O'), ('SOL',2,'HW1'), 3.0,180): {(2,3,('SOL',2,'HW2'),('ASP',3,'OD1'), 3.0,180): None}} + # Where the key of the a dict will be all the hydrogen bonds starting from this nodes. + # The corresponding value of a certain key will be a dictionary whose key will be all the hydrogen bonds from + # the destination of in the key. + # If the value of a certain key is None, which means it is reaching selection 2. + + result = {'start': defaultdict(dict), 'water': defaultdict(dict)} + + def add_route(result, route): + if len(route) == 1: + result['start'][route[0]] = None + else: + # exclude the the selection which goes back to itself + if (sorted(route[0][0:3:2]) == sorted(route[-1][0:3:2])): + return + + # selection 2 to water + result['water'][route[-1]] = None + # water to water + for i in range(1, len(route) - 1): + result['water'][route[i]][route[i+1]] = result['water'][route[i+1]] + # selection 1 to water + result['start'][route[0]][route[1]] = result['water'][route[1]] + + def traverse_water_network(graph, node, end, route, maxdepth, result): + if len(route) > self.order + 1: + return + else: + if node in end: + # check if any duplication happens + if len(route) == len(set(route)): + add_route(result, route) + else: + for new_node in graph[node]: + new_route = route[:] + new_route.append(new_node) + new_node = self._expand_timeseries(new_node,'sele1_sele2')[3][:2] + traverse_water_network(graph, new_node, end, new_route, maxdepth, result) + for s1 in selection_1: + route = [s1, ] + next_mol = self._expand_timeseries(s1,'sele1_sele2')[3][:2] + traverse_water_network(water_pool, next_mol, selection_2, route[:], self.order, result) + + self._network.append(result['start']) + + def _traverse_water_network(self, graph, current, analysis_func=None, output=None, link_func=None, **kwargs): + ''' + This function recursively traverses the water network self._network and finds the hydrogen bonds which connect + the current atom to the next atom. The newly found hydrogen bond will be appended to the hydrogen bonds + connecting the selection 1 to the current atom via link_func. When selection 2 is reached, the full list of + hydrogen bonds connecting the selection 1 to selection 2 will be fed into analysis_func, which will then modify + the output in place. + :param graph: The connection network describes the connection between the atoms in the water network. + :param current: The hydrogen bonds from selection 1 until now. + :param analysis_func: The analysis function which is called to analysis the hydrogen bonds. + :param output: where the result is stored. + :param link_func: The new hydrogen bonds will be appended to current. + :param kwargs: the keywords which are passed into the analysis_func. + :return: + ''' + if link_func is None: + # If no link_func is provided, the default link_func will be used + link_func = self._full_link + + if graph is None: + # if selection 2 is reached + if not analysis_func is None: + # the result is analysed by analysis_func which will change the output + analysis_func(current, output, self.u, **kwargs) + else: + # make sure no loop can occur + if len(current) <= self.order: + for node in graph: + # the new hydrogen bond will be added to the existing bonds + new = link_func(current, node) + self._traverse_water_network(graph[node], new, analysis_func, output, link_func, **kwargs) + + def _expand_index(self, index): + ''' + Expand the index into (resname, resid, name). + ''' + atom = self.u.atoms[index] + return (atom.resname, atom.resid, atom.name) + + def _expand_timeseries(self, entry, output_format=None): + ''' + Expand the compact data format into the old timeseries form. + The old is defined as the format for release up to 0.19.2. + As is discussed in Issue #2177, the internal storage of the hydrogen + bond information has been changed to the compact format. + The function takes in the argument `output_format` to see which output format will be chosen. + if `output_format` is not specified, the value will be taken from :attr:`output_format`. + If `output_format` is 'sele1_sele2', the output will be the old water bridge analysis format:: + + # donor from selection 1 to acceptor in selection 2 + [sele1_index, sele2_index, + (sele1_resname, sele1_resid, sele1_name), + (sele2_resname, sele2_resid, sele2_name), dist, angle] + + If `output_format` is 'donor_acceptor', the output will be the old hydrogen bond analysis format:: + + # From donor to acceptor + [donor_index, acceptor_index, + (donor_resname, donor_resid, donor_name), + (acceptor_resname, acceptor_resid, acceptor_name), dist, angle] + ''' + output_format = output_format or self.output_format + # Expand the compact entry into atom1, which is the first index in the output and atom2, which is the second + # entry. + atom1, heavy_atom1, atom2, heavy_atom2, dist, angle = entry + if output_format == 'sele1_sele2': + # If the output format is the sele1_sele2, no change will be executed + atom1, atom2 = atom1, atom2 + elif output_format == 'donor_acceptor': + # If the output format is donor_acceptor, use heavy atom position to check which is donor and which is + # acceptor + if heavy_atom1 is None: + # atom1 is hydrogen bond acceptor and thus, the position of atom1 and atom2 are swapped. + atom1, atom2 = atom2, atom1 + else: + # atom1 is hydrogen bond donor, position not swapped. + atom1, atom2 = atom1, atom2 + else: + raise KeyError("Only 'sele1_sele2' or 'donor_acceptor' are allowed as output format") + + return (atom1, atom2, self._expand_index(atom1), self._expand_index(atom2), dist, angle) + + def _generate_timeseries(self, output_format=None): + r'''Time series of water bridges. + + The output is generated per frame as is explained in :ref:`wb_Analysis_Timeseries`. + The format of output can be changed via the output_format selection. + If ``output_format="sele1_sele2"``, the hydrogen bond forms a directional + link from selection 1 to selection 2. If ``output_format="donor_acceptor"``, + for each hydrogen bond, the donor is always written before the acceptor. + + Note + ---- + To find an acceptor atom in :attr:`Universe.atoms` by + *index* one would use ``u.atoms[acceptor_index]``. + + The :attr:`timeseries` is a managed attribute and it is generated + from the underlying data in :attr:`_network` every time the + attribute is accessed. It is therefore costly to call and if + :attr:`timeseries` is needed repeatedly it is recommended that you + assign to a variable:: + + w = WaterBridgeAnalysis(u) + w.run() + timeseries = w.timeseries + + .. versionchanged 0.20.0 + The :attr:`WaterBridgeAnalysis.timeseries` has been updated where + the donor and acceptor string has been changed to tuple + (resname string, resid, name_string). + + + ''' + output_format = output_format or self.output_format + def analysis(current, output, *args, **kwargs): + output = current + + timeseries = [] + for frame in self._network: + new_frame = [] + self._traverse_water_network(frame, new_frame, analysis_func=analysis, + output=new_frame, link_func=self._compact_link) + timeseries.append([self._expand_timeseries(entry, output_format) for entry in new_frame]) + return timeseries + + timeseries = property(_generate_timeseries) + + def _get_network(self): + r'''Network representation of the water network. + + The output is generated per frame as is explained in :ref:`wb_Analysis_Network`. + Each hydrogen bond has a compact representation of :: + + [sele1_acceptor_idx, None, sele2_donor_idx, donor_heavy_idx, distance, angle] + + or :: + + [sele1_donor_idx, donor_heavy_idx, sele1_acceptor_idx, None, distance, angle] + + The donor_heavy_idx is the heavy atom bonding to the proton and atoms + can be retrived from the universe:: + + atom = u.atoms[idx] + + .. versionadded:: 0.20.0 + + ''' + return self._network + + def set_network(self, network): + self._network = network + + network = property(_get_network, set_network) + + @classmethod + def _full_link(self, output, node): + ''' + A function used in _traverse_water_network to add the new hydrogen bond to the existing bonds. + :param output: The existing hydrogen bonds from selection 1 + :param node: The new hydrogen bond + :return: The hydrogen bonds from selection 1 with the new hydrogen bond added + ''' + result = output[:] + result.append(node) + return result + + @classmethod + def _compact_link(self, output, node): + ''' + A function used in _traverse_water_network to add the new hydrogen bond to the existing bonds. + In this form no new list is created and thus, one bridge will only appear once. + :param output: The existing hydrogen bonds from selection 1 + :param node: The new hydrogen bond + :return: The hydrogen bonds from selection 1 with the new hydrogen bond added + ''' + output.append(node) + return output + + def _count_by_type_analysis(self, current, output, *args, **kwargs): + ''' + Generates the key for count_by_type analysis. + :return: + ''' + + s1_index, to_index, (s1_resname, s1_resid, s1_name), (to_resname, to_resid, to_name), dist, angle = \ + self._expand_timeseries(current[0]) + from_index, s2_index, (from_resname, from_resid, from_name), (s2_resname, s2_resid, s2_name), dist, angle = \ + self._expand_timeseries(current[-1]) + key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) + output[key] += 1 + + def count_by_type(self, analysis_func=None, **kwargs): + """Counts the frequency of water bridge of a specific type. + + If one atom *A* from *selection 1* is linked to atom *B* from + *selection 2* through one or more bridging waters, an entity will be created and + the proportion of time that this linkage exists in the whole simulation + will be calculated. + + The identification of a specific type of water bridge can be modified by + supplying the analysis_func function. See :ref:`wb_count_by_type` + for detail. + + Returns + ------- + counts : list + Returns a :class:`list` containing atom indices for *A* and + *B*, residue names, residue numbers, atom names (for both A and B) and + the fraction of the total time during which the water bridge was + detected. This method returns None if method + :meth:`WaterBridgeAnalysis.run` was not executed first. + + + """ + output = None + if analysis_func is None: + analysis_func = self._count_by_type_analysis + output = 'combined' + + if self._network: + length = len(self._network) + result_dict = defaultdict(int) + for frame in self._network: + frame_dict = defaultdict(int) + self._traverse_water_network(frame, [], analysis_func=analysis_func, + output=frame_dict, + link_func=self._full_link, **kwargs) + for key, value in frame_dict.items(): + result_dict[key] += frame_dict[key] + + if output == 'combined': + result = [[i for i in key] for key in result_dict] + [result[i].append(result_dict[key]/length) for i, key in enumerate(result_dict)] + else: + result = [(key, result_dict[key]/length) for key in result_dict] + return result + else: + return None + + def _count_by_time_analysis(self, current, output, *args, **kwargs): + s1_index, to_index, (s1_resname, s1_resid, s1_name), (to_resname, to_resid, to_name), dist, angle = \ + self._expand_timeseries(current[0]) + from_index, s2_index, (from_resname, from_resid, from_name), (s2_resname, s2_resid, s2_name), dist, angle = \ + self._expand_timeseries(current[-1]) + key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) + output[key] += 1 + + def count_by_time(self, analysis_func=None, **kwargs): + """Counts the number of water bridges per timestep. + + The counting behaviour can be adjusted by supplying analysis_func. + See :ref:`wb_count_by_time` for details. + + Returns + ------- + counts : list + Returns a time series ``N(t)`` where ``N`` is the total + number of observed water bridges at time ``t``. + + """ + if analysis_func is None: + analysis_func = self._count_by_time_analysis + if self._network: + result = [] + for time, frame in zip(self.timesteps, self._network): + result_dict = defaultdict(int) + self._traverse_water_network(frame, [], analysis_func=analysis_func, + output=result_dict, + link_func=self._full_link, **kwargs) + result.append((time, sum([result_dict[key] for key in result_dict]))) + return result + else: + return None + + def _timesteps_by_type_analysis(self, current, output, *args, **kwargs): + s1_index, to_index, (s1_resname, s1_resid, s1_name), (to_resname, to_resid, to_name), dist, angle = \ + self._expand_timeseries(current[0]) + from_index, s2_index, (from_resname, from_resid, from_name), (s2_resname, s2_resid, s2_name), dist, angle = \ + self._expand_timeseries(current[-1]) + key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) + output[key].append(kwargs.pop('time')) + + def timesteps_by_type(self, analysis_func=None, **kwargs): + """Frames during which each water bridges existed, sorted by each water bridges. + + Processes :attr:`WaterBridgeAnalysis._network` and returns a + :class:`list` containing atom indices, residue names, residue + numbers (from selection 1 and selection 2) and each timestep at which the + water bridge was detected. + + Similar to :meth:`~WaterBridgeAnalysis.count_by_type` and + :meth:`~WaterBridgeAnalysis.count_by_time`, the behavior can be adjusted by + supplying an analysis_func. + + Returns + ------- + data : list + + """ + output = None + if analysis_func is None: + analysis_func = self._timesteps_by_type_analysis + output = 'combined' + + if self._network: + result = defaultdict(list) + if self.timesteps is None: + timesteps = range(len(self._network)) + else: + timesteps = self.timesteps + for time, frame in zip(timesteps, self._network): + self._traverse_water_network(frame, [], analysis_func=analysis_func, + output=result, + link_func=self._full_link, time=time, **kwargs) + + result_list = [] + for key, time_list in result.items(): + for time in time_list: + if output == 'combined': + key = list(key) + key.append(time) + result_list.append(key) + else: + result_list.append((key, time)) + return result_list + else: + return None + + def generate_table(self, output_format=None): + """Generate a normalised table of the results. + + The table is stored as a :class:`numpy.recarray` in the + attribute :attr:`~WaterBridgeAnalysis.table`. + + The output format of :attr:`~WaterBridgeAnalysis.table` can also be + changed using output_format in a fashion similar to :attr:`WaterBridgeAnalysis.timeseries` + """ + output_format = output_format or self.output_format + if self._network == []: + msg = "No data computed, do run() first." + warnings.warn(msg, category=MissingDataWarning) + logger.warning(msg) + return None + timeseries = self._generate_timeseries(output_format) + + num_records = np.sum([len(hframe) for hframe in timeseries]) + # build empty output table + if output_format == 'sele1_sele2': + dtype = [ + ("time", float), + ("sele1_index", int), ("sele2_index", int), + ("sele1_resnm", "|U4"), ("sele1_resid", int), ("sele1_atom", "|U4"), + ("sele2_resnm", "|U4"), ("sele2_resid", int), ("sele2_atom", "|U4"), + ("distance", float), ("angle", float)] + elif output_format == 'donor_acceptor': + dtype = [ + ("time", float), + ("donor_index", int), ("acceptor_index", int), + ("donor_resnm", "|U4"), ("donor_resid", int), ("donor_atom", "|U4"), + ("acceptor_resnm", "|U4"), ("acceptor_resid", int), ("acceptor_atom", "|U4"), + ("distance", float), ("angle", float)] + + # according to Lukas' notes below, using a recarray at this stage is ineffective + # and speedups of ~x10 can be achieved by filling a standard array, like this: + out = np.empty((num_records,), dtype=dtype) + cursor = 0 # current row + for t, hframe in zip(self.timesteps, timeseries): + for (donor_index, acceptor_index, donor, + acceptor, distance, angle) in hframe: + # donor|acceptor = (resname, resid, atomid) + out[cursor] = (t, donor_index, acceptor_index) + \ + donor + acceptor + (distance, angle) + cursor += 1 + assert cursor == num_records, "Internal Error: Not all wb records stored" + table = out.view(np.recarray) + logger.debug("WBridge: Stored results as table with %(num_records)d entries.", vars()) + self.table = table diff --git a/testsuite/MDAnalysisTests/analysis/test_wbridge.py b/testsuite/MDAnalysisTests/analysis/test_wbridge.py index 24f076feb55..7de4890aba5 100644 --- a/testsuite/MDAnalysisTests/analysis/test_wbridge.py +++ b/testsuite/MDAnalysisTests/analysis/test_wbridge.py @@ -7,7 +7,7 @@ import MDAnalysis import MDAnalysis.analysis.hbonds -from MDAnalysis.analysis.hbonds.wbridge_analysis import WaterBridgeAnalysis +from MDAnalysis.analysis.hydrogenbonds.wbridge_analysis import WaterBridgeAnalysis def test_import_from_hbonds(): try: From 68584d94b44011ef7ffef9a7417c0249e6518de3 Mon Sep 17 00:00:00 2001 From: zhiyiwu Date: Tue, 18 Aug 2020 12:25:26 +0100 Subject: [PATCH 02/26] PEP8 --- .../analysis/hbonds/wbridge_analysis.py | 6 +- .../analysis/hydrogenbonds/__init__.py | 1 + .../hydrogenbonds/wbridge_analysis.py | 762 +++++++++++------- .../MDAnalysisTests/analysis/test_wbridge.py | 3 +- 4 files changed, 477 insertions(+), 295 deletions(-) diff --git a/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py index 5f709cb02b4..1f0138c0ec9 100644 --- a/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py @@ -33,6 +33,8 @@ .. _`@xiki-tempula`: https://github.com/xiki-tempula -This module is moved to :class:`MDAnalysis.analysis.hydrogenbonds.WaterBridgeAnalysis`.""" +This module is moved to +:class:`MDAnalysis.analysis.hydrogenbonds.WaterBridgeAnalysis`.""" -from MDAnalysis.analysis.hydrogenbonds.WaterBridgeAnalysis import WaterBridgeAnalysis +from MDAnalysis.analysis.hydrogenbonds.WaterBridgeAnalysis import ( + WaterBridgeAnalysis,) diff --git a/package/MDAnalysis/analysis/hydrogenbonds/__init__.py b/package/MDAnalysis/analysis/hydrogenbonds/__init__.py index 9aa5c1f7fa9..25f6dd66383 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/__init__.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/__init__.py @@ -26,3 +26,4 @@ ] from .hbond_analysis import HydrogenBondAnalysis +from .wbridge_analysis import WaterBridgeAnalysis diff --git a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py index e0fab139133..555c99d2934 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py @@ -22,7 +22,8 @@ # # Water Bridge Analysis -r"""Water Bridge analysis --- :mod:`MDAnalysis.analysis.hydrogenbonds.WaterBridgeAnalysis` +r"""Water Bridge analysis --- +mod:`MDAnalysis.analysis.hydrogenbonds.WaterBridgeAnalysis` =============================================================================== :Author: Zhiyi Wu @@ -56,7 +57,8 @@ A higher order water bridge is defined as more than one water bridging hydrogen bond acceptor and donor. An example of a second order water bridge: -e.g. -CO\ :sub:`2`\ :sup:`-`:···H−O:···H−O:···HN- (where H−O is part of **H−O**\ −H) +e.g. -CO\ :sub:`2`\ :sup:`-`:···H−O:···H−O:···HN- +(where H−O is part of **H−O**\ −H) The following keyword arguments are important to control the behaviour of the water bridge analysis: @@ -84,13 +86,13 @@ Output as Network ----------------- -Since the waters connecting the two ends of the selections are by nature a network. -We provide a network representation of the water network. Water bridge data are -returned per frame, which is stored in :attr:`WaterBridgeAnalysis.network`. Each -frame is represented as a dictionary, where the keys are the hydrogen bonds -originating from selection 1 and the values are new dictionaries representing -the hydrogen bonds coming out of the corresponding molecules making hydrogen bonds -with selection 1. +Since the waters connecting the two ends of the selections are by nature a +network. We provide a network representation of the water network. Water bridge +data are returned per frame, which is stored in +:attr:`WaterBridgeAnalysis.network`. Each frame is represented as a dictionary, +where the keys are the hydrogen bonds originating from selection 1 and the +values are new dictionaries representing the hydrogen bonds coming out of the +corresponding molecules making hydrogen bonds with selection 1. As for the hydrogen bonds which reach the selection 2, the values of the corresponding keys are None. One example where selection 1 and selection 2 are @@ -102,9 +104,12 @@ # H·············O-H(water2) # H {(sele1_acceptor, None, water1_donor, water1_donor_heavy, distance, angle): - {(water1_acceptor, None, sele2_donor, sele2_donor_heavy, distance, angle): None}, - {(water1_donor, water1_donor_heavy, water2_acceptor, None, distance, angle): - {(water2_acceptor, None, sele2_donor, sele2_donor_heavy, distance, angle): None} + {(water1_acceptor, None, sele2_donor, sele2_donor_heavy, + distance, angle): None}, + {(water1_donor, water1_donor_heavy, water2_acceptor, None, + distance, angle): + {(water2_acceptor, None, sele2_donor, sele2_donor_heavy, + distance, angle): None} }, } @@ -118,20 +123,20 @@ Output as Timeseries -------------------- -For lower order water bridges, it might be desirable to represent the connections as -:attr:`WaterBridgeAnalysis.timeseries`. The results are returned per frame and -are a list of hydrogen bonds between the selection 1 or selection 2 and the -bridging waters. Due to the complexity of the higher order water bridge and the -fact that one hydrogen bond between two waters can appear in both third and -fourth order water bridge, the hydrogen bonds in the +For lower order water bridges, it might be desirable to represent the +connections as :attr:`WaterBridgeAnalysis.timeseries`. The results are returned +per frame and are a list of hydrogen bonds between the selection 1 or selection +2 and the bridging waters. Due to the complexity of the higher order water +bridge and the fact that one hydrogen bond between two waters can appear in +both third and fourth order water bridge, the hydrogen bonds in the :attr:`WaterBridgeAnalysis.timeseries` attribute are generated in a depth-first search manner to avoid duplication. Example code of how :attr:`WaterBridgeAnalysis.timeseries` is generated:: def network2timeseries(network, timeseries): '''Traverse the network in a depth-first fashion. - expand_timeseries will expand the compact representation to the familiar - timeseries representation.''' + expand_timeseries will expand the compact representation to the + familiar timeseries representation.''' if network is None: return @@ -149,7 +154,8 @@ def network2timeseries(network, timeseries): [ # frame 1 [ , , (, , ), - (, , ), + (, , + ), , ], .... ], @@ -253,8 +259,8 @@ class WaterBridgeAnalysis_OtherFF(WaterBridgeAnalysis): DEFAULT_ACCEPTORS = {"OtherFF": tuple(set([...]))} Then simply use the new class instead of the parent class and call it with -```forcefield` = "OtherFF"``. Please also consider contributing the list of heavy -atom names to MDAnalysis. +```forcefield` = "OtherFF"``. Please also consider contributing the list of +heavy atom names to MDAnalysis. How to perform WaterBridgeAnalysis ---------------------------------- @@ -265,13 +271,15 @@ class WaterBridgeAnalysis_OtherFF(WaterBridgeAnalysis): import MDAnalysis.analysis.hbonds u = MDAnalysis.Universe('topology', 'trajectory') - w = MDAnalysis.analysis.hbonds.WaterBridgeAnalysis(u, 'resname ARG', 'resname ASP') + w = MDAnalysis.analysis.hbonds.WaterBridgeAnalysis(u, 'resname ARG', + 'resname ASP') w.run() -The maximum number of bridging waters detected can be changed using the order keyword. :: +The maximum number of bridging waters detected can be changed using the order +keyword. :: - w = MDAnalysis.analysis.hbonds.WaterBridgeAnalysis(u, 'resname ARG', 'resname ASP', - order=3) + w = MDAnalysis.analysis.hbonds.WaterBridgeAnalysis(u, 'resname ARG', + 'resname ASP', order=3) Thus, a maximum of three bridging waters will be detected. @@ -279,10 +287,10 @@ class WaterBridgeAnalysis_OtherFF(WaterBridgeAnalysis): detecting the percentage of time a certain water bridge exits. Trajectory :code:`u` has two frames, where the first frame contains a water -bridge from the oxygen of the first arginine to one of the oxygens in the carboxylic -group of aspartate (ASP3:OD1). In the second frame, the same water bridge forms but -is between the oxygen of the arginine and the other oxygen in the carboxylic -group (ASP3:OD2). :: +bridge from the oxygen of the first arginine to one of the oxygens in the +carboxylic group of aspartate (ASP3:OD1). In the second frame, the same water +bridge forms but is between the oxygen of the arginine and the other oxygen in +the carboxylic group (ASP3:OD2). :: # index residue id residue name atom name # 0 1 ARG O @@ -324,50 +332,60 @@ class WaterBridgeAnalysis_OtherFF(WaterBridgeAnalysis): [(0, 3, 'ARG', 1, 'O', 'ASP', 3, 'OD1', 0.5), (0, 4, 'ARG', 1, 'O', 'ASP', 3, 'OD2', 0.5),] -You might think that the OD1 and OD2 are the same oxygen and the aspartate has just flipped -and thus, they should be counted as the same type of water bridge. The type of the water -bridge can be customised by supplying an analysis function to -:meth:`~WaterBridgeAnalysis.count_by_type`. - -The analysis function has two parameters. The current and the output. The current is a list -of hydrogen bonds from selection 1 to selection 2, formatted in the same fashion as -:attr:`WaterBridgeAnalysis.network`, and an example will be :: - - [ # sele1 acceptor idx, , water donor index, donor heavy atom idx, dist, ang. - [ 0, None, 2, 1, 3.0,180], - # water donor idx, donor heavy atom idx, sele2 acceptor idx, distance, angle. - [ 3, 1, 4, None, 3.0,180],] - -Where ``current[0]`` is the first hydrogen bond originating from selection 1 and ``current[-1]`` is -the final hydrogen bond ending in selection 2. The output sums up all the information in the -current frame and is a dictionary with a user-defined key and the value is the weight of the -corresponding key. During the analysis phase, the function analysis iterates through all the -water bridges and modify the output in-place. At the end of the analysis, the keys from -all the frames are collected and the corresponding values will be summed up and returned. :: +You might think that the OD1 and OD2 are the same oxygen and the aspartate has +just flipped and thus, they should be counted as the same type of water bridge. +The type of the water bridge can be customised by supplying an analysis +function to :meth:`~WaterBridgeAnalysis.count_by_type`. + +The analysis function has two parameters. The current and the output. The +current is a list of hydrogen bonds from selection 1 to selection 2, formatted +in the same fashion as :attr:`WaterBridgeAnalysis.network`, and an example will +be :: + + [ + # sele1 acceptor idx, , water donor index, donor heavy atom idx, dist, ang. + [ 0, None, 2, 1, 3.0,180], + # water donor idx, donor heavy atom idx, sele2 acceptor idx, distance, angle. + [ 3, 1, 4, None, 3.0,180],] + +Where ``current[0]`` is the first hydrogen bond originating from selection 1 +and ``current[-1]`` is the final hydrogen bond ending in selection 2. The +output sums up all the information in the current frame and is a dictionary +with a user-defined key and the value is the weight of the corresponding key. +During the analysis phase, the function analysis iterates through all the water +bridges and modify the output in-place. At the end of the analysis, the keys +from all the frames are collected and the corresponding values will be summed +up and returned. :: def analysis(current, output, u): - r'''This function defines how the type of water bridge should be specified. + r'''This function defines how the type of water bridge should be + specified. Parameters ---------- current : list - The current water bridge being analysed is a list of hydrogen bonds from - selection 1 to selection 2. + The current water bridge being analysed is a list of hydrogen bonds + from selection 1 to selection 2. output : dict - A dictionary which is modified in-place where the key is the type of - the water bridge and the value is the weight of this type of water bridge. + A dictionary which is modified in-place where the key is the type + of the water bridge and the value is the weight of this type of + water bridge. u : MDAnalysis.universe The current Universe for looking up atoms.''' # decompose the first hydrogen bond. - sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = + current[0] # decompose the last hydrogen bond. - atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = + current[-1] # expand the atom index to the resname, resid, atom names sele1 = u.atoms[sele1_index] sele2 = u.atoms[sele2_index] - (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) - (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) + (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, + sele1.name) + (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, + sele2.name) # if the residue name is ASP and the atom name is OD2 or OD1, # the atom name is changed to OD if s2_resname == 'ASP' and (s2_name == 'OD1' or s2_name == 'OD2'): @@ -384,43 +402,50 @@ def analysis(current, output, u): [(('ARG', 1, 'O', 'ASP', 3, 'OD'), 1.0),] Note that the result is arranged in the format of -``(key, the proportion of time)``. When no custom analysis function is supplied, -the key is expanded and is formatted as :: +``(key, the proportion of time)``. When no custom analysis function is supplied +, the key is expanded and is formatted as :: [('ARG', 1, 'O', 'ASP', 3, 'OD', 1.0),] -Some people might only interested in contacts between residues and pay no attention -to the details regarding the atom name. However, since multiple water bridges can -exist between two residues, which sometimes can give a result such that the water -bridge between two residues exists 300% of the time. Though this might be a desirable -result for some people, others might want the water bridge between two residues to be -only counted once per frame. This can also be achieved by supplying an analysis function -to :meth:`~WaterBridgeAnalysis.count_by_type`. :: +Some people might only interested in contacts between residues and pay no +attention to the details regarding the atom name. However, since multiple water +bridges can exist between two residues, which sometimes can give a result such +that the water bridge between two residues exists 300% of the time. Though this +might be a desirable result for some people, others might want the water bridge +between two residues to be only counted once per frame. This can also be +achieved by supplying an analysis function to +:meth:`~WaterBridgeAnalysis.count_by_type`. :: def analysis(current, output, u): - '''This function defines how the type of water bridge should be specified. + '''This function defines how the type of water bridge should be specified + . Parameters ---------- current : list - The current water bridge being analysed is a list of hydrogen bonds from - selection 1 to selection 2. + The current water bridge being analysed is a list of hydrogen bonds + from selection 1 to selection 2. output : dict - A dictionary which is modified in-place where the key is the type of - the water bridge and the value is the weight of this type of water bridge. + A dictionary which is modified in-place where the key is the type + of the water bridge and the value is the weight of this type of + water bridge. u : MDAnalysis.universe The current Universe for looking up atoms. ''' # decompose the first hydrogen bond. - sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = + current[0] # decompose the last hydrogen bond. - atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = + current[-1] # expand the atom index to the resname, resid, atom names sele1 = u.atoms[sele1_index] sele2 = u.atoms[sele2_index] - (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) - (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) + (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, + sele1.name) + (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, + sele2.name) # s1_name and s2_name are not included in the key key = (s1_resname, s1_resid, s2_resname, s2_resid) @@ -433,34 +458,41 @@ def analysis(current, output, u): [(('ARG', 1, 'ASP', 3), 1.0),] -On the other hand, other people may insist that the first order and second-order water -bridges shouldn't be mixed together, which can also be achieved by supplying an analysis -function to :meth:`~WaterBridgeAnalysis.count_by_type`. :: +On the other hand, other people may insist that the first order and +second-order water bridges shouldn't be mixed together, which can also be +achieved by supplying an analysis function to +:meth:`~WaterBridgeAnalysis.count_by_type`. :: def analysis(current, output, u): - '''This function defines how the type of water bridge should be specified. + '''This function defines how the type of water bridge should be specified + . Parameters ---------- current : list - The current water bridge being analysed is a list of hydrogen bonds from - selection 1 to selection 2. + The current water bridge being analysed is a list of hydrogen bonds + from selection 1 to selection 2. output : dict - A dictionary which is modified in-place where the key is the type of - the water bridge and the value is the weight of this type of water bridge. + A dictionary which is modified in-place where the key is the type + of the water bridge and the value is the weight of this type of + water bridge. u : MDAnalysis.universe The current Universe for looking up atoms. ''' # decompose the first hydrogen bond. - sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = + current[0] # decompose the last hydrogen bond. - atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = + current[-1] # expand the atom index to the resname, resid, atom names sele1 = u.atoms[sele1_index] sele2 = u.atoms[sele2_index] - (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) - (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) + (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, + sele1.name) + (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, + sele2.name) # order of the current water bridge is computed order_of_water_bridge = len(current) - 1 # and is included in the key @@ -470,38 +502,45 @@ def analysis(current, output, u): w.count_by_type(analysis_func=analysis) -The extra number 1 precede the 1.0 indicate that this is a first order water bridge :: +The extra number 1 precede the 1.0 indicate that this is a first order water +bridge :: [(('ARG', 1, 'ASP', 3, 1), 1.0),] -Some people might not be interested in the interactions related to arginine. The undesirable -interactions can be discarded by supplying an analysis function to -:meth:`~WaterBridgeAnalysis.count_by_type`. :: +Some people might not be interested in the interactions related to arginine. +The undesirable interactions can be discarded by supplying an analysis function +to :meth:`~WaterBridgeAnalysis.count_by_type`. :: def analysis(current, output, u): - '''This function defines how the type of water bridge should be specified. + '''This function defines how the type of water bridge should be + specified. Parameters ---------- current : list - The current water bridge being analysed is a list of hydrogen bonds from - selection 1 to selection 2. + The current water bridge being analysed is a list of hydrogen bonds + from selection 1 to selection 2. output : dict - A dictionary which is modified in-place where the key is the type of - the water bridge and the value is the number of this type of water bridge. + A dictionary which is modified in-place where the key is the type + of the water bridge and the value is the number of this type of + water bridge. u : MDAnalysis.universe The current Universe for looking up atoms. ''' # decompose the first hydrogen bond. - sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = + current[0] # decompose the last hydrogen bond. - atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = + current[-1] # expand the atom index to the resname, resid, atom names sele1 = u.atoms[sele1_index] sele2 = u.atoms[sele2_index] - (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) - (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) + (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, + sele1.name) + (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, + sele2.name) if not s1_resname == 'ARG': key = (s1_resname, s1_resid, s2_resname, s2_resid) output[key] += 1 @@ -526,12 +565,13 @@ def analysis(current, output, **kwargs): ----------------- :meth:`~WaterBridgeAnalysis.count_by_type` aggregates data across frames, which -might be desirable in some cases but not the others. :meth:`~WaterBridgeAnalysis.count_by_time` -provides additional functionality for aggregating results for each frame. +might be desirable in some cases but not the others. +:meth:`~WaterBridgeAnalysis.count_by_time` provides additional functionality +for aggregating results for each frame. -The default behaviour of :meth:`~WaterBridgeAnalysis.count_by_time` is counting the number of -water bridges from selection 1 to selection 2 for each frame. Take the previous ASP, ARG salt -bridge for example: :: +The default behaviour of :meth:`~WaterBridgeAnalysis.count_by_time` is counting +the number of water bridges from selection 1 to selection 2 for each frame. +Take the previous ASP, ARG salt bridge for example: :: w.count_by_time() @@ -540,41 +580,48 @@ def analysis(current, output, **kwargs): [(1.0, 1), (2.0, 1), ] Similar to :meth:`~WaterBridgeAnalysis.count_by_type` -The behaviour of :meth:`~WaterBridgeAnalysis.count_by_time` can also be modified by supplying -an analysis function. +The behaviour of :meth:`~WaterBridgeAnalysis.count_by_time` can also be +modified by supplying an analysis function. Suppose we want to count - - the **number** of water molecules involved in bridging selection 1 to selection 2. + - the **number** of water molecules involved in bridging selection 1 to + selection 2. - only if water bridge terminates in atom name **OD1 of ASP**. - only when water bridge is joined by less than **two** water. The analysis function can be written as:: def analysis(current, output, u, **kwargs): - '''This function defines how the counting of water bridge should be specified. + '''This function defines how the counting of water bridge should be + specified. Parameters ---------- current : list - The current water bridge being analysed is a list of hydrogen bonds from - selection 1 to selection 2. + The current water bridge being analysed is a list of hydrogen bonds + from selection 1 to selection 2. output : dict - A dictionary which is modified in-place where the key is the type of - the water bridge and the value is the number of this type of water bridge. + A dictionary which is modified in-place where the key is the type + of the water bridge and the value is the number of this type of + water bridge. u : MDAnalysis.universe The current Universe for looking up atoms. ''' # decompose the first hydrogen bond. - sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = current[0] + sele1_index, sele1_heavy_index, atom2, heavy_atom2, dist, angle = + current[0] # decompose the last hydrogen bond. - atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = current[-1] + atom1, heavy_atom1, sele2_index, sele2_heavy_index, dist, angle = + current[-1] # expand the atom index to the resname, resid, atom names sele1 = u.atoms[sele1_index] sele2 = u.atoms[sele2_index] - (s1_resname, s1_resid, s1_name) = (sele1.resname, sele1.resid, sele1.name) - (s2_resname, s2_resid, s2_name) = (sele2.resname, sele2.resid, sele2.name) + (s1_resname, s1_resid, s1_name) = + (sele1.resname, sele1.resid, sele1.name) + (s2_resname, s2_resid, s2_name) = + (sele2.resname, sele2.resid, sele2.name) # only the residue name is ASP and the atom name is OD1, if s2_resname == 'ASP' and s2_name == 'OD1': @@ -658,20 +705,21 @@ class WaterBridgeAnalysis(AnalysisBase): #: (see :ref:`Default atom names for hydrogen bonding analysis`); #: use the keyword `donors` to add a list of additional donor names. DEFAULT_DONORS = { - 'CHARMM27': tuple(set([ - 'N', 'OH2', 'OW', 'NE', 'NH1', 'NH2', 'ND2', 'SG', 'NE2', 'ND1', - 'NZ', 'OG', 'OG1', 'NE1', 'OH'])), - 'GLYCAM06': tuple(set(['N', 'NT', 'N3', 'OH', 'OW'])), + 'CHARMM27': tuple( + {'N', 'OH2', 'OW', 'NE', 'NH1', 'NH2', 'ND2', 'SG', 'NE2', 'ND1', + 'NZ', 'OG', 'OG1', 'NE1', 'OH'}), + 'GLYCAM06': tuple({'N', 'NT', 'N3', 'OH', 'OW'}), 'other': tuple(set([]))} #: default atom names that are treated as hydrogen *acceptors* #: (see :ref:`Default atom names for hydrogen bonding analysis`); #: use the keyword `acceptors` to add a list of additional acceptor names. DEFAULT_ACCEPTORS = { - 'CHARMM27': tuple(set([ - 'O', 'OC1', 'OC2', 'OH2', 'OW', 'OD1', 'OD2', 'SG', 'OE1', 'OE1', - 'OE2', 'ND1', 'NE2', 'SD', 'OG', 'OG1', 'OH'])), - 'GLYCAM06': tuple(set(['N', 'NT', 'O', 'O2', 'OH', 'OS', 'OW', 'OY', 'SM'])), + 'CHARMM27': tuple( + {'O', 'OC1', 'OC2', 'OH2', 'OW', 'OD1', 'OD2', 'SG', 'OE1', 'OE1', + 'OE2', 'ND1', 'NE2', 'SD', 'OG', 'OG1', 'OH'}), + 'GLYCAM06': + tuple({'N', 'NT', 'O', 'O2', 'OH', 'OS', 'OW', 'OY', 'SM'}), 'other': tuple(set([]))} #: A :class:`collections.defaultdict` of covalent radii of common donors @@ -683,11 +731,12 @@ class WaterBridgeAnalysis(AnalysisBase): N=1.31, O=1.31, P=1.58, S=1.55) def __init__(self, universe, selection1='protein', - selection2='not resname SOL', water_selection='resname SOL', order=1, - selection1_type='both', update_selection=False, update_water_selection=True, - filter_first=True, distance_type='hydrogen', distance=3.0, - angle=120.0, forcefield='CHARMM27', donors=None, - acceptors=None, output_format="sele1_sele2", debug=None, verbose=False, + selection2='not resname SOL', water_selection='resname SOL', + order=1, selection1_type='both', update_selection=False, + update_water_selection=True, filter_first=True, + distance_type='hydrogen', distance=3.0, angle=120.0, + forcefield='CHARMM27', donors=None, acceptors=None, + output_format="sele1_sele2", debug=None, pbc=False, **kwargs): """Set up the calculation of water bridges between two selections in a universe. @@ -722,13 +771,14 @@ def __init__(self, universe, selection1='protein', a hydrogen bond with selection 1 and selection 2. order : int (optional) The maximum number of water bridges linking both selections. - if the order is set to 3, then all the residues linked with less than - three water molecules will be detected. [1] + if the order is set to 3, then all the residues linked with less + than three water molecules will be detected. [1] Computation of high order water bridges can be very time-consuming. Think carefully before running the calculation, do you really want - to compute the 20th order water bridge between domain A and domain B - or you just want to know the third order water bridge between two residues. + to compute the 20th order water bridge between domain A and domain + B or you just want to know the third order water bridge between two + residues. selection1_type : {"donor", "acceptor", "both"} (optional) Selection 1 can be 'donor', 'acceptor' or 'both'. Note that the value for `selection1_type` automatically determines how @@ -744,9 +794,9 @@ def __init__(self, universe, selection1='protein', changing. [``False``] update_water_selection : bool (optional) Update selection of water at each frame. Setting to ``False`` is - **only** recommended when the total amount of water molecules in the - simulation are small and when water molecules remain static across - the simulation. + **only** recommended when the total amount of water molecules in + the simulation are small and when water molecules remain static + across the simulation. However, in normal simulations, only a tiny proportion of water is engaged in the formation of water bridge. It is recommended to @@ -754,8 +804,9 @@ def __init__(self, universe, selection1='protein', ``True`` so as to filter out water not residing between the two selections. [``True``] filter_first : bool (optional) - Filter the water selection to only include water within 4 Å + `order` * - (2 Å + `distance`) away from `both` selection 1 and selection 2. + Filter the water selection to only include water within 4 Å + + `order` * (2 Å + `distance`) away from `both` selection 1 and + selection 2. Selection 1 and selection 2 are both filtered to only include atoms with the same distance away from the other selection. [``True``] distance : float (optional) @@ -791,10 +842,10 @@ def __init__(self, universe, selection1='protein', will be placed before selection 2. If set to "donor_acceptor", the donor will be placed before acceptor. "sele1_sele2"] debug : bool (optional) - If set to ``True`` enables per-frame debug logging. This is disabled - by default because it generates a very large amount of output in - the log file. (Note that a logger must have been started to see - the output, e.g. using :func:`MDAnalysis.start_logging`.) + If set to ``True`` enables per-frame debug logging. This is + disabled by default because it generates a very large amount of + output in the log file. (Note that a logger must have been started + to see the output, e.g. using :func:`MDAnalysis.start_logging`.) verbose : bool (optional) Toggle progress output. (Can also be given as keyword argument to :meth:`run`.) @@ -841,7 +892,8 @@ def __init__(self, universe, selection1='protein', self.update_selection = update_selection self.filter_first = filter_first self.distance = distance - self.distance_type = distance_type # note: everything except 'heavy' will give the default behavior + self.distance_type = distance_type # note: everything except 'heavy' + # will give the default behavior self.angle = angle self.pbc = pbc and all(self.u.dimensions[:3]) self.order = order @@ -853,10 +905,12 @@ def __init__(self, universe, selection1='protein', acceptors = () self.forcefield = forcefield self.donors = tuple(set(self.DEFAULT_DONORS[forcefield]).union(donors)) - self.acceptors = tuple(set(self.DEFAULT_ACCEPTORS[forcefield]).union(acceptors)) + self.acceptors = tuple(set(self.DEFAULT_ACCEPTORS[forcefield]).union( + acceptors)) if self.selection1_type not in ('both', 'donor', 'acceptor'): - raise ValueError('WaterBridgeAnalysis: Invalid selection type {0!s}'.format( + raise ValueError('WaterBridgeAnalysis: ' + 'Invalid selection type {0!s}'.format( self.selection1_type)) self._network = [] # final result accessed as self.network @@ -870,17 +924,19 @@ def _log_parameters(self): self.selection2, self.update_selection) logger.info("WaterBridgeAnalysis: water selection = %r (update: %r)", self.water_selection, self.update_water_selection) - logger.info("WaterBridgeAnalysis: criterion: donor %s atom and acceptor \ - atom distance <= %.3f A", self.distance_type, + logger.info("WaterBridgeAnalysis: criterion: donor %s atom and " + "acceptor atom distance <= %.3f A", self.distance_type, self.distance) - logger.info("WaterBridgeAnalysis: criterion: angle D-H-A >= %.3f degrees", + logger.info("WaterBridgeAnalysis: criterion: " + "angle D-H-A >= %.3f degrees", self.angle) logger.info("WaterBridgeAnalysis: force field %s to guess donor and \ acceptor names", self.forcefield) def _build_residue_dict(self, selection): # Build the residue_dict where the key is the residue name - # The content is a dictionary where hydrogen bond donor heavy atom names is the key + # The content is a dictionary where hydrogen bond donor heavy atom + # names is the key # The content is the hydrogen bond donor hydrogen atom names atom_group = self.u.select_atoms(selection) for residue in atom_group.residues: @@ -888,7 +944,8 @@ def _build_residue_dict(self, selection): self._residue_dict[residue.resname] = defaultdict(set) for atom in residue.atoms: if atom.name in self.donors: - self._residue_dict[residue.resname][atom.name].update(self._get_bonded_hydrogens(atom).names) + self._residue_dict[residue.resname][atom.name].update( + self._get_bonded_hydrogens(atom).names) def _update_donor_h(self, atom_ix, h_donors, donors_h): atom = self.u.atoms[atom_ix] @@ -918,9 +975,12 @@ def _update_selection(self): if self.filter_first and len(self._s1): self.logger_debug('Size of selection 1 before filtering:' ' {} atoms'.format(len(self._s1))) - ns_selection_1 = AtomNeighborSearch(self.u.atoms[self._s1], box=self.box) - self._s1 = ns_selection_1.search(self.u.atoms[self._s2], self.selection_distance).ix - self.logger_debug("Size of selection 1: {0} atoms".format(len(self._s1))) + ns_selection_1 = AtomNeighborSearch(self.u.atoms[self._s1], + box=self.box) + self._s1 = ns_selection_1.search(self.u.atoms[self._s2], + self.selection_distance).ix + self.logger_debug("Size of selection 1: {0} atoms".format( + len(self._s1))) if len(self._s1) == 0: logger.warning('Selection 1 "{0}" did not select any atoms.' @@ -930,9 +990,12 @@ def _update_selection(self): if self.filter_first and len(self._s2): self.logger_debug('Size of selection 2 before filtering:' ' {} atoms'.format(len(self._s2))) - ns_selection_2 = AtomNeighborSearch(self.u.atoms[self._s2], box=self.box) - self._s2 = ns_selection_2.search(self.u.atoms[self._s1], self.selection_distance).ix - self.logger_debug('Size of selection 2: {0} atoms'.format(len(self._s2))) + ns_selection_2 = AtomNeighborSearch(self.u.atoms[self._s2], + box=self.box) + self._s2 = ns_selection_2.search(self.u.atoms[self._s1], + self.selection_distance).ix + self.logger_debug('Size of selection 2: {0} atoms'.format( + len(self._s2))) if len(self._s2) == 0: logger.warning('Selection 2 "{0}" did not select any atoms.' @@ -943,27 +1006,35 @@ def _update_selection(self): self._s1_donors = self.u.atoms[self._s1].select_atoms( 'name {0}'.format(' '.join(self.donors))).ix for atom_ix in self._s1_donors: - self._update_donor_h(atom_ix, self._s1_h_donors, self._s1_donors_h) - self.logger_debug("Selection 1 donors: {0}".format(len(self._s1_donors))) - self.logger_debug("Selection 1 donor hydrogens: {0}".format(len(self._s1_h_donors))) + self._update_donor_h(atom_ix, self._s1_h_donors, + self._s1_donors_h) + self.logger_debug("Selection 1 donors: {0}".format( + len(self._s1_donors))) + self.logger_debug("Selection 1 donor hydrogens: {0}".format( + len(self._s1_h_donors))) if self.selection1_type in ('acceptor', 'both'): self._s1_acceptors = self.u.atoms[self._s1].select_atoms( 'name {0}'.format(' '.join(self.acceptors))).ix - self.logger_debug("Selection 1 acceptors: {0}".format(len(self._s1_acceptors))) + self.logger_debug("Selection 1 acceptors: {0}".format( + len(self._s1_acceptors))) if len(self._s2) == 0: return None if self.selection1_type in ('donor', 'both'): self._s2_acceptors = self.u.atoms[self._s2].select_atoms( 'name {0}'.format(' '.join(self.acceptors))).ix - self.logger_debug("Selection 2 acceptors: {0:d}".format(len(self._s2_acceptors))) + self.logger_debug("Selection 2 acceptors: {0:d}".format( + len(self._s2_acceptors))) if self.selection1_type in ('acceptor', 'both'): self._s2_donors = self.u.atoms[self._s2].select_atoms( 'name {0}'.format(' '.join(self.donors))).ix for atom_ix in self._s2_donors: - self._update_donor_h(atom_ix, self._s2_h_donors, self._s2_donors_h) - self.logger_debug("Selection 2 donors: {0:d}".format(len(self._s2_donors))) - self.logger_debug("Selection 2 donor hydrogens: {0:d}".format(len(self._s2_h_donors))) + self._update_donor_h(atom_ix, self._s2_h_donors, + self._s2_donors_h) + self.logger_debug("Selection 2 donors: {0:d}".format( + len(self._s2_donors))) + self.logger_debug("Selection 2 donor hydrogens: {0:d}".format( + len(self._s2_h_donors))) def _update_water_selection(self): self._water_donors = [] @@ -975,13 +1046,16 @@ def _update_water_selection(self): self.logger_debug('Size of water selection before filtering:' ' {} atoms'.format(len(self._water))) if len(self._water) and self.filter_first: - filtered_s1 = AtomNeighborSearch(self.u.atoms[self._water], box=self.box).search(self.u.atoms[self._s1], - self.selection_distance) + filtered_s1 = AtomNeighborSearch(self.u.atoms[self._water], + box=self.box).search( + self.u.atoms[self._s1], self.selection_distance) if filtered_s1: - self._water = AtomNeighborSearch(filtered_s1, box=self.box).search(self.u.atoms[self._s2], - self.selection_distance).ix + self._water = AtomNeighborSearch(filtered_s1, + box=self.box).search( + self.u.atoms[self._s2], self.selection_distance).ix - self.logger_debug("Size of water selection: {0} atoms".format(len(self._water))) + self.logger_debug("Size of water selection: {0} atoms".format( + len(self._water))) if len(self._water) == 0: logger.warning("Water selection '{0}' did not select any atoms." @@ -990,12 +1064,16 @@ def _update_water_selection(self): self._water_donors = self.u.atoms[self._water].select_atoms( 'name {0}'.format(' '.join(self.donors))).ix for atom_ix in self._water_donors: - self._update_donor_h(atom_ix, self._water_h_donors, self._water_donors_h) - self.logger_debug("Water donors: {0}".format(len(self._water_donors))) - self.logger_debug("Water donor hydrogens: {0}".format(len(self._water_h_donors))) + self._update_donor_h(atom_ix, self._water_h_donors, + self._water_donors_h) + self.logger_debug("Water donors: {0}".format( + len(self._water_donors))) + self.logger_debug("Water donor hydrogens: {0}".format( + len(self._water_h_donors))) self._water_acceptors = self.u.atoms[self._water].select_atoms( 'name {0}'.format(' '.join(self.acceptors))).ix - self.logger_debug("Water acceptors: {0}".format(len(self._water_acceptors))) + self.logger_debug("Water acceptors: {0}".format( + len(self._water_acceptors))) def _get_bonded_hydrogens(self, atom): """Find hydrogens bonded within cutoff to `atom`. @@ -1020,7 +1098,8 @@ def _get_bonded_hydrogens(self, atom): Returns ------- hydrogen_atoms : AtomGroup or [] - list of hydrogens (can be a :class:`~MDAnalysis.core.groups.AtomGroup`) + list of hydrogens (can be a + :class:`~MDAnalysis.core.groups.AtomGroup`) or empty list ``[]`` if none were found. """ try: @@ -1036,8 +1115,9 @@ def logger_debug(self, *args): def _prepare(self): - # The distance for selection is defined as twice the maximum bond length of an O-H bond (2A) - # plus order of water bridge times the length of OH bond plus hydrogne bond distance + # The distance for selection is defined as twice the maximum bond + # length of an O-H bond (2A) plus order of water bridge times the + # length of OH bond plus hydrogne bond distance self.selection_distance = (2 * 2 + self.order * (2 + self.distance)) self.box = self.u.dimensions if self.pbc else None @@ -1052,21 +1132,25 @@ def _prepare(self): if len(self._s1) and len(self._s2): self._update_water_selection() else: - logger.info("WaterBridgeAnalysis: no atoms found in the selection.") + logger.info("WaterBridgeAnalysis: " + "no atoms found in the selection.") logger.info("WaterBridgeAnalysis: initial checks passed.") logger.info("WaterBridgeAnalysis: starting") logger.debug("WaterBridgeAnalysis: donors %r", self.donors) logger.debug("WaterBridgeAnalysis: acceptors %r", self.acceptors) - logger.debug("WaterBridgeAnalysis: water bridge %r", self.water_selection) + logger.debug("WaterBridgeAnalysis: water bridge %r", + self.water_selection) if self.debug: logger.debug("Toggling debug to %r", self.debug) else: - logger.debug("WaterBridgeAnalysis: For full step-by-step debugging output use debug=True") + logger.debug("WaterBridgeAnalysis: For full step-by-step " + "debugging output use debug=True") - logger.info("Starting analysis (frame index start=%d stop=%d, step=%d)", + logger.info("Starting analysis " + "(frame index start=%d stop=%d, step=%d)", self.start, self.stop, self.step) def _donor2acceptor(self, donors, h_donors, acceptor): @@ -1080,7 +1164,8 @@ def _donor2acceptor(self, donors, h_donors, acceptor): # Code modified from p-j-smith pairs, distances = capped_distance(self.u.atoms[donors_idx].positions, self.u.atoms[acceptor].positions, - max_cutoff=self.distance, box=self.box, + max_cutoff=self.distance, + box=self.box, return_distances=True) if self.distance_type != 'heavy': tmp_donors = [h_donors[donors_idx[idx]] for idx in pairs[:,0]] @@ -1109,8 +1194,9 @@ def _donor2acceptor(self, donors, h_donors, acceptor): h = tmp_hydrogens[index] d = tmp_donors[index] a = tmp_acceptors[index] - result.append((h, d, a, self._expand_index(h), self._expand_index(a), - distances[index], angles[index])) + result.append((h, d, a, self._expand_index(h), + self._expand_index(a), + distances[index], angles[index])) return result def _single_frame(self): @@ -1134,73 +1220,102 @@ def _single_frame(self): if self.selection1_type in ('donor', 'both'): # check for direct hbond from s1 to s2 self.logger_debug("Selection 1 Donors <-> Selection 2 Acceptors") - results = self._donor2acceptor(self._s1_donors_h, self._s1_h_donors,self._s2_acceptors) + results = self._donor2acceptor( + self._s1_donors_h, self._s1_h_donors,self._s2_acceptors) for line in results: - h_index, d_index, a_index, (h_resname, h_resid, h_name), (a_resname, a_resid, a_name), dist, angle = line + h_index, d_index, a_index, (h_resname, h_resid, h_name), \ + (a_resname, a_resid, a_name), dist, angle = line water_pool[(a_resname, a_resid)] = None - selection_1.append((h_index, d_index, a_index, None, dist, angle)) + selection_1.append( + (h_index, d_index, a_index, None, dist, angle)) selection_2.append((a_resname, a_resid)) if self.order > 0: self.logger_debug("Selection 1 Donors <-> Water Acceptors") - results = self._donor2acceptor(self._s1_donors_h, self._s1_h_donors, self._water_acceptors) + results = self._donor2acceptor( + self._s1_donors_h, self._s1_h_donors, + self._water_acceptors) for line in results: h_index, d_index, a_index, (h_resname, h_resid, h_name), ( a_resname, a_resid, a_name), dist, angle = line - selection_1.append((h_index, d_index, a_index, None, dist, angle)) + selection_1.append( + (h_index, d_index, a_index, None, dist, angle)) self.logger_debug("Water Donors <-> Selection 2 Acceptors") - results = self._donor2acceptor(self._water_donors_h, self._water_h_donors, self._s2_acceptors) + results = self._donor2acceptor( + self._water_donors_h, self._water_h_donors, + self._s2_acceptors) for line in results: h_index, d_index, a_index, (h_resname, h_resid, h_name), ( a_resname, a_resid, a_name), dist, angle = line - water_pool[(h_resname, h_resid)].append((h_index, d_index, a_index, None, dist, angle)) + water_pool[(h_resname, h_resid)].append( + (h_index, d_index, a_index, None, dist, angle)) selection_2.append((a_resname, a_resid)) if self.selection1_type in ('acceptor', 'both'): self.logger_debug("Selection 2 Donors <-> Selection 1 Acceptors") - results = self._donor2acceptor(self._s2_donors_h, self._s2_h_donors, self._s1_acceptors) + results = self._donor2acceptor(self._s2_donors_h, + self._s2_h_donors, + self._s1_acceptors) for line in results: - h_index, d_index, a_index, (h_resname, h_resid, h_name), (a_resname, a_resid, a_name), dist, angle = line + h_index, d_index, a_index, (h_resname, h_resid, h_name), \ + (a_resname, a_resid, a_name), dist, angle = line water_pool[(h_resname, h_resid)] = None - selection_1.append((a_index, None, h_index, d_index, dist, angle)) + selection_1.append( + (a_index, None, h_index, d_index, dist, angle)) selection_2.append((h_resname, h_resid)) if self.order > 0: self.logger_debug("Selection 2 Donors <-> Water Acceptors") - results = self._donor2acceptor(self._s2_donors_h, self._s2_h_donors, self._water_acceptors) + results = self._donor2acceptor( + self._s2_donors_h, self._s2_h_donors, + self._water_acceptors) for line in results: h_index, d_index, a_index, (h_resname, h_resid, h_name), ( a_resname, a_resid, a_name), dist, angle = line - water_pool[(a_resname, a_resid)].append((a_index, None, h_index, d_index, dist, angle)) + water_pool[(a_resname, a_resid)].append( + (a_index, None, h_index, d_index, dist, angle)) selection_2.append((h_resname, h_resid)) self.logger_debug("Selection 1 Acceptors <-> Water Donors") - results = self._donor2acceptor(self._water_donors_h, self._water_h_donors, self._s1_acceptors) + results = self._donor2acceptor( + self._water_donors_h, self._water_h_donors, + self._s1_acceptors) for line in results: h_index, d_index, a_index, (h_resname, h_resid, h_name), ( a_resname, a_resid, a_name), dist, angle = line - selection_1.append((a_index, None, h_index, d_index, dist, angle)) + selection_1.append( + (a_index, None, h_index, d_index, dist, angle)) if self.order > 1: self.logger_debug("Water donor <-> Water Acceptors") - results = self._donor2acceptor(self._water_donors_h, self._water_h_donors, self._water_acceptors) + results = self._donor2acceptor(self._water_donors_h, + self._water_h_donors, + self._water_acceptors) for line in results: h_index, d_index, a_index, (h_resname, h_resid, h_name), ( a_resname, a_resid, a_name), dist, angle = line - water_pool[(a_resname, a_resid)].append((a_index, None, h_index, d_index, dist, angle)) - water_pool[(h_resname, h_resid)].append((h_index, d_index, a_index, None, dist, angle)) + water_pool[(a_resname, a_resid)].append( + (a_index, None, h_index, d_index, dist, angle)) + water_pool[(h_resname, h_resid)].append( + (h_index, d_index, a_index, None, dist, angle)) # solve the connectivity network - # The following code attempt to generate a water network which is formed by the class dict. - # Suppose we have a water bridge connection ARG1 to ASP3 via the two hydrogen bonds. + # The following code attempt to generate a water network which is + # formed by the class dict. + # Suppose we have a water bridge connection ARG1 to ASP3 via the two + # hydrogen bonds. # [0,1,('ARG',1,'O'), ('SOL',2,'HW1'), 3.0,180], # [2,3,('SOL',2,'HW2'),('ASP',3,'OD1'), 3.0,180], # The resulting network will be - #{(0,1,('ARG',1,'O'), ('SOL',2,'HW1'), 3.0,180): {(2,3,('SOL',2,'HW2'),('ASP',3,'OD1'), 3.0,180): None}} - # Where the key of the a dict will be all the hydrogen bonds starting from this nodes. - # The corresponding value of a certain key will be a dictionary whose key will be all the hydrogen bonds from + #{(0,1,('ARG',1,'O'), ('SOL',2,'HW1'), 3.0,180): + # {(2,3,('SOL',2,'HW2'),('ASP',3,'OD1'), 3.0,180): None}} + # Where the key of the a dict will be all the hydrogen bonds starting + # from this nodes. + # The corresponding value of a certain key will be a dictionary whose + # key will be all the hydrogen bonds from # the destination of in the key. - # If the value of a certain key is None, which means it is reaching selection 2. + # If the value of a certain key is None, which means it is reaching + # selection 2. result = {'start': defaultdict(dict), 'water': defaultdict(dict)} @@ -1216,7 +1331,8 @@ def add_route(result, route): result['water'][route[-1]] = None # water to water for i in range(1, len(route) - 1): - result['water'][route[i]][route[i+1]] = result['water'][route[i+1]] + result['water'][route[i]][route[i+1]] = \ + result['water'][route[i+1]] # selection 1 to water result['start'][route[0]][route[1]] = result['water'][route[1]] @@ -1232,25 +1348,33 @@ def traverse_water_network(graph, node, end, route, maxdepth, result): for new_node in graph[node]: new_route = route[:] new_route.append(new_node) - new_node = self._expand_timeseries(new_node,'sele1_sele2')[3][:2] - traverse_water_network(graph, new_node, end, new_route, maxdepth, result) + new_node = self._expand_timeseries( + new_node,'sele1_sele2')[3][:2] + traverse_water_network(graph, new_node, end, new_route, + maxdepth, result) for s1 in selection_1: route = [s1, ] next_mol = self._expand_timeseries(s1,'sele1_sele2')[3][:2] - traverse_water_network(water_pool, next_mol, selection_2, route[:], self.order, result) + traverse_water_network(water_pool, next_mol, selection_2, route[:], + self.order, result) self._network.append(result['start']) - def _traverse_water_network(self, graph, current, analysis_func=None, output=None, link_func=None, **kwargs): + def _traverse_water_network(self, graph, current, analysis_func=None, + output=None, link_func=None, **kwargs): ''' - This function recursively traverses the water network self._network and finds the hydrogen bonds which connect - the current atom to the next atom. The newly found hydrogen bond will be appended to the hydrogen bonds - connecting the selection 1 to the current atom via link_func. When selection 2 is reached, the full list of - hydrogen bonds connecting the selection 1 to selection 2 will be fed into analysis_func, which will then modify - the output in place. - :param graph: The connection network describes the connection between the atoms in the water network. + This function recursively traverses the water network self._network and + finds the hydrogen bonds which connect the current atom to the next + atom. The newly found hydrogen bond will be appended to the hydrogen + bonds connecting the selection 1 to the current atom via link_func. + When selection 2 is reached, the full list of hydrogen bonds + connecting the selection 1 to selection 2 will be fed into + analysis_func, which will then modify the output in place. + :param graph: The connection network describes the connection between + the atoms in the water network. :param current: The hydrogen bonds from selection 1 until now. - :param analysis_func: The analysis function which is called to analysis the hydrogen bonds. + :param analysis_func: The analysis function which is called to analysis + the hydrogen bonds. :param output: where the result is stored. :param link_func: The new hydrogen bonds will be appended to current. :param kwargs: the keywords which are passed into the analysis_func. @@ -1263,7 +1387,8 @@ def _traverse_water_network(self, graph, current, analysis_func=None, output=Non if graph is None: # if selection 2 is reached if not analysis_func is None: - # the result is analysed by analysis_func which will change the output + # the result is analysed by analysis_func which will change the + # output analysis_func(current, output, self.u, **kwargs) else: # make sure no loop can occur @@ -1271,7 +1396,9 @@ def _traverse_water_network(self, graph, current, analysis_func=None, output=Non for node in graph: # the new hydrogen bond will be added to the existing bonds new = link_func(current, node) - self._traverse_water_network(graph[node], new, analysis_func, output, link_func, **kwargs) + self._traverse_water_network(graph[node], new, + analysis_func, output, + link_func, **kwargs) def _expand_index(self, index): ''' @@ -1286,16 +1413,20 @@ def _expand_timeseries(self, entry, output_format=None): The old is defined as the format for release up to 0.19.2. As is discussed in Issue #2177, the internal storage of the hydrogen bond information has been changed to the compact format. - The function takes in the argument `output_format` to see which output format will be chosen. - if `output_format` is not specified, the value will be taken from :attr:`output_format`. - If `output_format` is 'sele1_sele2', the output will be the old water bridge analysis format:: + The function takes in the argument `output_format` to see which output + format will be chosen. + if `output_format` is not specified, the value will be taken from + :attr:`output_format`. + If `output_format` is 'sele1_sele2', the output will be the old water + bridge analysis format:: # donor from selection 1 to acceptor in selection 2 [sele1_index, sele2_index, (sele1_resname, sele1_resid, sele1_name), (sele2_resname, sele2_resid, sele2_name), dist, angle] - If `output_format` is 'donor_acceptor', the output will be the old hydrogen bond analysis format:: + If `output_format` is 'donor_acceptor', the output will be the old + hydrogen bond analysis format:: # From donor to acceptor [donor_index, acceptor_index, @@ -1303,34 +1434,42 @@ def _expand_timeseries(self, entry, output_format=None): (acceptor_resname, acceptor_resid, acceptor_name), dist, angle] ''' output_format = output_format or self.output_format - # Expand the compact entry into atom1, which is the first index in the output and atom2, which is the second + # Expand the compact entry into atom1, which is the first index in the + # output and atom2, which is the second # entry. atom1, heavy_atom1, atom2, heavy_atom2, dist, angle = entry if output_format == 'sele1_sele2': - # If the output format is the sele1_sele2, no change will be executed + # If the output format is the sele1_sele2, no change will be + # executed atom1, atom2 = atom1, atom2 elif output_format == 'donor_acceptor': - # If the output format is donor_acceptor, use heavy atom position to check which is donor and which is + # If the output format is donor_acceptor, use heavy atom position + # to check which is donor and which is # acceptor if heavy_atom1 is None: - # atom1 is hydrogen bond acceptor and thus, the position of atom1 and atom2 are swapped. + # atom1 is hydrogen bond acceptor and thus, the position of + # atom1 and atom2 are swapped. atom1, atom2 = atom2, atom1 else: # atom1 is hydrogen bond donor, position not swapped. atom1, atom2 = atom1, atom2 else: - raise KeyError("Only 'sele1_sele2' or 'donor_acceptor' are allowed as output format") + raise KeyError( + "Only 'sele1_sele2' or 'donor_acceptor' are allowed as output " + "format") - return (atom1, atom2, self._expand_index(atom1), self._expand_index(atom2), dist, angle) + return (atom1, atom2, self._expand_index(atom1), + self._expand_index(atom2), dist, angle) def _generate_timeseries(self, output_format=None): r'''Time series of water bridges. - The output is generated per frame as is explained in :ref:`wb_Analysis_Timeseries`. - The format of output can be changed via the output_format selection. - If ``output_format="sele1_sele2"``, the hydrogen bond forms a directional - link from selection 1 to selection 2. If ``output_format="donor_acceptor"``, - for each hydrogen bond, the donor is always written before the acceptor. + The output is generated per frame as is explained in + :ref:`wb_Analysis_Timeseries`. The format of output can be changed via + the output_format selection. If ``output_format="sele1_sele2"``, the + hydrogen bond forms a directional link from selection 1 to selection 2. + If ``output_format="donor_acceptor"``, for each hydrogen bond, the + donor is always written before the acceptor. Note ---- @@ -1361,9 +1500,13 @@ def analysis(current, output, *args, **kwargs): timeseries = [] for frame in self._network: new_frame = [] - self._traverse_water_network(frame, new_frame, analysis_func=analysis, - output=new_frame, link_func=self._compact_link) - timeseries.append([self._expand_timeseries(entry, output_format) for entry in new_frame]) + self._traverse_water_network(frame, new_frame, + analysis_func=analysis, + output=new_frame, + link_func=self._compact_link) + timeseries.append([ + self._expand_timeseries(entry, output_format) + for entry in new_frame]) return timeseries timeseries = property(_generate_timeseries) @@ -1371,14 +1514,17 @@ def analysis(current, output, *args, **kwargs): def _get_network(self): r'''Network representation of the water network. - The output is generated per frame as is explained in :ref:`wb_Analysis_Network`. - Each hydrogen bond has a compact representation of :: + The output is generated per frame as is explained in + :ref:`wb_Analysis_Network`. Each hydrogen bond has a compact + representation of :: - [sele1_acceptor_idx, None, sele2_donor_idx, donor_heavy_idx, distance, angle] + [sele1_acceptor_idx, None, sele2_donor_idx, donor_heavy_idx, + distance, angle] or :: - [sele1_donor_idx, donor_heavy_idx, sele1_acceptor_idx, None, distance, angle] + [sele1_donor_idx, donor_heavy_idx, sele1_acceptor_idx, None, + distance, angle] The donor_heavy_idx is the heavy atom bonding to the proton and atoms can be retrived from the universe:: @@ -1398,10 +1544,12 @@ def set_network(self, network): @classmethod def _full_link(self, output, node): ''' - A function used in _traverse_water_network to add the new hydrogen bond to the existing bonds. + A function used in _traverse_water_network to add the new hydrogen + bond to the existing bonds. :param output: The existing hydrogen bonds from selection 1 :param node: The new hydrogen bond - :return: The hydrogen bonds from selection 1 with the new hydrogen bond added + :return: The hydrogen bonds from selection 1 with the new hydrogen + bond added ''' result = output[:] result.append(node) @@ -1410,11 +1558,13 @@ def _full_link(self, output, node): @classmethod def _compact_link(self, output, node): ''' - A function used in _traverse_water_network to add the new hydrogen bond to the existing bonds. - In this form no new list is created and thus, one bridge will only appear once. + A function used in _traverse_water_network to add the new hydrogen + bond to the existing bonds. In this form no new list is created and + thus, one bridge will only appear once. :param output: The existing hydrogen bonds from selection 1 :param node: The new hydrogen bond - :return: The hydrogen bonds from selection 1 with the new hydrogen bond added + :return: The hydrogen bonds from selection 1 with the new hydrogen + bond added ''' output.append(node) return output @@ -1425,32 +1575,35 @@ def _count_by_type_analysis(self, current, output, *args, **kwargs): :return: ''' - s1_index, to_index, (s1_resname, s1_resid, s1_name), (to_resname, to_resid, to_name), dist, angle = \ + s1_index, to_index, (s1_resname, s1_resid, s1_name), \ + (to_resname, to_resid, to_name), dist, angle = \ self._expand_timeseries(current[0]) - from_index, s2_index, (from_resname, from_resid, from_name), (s2_resname, s2_resid, s2_name), dist, angle = \ + from_index, s2_index, (from_resname, from_resid, from_name), \ + (s2_resname, s2_resid, s2_name), dist, angle = \ self._expand_timeseries(current[-1]) - key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) + key = (s1_index, s2_index, + s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) output[key] += 1 def count_by_type(self, analysis_func=None, **kwargs): """Counts the frequency of water bridge of a specific type. If one atom *A* from *selection 1* is linked to atom *B* from - *selection 2* through one or more bridging waters, an entity will be created and - the proportion of time that this linkage exists in the whole simulation - will be calculated. + *selection 2* through one or more bridging waters, an entity will be + created and the proportion of time that this linkage exists in the + whole simulation will be calculated. - The identification of a specific type of water bridge can be modified by - supplying the analysis_func function. See :ref:`wb_count_by_type` + The identification of a specific type of water bridge can be modified + by supplying the analysis_func function. See :ref:`wb_count_by_type` for detail. Returns ------- counts : list Returns a :class:`list` containing atom indices for *A* and - *B*, residue names, residue numbers, atom names (for both A and B) and - the fraction of the total time during which the water bridge was - detected. This method returns None if method + *B*, residue names, residue numbers, atom names (for both A and B) + and the fraction of the total time during which the water bridge + was detected. This method returns None if method :meth:`WaterBridgeAnalysis.run` was not executed first. @@ -1465,27 +1618,34 @@ def count_by_type(self, analysis_func=None, **kwargs): result_dict = defaultdict(int) for frame in self._network: frame_dict = defaultdict(int) - self._traverse_water_network(frame, [], analysis_func=analysis_func, + self._traverse_water_network(frame, [], + analysis_func=analysis_func, output=frame_dict, - link_func=self._full_link, **kwargs) + link_func=self._full_link, + **kwargs) for key, value in frame_dict.items(): result_dict[key] += frame_dict[key] if output == 'combined': result = [[i for i in key] for key in result_dict] - [result[i].append(result_dict[key]/length) for i, key in enumerate(result_dict)] + [result[i].append(result_dict[key]/length) + for i, key in enumerate(result_dict)] else: - result = [(key, result_dict[key]/length) for key in result_dict] + result = [(key, + result_dict[key]/length) for key in result_dict] return result else: return None def _count_by_time_analysis(self, current, output, *args, **kwargs): - s1_index, to_index, (s1_resname, s1_resid, s1_name), (to_resname, to_resid, to_name), dist, angle = \ + s1_index, to_index, (s1_resname, s1_resid, s1_name), \ + (to_resname, to_resid, to_name), dist, angle = \ self._expand_timeseries(current[0]) - from_index, s2_index, (from_resname, from_resid, from_name), (s2_resname, s2_resid, s2_name), dist, angle = \ + from_index, s2_index, (from_resname, from_resid, from_name), \ + (s2_resname, s2_resid, s2_name), dist, angle = \ self._expand_timeseries(current[-1]) - key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) + key = (s1_index, s2_index, + s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) output[key] += 1 def count_by_time(self, analysis_func=None, **kwargs): @@ -1507,33 +1667,40 @@ def count_by_time(self, analysis_func=None, **kwargs): result = [] for time, frame in zip(self.timesteps, self._network): result_dict = defaultdict(int) - self._traverse_water_network(frame, [], analysis_func=analysis_func, + self._traverse_water_network(frame, [], + analysis_func=analysis_func, output=result_dict, - link_func=self._full_link, **kwargs) - result.append((time, sum([result_dict[key] for key in result_dict]))) + link_func=self._full_link, + **kwargs) + result.append((time, + sum([result_dict[key] for key in result_dict]))) return result else: return None def _timesteps_by_type_analysis(self, current, output, *args, **kwargs): - s1_index, to_index, (s1_resname, s1_resid, s1_name), (to_resname, to_resid, to_name), dist, angle = \ + s1_index, to_index, (s1_resname, s1_resid, s1_name), \ + (to_resname, to_resid, to_name), dist, angle = \ self._expand_timeseries(current[0]) - from_index, s2_index, (from_resname, from_resid, from_name), (s2_resname, s2_resid, s2_name), dist, angle = \ + from_index, s2_index, (from_resname, from_resid, from_name), \ + (s2_resname, s2_resid, s2_name), dist, angle = \ self._expand_timeseries(current[-1]) - key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) + key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, + s2_resid, s2_name) output[key].append(kwargs.pop('time')) def timesteps_by_type(self, analysis_func=None, **kwargs): - """Frames during which each water bridges existed, sorted by each water bridges. + """Frames during which each water bridges existed, sorted by each water + bridges. Processes :attr:`WaterBridgeAnalysis._network` and returns a :class:`list` containing atom indices, residue names, residue - numbers (from selection 1 and selection 2) and each timestep at which the - water bridge was detected. + numbers (from selection 1 and selection 2) and each timestep at which + the water bridge was detected. Similar to :meth:`~WaterBridgeAnalysis.count_by_type` and - :meth:`~WaterBridgeAnalysis.count_by_time`, the behavior can be adjusted by - supplying an analysis_func. + :meth:`~WaterBridgeAnalysis.count_by_time`, the behavior can be + adjusted by supplying an analysis_func. Returns ------- @@ -1552,9 +1719,11 @@ def timesteps_by_type(self, analysis_func=None, **kwargs): else: timesteps = self.timesteps for time, frame in zip(timesteps, self._network): - self._traverse_water_network(frame, [], analysis_func=analysis_func, + self._traverse_water_network(frame, [], + analysis_func=analysis_func, output=result, - link_func=self._full_link, time=time, **kwargs) + link_func=self._full_link, + time=time, **kwargs) result_list = [] for key, time_list in result.items(): @@ -1576,7 +1745,8 @@ def generate_table(self, output_format=None): attribute :attr:`~WaterBridgeAnalysis.table`. The output format of :attr:`~WaterBridgeAnalysis.table` can also be - changed using output_format in a fashion similar to :attr:`WaterBridgeAnalysis.timeseries` + changed using output_format in a fashion similar to + :attr:`WaterBridgeAnalysis.timeseries` """ output_format = output_format or self.output_format if self._network == []: @@ -1592,19 +1762,24 @@ def generate_table(self, output_format=None): dtype = [ ("time", float), ("sele1_index", int), ("sele2_index", int), - ("sele1_resnm", "|U4"), ("sele1_resid", int), ("sele1_atom", "|U4"), - ("sele2_resnm", "|U4"), ("sele2_resid", int), ("sele2_atom", "|U4"), + ("sele1_resnm", "|U4"), ("sele1_resid", int), + ("sele1_atom", "|U4"), + ("sele2_resnm", "|U4"), ("sele2_resid", int), + ("sele2_atom", "|U4"), ("distance", float), ("angle", float)] elif output_format == 'donor_acceptor': dtype = [ ("time", float), ("donor_index", int), ("acceptor_index", int), - ("donor_resnm", "|U4"), ("donor_resid", int), ("donor_atom", "|U4"), - ("acceptor_resnm", "|U4"), ("acceptor_resid", int), ("acceptor_atom", "|U4"), + ("donor_resnm", "|U4"), ("donor_resid", int), + ("donor_atom", "|U4"), + ("acceptor_resnm", "|U4"), ("acceptor_resid", int), + ("acceptor_atom", "|U4"), ("distance", float), ("angle", float)] - # according to Lukas' notes below, using a recarray at this stage is ineffective - # and speedups of ~x10 can be achieved by filling a standard array, like this: + # according to Lukas' notes below, using a recarray at this stage is + # ineffective and speedups of ~x10 can be achieved by filling a + # standard array, like this: out = np.empty((num_records,), dtype=dtype) cursor = 0 # current row for t, hframe in zip(self.timesteps, timeseries): @@ -1612,9 +1787,12 @@ def generate_table(self, output_format=None): acceptor, distance, angle) in hframe: # donor|acceptor = (resname, resid, atomid) out[cursor] = (t, donor_index, acceptor_index) + \ - donor + acceptor + (distance, angle) + donor + acceptor + (distance, angle) cursor += 1 - assert cursor == num_records, "Internal Error: Not all wb records stored" + assert cursor == num_records, \ + "Internal Error: Not all wb records stored" table = out.view(np.recarray) - logger.debug("WBridge: Stored results as table with %(num_records)d entries.", vars()) + logger.debug( + "WBridge: Stored results as table with %(num_records)d entries.", + vars()) self.table = table diff --git a/testsuite/MDAnalysisTests/analysis/test_wbridge.py b/testsuite/MDAnalysisTests/analysis/test_wbridge.py index 7de4890aba5..fda7bbd6f2c 100644 --- a/testsuite/MDAnalysisTests/analysis/test_wbridge.py +++ b/testsuite/MDAnalysisTests/analysis/test_wbridge.py @@ -7,7 +7,8 @@ import MDAnalysis import MDAnalysis.analysis.hbonds -from MDAnalysis.analysis.hydrogenbonds.wbridge_analysis import WaterBridgeAnalysis +from MDAnalysis.analysis.hydrogenbonds.wbridge_analysis import ( + WaterBridgeAnalysis, ) def test_import_from_hbonds(): try: From eeedf71b897dcb7079f71d2af37a7bf4e49aa374 Mon Sep 17 00:00:00 2001 From: zhiyiwu Date: Tue, 18 Aug 2020 14:09:24 +0100 Subject: [PATCH 03/26] PEP8 --- .../hydrogenbonds/wbridge_analysis.py | 77 ++++++++++--------- 1 file changed, 39 insertions(+), 38 deletions(-) diff --git a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py index 555c99d2934..5797b5d4024 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py @@ -684,6 +684,7 @@ def analysis(current, output, u, **kwargs): logger = logging.getLogger('MDAnalysis.analysis.WaterBridgeAnalysis') + class WaterBridgeAnalysis(AnalysisBase): """Perform a water bridge analysis @@ -871,7 +872,7 @@ def __init__(self, universe, selection1='protein', """ super(WaterBridgeAnalysis, self).__init__(universe.trajectory, - **kwargs) + **kwargs) self.water_selection = water_selection self.update_water_selection = update_water_selection # per-frame debugging output? @@ -911,7 +912,7 @@ def __init__(self, universe, selection1='protein', if self.selection1_type not in ('both', 'donor', 'acceptor'): raise ValueError('WaterBridgeAnalysis: ' 'Invalid selection type {0!s}'.format( - self.selection1_type)) + self.selection1_type)) self._network = [] # final result accessed as self.network self.timesteps = None # time for each frame @@ -921,15 +922,15 @@ def __init__(self, universe, selection1='protein', def _log_parameters(self): """Log important parameters to the logfile.""" logger.info("WaterBridgeAnalysis: selection = %r (update: %r)", - self.selection2, self.update_selection) + self.selection2, self.update_selection) logger.info("WaterBridgeAnalysis: water selection = %r (update: %r)", - self.water_selection, self.update_water_selection) + self.water_selection, self.update_water_selection) logger.info("WaterBridgeAnalysis: criterion: donor %s atom and " "acceptor atom distance <= %.3f A", self.distance_type, self.distance) logger.info("WaterBridgeAnalysis: criterion: " "angle D-H-A >= %.3f degrees", - self.angle) + self.angle) logger.info("WaterBridgeAnalysis: force field %s to guess donor and \ acceptor names", self.forcefield) @@ -940,7 +941,7 @@ def _build_residue_dict(self, selection): # The content is the hydrogen bond donor hydrogen atom names atom_group = self.u.select_atoms(selection) for residue in atom_group.residues: - if not residue.resname in self._residue_dict: + if residue.resname not in self._residue_dict: self._residue_dict[residue.resname] = defaultdict(set) for atom in residue.atoms: if atom.name in self.donors: @@ -1113,7 +1114,6 @@ def logger_debug(self, *args): if self.debug: logger.debug(*args) - def _prepare(self): # The distance for selection is defined as twice the maximum bond # length of an O-H bond (2A) plus order of water bridge times the @@ -1168,18 +1168,18 @@ def _donor2acceptor(self, donors, h_donors, acceptor): box=self.box, return_distances=True) if self.distance_type != 'heavy': - tmp_donors = [h_donors[donors_idx[idx]] for idx in pairs[:,0]] - tmp_hydrogens = [donors_idx[idx] for idx in pairs[:,0]] - tmp_acceptors = [acceptor[idx] for idx in pairs[:,1]] + tmp_donors = [h_donors[donors_idx[idx]] for idx in pairs[:, 0]] + tmp_hydrogens = [donors_idx[idx] for idx in pairs[:, 0]] + tmp_acceptors = [acceptor[idx] for idx in pairs[:, 1]] else: tmp_donors = [] tmp_hydrogens = [] tmp_acceptors = [] - for idx in range(len(pairs[:,0])): - for h in donors[donors_idx[pairs[idx,0]]]: - tmp_donors.append(donors_idx[pairs[idx,0]]) + for idx in range(len(pairs[:, 0])): + for h in donors[donors_idx[pairs[idx, 0]]]: + tmp_donors.append(donors_idx[pairs[idx, 0]]) tmp_hydrogens.append(h) - tmp_acceptors.append(acceptor[pairs[idx,1]]) + tmp_acceptors.append(acceptor[pairs[idx, 1]]) angles = np.rad2deg( calc_angles( @@ -1221,10 +1221,10 @@ def _single_frame(self): # check for direct hbond from s1 to s2 self.logger_debug("Selection 1 Donors <-> Selection 2 Acceptors") results = self._donor2acceptor( - self._s1_donors_h, self._s1_h_donors,self._s2_acceptors) + self._s1_donors_h, self._s1_h_donors, self._s2_acceptors) for line in results: h_index, d_index, a_index, (h_resname, h_resid, h_name), \ - (a_resname, a_resid, a_name), dist, angle = line + (a_resname, a_resid, a_name), dist, angle = line water_pool[(a_resname, a_resid)] = None selection_1.append( (h_index, d_index, a_index, None, dist, angle)) @@ -1236,7 +1236,7 @@ def _single_frame(self): self._water_acceptors) for line in results: h_index, d_index, a_index, (h_resname, h_resid, h_name), ( - a_resname, a_resid, a_name), dist, angle = line + a_resname, a_resid, a_name), dist, angle = line selection_1.append( (h_index, d_index, a_index, None, dist, angle)) @@ -1246,7 +1246,7 @@ def _single_frame(self): self._s2_acceptors) for line in results: h_index, d_index, a_index, (h_resname, h_resid, h_name), ( - a_resname, a_resid, a_name), dist, angle = line + a_resname, a_resid, a_name), dist, angle = line water_pool[(h_resname, h_resid)].append( (h_index, d_index, a_index, None, dist, angle)) selection_2.append((a_resname, a_resid)) @@ -1258,7 +1258,7 @@ def _single_frame(self): self._s1_acceptors) for line in results: h_index, d_index, a_index, (h_resname, h_resid, h_name), \ - (a_resname, a_resid, a_name), dist, angle = line + (a_resname, a_resid, a_name), dist, angle = line water_pool[(h_resname, h_resid)] = None selection_1.append( (a_index, None, h_index, d_index, dist, angle)) @@ -1271,7 +1271,7 @@ def _single_frame(self): self._water_acceptors) for line in results: h_index, d_index, a_index, (h_resname, h_resid, h_name), ( - a_resname, a_resid, a_name), dist, angle = line + a_resname, a_resid, a_name), dist, angle = line water_pool[(a_resname, a_resid)].append( (a_index, None, h_index, d_index, dist, angle)) selection_2.append((h_resname, h_resid)) @@ -1282,7 +1282,7 @@ def _single_frame(self): self._s1_acceptors) for line in results: h_index, d_index, a_index, (h_resname, h_resid, h_name), ( - a_resname, a_resid, a_name), dist, angle = line + a_resname, a_resid, a_name), dist, angle = line selection_1.append( (a_index, None, h_index, d_index, dist, angle)) @@ -1293,7 +1293,7 @@ def _single_frame(self): self._water_acceptors) for line in results: h_index, d_index, a_index, (h_resname, h_resid, h_name), ( - a_resname, a_resid, a_name), dist, angle = line + a_resname, a_resid, a_name), dist, angle = line water_pool[(a_resname, a_resid)].append( (a_index, None, h_index, d_index, dist, angle)) water_pool[(h_resname, h_resid)].append( @@ -1307,7 +1307,7 @@ def _single_frame(self): # [0,1,('ARG',1,'O'), ('SOL',2,'HW1'), 3.0,180], # [2,3,('SOL',2,'HW2'),('ASP',3,'OD1'), 3.0,180], # The resulting network will be - #{(0,1,('ARG',1,'O'), ('SOL',2,'HW1'), 3.0,180): + # {(0,1,('ARG',1,'O'), ('SOL',2,'HW1'), 3.0,180): # {(2,3,('SOL',2,'HW2'),('ASP',3,'OD1'), 3.0,180): None}} # Where the key of the a dict will be all the hydrogen bonds starting # from this nodes. @@ -1349,12 +1349,12 @@ def traverse_water_network(graph, node, end, route, maxdepth, result): new_route = route[:] new_route.append(new_node) new_node = self._expand_timeseries( - new_node,'sele1_sele2')[3][:2] + new_node, 'sele1_sele2')[3][:2] traverse_water_network(graph, new_node, end, new_route, maxdepth, result) for s1 in selection_1: route = [s1, ] - next_mol = self._expand_timeseries(s1,'sele1_sele2')[3][:2] + next_mol = self._expand_timeseries(s1, 'sele1_sele2')[3][:2] traverse_water_network(water_pool, next_mol, selection_2, route[:], self.order, result) @@ -1386,7 +1386,7 @@ def _traverse_water_network(self, graph, current, analysis_func=None, if graph is None: # if selection 2 is reached - if not analysis_func is None: + if analysis_func is not None: # the result is analysed by analysis_func which will change the # output analysis_func(current, output, self.u, **kwargs) @@ -1494,6 +1494,7 @@ def _generate_timeseries(self, output_format=None): ''' output_format = output_format or self.output_format + def analysis(current, output, *args, **kwargs): output = current @@ -1576,11 +1577,11 @@ def _count_by_type_analysis(self, current, output, *args, **kwargs): ''' s1_index, to_index, (s1_resname, s1_resid, s1_name), \ - (to_resname, to_resid, to_name), dist, angle = \ - self._expand_timeseries(current[0]) + (to_resname, to_resid, to_name), dist, angle = \ + self._expand_timeseries(current[0]) from_index, s2_index, (from_resname, from_resid, from_name), \ - (s2_resname, s2_resid, s2_name), dist, angle = \ - self._expand_timeseries(current[-1]) + (s2_resname, s2_resid, s2_name), dist, angle = \ + self._expand_timeseries(current[-1]) key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) output[key] += 1 @@ -1639,11 +1640,11 @@ def count_by_type(self, analysis_func=None, **kwargs): def _count_by_time_analysis(self, current, output, *args, **kwargs): s1_index, to_index, (s1_resname, s1_resid, s1_name), \ - (to_resname, to_resid, to_name), dist, angle = \ - self._expand_timeseries(current[0]) + (to_resname, to_resid, to_name), dist, angle = \ + self._expand_timeseries(current[0]) from_index, s2_index, (from_resname, from_resid, from_name), \ - (s2_resname, s2_resid, s2_name), dist, angle = \ - self._expand_timeseries(current[-1]) + (s2_resname, s2_resid, s2_name), dist, angle = \ + self._expand_timeseries(current[-1]) key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) output[key] += 1 @@ -1680,11 +1681,11 @@ def count_by_time(self, analysis_func=None, **kwargs): def _timesteps_by_type_analysis(self, current, output, *args, **kwargs): s1_index, to_index, (s1_resname, s1_resid, s1_name), \ - (to_resname, to_resid, to_name), dist, angle = \ - self._expand_timeseries(current[0]) + (to_resname, to_resid, to_name), dist, angle = \ + self._expand_timeseries(current[0]) from_index, s2_index, (from_resname, from_resid, from_name), \ - (s2_resname, s2_resid, s2_name), dist, angle = \ - self._expand_timeseries(current[-1]) + (s2_resname, s2_resid, s2_name), dist, angle = \ + self._expand_timeseries(current[-1]) key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) output[key].append(kwargs.pop('time')) From b007a63e522dce8e3d3c1b0dac8f5c9144ced31a Mon Sep 17 00:00:00 2001 From: zhiyiwu Date: Tue, 18 Aug 2020 16:21:06 +0100 Subject: [PATCH 04/26] PEP8 --- .../hydrogenbonds/wbridge_analysis.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py index 5797b5d4024..a4bb31043c5 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py @@ -922,9 +922,9 @@ def __init__(self, universe, selection1='protein', def _log_parameters(self): """Log important parameters to the logfile.""" logger.info("WaterBridgeAnalysis: selection = %r (update: %r)", - self.selection2, self.update_selection) + self.selection2, self.update_selection) logger.info("WaterBridgeAnalysis: water selection = %r (update: %r)", - self.water_selection, self.update_water_selection) + self.water_selection, self.update_water_selection) logger.info("WaterBridgeAnalysis: criterion: donor %s atom and " "acceptor atom distance <= %.3f A", self.distance_type, self.distance) @@ -1577,11 +1577,11 @@ def _count_by_type_analysis(self, current, output, *args, **kwargs): ''' s1_index, to_index, (s1_resname, s1_resid, s1_name), \ - (to_resname, to_resid, to_name), dist, angle = \ - self._expand_timeseries(current[0]) + (to_resname, to_resid, to_name), dist, angle = \ + self._expand_timeseries(current[0]) from_index, s2_index, (from_resname, from_resid, from_name), \ - (s2_resname, s2_resid, s2_name), dist, angle = \ - self._expand_timeseries(current[-1]) + (s2_resname, s2_resid, s2_name), dist, angle = \ + self._expand_timeseries(current[-1]) key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) output[key] += 1 @@ -1640,11 +1640,11 @@ def count_by_type(self, analysis_func=None, **kwargs): def _count_by_time_analysis(self, current, output, *args, **kwargs): s1_index, to_index, (s1_resname, s1_resid, s1_name), \ - (to_resname, to_resid, to_name), dist, angle = \ - self._expand_timeseries(current[0]) + (to_resname, to_resid, to_name), dist, angle = \ + self._expand_timeseries(current[0]) from_index, s2_index, (from_resname, from_resid, from_name), \ - (s2_resname, s2_resid, s2_name), dist, angle = \ - self._expand_timeseries(current[-1]) + (s2_resname, s2_resid, s2_name), dist, angle = \ + self._expand_timeseries(current[-1]) key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) output[key] += 1 @@ -1681,11 +1681,11 @@ def count_by_time(self, analysis_func=None, **kwargs): def _timesteps_by_type_analysis(self, current, output, *args, **kwargs): s1_index, to_index, (s1_resname, s1_resid, s1_name), \ - (to_resname, to_resid, to_name), dist, angle = \ - self._expand_timeseries(current[0]) + (to_resname, to_resid, to_name), dist, angle = \ + self._expand_timeseries(current[0]) from_index, s2_index, (from_resname, from_resid, from_name), \ - (s2_resname, s2_resid, s2_name), dist, angle = \ - self._expand_timeseries(current[-1]) + (s2_resname, s2_resid, s2_name), dist, angle = \ + self._expand_timeseries(current[-1]) key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) output[key].append(kwargs.pop('time')) From 267ef7522d9ae6867130bc0f1f75e5d37db452cd Mon Sep 17 00:00:00 2001 From: zhiyiwu Date: Tue, 18 Aug 2020 16:26:08 +0100 Subject: [PATCH 05/26] PEP8 --- .../hydrogenbonds/wbridge_analysis.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py index a4bb31043c5..0e45567e42f 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py @@ -1576,12 +1576,12 @@ def _count_by_type_analysis(self, current, output, *args, **kwargs): :return: ''' - s1_index, to_index, (s1_resname, s1_resid, s1_name), \ - (to_resname, to_resid, to_name), dist, angle = \ + s1_index, to_index, s1, to_residue, dist, angle = \ self._expand_timeseries(current[0]) - from_index, s2_index, (from_resname, from_resid, from_name), \ - (s2_resname, s2_resid, s2_name), dist, angle = \ + s1_resname, s1_resid, s1_name = s1 + from_index, s2_index, from_residue, s2, dist, angle = \ self._expand_timeseries(current[-1]) + s2_resname, s2_resid, s2_name = s2 key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) output[key] += 1 @@ -1639,12 +1639,12 @@ def count_by_type(self, analysis_func=None, **kwargs): return None def _count_by_time_analysis(self, current, output, *args, **kwargs): - s1_index, to_index, (s1_resname, s1_resid, s1_name), \ - (to_resname, to_resid, to_name), dist, angle = \ + s1_index, to_index, s1, to_residue, dist, angle = \ self._expand_timeseries(current[0]) - from_index, s2_index, (from_resname, from_resid, from_name), \ - (s2_resname, s2_resid, s2_name), dist, angle = \ + s1_resname, s1_resid, s1_name = s1 + from_index, s2_index, from_residue, s2, dist, angle = \ self._expand_timeseries(current[-1]) + s2_resname, s2_resid, s2_name = s2 key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) output[key] += 1 @@ -1680,12 +1680,12 @@ def count_by_time(self, analysis_func=None, **kwargs): return None def _timesteps_by_type_analysis(self, current, output, *args, **kwargs): - s1_index, to_index, (s1_resname, s1_resid, s1_name), \ - (to_resname, to_resid, to_name), dist, angle = \ + s1_index, to_index, s1, to_residue, dist, angle = \ self._expand_timeseries(current[0]) - from_index, s2_index, (from_resname, from_resid, from_name), \ - (s2_resname, s2_resid, s2_name), dist, angle = \ + s1_resname, s1_resid, s1_name = s1 + from_index, s2_index, from_residue, s2, dist, angle = \ self._expand_timeseries(current[-1]) + s2_resname, s2_resid, s2_name = s2 key = (s1_index, s2_index, s1_resname, s1_resid, s1_name, s2_resname, s2_resid, s2_name) output[key].append(kwargs.pop('time')) From 306fde18be48de283ab73c3a8b5ed675656c79c6 Mon Sep 17 00:00:00 2001 From: zhiyiwu Date: Tue, 18 Aug 2020 16:32:28 +0100 Subject: [PATCH 06/26] fix import --- package/MDAnalysis/analysis/hbonds/wbridge_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py index 1f0138c0ec9..ce4be0cc412 100644 --- a/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py @@ -36,5 +36,5 @@ This module is moved to :class:`MDAnalysis.analysis.hydrogenbonds.WaterBridgeAnalysis`.""" -from MDAnalysis.analysis.hydrogenbonds.WaterBridgeAnalysis import ( +from ..hydrogenbonds.wbridge_analysis import ( WaterBridgeAnalysis,) From e5214359e9075b1c6349c30f47f531a7bfec02fe Mon Sep 17 00:00:00 2001 From: zhiyiwu Date: Tue, 18 Aug 2020 17:23:35 +0100 Subject: [PATCH 07/26] Update CHANGELOG --- package/CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/package/CHANGELOG b/package/CHANGELOG index 81bee146f7d..289409fc3fe 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -102,6 +102,7 @@ Changes * Sets the minimal RDKit version for CI to 2020.03.1 (Issue #2827, PR #2831) * Removes deprecated waterdynamics.HydrogenBondLifetimes (PR #2842) * Make NeighborSearch return empty atomgroup, residue, segments instead of list (Issue #2892, PR #2907) + * Move water bridge analysis from hbonds to hydrogenbonds and deprecate hydrogen bond analysis in water bridge analysis (PR #2913) Deprecations From c33d2dce2b997bac9d4175568d22f8c74b67d762 Mon Sep 17 00:00:00 2001 From: zhiyiwu Date: Tue, 18 Aug 2020 17:34:28 +0100 Subject: [PATCH 08/26] DeprecationWarning --- package/MDAnalysis/analysis/hbonds/wbridge_analysis.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py index ce4be0cc412..c81daa6df04 100644 --- a/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py @@ -36,5 +36,14 @@ This module is moved to :class:`MDAnalysis.analysis.hydrogenbonds.WaterBridgeAnalysis`.""" +import warnings + from ..hydrogenbonds.wbridge_analysis import ( WaterBridgeAnalysis,) + +warnings.warn( + "This module has been moved to MDAnalysis.analysis.hydrogenbonds" + "It will be removed in MDAnalysis version 2.0" + "Please use MDAnalysis.analysis.hydrogenbonds.wbridge_analysis instead.", + category=DeprecationWarning + ) From ce5c55236bca0fb03e08d981f6e83d2d7e6fa542 Mon Sep 17 00:00:00 2001 From: zhiyiwu Date: Tue, 18 Aug 2020 17:35:40 +0100 Subject: [PATCH 09/26] PEP8 --- package/MDAnalysis/analysis/hbonds/wbridge_analysis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py index c81daa6df04..d5ded4540bd 100644 --- a/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py @@ -43,7 +43,7 @@ warnings.warn( "This module has been moved to MDAnalysis.analysis.hydrogenbonds" - "It will be removed in MDAnalysis version 2.0" - "Please use MDAnalysis.analysis.hydrogenbonds.wbridge_analysis instead.", + "It will be removed in MDAnalysis version 2.0. Please use " + "MDAnalysis.analysis.hydrogenbonds.wbridge_analysis instead.", category=DeprecationWarning ) From 54efe5eabf35d4ad2d9e3261206f7deb1cef9105 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Tue, 29 Sep 2020 16:31:01 +0100 Subject: [PATCH 10/26] Update package/CHANGELOG Co-authored-by: Irfan Alibay --- package/CHANGELOG | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 289409fc3fe..ed756bcc999 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -102,7 +102,8 @@ Changes * Sets the minimal RDKit version for CI to 2020.03.1 (Issue #2827, PR #2831) * Removes deprecated waterdynamics.HydrogenBondLifetimes (PR #2842) * Make NeighborSearch return empty atomgroup, residue, segments instead of list (Issue #2892, PR #2907) - * Move water bridge analysis from hbonds to hydrogenbonds and deprecate hydrogen bond analysis in water bridge analysis (PR #2913) + * Move water bridge analysis from hbonds to hydrogenbonds and deprecate + hydrogen bond analysis in water bridge analysis (Issue #2739 PR #2913) Deprecations From e17e7427fbade0c4e4ecaddc70fafe6bea079bd6 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Tue, 29 Sep 2020 16:31:23 +0100 Subject: [PATCH 11/26] Update package/MDAnalysis/analysis/hbonds/wbridge_analysis.py Co-authored-by: Irfan Alibay --- package/MDAnalysis/analysis/hbonds/wbridge_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py index d5ded4540bd..edc1478c727 100644 --- a/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py @@ -33,7 +33,7 @@ .. _`@xiki-tempula`: https://github.com/xiki-tempula -This module is moved to +This module has moved to :class:`MDAnalysis.analysis.hydrogenbonds.WaterBridgeAnalysis`.""" import warnings From 728f154e2dd0a564b15dd74dc337618bc2e38e56 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Tue, 29 Sep 2020 16:33:27 +0100 Subject: [PATCH 12/26] Update package/MDAnalysis/analysis/hydrogenbonds/__init__.py Co-authored-by: Irfan Alibay --- package/MDAnalysis/analysis/hydrogenbonds/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/package/MDAnalysis/analysis/hydrogenbonds/__init__.py b/package/MDAnalysis/analysis/hydrogenbonds/__init__.py index 25f6dd66383..1f74c7612cd 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/__init__.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/__init__.py @@ -23,6 +23,7 @@ __all__ = [ 'HydrogenBondAnalysis' + 'WaterBridgeAnalysis' ] from .hbond_analysis import HydrogenBondAnalysis From 8658ab39de923061f36586ac47dbdb421907b952 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Tue, 29 Sep 2020 17:42:33 +0100 Subject: [PATCH 13/26] Update wbridge_analysis.py --- package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py | 1 - 1 file changed, 1 deletion(-) diff --git a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py index 0e45567e42f..51d51577d8e 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py @@ -696,7 +696,6 @@ class WaterBridgeAnalysis(AnalysisBase): .. versionadded:: 0.17.0 - """ # use tuple(set()) here so that one can just copy&paste names from the # table; set() takes care for removing duplicates. At the end the From 7fad43d3701ab77a3416332de1c36ded8b5612b2 Mon Sep 17 00:00:00 2001 From: zhiyiwu Date: Wed, 30 Sep 2020 14:55:12 +0100 Subject: [PATCH 14/26] warning test --- testsuite/MDAnalysisTests/analysis/test_wbridge.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/testsuite/MDAnalysisTests/analysis/test_wbridge.py b/testsuite/MDAnalysisTests/analysis/test_wbridge.py index fda7bbd6f2c..ecc28916c9b 100644 --- a/testsuite/MDAnalysisTests/analysis/test_wbridge.py +++ b/testsuite/MDAnalysisTests/analysis/test_wbridge.py @@ -18,6 +18,10 @@ def test_import_from_hbonds(): "importing WaterBridgeAnalysis from " "MDAnalysis.analysis.hbonds failed.'") +def test_import_warning(): + with pytest.warns(DeprecationWarning): + from MDAnalysis.analysis.hbonds import WaterBridgeAnalysis + class TestWaterBridgeAnalysis(object): @staticmethod @pytest.fixture(scope='class') From 05ef61545aeabd79d638e146a7e24a39177947af Mon Sep 17 00:00:00 2001 From: zhiyiwu Date: Wed, 30 Sep 2020 15:09:43 +0100 Subject: [PATCH 15/26] PEP8 --- testsuite/MDAnalysisTests/analysis/test_wbridge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_wbridge.py b/testsuite/MDAnalysisTests/analysis/test_wbridge.py index ecc28916c9b..8b2104e0d43 100644 --- a/testsuite/MDAnalysisTests/analysis/test_wbridge.py +++ b/testsuite/MDAnalysisTests/analysis/test_wbridge.py @@ -6,7 +6,6 @@ import pytest import MDAnalysis -import MDAnalysis.analysis.hbonds from MDAnalysis.analysis.hydrogenbonds.wbridge_analysis import ( WaterBridgeAnalysis, ) @@ -22,6 +21,7 @@ def test_import_warning(): with pytest.warns(DeprecationWarning): from MDAnalysis.analysis.hbonds import WaterBridgeAnalysis + class TestWaterBridgeAnalysis(object): @staticmethod @pytest.fixture(scope='class') From d7305f79da0f1679d38836ddc1f5787fbc3bf98e Mon Sep 17 00:00:00 2001 From: zhiyiwu Date: Wed, 30 Sep 2020 15:11:01 +0100 Subject: [PATCH 16/26] PEP8 --- testsuite/MDAnalysisTests/analysis/test_wbridge.py | 1 + 1 file changed, 1 insertion(+) diff --git a/testsuite/MDAnalysisTests/analysis/test_wbridge.py b/testsuite/MDAnalysisTests/analysis/test_wbridge.py index 8b2104e0d43..096ff45129f 100644 --- a/testsuite/MDAnalysisTests/analysis/test_wbridge.py +++ b/testsuite/MDAnalysisTests/analysis/test_wbridge.py @@ -17,6 +17,7 @@ def test_import_from_hbonds(): "importing WaterBridgeAnalysis from " "MDAnalysis.analysis.hbonds failed.'") + def test_import_warning(): with pytest.warns(DeprecationWarning): from MDAnalysis.analysis.hbonds import WaterBridgeAnalysis From 01f0297b687682d914a853c6dac2aad83c80ed0a Mon Sep 17 00:00:00 2001 From: zhiyiwu Date: Wed, 30 Sep 2020 15:17:49 +0100 Subject: [PATCH 17/26] warning --- .../MDAnalysisTests/analysis/test_wbridge.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_wbridge.py b/testsuite/MDAnalysisTests/analysis/test_wbridge.py index 096ff45129f..c7357646559 100644 --- a/testsuite/MDAnalysisTests/analysis/test_wbridge.py +++ b/testsuite/MDAnalysisTests/analysis/test_wbridge.py @@ -19,8 +19,21 @@ def test_import_from_hbonds(): def test_import_warning(): - with pytest.warns(DeprecationWarning): - from MDAnalysis.analysis.hbonds import WaterBridgeAnalysis + with pytest.warns(DeprecationWarning) as record: + from MDAnalysis.analysis.hbonds.wbridge_analysis import ( + WaterBridgeAnalysis, ) + assert len(record) == 2 + assert record[0].message.args[0] == 'This module is deprecated as of '\ + 'MDAnalysis version 1.0.It will be '\ + 'removed in MDAnalysis version 2.0'\ + 'Please use '\ + 'MDAnalysis.analysis.hydrogenbonds.'\ + 'hbond_analysis instead.' + assert record[1].message.args[0] == 'This module has been moved to '\ + 'MDAnalysis.analysis.hydrogenbondsIt '\ + 'will be removed in MDAnalysis version'\ + ' 2.0. Please use MDAnalysis.analysis.'\ + 'hydrogenbonds.wbridge_analysis instead.' class TestWaterBridgeAnalysis(object): From 6c0f56b0c122ee92f14a0ed77a3c76946bf2b539 Mon Sep 17 00:00:00 2001 From: zhiyiwu Date: Wed, 30 Sep 2020 15:19:18 +0100 Subject: [PATCH 18/26] PEP8 --- testsuite/MDAnalysisTests/analysis/test_wbridge.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_wbridge.py b/testsuite/MDAnalysisTests/analysis/test_wbridge.py index c7357646559..7b2db0e44cf 100644 --- a/testsuite/MDAnalysisTests/analysis/test_wbridge.py +++ b/testsuite/MDAnalysisTests/analysis/test_wbridge.py @@ -31,9 +31,10 @@ def test_import_warning(): 'hbond_analysis instead.' assert record[1].message.args[0] == 'This module has been moved to '\ 'MDAnalysis.analysis.hydrogenbondsIt '\ - 'will be removed in MDAnalysis version'\ - ' 2.0. Please use MDAnalysis.analysis.'\ - 'hydrogenbonds.wbridge_analysis instead.' + 'will be removed in MDAnalysis '\ + 'version 2.0. Please use MDAnalysis.'\ + 'analysis.hydrogenbonds.'\ + 'wbridge_analysis instead.' class TestWaterBridgeAnalysis(object): From e852f7a18a37f1494d233ea84362a1711fb642e4 Mon Sep 17 00:00:00 2001 From: zhiyiwu Date: Wed, 30 Sep 2020 15:54:45 +0100 Subject: [PATCH 19/26] catch warning --- testsuite/MDAnalysisTests/analysis/test_wbridge.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_wbridge.py b/testsuite/MDAnalysisTests/analysis/test_wbridge.py index 7b2db0e44cf..2c3ef34aec8 100644 --- a/testsuite/MDAnalysisTests/analysis/test_wbridge.py +++ b/testsuite/MDAnalysisTests/analysis/test_wbridge.py @@ -6,8 +6,6 @@ import pytest import MDAnalysis -from MDAnalysis.analysis.hydrogenbonds.wbridge_analysis import ( - WaterBridgeAnalysis, ) def test_import_from_hbonds(): try: @@ -37,6 +35,9 @@ def test_import_warning(): 'wbridge_analysis instead.' +from MDAnalysis.analysis.hydrogenbonds.wbridge_analysis import ( + WaterBridgeAnalysis, ) + class TestWaterBridgeAnalysis(object): @staticmethod @pytest.fixture(scope='class') From 946a367d70eaccbc6dd0b419dfb9d072d20c07db Mon Sep 17 00:00:00 2001 From: zhiyiwu Date: Wed, 30 Sep 2020 16:10:28 +0100 Subject: [PATCH 20/26] arrange order --- .../MDAnalysisTests/analysis/test_wbridge.py | 28 ++++++------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_wbridge.py b/testsuite/MDAnalysisTests/analysis/test_wbridge.py index 2c3ef34aec8..61249af28ba 100644 --- a/testsuite/MDAnalysisTests/analysis/test_wbridge.py +++ b/testsuite/MDAnalysisTests/analysis/test_wbridge.py @@ -7,6 +7,14 @@ import MDAnalysis + +def test_import_warning(): + wmsg = 'Please use MDAnalysis.analysis.hydrogenbonds.wbridge_analysis' + with pytest.warns(DeprecationWarning, match=wmsg): + from MDAnalysis.analysis.hbonds.wbridge_analysis import ( + WaterBridgeAnalysis, ) + + def test_import_from_hbonds(): try: from MDAnalysis.analysis.hbonds import WaterBridgeAnalysis @@ -16,28 +24,10 @@ def test_import_from_hbonds(): "MDAnalysis.analysis.hbonds failed.'") -def test_import_warning(): - with pytest.warns(DeprecationWarning) as record: - from MDAnalysis.analysis.hbonds.wbridge_analysis import ( - WaterBridgeAnalysis, ) - assert len(record) == 2 - assert record[0].message.args[0] == 'This module is deprecated as of '\ - 'MDAnalysis version 1.0.It will be '\ - 'removed in MDAnalysis version 2.0'\ - 'Please use '\ - 'MDAnalysis.analysis.hydrogenbonds.'\ - 'hbond_analysis instead.' - assert record[1].message.args[0] == 'This module has been moved to '\ - 'MDAnalysis.analysis.hydrogenbondsIt '\ - 'will be removed in MDAnalysis '\ - 'version 2.0. Please use MDAnalysis.'\ - 'analysis.hydrogenbonds.'\ - 'wbridge_analysis instead.' - - from MDAnalysis.analysis.hydrogenbonds.wbridge_analysis import ( WaterBridgeAnalysis, ) + class TestWaterBridgeAnalysis(object): @staticmethod @pytest.fixture(scope='class') From 6f4517c816a194e8f419a8d92ec9d06cf3e119ef Mon Sep 17 00:00:00 2001 From: zhiyiwu Date: Wed, 30 Sep 2020 16:40:17 +0100 Subject: [PATCH 21/26] fix loading issue --- .../MDAnalysisTests/analysis/test_wbridge.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_wbridge.py b/testsuite/MDAnalysisTests/analysis/test_wbridge.py index 61249af28ba..7b5ccd6c839 100644 --- a/testsuite/MDAnalysisTests/analysis/test_wbridge.py +++ b/testsuite/MDAnalysisTests/analysis/test_wbridge.py @@ -1,18 +1,14 @@ from io import StringIO from collections import defaultdict +from importlib import reload from numpy.testing import ( assert_equal, assert_array_equal,) import pytest import MDAnalysis - - -def test_import_warning(): - wmsg = 'Please use MDAnalysis.analysis.hydrogenbonds.wbridge_analysis' - with pytest.warns(DeprecationWarning, match=wmsg): - from MDAnalysis.analysis.hbonds.wbridge_analysis import ( - WaterBridgeAnalysis, ) +from MDAnalysis.analysis.hydrogenbonds.wbridge_analysis import ( + WaterBridgeAnalysis, ) def test_import_from_hbonds(): @@ -24,8 +20,10 @@ def test_import_from_hbonds(): "MDAnalysis.analysis.hbonds failed.'") -from MDAnalysis.analysis.hydrogenbonds.wbridge_analysis import ( - WaterBridgeAnalysis, ) +def test_import_warning(): + wmsg = 'Please use MDAnalysis.analysis.hydrogenbonds.wbridge_analysis' + with pytest.warns(DeprecationWarning, match=wmsg): + reload(MDAnalysis.analysis.hbonds.wbridge_analysis) class TestWaterBridgeAnalysis(object): From da262bf930d2fdf182f265ad4fe0db0dbb15720e Mon Sep 17 00:00:00 2001 From: zhiyiwu Date: Sat, 24 Oct 2020 11:02:16 +0100 Subject: [PATCH 22/26] remove the file --- .../analysis/hbonds/wbridge_analysis.py | 49 ------------------- .../MDAnalysisTests/analysis/test_wbridge.py | 16 ------ 2 files changed, 65 deletions(-) delete mode 100644 package/MDAnalysis/analysis/hbonds/wbridge_analysis.py diff --git a/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py deleted file mode 100644 index edc1478c727..00000000000 --- a/package/MDAnalysis/analysis/hbonds/wbridge_analysis.py +++ /dev/null @@ -1,49 +0,0 @@ -# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 -# -# MDAnalysis --- https://www.mdanalysis.org -# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors -# (see the file AUTHORS for the full list of names) -# -# Released under the GNU Public Licence, v2 or any higher version -# -# Please cite your use of MDAnalysis in published work: -# -# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, -# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. -# MDAnalysis: A Python package for the rapid analysis of molecular dynamics -# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th -# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. -# doi: 10.25080/majora-629e541a-00e -# -# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. -# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. -# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 -# - -# Water Bridge Analysis -r"""Water Bridge analysis --- :mod:`MDAnalysis.analysis.hbonds.WaterBridgeAnalysis` -=============================================================================== - -:Author: Zhiyi Wu -:Year: 2017-2018 -:Copyright: GNU Public License v3 -:Maintainer: Zhiyi Wu , `@xiki-tempula`_ on GitHub - - -.. _`@xiki-tempula`: https://github.com/xiki-tempula - -This module has moved to -:class:`MDAnalysis.analysis.hydrogenbonds.WaterBridgeAnalysis`.""" - -import warnings - -from ..hydrogenbonds.wbridge_analysis import ( - WaterBridgeAnalysis,) - -warnings.warn( - "This module has been moved to MDAnalysis.analysis.hydrogenbonds" - "It will be removed in MDAnalysis version 2.0. Please use " - "MDAnalysis.analysis.hydrogenbonds.wbridge_analysis instead.", - category=DeprecationWarning - ) diff --git a/testsuite/MDAnalysisTests/analysis/test_wbridge.py b/testsuite/MDAnalysisTests/analysis/test_wbridge.py index 7b5ccd6c839..8d0e9ee789e 100644 --- a/testsuite/MDAnalysisTests/analysis/test_wbridge.py +++ b/testsuite/MDAnalysisTests/analysis/test_wbridge.py @@ -1,6 +1,5 @@ from io import StringIO from collections import defaultdict -from importlib import reload from numpy.testing import ( assert_equal, assert_array_equal,) @@ -11,21 +10,6 @@ WaterBridgeAnalysis, ) -def test_import_from_hbonds(): - try: - from MDAnalysis.analysis.hbonds import WaterBridgeAnalysis - except ImportError: - raise AssertionError("Issue #2064 not fixed: " - "importing WaterBridgeAnalysis from " - "MDAnalysis.analysis.hbonds failed.'") - - -def test_import_warning(): - wmsg = 'Please use MDAnalysis.analysis.hydrogenbonds.wbridge_analysis' - with pytest.warns(DeprecationWarning, match=wmsg): - reload(MDAnalysis.analysis.hbonds.wbridge_analysis) - - class TestWaterBridgeAnalysis(object): @staticmethod @pytest.fixture(scope='class') From 53a1913f2b14f2324e43341c470256e2e9699e26 Mon Sep 17 00:00:00 2001 From: zhiyiwu Date: Sat, 24 Oct 2020 13:31:54 +0100 Subject: [PATCH 23/26] fix test --- package/CHANGELOG | 3 +-- package/MDAnalysis/analysis/hbonds/__init__.py | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 28c4db067aa..822afa14801 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -122,8 +122,7 @@ Changes `__call__` (Issue #2860, PR #2859) * deprecated ``analysis.helanal`` module has been removed in favour of ``analysis.helix_analysis`` (PR #2929) - * Move water bridge analysis from hbonds to hydrogenbonds and deprecate - hydrogen bond analysis in water bridge analysis (Issue #2739 PR #2913) + * Move water bridge analysis from hbonds to hydrogenbonds (Issue #2739 PR #2913) Deprecations diff --git a/package/MDAnalysis/analysis/hbonds/__init__.py b/package/MDAnalysis/analysis/hbonds/__init__.py index c9dd578850f..b78399c8b9d 100644 --- a/package/MDAnalysis/analysis/hbonds/__init__.py +++ b/package/MDAnalysis/analysis/hbonds/__init__.py @@ -29,4 +29,3 @@ from .hbond_analysis import HydrogenBondAnalysis from .hbond_autocorrel import HydrogenBondAutoCorrel, find_hydrogen_donors -from .wbridge_analysis import WaterBridgeAnalysis From fa1d1877cb46bf6012c4a4cc76b2ecf3a00bfda8 Mon Sep 17 00:00:00 2001 From: zhiyiwu Date: Sat, 24 Oct 2020 17:27:26 +0100 Subject: [PATCH 24/26] Update wbridge_analysis.rst --- .../source/documentation_pages/analysis/wbridge_analysis.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/doc/sphinx/source/documentation_pages/analysis/wbridge_analysis.rst b/package/doc/sphinx/source/documentation_pages/analysis/wbridge_analysis.rst index b022b17c4a4..ea9ab08ee18 100644 --- a/package/doc/sphinx/source/documentation_pages/analysis/wbridge_analysis.rst +++ b/package/doc/sphinx/source/documentation_pages/analysis/wbridge_analysis.rst @@ -1 +1 @@ -.. automodule:: MDAnalysis.analysis.hbonds.wbridge_analysis +.. automodule:: MDAnalysis.analysis.hydrogenbonds.wbridge_analysis From d7ce7e54e31f2d3320e5c2c5643d5da92b959f66 Mon Sep 17 00:00:00 2001 From: xiki-tempula Date: Wed, 27 Jan 2021 21:01:50 +0000 Subject: [PATCH 25/26] fix doc string waring --- package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py index 51d51577d8e..9aa0e6d3963 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py @@ -586,7 +586,7 @@ def analysis(current, output, **kwargs): Suppose we want to count - the **number** of water molecules involved in bridging selection 1 to - selection 2. + selection 2. - only if water bridge terminates in atom name **OD1 of ASP**. - only when water bridge is joined by less than **two** water. From e92d8ce219d6f1d6361f10ba36ddf0ee5e4902cf Mon Sep 17 00:00:00 2001 From: xiki-tempula Date: Wed, 27 Jan 2021 21:10:56 +0000 Subject: [PATCH 26/26] fix doc --- .../MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py index 9aa0e6d3963..3836246481d 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/wbridge_analysis.py @@ -187,14 +187,14 @@ def network2timeseries(network, timeseries): Donor and acceptor heavy atoms are detected from atom names. The current defaults are appropriate for the CHARMM27 and GLYCAM06 force fields as defined -in Table `Default atom names for hydrogen bonding analysis`_. +in Table `Default atom names for water bridge analysis`_. Hydrogen atoms bonded to a donor are searched based on its distance to the donor. The algorithm searches for all hydrogens (name "H*" or name "[123]H" or type "H") in the same residue as the donor atom within a cut-off distance of 1.2 Å. -.. _Default atom names for hydrogen bonding analysis: +.. _Default atom names for water bridge analysis: .. table:: Default heavy atom names for CHARMM27 force field. @@ -702,7 +702,7 @@ class WaterBridgeAnalysis(AnalysisBase): # DEFAULT_DONORS and DEFAULT_ACCEPTORS should simply be tuples. #: default heavy atom names whose hydrogens are treated as *donors* - #: (see :ref:`Default atom names for hydrogen bonding analysis`); + #: (see :ref:`Default atom names for water bridge analysis`); #: use the keyword `donors` to add a list of additional donor names. DEFAULT_DONORS = { 'CHARMM27': tuple( @@ -712,7 +712,7 @@ class WaterBridgeAnalysis(AnalysisBase): 'other': tuple(set([]))} #: default atom names that are treated as hydrogen *acceptors* - #: (see :ref:`Default atom names for hydrogen bonding analysis`); + #: (see :ref:`Default atom names for water bridge analysis`); #: use the keyword `acceptors` to add a list of additional acceptor names. DEFAULT_ACCEPTORS = { 'CHARMM27': tuple(