From 2a2bcf88ac14f737f7721bfd4b03dfe358912d5f Mon Sep 17 00:00:00 2001 From: thempel Date: Fri, 7 Apr 2017 12:06:34 -0500 Subject: [PATCH 1/8] [docs] added misc section on existing traj import --- docs/examples.rst | 1 + docs/examples/example7.rst | 6 + examples/tutorial/7_example_misc.ipynb | 270 +++++++++++++++++++++++++ 3 files changed, 277 insertions(+) create mode 100644 docs/examples/example7.rst create mode 100755 examples/tutorial/7_example_misc.ipynb diff --git a/docs/examples.rst b/docs/examples.rst index 34d0a48..2943a8a 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -10,3 +10,4 @@ Examples Notebooks examples/example4 examples/example5 examples/example6 + examples/example7 diff --git a/docs/examples/example7.rst b/docs/examples/example7.rst new file mode 100644 index 0000000..236a308 --- /dev/null +++ b/docs/examples/example7.rst @@ -0,0 +1,6 @@ +.. _example7: + +Example 7 - Miscellaneous +========================= + +.. notebook:: examples/tutorial/7_example_misc.ipynb diff --git a/examples/tutorial/7_example_misc.ipynb b/examples/tutorial/7_example_misc.ipynb new file mode 100755 index 0000000..77f8631 --- /dev/null +++ b/examples/tutorial/7_example_misc.ipynb @@ -0,0 +1,270 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Importing existing trajectory data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In many cases, some trajectory data already exists before running an adaptive simulation. It is thus most efficiently to import this data into the framework. This works in principle by creating `Trajectory` objects and adding them to the `Project`. Since all of the trajectory-related data however is stored in the `Engine` object that generated it, this needs to be created as well." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "### Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "import sys, os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "from adaptivemd import Project" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Let's open our `test` project by its name. If you completed the previous example this should all work out of the box." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "project = Project('tutorial')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Open all connections to the `MongoDB` and `Session` so we can get started." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create an import `Engine`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from adaptivemd import Trajectory\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "pdb_file = File('file://init.pdb').named('initial_pdb').load()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Since it is not desired to expand the trajectories at this point, system and integrator files are not given. In principle, if compatible restart files are available, one could create a complete engine and expand existing trajectories." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import_engine = OpenMMEngine(pdb_file=pdb_file,\n", + " system_file=None,\n", + " integrator_file=None,\n", + " args=None\n", + " ).named('openmm-import')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, to use the same `Modeller` as for the trajectories generated with `AdaptiveMD`, we build compatible output types. This means, they should contain the original file names with the respective strides and be named accordly. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import_engine.add_output_type('master', 'old-file-name-full.dcd', \n", + " stride=stride_full)\n", + "import_engine.add_output_type('protein', 'old-file-name-protein.dcd', \n", + " stride=stride_prot, \n", + " selection='protein')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize `Trajectory` objects\n", + "To add the actual file paths, `Trajectory` objects have to be initialized. \n", + "- `Trajectory` locations are folders, not files, and end with '/'.\n", + "- `frame` can be None if the initial frame is not known.\n", + "- `length` as defined by the engine time step, not by the output/save rate of an output type.\n", + "- `engine`: import engine defined above.\n", + "\n", + "The example below uses a list of trajectory folders to import, `existing_trajectory_paths`. The trajectory lengths are known and stored in `existing_trajectory_lengths`.\n", + "\n", + "The `created` variable has to be set a creation time in order to let the database know the trajectory already exists. In the example below, the (arbitrary) import time is used." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "trajs = []\n", + "for traj_path, traj_length in zip(existing_trajectory_paths, \n", + " existing_trajectory_lengths):\n", + " traj = Trajectory('shared://' + traj_path,\n", + " frame=None,\n", + " length=traj_length,\n", + " engine=import_engine)\n", + " traj.created = time.time()\n", + " trajs.append(traj)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Add the trajectories to the project" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "map(project.files.add, trajs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's check if the trajectories have been added:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "len(project.trajectories)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "project.close()" + ] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "py27_mar17", + "language": "python", + "name": "py27_mar17" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.13" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} From b175d1d8fe52ff7ef22b9a6048b379921e9d5cc4 Mon Sep 17 00:00:00 2001 From: thempel Date: Fri, 7 Apr 2017 12:17:43 -0500 Subject: [PATCH 2/8] [analysis.pyemma] added multiple engine support --- adaptivemd/analysis/pyemma/emma.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/adaptivemd/analysis/pyemma/emma.py b/adaptivemd/analysis/pyemma/emma.py index 40dede0..9f1969b 100755 --- a/adaptivemd/analysis/pyemma/emma.py +++ b/adaptivemd/analysis/pyemma/emma.py @@ -172,10 +172,26 @@ def execute( ty = trajs[0].types[outtype] + + engines = [] + for traj in trajectories: + if traj.engine not in engines: + engines.append(traj.engine) + + if len(engines) > 1: + trajs = [] + for traj in trajectories: + trajs.append(os.path.join(traj.location, traj.types[outtype].filename)) + trajectory_file_name = '' + else: + trajs = list(trajectories) + trajectory_file_name = ty.filename + + t.call( remote_analysis, trajectories=trajs, - traj_name=ty.filename, # we need the filename in the traj folder + traj_name=trajectory_file_name, # we need the filename in the traj folder selection=ty.selection, # tell pyemma the subsets of atoms features=features, topfile=input_pdb, From d94d9d2fc60456b3ae0504378b185de38211990a Mon Sep 17 00:00:00 2001 From: thempel Date: Fri, 7 Apr 2017 12:18:11 -0500 Subject: [PATCH 3/8] [scheduler] changed shared:// paths to absolute --- adaptivemd/scheduler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptivemd/scheduler.py b/adaptivemd/scheduler.py index 150aac8..7b3c385 100755 --- a/adaptivemd/scheduler.py +++ b/adaptivemd/scheduler.py @@ -362,7 +362,7 @@ def replace_prefix(self, path): path = path.replace('sandbox://', '../..') # the main remote shared FS - path = path.replace('shared://', '../../..') + path = path.replace('shared://', '') path = path.replace('worker://', '') path = path.replace('file://', '') # the specific project folder:// From 71a1b0c281abce8669007fde6042ddd6aab4d619 Mon Sep 17 00:00:00 2001 From: thempel Date: Fri, 7 Apr 2017 14:41:16 -0500 Subject: [PATCH 4/8] [scheduler] undo shared:// modification & update docs --- adaptivemd/scheduler.py | 2 +- examples/tutorial/7_example_misc.ipynb | 65 ++++++++++++++++++++------ 2 files changed, 51 insertions(+), 16 deletions(-) diff --git a/adaptivemd/scheduler.py b/adaptivemd/scheduler.py index 7b3c385..150aac8 100755 --- a/adaptivemd/scheduler.py +++ b/adaptivemd/scheduler.py @@ -362,7 +362,7 @@ def replace_prefix(self, path): path = path.replace('sandbox://', '../..') # the main remote shared FS - path = path.replace('shared://', '') + path = path.replace('shared://', '../../..') path = path.replace('worker://', '') path = path.replace('file://', '') # the specific project folder:// diff --git a/examples/tutorial/7_example_misc.ipynb b/examples/tutorial/7_example_misc.ipynb index 77f8631..1e3888c 100755 --- a/examples/tutorial/7_example_misc.ipynb +++ b/examples/tutorial/7_example_misc.ipynb @@ -12,7 +12,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "In many cases, some trajectory data already exists before running an adaptive simulation. It is thus most efficiently to import this data into the framework. This works in principle by creating `Trajectory` objects and adding them to the `Project`. Since all of the trajectory-related data however is stored in the `Engine` object that generated it, this needs to be created as well." ] @@ -88,7 +91,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "### Create an import `Engine`" ] @@ -97,7 +103,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true + "collapsed": true, + "deletable": true, + "editable": true }, "outputs": [], "source": [ @@ -109,7 +117,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true + "collapsed": true, + "deletable": true, + "editable": true }, "outputs": [], "source": [ @@ -118,7 +128,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "Since it is not desired to expand the trajectories at this point, system and integrator files are not given. In principle, if compatible restart files are available, one could create a complete engine and expand existing trajectories." ] @@ -127,7 +140,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true + "collapsed": true, + "deletable": true, + "editable": true }, "outputs": [], "source": [ @@ -140,7 +155,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "Now, to use the same `Modeller` as for the trajectories generated with `AdaptiveMD`, we build compatible output types. This means, they should contain the original file names with the respective strides and be named accordly. " ] @@ -149,7 +167,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true + "collapsed": true, + "deletable": true, + "editable": true }, "outputs": [], "source": [ @@ -162,7 +182,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "## Initialize `Trajectory` objects\n", "To add the actual file paths, `Trajectory` objects have to be initialized. \n", @@ -171,7 +194,7 @@ "- `length` as defined by the engine time step, not by the output/save rate of an output type.\n", "- `engine`: import engine defined above.\n", "\n", - "The example below uses a list of trajectory folders to import, `existing_trajectory_paths`. The trajectory lengths are known and stored in `existing_trajectory_lengths`.\n", + "The example below uses a list of trajectory folders to import, `existing_trajectory_paths`. Note that when adding this path with the `shared://` prefix, it must be a relative path in the root shared cluster file system. For absolute paths, use `worker://` instead. The trajectory lengths are known and stored in `existing_trajectory_lengths`.\n", "\n", "The `created` variable has to be set a creation time in order to let the database know the trajectory already exists. In the example below, the (arbitrary) import time is used." ] @@ -180,7 +203,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true + "collapsed": true, + "deletable": true, + "editable": true }, "outputs": [], "source": [ @@ -197,7 +222,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "#### Add the trajectories to the project" ] @@ -206,7 +234,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true + "collapsed": true, + "deletable": true, + "editable": true }, "outputs": [], "source": [ @@ -215,7 +245,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "Let's check if the trajectories have been added:" ] @@ -224,7 +257,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true + "collapsed": true, + "deletable": true, + "editable": true }, "outputs": [], "source": [ From bfae29a0bbae152193c38a6812b32435b549f7d3 Mon Sep 17 00:00:00 2001 From: thempel Date: Fri, 7 Apr 2017 14:57:04 -0500 Subject: [PATCH 5/8] [analysis.pyemma] multi engine support, pass paths directly --- adaptivemd/analysis/pyemma/_remote.py | 9 ++------- adaptivemd/analysis/pyemma/emma.py | 16 ++-------------- 2 files changed, 4 insertions(+), 21 deletions(-) diff --git a/adaptivemd/analysis/pyemma/_remote.py b/adaptivemd/analysis/pyemma/_remote.py index a0e196d..b6a690f 100755 --- a/adaptivemd/analysis/pyemma/_remote.py +++ b/adaptivemd/analysis/pyemma/_remote.py @@ -26,7 +26,6 @@ def remote_analysis( trajectories, - traj_name='output.dcd', selection=None, features=None, topfile='input.pdb', @@ -40,10 +39,7 @@ def remote_analysis( Parameters ---------- - trajectories : Sized of `Trajectory` - a list of `Trajectory` objects - traj_name : str - name of the trajectory file with the trajectory directory given + trajectories : Trajectory file paths selection : str an atom subset selection string as used in mdtraj .select features : dict or list or None @@ -122,8 +118,7 @@ def apply_feat_part(featurizer, parts): print '#trajectories :', len(trajectories) - files = [os.path.join(t, traj_name) for t in trajectories] - inp = pyemma.coordinates.source(files, feat) + inp = pyemma.coordinates.source(trajectories, feat) tica_obj = pyemma.coordinates.tica( inp, lag=tica_lag, dim=tica_dim, kinetic_map=False) diff --git a/adaptivemd/analysis/pyemma/emma.py b/adaptivemd/analysis/pyemma/emma.py index 9f1969b..43e85c4 100755 --- a/adaptivemd/analysis/pyemma/emma.py +++ b/adaptivemd/analysis/pyemma/emma.py @@ -172,26 +172,14 @@ def execute( ty = trajs[0].types[outtype] - - engines = [] + traj_paths = [] for traj in trajectories: - if traj.engine not in engines: - engines.append(traj.engine) - - if len(engines) > 1: - trajs = [] - for traj in trajectories: - trajs.append(os.path.join(traj.location, traj.types[outtype].filename)) - trajectory_file_name = '' - else: - trajs = list(trajectories) - trajectory_file_name = ty.filename + traj_paths.append(os.path.join(traj.location, traj.types[outtype].filename)) t.call( remote_analysis, trajectories=trajs, - traj_name=trajectory_file_name, # we need the filename in the traj folder selection=ty.selection, # tell pyemma the subsets of atoms features=features, topfile=input_pdb, From 1cb9f86c15e83d883675470bc622e7a06b3a6376 Mon Sep 17 00:00:00 2001 From: thempel Date: Fri, 7 Apr 2017 15:07:14 -0500 Subject: [PATCH 6/8] [analysis.pyemma] multi engines: check strides & selections --- adaptivemd/analysis/pyemma/emma.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/adaptivemd/analysis/pyemma/emma.py b/adaptivemd/analysis/pyemma/emma.py index 43e85c4..8a0d9a6 100755 --- a/adaptivemd/analysis/pyemma/emma.py +++ b/adaptivemd/analysis/pyemma/emma.py @@ -170,10 +170,18 @@ def execute( # ups, one of the trajectories does not have the required type! return + if len(set(traj.types[outtype].stride for traj in trajs)) > 1: + # using different strides in trajectories + return + + if len(set(traj.types[outtype].selection for traj in trajs)) > 1: + # different selection strings among trajectories + return + ty = trajs[0].types[outtype] traj_paths = [] - for traj in trajectories: + for traj in trajs: traj_paths.append(os.path.join(traj.location, traj.types[outtype].filename)) From ed6365e75a0f958d532dcba25067568ec6c69b32 Mon Sep 17 00:00:00 2001 From: thempel Date: Fri, 7 Apr 2017 15:22:05 -0500 Subject: [PATCH 7/8] [analysis.pyemma] minor bugfix --- adaptivemd/analysis/pyemma/emma.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adaptivemd/analysis/pyemma/emma.py b/adaptivemd/analysis/pyemma/emma.py index 8a0d9a6..ae33f58 100755 --- a/adaptivemd/analysis/pyemma/emma.py +++ b/adaptivemd/analysis/pyemma/emma.py @@ -187,7 +187,7 @@ def execute( t.call( remote_analysis, - trajectories=trajs, + trajectories=traj_paths, selection=ty.selection, # tell pyemma the subsets of atoms features=features, topfile=input_pdb, From f72f3d6e1bd15d7511fcfbec187c82d29719b276 Mon Sep 17 00:00:00 2001 From: thempel Date: Fri, 7 Apr 2017 15:51:29 -0500 Subject: [PATCH 8/8] [analysis.pyemma] fix file paths --- adaptivemd/analysis/pyemma/emma.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/adaptivemd/analysis/pyemma/emma.py b/adaptivemd/analysis/pyemma/emma.py index ae33f58..3cb5e31 100755 --- a/adaptivemd/analysis/pyemma/emma.py +++ b/adaptivemd/analysis/pyemma/emma.py @@ -182,8 +182,7 @@ def execute( traj_paths = [] for traj in trajs: - traj_paths.append(os.path.join(traj.location, traj.types[outtype].filename)) - + traj_paths.append(traj.file(traj.types[outtype].filename).path) t.call( remote_analysis,