From 2a2bcf88ac14f737f7721bfd4b03dfe358912d5f Mon Sep 17 00:00:00 2001
From: thempel <thempel@zedat.fu-berlin.de>
Date: Fri, 7 Apr 2017 12:06:34 -0500
Subject: [PATCH 1/8] [docs] added misc section on existing traj import

---
 docs/examples.rst                      |   1 +
 docs/examples/example7.rst             |   6 +
 examples/tutorial/7_example_misc.ipynb | 270 +++++++++++++++++++++++++
 3 files changed, 277 insertions(+)
 create mode 100644 docs/examples/example7.rst
 create mode 100755 examples/tutorial/7_example_misc.ipynb

diff --git a/docs/examples.rst b/docs/examples.rst
index 34d0a48..2943a8a 100644
--- a/docs/examples.rst
+++ b/docs/examples.rst
@@ -10,3 +10,4 @@ Examples Notebooks
    examples/example4
    examples/example5
    examples/example6
+   examples/example7
diff --git a/docs/examples/example7.rst b/docs/examples/example7.rst
new file mode 100644
index 0000000..236a308
--- /dev/null
+++ b/docs/examples/example7.rst
@@ -0,0 +1,6 @@
+.. _example7:
+
+Example 7 - Miscellaneous
+=========================
+
+.. notebook:: examples/tutorial/7_example_misc.ipynb
diff --git a/examples/tutorial/7_example_misc.ipynb b/examples/tutorial/7_example_misc.ipynb
new file mode 100755
index 0000000..77f8631
--- /dev/null
+++ b/examples/tutorial/7_example_misc.ipynb
@@ -0,0 +1,270 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "## Importing existing trajectory data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In many cases, some trajectory data already exists before running an adaptive simulation. It is thus most efficiently to import this data into the framework. This works in principle by creating `Trajectory` objects and adding them to the `Project`. Since all of the trajectory-related data however is stored in the `Engine` object that generated it, this needs to be created as well."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "### Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "import sys, os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "from adaptivemd import Project"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "Let's open our `test` project by its name. If you completed the previous example this should all work out of the box."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "project = Project('tutorial')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "Open all connections to the `MongoDB` and `Session` so we can get started."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create an import `Engine`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from adaptivemd import Trajectory\n",
+    "import time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "pdb_file = File('file://init.pdb').named('initial_pdb').load()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Since it is not desired to expand the trajectories at this point, system and integrator files are not given. In principle, if compatible restart files are available, one could create a complete engine and expand existing trajectories."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import_engine = OpenMMEngine(pdb_file=pdb_file,\n",
+    "                             system_file=None,\n",
+    "                             integrator_file=None,\n",
+    "                             args=None\n",
+    "                             ).named('openmm-import')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now, to use the same `Modeller` as for the trajectories generated with `AdaptiveMD`, we build compatible output types. This means, they should contain the original file names with the respective strides and be named accordly. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import_engine.add_output_type('master', 'old-file-name-full.dcd', \n",
+    "                              stride=stride_full)\n",
+    "import_engine.add_output_type('protein', 'old-file-name-protein.dcd', \n",
+    "                              stride=stride_prot, \n",
+    "                              selection='protein')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Initialize `Trajectory` objects\n",
+    "To add the actual file paths, `Trajectory` objects have to be initialized. \n",
+    "- `Trajectory` locations are folders, not files, and end with '/'.\n",
+    "- `frame` can be None if the initial frame is not known.\n",
+    "- `length` as defined by the engine time step, not by the output/save rate of an output type.\n",
+    "- `engine`: import engine defined above.\n",
+    "\n",
+    "The example below uses a list of trajectory folders to import, `existing_trajectory_paths`. The trajectory lengths are known and stored in `existing_trajectory_lengths`.\n",
+    "\n",
+    "The `created` variable has to be set a creation time in order to let the database know the trajectory already exists. In the example below, the (arbitrary) import time is used."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "trajs = []\n",
+    "for traj_path, traj_length in zip(existing_trajectory_paths, \n",
+    "                                  existing_trajectory_lengths):\n",
+    "    traj = Trajectory('shared://' + traj_path,\n",
+    "                      frame=None,\n",
+    "                      length=traj_length,\n",
+    "                      engine=import_engine)\n",
+    "    traj.created = time.time()\n",
+    "    trajs.append(traj)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Add the trajectories to the project"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "map(project.files.add, trajs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's check if the trajectories have been added:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "len(project.trajectories)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "project.close()"
+   ]
+  }
+ ],
+ "metadata": {
+  "anaconda-cloud": {},
+  "kernelspec": {
+   "display_name": "py27_mar17",
+   "language": "python",
+   "name": "py27_mar17"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}

From b175d1d8fe52ff7ef22b9a6048b379921e9d5cc4 Mon Sep 17 00:00:00 2001
From: thempel <thempel@zedat.fu-berlin.de>
Date: Fri, 7 Apr 2017 12:17:43 -0500
Subject: [PATCH 2/8] [analysis.pyemma] added multiple engine support

---
 adaptivemd/analysis/pyemma/emma.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/adaptivemd/analysis/pyemma/emma.py b/adaptivemd/analysis/pyemma/emma.py
index 40dede0..9f1969b 100755
--- a/adaptivemd/analysis/pyemma/emma.py
+++ b/adaptivemd/analysis/pyemma/emma.py
@@ -172,10 +172,26 @@ def execute(
 
         ty = trajs[0].types[outtype]
 
+
+        engines = []
+        for traj in trajectories:
+            if traj.engine not in engines:
+                engines.append(traj.engine)
+        
+        if len(engines) > 1:
+            trajs = []
+            for traj in trajectories:
+                trajs.append(os.path.join(traj.location, traj.types[outtype].filename))
+            trajectory_file_name = ''
+        else:
+            trajs = list(trajectories)
+            trajectory_file_name = ty.filename
+
+
         t.call(
             remote_analysis,
             trajectories=trajs,
-            traj_name=ty.filename,  # we need the filename in the traj folder
+            traj_name=trajectory_file_name,  # we need the filename in the traj folder
             selection=ty.selection,  # tell pyemma the subsets of atoms
             features=features,
             topfile=input_pdb,

From d94d9d2fc60456b3ae0504378b185de38211990a Mon Sep 17 00:00:00 2001
From: thempel <thempel@zedat.fu-berlin.de>
Date: Fri, 7 Apr 2017 12:18:11 -0500
Subject: [PATCH 3/8] [scheduler] changed shared:// paths to absolute

---
 adaptivemd/scheduler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/adaptivemd/scheduler.py b/adaptivemd/scheduler.py
index 150aac8..7b3c385 100755
--- a/adaptivemd/scheduler.py
+++ b/adaptivemd/scheduler.py
@@ -362,7 +362,7 @@ def replace_prefix(self, path):
         path = path.replace('sandbox://', '../..')
 
         # the main remote shared FS
-        path = path.replace('shared://', '../../..')
+        path = path.replace('shared://', '')
         path = path.replace('worker://', '')
         path = path.replace('file://', '')
         # the specific project folder://

From 71a1b0c281abce8669007fde6042ddd6aab4d619 Mon Sep 17 00:00:00 2001
From: thempel <thempel@zedat.fu-berlin.de>
Date: Fri, 7 Apr 2017 14:41:16 -0500
Subject: [PATCH 4/8] [scheduler] undo shared:// modification & update docs

---
 adaptivemd/scheduler.py                |  2 +-
 examples/tutorial/7_example_misc.ipynb | 65 ++++++++++++++++++++------
 2 files changed, 51 insertions(+), 16 deletions(-)

diff --git a/adaptivemd/scheduler.py b/adaptivemd/scheduler.py
index 7b3c385..150aac8 100755
--- a/adaptivemd/scheduler.py
+++ b/adaptivemd/scheduler.py
@@ -362,7 +362,7 @@ def replace_prefix(self, path):
         path = path.replace('sandbox://', '../..')
 
         # the main remote shared FS
-        path = path.replace('shared://', '')
+        path = path.replace('shared://', '../../..')
         path = path.replace('worker://', '')
         path = path.replace('file://', '')
         # the specific project folder://
diff --git a/examples/tutorial/7_example_misc.ipynb b/examples/tutorial/7_example_misc.ipynb
index 77f8631..1e3888c 100755
--- a/examples/tutorial/7_example_misc.ipynb
+++ b/examples/tutorial/7_example_misc.ipynb
@@ -12,7 +12,10 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
    "source": [
     "In many cases, some trajectory data already exists before running an adaptive simulation. It is thus most efficiently to import this data into the framework. This works in principle by creating `Trajectory` objects and adding them to the `Project`. Since all of the trajectory-related data however is stored in the `Engine` object that generated it, this needs to be created as well."
    ]
@@ -88,7 +91,10 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
    "source": [
     "### Create an import `Engine`"
    ]
@@ -97,7 +103,9 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
    },
    "outputs": [],
    "source": [
@@ -109,7 +117,9 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
    },
    "outputs": [],
    "source": [
@@ -118,7 +128,10 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
    "source": [
     "Since it is not desired to expand the trajectories at this point, system and integrator files are not given. In principle, if compatible restart files are available, one could create a complete engine and expand existing trajectories."
    ]
@@ -127,7 +140,9 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
    },
    "outputs": [],
    "source": [
@@ -140,7 +155,10 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
    "source": [
     "Now, to use the same `Modeller` as for the trajectories generated with `AdaptiveMD`, we build compatible output types. This means, they should contain the original file names with the respective strides and be named accordly. "
    ]
@@ -149,7 +167,9 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
    },
    "outputs": [],
    "source": [
@@ -162,7 +182,10 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
    "source": [
     "## Initialize `Trajectory` objects\n",
     "To add the actual file paths, `Trajectory` objects have to be initialized. \n",
@@ -171,7 +194,7 @@
     "- `length` as defined by the engine time step, not by the output/save rate of an output type.\n",
     "- `engine`: import engine defined above.\n",
     "\n",
-    "The example below uses a list of trajectory folders to import, `existing_trajectory_paths`. The trajectory lengths are known and stored in `existing_trajectory_lengths`.\n",
+    "The example below uses a list of trajectory folders to import, `existing_trajectory_paths`. Note that when adding this path with the `shared://` prefix, it must be a relative path in the root shared cluster file system. For absolute paths, use `worker://` instead. The trajectory lengths are known and stored in `existing_trajectory_lengths`.\n",
     "\n",
     "The `created` variable has to be set a creation time in order to let the database know the trajectory already exists. In the example below, the (arbitrary) import time is used."
    ]
@@ -180,7 +203,9 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
    },
    "outputs": [],
    "source": [
@@ -197,7 +222,10 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
    "source": [
     "#### Add the trajectories to the project"
    ]
@@ -206,7 +234,9 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
    },
    "outputs": [],
    "source": [
@@ -215,7 +245,10 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
    "source": [
     "Let's check if the trajectories have been added:"
    ]
@@ -224,7 +257,9 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
    },
    "outputs": [],
    "source": [

From bfae29a0bbae152193c38a6812b32435b549f7d3 Mon Sep 17 00:00:00 2001
From: thempel <thempel@zedat.fu-berlin.de>
Date: Fri, 7 Apr 2017 14:57:04 -0500
Subject: [PATCH 5/8] [analysis.pyemma] multi engine support, pass paths
 directly

---
 adaptivemd/analysis/pyemma/_remote.py |  9 ++-------
 adaptivemd/analysis/pyemma/emma.py    | 16 ++--------------
 2 files changed, 4 insertions(+), 21 deletions(-)

diff --git a/adaptivemd/analysis/pyemma/_remote.py b/adaptivemd/analysis/pyemma/_remote.py
index a0e196d..b6a690f 100755
--- a/adaptivemd/analysis/pyemma/_remote.py
+++ b/adaptivemd/analysis/pyemma/_remote.py
@@ -26,7 +26,6 @@
 
 def remote_analysis(
         trajectories,
-        traj_name='output.dcd',
         selection=None,
         features=None,
         topfile='input.pdb',
@@ -40,10 +39,7 @@ def remote_analysis(
 
     Parameters
     ----------
-    trajectories : Sized of `Trajectory`
-        a list of `Trajectory` objects
-    traj_name : str
-        name of the trajectory file with the trajectory directory given
+    trajectories : Trajectory file paths
     selection : str
         an atom subset selection string as used in mdtraj .select
     features : dict or list or None
@@ -122,8 +118,7 @@ def apply_feat_part(featurizer, parts):
 
     print '#trajectories :', len(trajectories)
 
-    files = [os.path.join(t, traj_name) for t in trajectories]
-    inp = pyemma.coordinates.source(files, feat)
+    inp = pyemma.coordinates.source(trajectories, feat)
 
     tica_obj = pyemma.coordinates.tica(
         inp, lag=tica_lag, dim=tica_dim, kinetic_map=False)
diff --git a/adaptivemd/analysis/pyemma/emma.py b/adaptivemd/analysis/pyemma/emma.py
index 9f1969b..43e85c4 100755
--- a/adaptivemd/analysis/pyemma/emma.py
+++ b/adaptivemd/analysis/pyemma/emma.py
@@ -172,26 +172,14 @@ def execute(
 
         ty = trajs[0].types[outtype]
 
-
-        engines = []
+        traj_paths = []
         for traj in trajectories:
-            if traj.engine not in engines:
-                engines.append(traj.engine)
-        
-        if len(engines) > 1:
-            trajs = []
-            for traj in trajectories:
-                trajs.append(os.path.join(traj.location, traj.types[outtype].filename))
-            trajectory_file_name = ''
-        else:
-            trajs = list(trajectories)
-            trajectory_file_name = ty.filename
+            traj_paths.append(os.path.join(traj.location, traj.types[outtype].filename))
 
 
         t.call(
             remote_analysis,
             trajectories=trajs,
-            traj_name=trajectory_file_name,  # we need the filename in the traj folder
             selection=ty.selection,  # tell pyemma the subsets of atoms
             features=features,
             topfile=input_pdb,

From 1cb9f86c15e83d883675470bc622e7a06b3a6376 Mon Sep 17 00:00:00 2001
From: thempel <thempel@zedat.fu-berlin.de>
Date: Fri, 7 Apr 2017 15:07:14 -0500
Subject: [PATCH 6/8] [analysis.pyemma] multi engines: check strides &
 selections

---
 adaptivemd/analysis/pyemma/emma.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/adaptivemd/analysis/pyemma/emma.py b/adaptivemd/analysis/pyemma/emma.py
index 43e85c4..8a0d9a6 100755
--- a/adaptivemd/analysis/pyemma/emma.py
+++ b/adaptivemd/analysis/pyemma/emma.py
@@ -170,10 +170,18 @@ def execute(
                 # ups, one of the trajectories does not have the required type!
                 return
 
+        if len(set(traj.types[outtype].stride for traj in trajs)) > 1:
+            # using different strides in trajectories
+            return
+
+        if len(set(traj.types[outtype].selection for traj in trajs)) > 1:
+            # different selection strings among trajectories
+            return
+
         ty = trajs[0].types[outtype]
 
         traj_paths = []
-        for traj in trajectories:
+        for traj in trajs:
             traj_paths.append(os.path.join(traj.location, traj.types[outtype].filename))
 
 

From ed6365e75a0f958d532dcba25067568ec6c69b32 Mon Sep 17 00:00:00 2001
From: thempel <thempel@zedat.fu-berlin.de>
Date: Fri, 7 Apr 2017 15:22:05 -0500
Subject: [PATCH 7/8] [analysis.pyemma] minor bugfix

---
 adaptivemd/analysis/pyemma/emma.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/adaptivemd/analysis/pyemma/emma.py b/adaptivemd/analysis/pyemma/emma.py
index 8a0d9a6..ae33f58 100755
--- a/adaptivemd/analysis/pyemma/emma.py
+++ b/adaptivemd/analysis/pyemma/emma.py
@@ -187,7 +187,7 @@ def execute(
 
         t.call(
             remote_analysis,
-            trajectories=trajs,
+            trajectories=traj_paths,
             selection=ty.selection,  # tell pyemma the subsets of atoms
             features=features,
             topfile=input_pdb,

From f72f3d6e1bd15d7511fcfbec187c82d29719b276 Mon Sep 17 00:00:00 2001
From: thempel <thempel@zedat.fu-berlin.de>
Date: Fri, 7 Apr 2017 15:51:29 -0500
Subject: [PATCH 8/8] [analysis.pyemma] fix file paths

---
 adaptivemd/analysis/pyemma/emma.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/adaptivemd/analysis/pyemma/emma.py b/adaptivemd/analysis/pyemma/emma.py
index ae33f58..3cb5e31 100755
--- a/adaptivemd/analysis/pyemma/emma.py
+++ b/adaptivemd/analysis/pyemma/emma.py
@@ -182,8 +182,7 @@ def execute(
 
         traj_paths = []
         for traj in trajs:
-            traj_paths.append(os.path.join(traj.location, traj.types[outtype].filename))
-
+            traj_paths.append(traj.file(traj.types[outtype].filename).path)
 
         t.call(
             remote_analysis,