From 698354bf2655e362679210323d93aff0f26bc3e6 Mon Sep 17 00:00:00 2001
From: "David W.H. Swenson" <dwhs@hyperblazer.net>
Date: Mon, 9 Aug 2021 09:03:15 -0400
Subject: [PATCH] Start on init_conds_from_trajs tutorial

---
 .../1_init_conds_from_trajs.ipynb             | 685 ++++++++++++++++++
 init_conds_from_trajs/README.md               |  23 +
 init_conds_from_trajs/plotting.py             |  50 ++
 3 files changed, 758 insertions(+)
 create mode 100644 init_conds_from_trajs/1_init_conds_from_trajs.ipynb
 create mode 100644 init_conds_from_trajs/README.md
 create mode 100644 init_conds_from_trajs/plotting.py

diff --git a/init_conds_from_trajs/1_init_conds_from_trajs.ipynb b/init_conds_from_trajs/1_init_conds_from_trajs.ipynb
new file mode 100644
index 0000000..ce43d23
--- /dev/null
+++ b/init_conds_from_trajs/1_init_conds_from_trajs.ipynb
@@ -0,0 +1,685 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "b95b55fc",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "# Setting up our toy system"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4e7e5e9f",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import openpathsampling as paths\n",
+    "from plotting import plot\n",
+    "from openpathsampling.tests.test_helpers import make_1d_traj"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9c9a787f",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "cv = paths.CoordinateFunctionCV('CV', lambda s: s.xyz[0][0])\n",
+    "state_A = paths.CVDefinedVolume(cv, float('-inf'), 0.0).named(\"A\")\n",
+    "state_B = paths.CVDefinedVolume(cv, 1.0, float('inf')).named(\"B\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1dba70ea",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "tps_network = paths.TPSNetwork(state_A, state_B)\n",
+    "assert len(tps_network.sampling_ensembles) == 1\n",
+    "ensemble = tps_network.sampling_ensembles[0].named(\"TPS ensemble\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "36c9f7cd",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "scheme = paths.OneWayShootingMoveScheme(tps_network)\n",
+    "# NOTE: no engine given here -- currently allowed, but will cause errors if you use it!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ed484040",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "subslide"
+    }
+   },
+   "source": [
+    "## What does this system look like?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cb19459a",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "plot([])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ab10df31",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "subslide"
+    }
+   },
+   "source": [
+    "## A fake transition trajectory"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "debd9ee3",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "transition = make_1d_traj([i*0.1 - 0.05 for i in range(12)])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f23ac1db",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "plot(transition)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "29ce91f3",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "subslide"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "ic = scheme.initial_conditions_from_trajectories(transition)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fd2ffb5e",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "plot(transition, ic)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "85f3a845",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "# What is `init_conds`?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e6142dfd",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "notes"
+    }
+   },
+   "source": [
+    "`initial_conditions_from_trajectories` returns a `SampleSet`. A `SampleSet` is a container for samples, which acts like a dictionary where keys can be either the replica ID for a sample or the ensemble for a sample."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2ffd2981",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "type(ic)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a3fc4e14",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "len(ic)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "32da977c",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "ic[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d6a9c776",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "ic[ensemble]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "18ecb719",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "subslide"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "sample = ic[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e02ed8c3",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "sample.ensemble is ensemble"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b0753867",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "sample.replica == 0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dbd569ac",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "sample.trajectory is transition"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cfbbd097",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "# Extracting the trajectory from the input\n",
+    "\n",
+    "`initial_conditions_from_trajectories` does whatever it can to find the trajectories you need from the input you give it."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1a50d122",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "subslide"
+    }
+   },
+   "source": [
+    "## What if the desired trajectory is a subtrajectory of the input?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "66e0967f",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "traj = transition + transition.reversed\n",
+    "ic3 = scheme.initial_conditions_from_trajectories(traj)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b5c085b2",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "plot(traj, ic3)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "99539683",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "subslide"
+    }
+   },
+   "source": [
+    "## What if the trajectory is backward?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7fae4922",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rev = transition.reversed\n",
+    "ic4 = scheme.initial_conditions_from_trajectories(rev)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e922aed5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot(rev, ic4)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6718476e",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "subslide"
+    }
+   },
+   "source": [
+    "## What if you give it a list of trajectories?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e55885f6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "t1 = make_1d_traj([0.25, -0.06, 0.35, -0.04])\n",
+    "t2 = make_1d_traj([0.2, -0.05, 0.3, 0.7, 1.05, 0.65])\n",
+    "ic5 = scheme.initial_conditions_from_trajectories([t1, t2])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "11447422",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot([t1, t2], ic5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4e78d439",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "# What does the output message mean?\n",
+    "\n",
+    "You keep seeing the text:\n",
+    "\n",
+    "```\n",
+    "No missing ensembles.\n",
+    "No extra ensembles.\n",
+    "```\n",
+    "\n",
+    "What would be missing? What would be extra?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b73123ce",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "subslide"
+    }
+   },
+   "source": [
+    "## An example with missing ensembles"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f1c16576",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "interfaces = paths.VolumeInterfaceSet(cv, float(\"-inf\"), [0.2, 0.6])\n",
+    "tis_network = paths.MISTISNetwork([(state_A, interfaces, state_B)])\n",
+    "tis_scheme = paths.DefaultScheme(tis_network)\n",
+    "# again, no engine, which will cause errors on dynamics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f2fa3907",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot([], interfaces=[0.2, 0.6])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "60370964",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "subslide"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "t1 = make_1d_traj([0.25, -0.06, 0.35, -0.04])\n",
+    "ic6 = tis_scheme.initial_conditions_from_trajectories(t1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8f1d82af",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "plot(t1, ic6, interfaces=[0.2, 0.6])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "58a61a3d",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "subslide"
+    }
+   },
+   "source": [
+    "## An example with extra ensembles"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "14080f19",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "skip"
+    }
+   },
+   "source": [
+    "The only way to get extra ensembles is to start with an existing sample set (using the `sample_set` argument) which contains an ensemble that won't be used by the move scheme.\n",
+    "\n",
+    "This would be extremely unusual in most workflows. Below, we will use the original TPS scheme with `t2`, which satisfies the scheme, but the existing sample set from the TIS scheme, which does not."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7f92db42",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "t2 = make_1d_traj([0.2, -0.05, 0.3, 0.7, 1.05, 0.65])\n",
+    "ic7 = scheme.initial_conditions_from_trajectories(t2, sample_set=ic6)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7469371b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ic7 is ic6  # well, this is probably a bug.... I should revisit that"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6b778398",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "subslide"
+    }
+   },
+   "source": [
+    "## Why no error?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ba7882ac",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "notes"
+    }
+   },
+   "source": [
+    "You might wonder why this issue is only raised in text -- not as an error. This is because it is completely reasonable to use workflows where you build up a sample set a bit at a time."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "92318d2e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# build on an existing sample set"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cb70bd2d",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "subslide"
+    }
+   },
+   "source": [
+    "***Programmers***: If you want to generate an error, check out these methods:\n",
+    "\n",
+    "* `scheme.assert_initial_conditions(sample_set)`: raises `AssertionError`\n",
+    "* `scheme.check_initial_conditions(sample_set)`: return tuple of `(missing, extra)` so you can handle it yourself"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2eee8bc7",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "# Advanced functionality"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2e069d19",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "skip"
+    }
+   },
+   "source": [
+    "There are many advanced approaches possible with `initial_conditions_from_trajectories`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "496bb745",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "source": [
+    "* change the order of trajectories to prefer longest/shortest etc.\n",
+    "* change whether reversed trajectories are considered\n",
+    "* attempt to run dynamics to generate trajectories that satisfy the ensemble\n",
+    "\n",
+    "For details, check the docs on `initial_conditions_from_trajectories`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b343e37c",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "# Future warnings\n",
+    "\n",
+    "Some of the details, especially around the advanced strategies, are likely to change in future versions of OPS. It is also possible that this method will be renamed to something considerably shorter than `initial_conditions_from_trajectories`."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [conda env:demo]",
+   "language": "python",
+   "name": "conda-env-demo-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/init_conds_from_trajs/README.md b/init_conds_from_trajs/README.md
new file mode 100644
index 0000000..aa4b6c8
--- /dev/null
+++ b/init_conds_from_trajs/README.md
@@ -0,0 +1,23 @@
+# `initial_conditions_from_trajectories`
+
+This tutorial shows the wide range of functionality available in the
+`initial_conditions_from_trajectories` method of a `MoveScheme`. This method
+performs helps create the input sample set required for any path sampling move
+scheme. As this tutorial illustrates, the method can extract the required
+trajectories from a wide range of inputs.
+
+In most cases, `initial_conditions_from_trajectories` is all you need to get
+prepare your trajectories for path sampling.
+
+## Videos
+
+There is a [video walkthrough of this tutorial]() on YouTube.
+
+## Requirements
+
+This tutorial should work with any release in the OpenPathSampling 1.x cycle.
+
+## Running the tutorial
+
+This tutorial consists of a single Jupyter notebook. It can be run either in a
+local installation of via the Binder link.
diff --git a/init_conds_from_trajs/plotting.py b/init_conds_from_trajs/plotting.py
new file mode 100644
index 0000000..e8d4fd1
--- /dev/null
+++ b/init_conds_from_trajs/plotting.py
@@ -0,0 +1,50 @@
+"""
+Custom plotting methods for this tutorial.
+"""
+
+from matplotlib import pyplot as plt
+import openpathsampling as paths
+
+cv = paths.CoordinateFunctionCV('CV', lambda s: s.xyz[0][0])
+
+def _plot_background(ax):
+    ax.axhline(1.0, color='r', zorder=-10)
+    ax.axhline(0.0, color='r', zorder=-10)
+    ax.set_ylabel(cv.name)
+    ax.set_xlabel('time (frame number)')
+    ax.set_ylim(-0.1, 1.1)
+
+def _plot_traj(ax, traj):
+    if isinstance(traj, paths.Sample):
+        traj, label = traj.trajectory, traj.ensemble.name
+    elif isinstance(traj, paths.Trajectory):
+        traj, label = traj, 'Input trajectory'
+    else:
+        raise RuntimeError("Something went wrong")
+
+    ax.plot(cv(traj), 'o-', label=label)
+
+def add_interfaces(ax, interfaces):
+    for iface in interfaces:
+        ax.axhline(iface, ls=':', color='r', lw=0.5)
+
+def plot(inputs, outputs=None, interfaces=None):
+    if outputs is None:
+        fig, ax = plt.subplots(1, 1, figsize=(5, 4))
+        axs = [ax]
+    else:
+        fig, axs = plt.subplots(1, 2, figsize=(11, 4))
+
+    if isinstance(inputs, paths.Trajectory):
+        inputs = [inputs]
+
+    for ax, inp in zip(axs, [inputs, outputs]):
+        _plot_background(ax)
+        for traj in inp:
+            _plot_traj(ax, traj)
+        if interfaces is not None:
+            add_interfaces(ax, interfaces)
+        if inputs:
+            ax.legend()
+        else:
+            ax.set_xlim(0, 10)