From d4434ee8801ff4c6bbc11cf7fa300104b8a5c75e Mon Sep 17 00:00:00 2001
From: Dean Malmgren <dean.malmgren@datascopeanalytics.com>
Date: Mon, 12 May 2014 12:41:08 -0500
Subject: [PATCH] got rid of pseudotasks

---
 docs/changelog.rst                   |  2 ++
 docs/quick_start.rst                 |  4 +---
 docs/yaml_specification.rst          | 21 +++++----------------
 examples/model-correlations/flo.yaml | 10 ----------
 flo/resources/__init__.py            |  3 +--
 flo/tasks/graph.py                   | 20 +++++++++-----------
 flo/tasks/task.py                    | 21 ++++++++-------------
 tests/run_functional_tests.sh        |  2 +-
 8 files changed, 27 insertions(+), 56 deletions(-)

diff --git a/docs/changelog.rst b/docs/changelog.rst
index 4678b52..f7d406c 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -7,6 +7,8 @@ Backwards incompatible features are highlighted in **bold**.
 latest
 ------
 
+* **removed pseudotask creation** (every task must have a ``command`` key)
+
 * specifying alternative yaml configuration (#62)
 
 * incorporated deterministic ordering in a predictable and explainable
diff --git a/docs/quick_start.rst b/docs/quick_start.rst
index 138ed8c..21a5061 100644
--- a/docs/quick_start.rst
+++ b/docs/quick_start.rst
@@ -23,9 +23,7 @@ quick start
    create the resource defined in ``creates``. You can optionally
    define a ``depends`` key that lists resources, either filenames on
    disk or other task ``creates`` targets, to quickly set up
-   dependency chains. You can optionally omit the ``command`` key to
-   create pseudotasks that are collections of other tasks for quickly
-   running a subcomponent of the analysis.
+   dependency chains.
 
 3. *Execute your workflow.* From the same directory as the
    ``flo.yaml`` file (or any subdirectory), execute ``flo run`` and
diff --git a/docs/yaml_specification.rst b/docs/yaml_specification.rst
index 38dd2e6..291fb50 100644
--- a/docs/yaml_specification.rst
+++ b/docs/yaml_specification.rst
@@ -59,10 +59,11 @@ dependencies when it constructs the task graph but always runs in a
 command
 '''''''
 
-The ``command`` key defines the command(s) that should be executed to
-produce the resource specified by the ``creates`` key. Like the
-``depends`` key, multiple steps can be defined in a `YAML
-list <http://en.wikipedia.org/wiki/YAML#Lists>`__ like this:
+The ``command`` key is mandatory and it defines the command(s) that
+should be executed to produce the resource specified by the
+``creates`` key. Like the ``depends`` key, multiple steps can be
+defined in a `YAML list <http://en.wikipedia.org/wiki/YAML#Lists>`__
+like this:
 
 .. code-block:: yaml
 
@@ -70,18 +71,6 @@ list <http://en.wikipedia.org/wiki/YAML#Lists>`__ like this:
       - "mkdir -p $(dirname {{creates}})"
       - "python {{depends}} > {{creates}}"
 
-If the ``command`` key is omitted, this task is treated like a
-pseudotask to make it easy to group together a collection of other tasks
-like this:
-
-.. code-block:: yaml
-
-    creates: "figures"         # name of pseudotask
-    depends:
-      - "path/to/figure/a.png" # refers to another task in flo.yaml
-      - "path/to/figure/b.png" # refers to another task in flo.yaml
-      - "path/to/figure/c.png" # refers to another task in flo.yaml
-
 .. _yaml-templating-variables:
 
 templating variables
diff --git a/examples/model-correlations/flo.yaml b/examples/model-correlations/flo.yaml
index d9c857d..5b599ab 100644
--- a/examples/model-correlations/flo.yaml
+++ b/examples/model-correlations/flo.yaml
@@ -51,13 +51,3 @@ tasks:
       - data/x_y.dat
       - src/loaders.py
     command: python {{depends[:2]|join(' ')}} {{y_col}} > {{creates}}
-  
-  # this is an example of a pseudotask that is a convenient alias for
-  # several subcommands. pseudotasks are tasks that have no `command`
-  # associated with them.
-  -
-    creates: analysis
-    depends:
-      - data/x_cdf.dat
-      - data/y_cdf.dat
-      - data/x_y_correlation.dat
diff --git a/flo/resources/__init__.py b/flo/resources/__init__.py
index ea78edf..920cde8 100644
--- a/flo/resources/__init__.py
+++ b/flo/resources/__init__.py
@@ -36,5 +36,4 @@ def add_to_task(task):
     # instantiate the resources associated with this task here
     # to make sure we can resolve aliases if they exist.
     get_or_create(task, task.depends_list, 'depends')
-    if not task.is_pseudotask():
-        get_or_create(task, task.creates_list, 'creates')
+    get_or_create(task, task.creates_list, 'creates')
diff --git a/flo/tasks/graph.py b/flo/tasks/graph.py
index 9186a9e..905279c 100644
--- a/flo/tasks/graph.py
+++ b/flo/tasks/graph.py
@@ -149,7 +149,7 @@ def get_sink_tasks(self):
     def get_out_of_sync_tasks(self):
         out_of_sync_tasks = []
         for task in self.iter_tasks():
-            if not task.is_pseudotask() and not task.in_sync():
+            if not task.in_sync():
                 out_of_sync_tasks.append(task)
         return out_of_sync_tasks
 
@@ -273,8 +273,7 @@ def get_user_clean_confirmation(self, task_list=None,
         if include_internals:
             self.logger.info(green(self.internals_path))
         for task in task_list:
-            if not task.is_pseudotask():
-                self.logger.info(task.creates_message())
+            self.logger.info(task.creates_message())
         yesno = raw_input(colors.red("Delete aforementioned files? [Y/n] "))
         if yesno == '':
             yesno = 'y'
@@ -335,12 +334,11 @@ def duration_message(self, tasks, color=colors.blue):
             min_duration += self.task_durations.get(task.id, 0.0)
         max_duration, n_unknown, n_tasks = 0.0, 0, 0
         for task in self.iter_tasks(tasks):
-            if not task.is_pseudotask():
-                n_tasks += 1
-                try:
-                    max_duration += self.task_durations[task.id]
-                except KeyError:
-                    n_unknown += 1
+            n_tasks += 1
+            try:
+                max_duration += self.task_durations[task.id]
+            except KeyError:
+                n_unknown += 1
         msg = ''
         if n_unknown > 0:
             msg += "There are %d new tasks with unknown durations.\n" % (
@@ -393,14 +391,14 @@ def run_all(self, mock_run=False):
         in sync or not.
         """
         def do_run_func(task):
-            return not task.is_pseudotask()
+            return True
         self._run_helper(None, do_run_func, mock_run)
 
     def run_all_out_of_sync(self, mock_run=False):
         """Execute all tasks in the workflow that are out of sync at runtime.
         """
         def do_run_func(task):
-            return not task.is_pseudotask() and not task.in_sync()
+            return not task.in_sync()
         self._run_helper(self.get_out_of_sync_tasks(), do_run_func, mock_run)
 
     @property
diff --git a/flo/tasks/task.py b/flo/tasks/task.py
index e68dfd6..98ae764 100644
--- a/flo/tasks/task.py
+++ b/flo/tasks/task.py
@@ -38,6 +38,10 @@ def __init__(self, graph, creates=None, depends=None,
             raise InvalidTaskDefinition(
                 "every task must define a `creates`"
             )
+        if self._command is None:
+            raise InvalidTaskDefinition(
+                "every task must define a `command`"
+            )
 
         # remember other attributes of this Task for rendering
         # purposes below
@@ -122,9 +126,8 @@ def root_directory(self):
         return self.graph.root_directory
 
     def iter_resources(self):
-        if not self.is_pseudotask():
-            for resource in self.creates_resources:
-                yield resource
+        for resource in self.creates_resources:
+            yield resource
         for resource in self.depends_resources:
             yield resource
 
@@ -186,11 +189,6 @@ def get_current_state(self):
             msg += k + str(self.attrs[k])
         return self.get_stream_state(StringIO.StringIO(msg))
 
-    def is_pseudotask(self):
-        """Check to see if this task is a pseudotask.
-        """
-        return self._command is None
-
     def in_sync(self):
         """Test whether this task is in sync with the stored state and
         needs to be executed
@@ -219,9 +217,8 @@ def clean_command(self):
 
     def clean(self):
         """Remove the specified target"""
-        if not self.is_pseudotask():
-            self.run(self.clean_command())
-            self.graph.logger.info("removed %s" % self.creates_message())
+        self.run(self.clean_command())
+        self.graph.logger.info("removed %s" % self.creates_message())
 
     def mock_run(self):
         """Mock run this task by displaying output as if it were run"""
@@ -276,8 +273,6 @@ def render_command_template(self):
         """Uses jinja template syntax to render the command from the other
         data specified in the YAML file
         """
-        if self.is_pseudotask():
-            return None
         return self.render_template(self._command)
 
     def duration_message(self, color=colors.blue):
diff --git a/tests/run_functional_tests.sh b/tests/run_functional_tests.sh
index dc9a753..5a95f97 100755
--- a/tests/run_functional_tests.sh
+++ b/tests/run_functional_tests.sh
@@ -69,7 +69,7 @@ validate_example () {
 # checksum by just running this script and determining what the
 # correct checksum is
 validate_example hello-world 040bf35be21ac0a3d6aa9ff4ff25df24
-validate_example model-correlations 14ba1ffc4c37cd306bf415107d6edfd1
+validate_example model-correlations c2e4ae57ff2d970a076b364bab87a87f
 
 # this runs specific tests for the --start-at option
 cd $BASEDIR