From d4434ee8801ff4c6bbc11cf7fa300104b8a5c75e Mon Sep 17 00:00:00 2001 From: Dean Malmgren Date: Mon, 12 May 2014 12:41:08 -0500 Subject: [PATCH] got rid of pseudotasks --- docs/changelog.rst | 2 ++ docs/quick_start.rst | 4 +--- docs/yaml_specification.rst | 21 +++++---------------- examples/model-correlations/flo.yaml | 10 ---------- flo/resources/__init__.py | 3 +-- flo/tasks/graph.py | 20 +++++++++----------- flo/tasks/task.py | 21 ++++++++------------- tests/run_functional_tests.sh | 2 +- 8 files changed, 27 insertions(+), 56 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 4678b52..f7d406c 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -7,6 +7,8 @@ Backwards incompatible features are highlighted in **bold**. latest ------ +* **removed pseudotask creation** (every task must have a ``command`` key) + * specifying alternative yaml configuration (#62) * incorporated deterministic ordering in a predictable and explainable diff --git a/docs/quick_start.rst b/docs/quick_start.rst index 138ed8c..21a5061 100644 --- a/docs/quick_start.rst +++ b/docs/quick_start.rst @@ -23,9 +23,7 @@ quick start create the resource defined in ``creates``. You can optionally define a ``depends`` key that lists resources, either filenames on disk or other task ``creates`` targets, to quickly set up - dependency chains. You can optionally omit the ``command`` key to - create pseudotasks that are collections of other tasks for quickly - running a subcomponent of the analysis. + dependency chains. 3. *Execute your workflow.* From the same directory as the ``flo.yaml`` file (or any subdirectory), execute ``flo run`` and diff --git a/docs/yaml_specification.rst b/docs/yaml_specification.rst index 38dd2e6..291fb50 100644 --- a/docs/yaml_specification.rst +++ b/docs/yaml_specification.rst @@ -59,10 +59,11 @@ dependencies when it constructs the task graph but always runs in a command ''''''' -The ``command`` key defines the command(s) that should be executed to -produce the resource specified by the ``creates`` key. Like the -``depends`` key, multiple steps can be defined in a `YAML -list `__ like this: +The ``command`` key is mandatory and it defines the command(s) that +should be executed to produce the resource specified by the +``creates`` key. Like the ``depends`` key, multiple steps can be +defined in a `YAML list `__ +like this: .. code-block:: yaml @@ -70,18 +71,6 @@ list `__ like this: - "mkdir -p $(dirname {{creates}})" - "python {{depends}} > {{creates}}" -If the ``command`` key is omitted, this task is treated like a -pseudotask to make it easy to group together a collection of other tasks -like this: - -.. code-block:: yaml - - creates: "figures" # name of pseudotask - depends: - - "path/to/figure/a.png" # refers to another task in flo.yaml - - "path/to/figure/b.png" # refers to another task in flo.yaml - - "path/to/figure/c.png" # refers to another task in flo.yaml - .. _yaml-templating-variables: templating variables diff --git a/examples/model-correlations/flo.yaml b/examples/model-correlations/flo.yaml index d9c857d..5b599ab 100644 --- a/examples/model-correlations/flo.yaml +++ b/examples/model-correlations/flo.yaml @@ -51,13 +51,3 @@ tasks: - data/x_y.dat - src/loaders.py command: python {{depends[:2]|join(' ')}} {{y_col}} > {{creates}} - - # this is an example of a pseudotask that is a convenient alias for - # several subcommands. pseudotasks are tasks that have no `command` - # associated with them. - - - creates: analysis - depends: - - data/x_cdf.dat - - data/y_cdf.dat - - data/x_y_correlation.dat diff --git a/flo/resources/__init__.py b/flo/resources/__init__.py index ea78edf..920cde8 100644 --- a/flo/resources/__init__.py +++ b/flo/resources/__init__.py @@ -36,5 +36,4 @@ def add_to_task(task): # instantiate the resources associated with this task here # to make sure we can resolve aliases if they exist. get_or_create(task, task.depends_list, 'depends') - if not task.is_pseudotask(): - get_or_create(task, task.creates_list, 'creates') + get_or_create(task, task.creates_list, 'creates') diff --git a/flo/tasks/graph.py b/flo/tasks/graph.py index 9186a9e..905279c 100644 --- a/flo/tasks/graph.py +++ b/flo/tasks/graph.py @@ -149,7 +149,7 @@ def get_sink_tasks(self): def get_out_of_sync_tasks(self): out_of_sync_tasks = [] for task in self.iter_tasks(): - if not task.is_pseudotask() and not task.in_sync(): + if not task.in_sync(): out_of_sync_tasks.append(task) return out_of_sync_tasks @@ -273,8 +273,7 @@ def get_user_clean_confirmation(self, task_list=None, if include_internals: self.logger.info(green(self.internals_path)) for task in task_list: - if not task.is_pseudotask(): - self.logger.info(task.creates_message()) + self.logger.info(task.creates_message()) yesno = raw_input(colors.red("Delete aforementioned files? [Y/n] ")) if yesno == '': yesno = 'y' @@ -335,12 +334,11 @@ def duration_message(self, tasks, color=colors.blue): min_duration += self.task_durations.get(task.id, 0.0) max_duration, n_unknown, n_tasks = 0.0, 0, 0 for task in self.iter_tasks(tasks): - if not task.is_pseudotask(): - n_tasks += 1 - try: - max_duration += self.task_durations[task.id] - except KeyError: - n_unknown += 1 + n_tasks += 1 + try: + max_duration += self.task_durations[task.id] + except KeyError: + n_unknown += 1 msg = '' if n_unknown > 0: msg += "There are %d new tasks with unknown durations.\n" % ( @@ -393,14 +391,14 @@ def run_all(self, mock_run=False): in sync or not. """ def do_run_func(task): - return not task.is_pseudotask() + return True self._run_helper(None, do_run_func, mock_run) def run_all_out_of_sync(self, mock_run=False): """Execute all tasks in the workflow that are out of sync at runtime. """ def do_run_func(task): - return not task.is_pseudotask() and not task.in_sync() + return not task.in_sync() self._run_helper(self.get_out_of_sync_tasks(), do_run_func, mock_run) @property diff --git a/flo/tasks/task.py b/flo/tasks/task.py index e68dfd6..98ae764 100644 --- a/flo/tasks/task.py +++ b/flo/tasks/task.py @@ -38,6 +38,10 @@ def __init__(self, graph, creates=None, depends=None, raise InvalidTaskDefinition( "every task must define a `creates`" ) + if self._command is None: + raise InvalidTaskDefinition( + "every task must define a `command`" + ) # remember other attributes of this Task for rendering # purposes below @@ -122,9 +126,8 @@ def root_directory(self): return self.graph.root_directory def iter_resources(self): - if not self.is_pseudotask(): - for resource in self.creates_resources: - yield resource + for resource in self.creates_resources: + yield resource for resource in self.depends_resources: yield resource @@ -186,11 +189,6 @@ def get_current_state(self): msg += k + str(self.attrs[k]) return self.get_stream_state(StringIO.StringIO(msg)) - def is_pseudotask(self): - """Check to see if this task is a pseudotask. - """ - return self._command is None - def in_sync(self): """Test whether this task is in sync with the stored state and needs to be executed @@ -219,9 +217,8 @@ def clean_command(self): def clean(self): """Remove the specified target""" - if not self.is_pseudotask(): - self.run(self.clean_command()) - self.graph.logger.info("removed %s" % self.creates_message()) + self.run(self.clean_command()) + self.graph.logger.info("removed %s" % self.creates_message()) def mock_run(self): """Mock run this task by displaying output as if it were run""" @@ -276,8 +273,6 @@ def render_command_template(self): """Uses jinja template syntax to render the command from the other data specified in the YAML file """ - if self.is_pseudotask(): - return None return self.render_template(self._command) def duration_message(self, color=colors.blue): diff --git a/tests/run_functional_tests.sh b/tests/run_functional_tests.sh index dc9a753..5a95f97 100755 --- a/tests/run_functional_tests.sh +++ b/tests/run_functional_tests.sh @@ -69,7 +69,7 @@ validate_example () { # checksum by just running this script and determining what the # correct checksum is validate_example hello-world 040bf35be21ac0a3d6aa9ff4ff25df24 -validate_example model-correlations 14ba1ffc4c37cd306bf415107d6edfd1 +validate_example model-correlations c2e4ae57ff2d970a076b364bab87a87f # this runs specific tests for the --start-at option cd $BASEDIR