Skip to content

Commit

Permalink
got rid of pseudotasks
Browse files Browse the repository at this point in the history
  • Loading branch information
Dean Malmgren committed May 12, 2014
1 parent a0d28d1 commit d4434ee
Show file tree
Hide file tree
Showing 8 changed files with 27 additions and 56 deletions.
2 changes: 2 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ Backwards incompatible features are highlighted in **bold**.
latest
------

* **removed pseudotask creation** (every task must have a ``command`` key)

* specifying alternative yaml configuration (#62)

* incorporated deterministic ordering in a predictable and explainable
Expand Down
4 changes: 1 addition & 3 deletions docs/quick_start.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@ quick start
create the resource defined in ``creates``. You can optionally
define a ``depends`` key that lists resources, either filenames on
disk or other task ``creates`` targets, to quickly set up
dependency chains. You can optionally omit the ``command`` key to
create pseudotasks that are collections of other tasks for quickly
running a subcomponent of the analysis.
dependency chains.

3. *Execute your workflow.* From the same directory as the
``flo.yaml`` file (or any subdirectory), execute ``flo run`` and
Expand Down
21 changes: 5 additions & 16 deletions docs/yaml_specification.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,29 +59,18 @@ dependencies when it constructs the task graph but always runs in a
command
'''''''

The ``command`` key defines the command(s) that should be executed to
produce the resource specified by the ``creates`` key. Like the
``depends`` key, multiple steps can be defined in a `YAML
list <http://en.wikipedia.org/wiki/YAML#Lists>`__ like this:
The ``command`` key is mandatory and it defines the command(s) that
should be executed to produce the resource specified by the
``creates`` key. Like the ``depends`` key, multiple steps can be
defined in a `YAML list <http://en.wikipedia.org/wiki/YAML#Lists>`__
like this:

.. code-block:: yaml
command:
- "mkdir -p $(dirname {{creates}})"
- "python {{depends}} > {{creates}}"
If the ``command`` key is omitted, this task is treated like a
pseudotask to make it easy to group together a collection of other tasks
like this:

.. code-block:: yaml
creates: "figures" # name of pseudotask
depends:
- "path/to/figure/a.png" # refers to another task in flo.yaml
- "path/to/figure/b.png" # refers to another task in flo.yaml
- "path/to/figure/c.png" # refers to another task in flo.yaml
.. _yaml-templating-variables:

templating variables
Expand Down
10 changes: 0 additions & 10 deletions examples/model-correlations/flo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,3 @@ tasks:
- data/x_y.dat
- src/loaders.py
command: python {{depends[:2]|join(' ')}} {{y_col}} > {{creates}}

# this is an example of a pseudotask that is a convenient alias for
# several subcommands. pseudotasks are tasks that have no `command`
# associated with them.
-
creates: analysis
depends:
- data/x_cdf.dat
- data/y_cdf.dat
- data/x_y_correlation.dat
3 changes: 1 addition & 2 deletions flo/resources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,4 @@ def add_to_task(task):
# instantiate the resources associated with this task here
# to make sure we can resolve aliases if they exist.
get_or_create(task, task.depends_list, 'depends')
if not task.is_pseudotask():
get_or_create(task, task.creates_list, 'creates')
get_or_create(task, task.creates_list, 'creates')
20 changes: 9 additions & 11 deletions flo/tasks/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def get_sink_tasks(self):
def get_out_of_sync_tasks(self):
out_of_sync_tasks = []
for task in self.iter_tasks():
if not task.is_pseudotask() and not task.in_sync():
if not task.in_sync():
out_of_sync_tasks.append(task)
return out_of_sync_tasks

Expand Down Expand Up @@ -273,8 +273,7 @@ def get_user_clean_confirmation(self, task_list=None,
if include_internals:
self.logger.info(green(self.internals_path))
for task in task_list:
if not task.is_pseudotask():
self.logger.info(task.creates_message())
self.logger.info(task.creates_message())
yesno = raw_input(colors.red("Delete aforementioned files? [Y/n] "))
if yesno == '':
yesno = 'y'
Expand Down Expand Up @@ -335,12 +334,11 @@ def duration_message(self, tasks, color=colors.blue):
min_duration += self.task_durations.get(task.id, 0.0)
max_duration, n_unknown, n_tasks = 0.0, 0, 0
for task in self.iter_tasks(tasks):
if not task.is_pseudotask():
n_tasks += 1
try:
max_duration += self.task_durations[task.id]
except KeyError:
n_unknown += 1
n_tasks += 1
try:
max_duration += self.task_durations[task.id]
except KeyError:
n_unknown += 1
msg = ''
if n_unknown > 0:
msg += "There are %d new tasks with unknown durations.\n" % (
Expand Down Expand Up @@ -393,14 +391,14 @@ def run_all(self, mock_run=False):
in sync or not.
"""
def do_run_func(task):
return not task.is_pseudotask()
return True
self._run_helper(None, do_run_func, mock_run)

def run_all_out_of_sync(self, mock_run=False):
"""Execute all tasks in the workflow that are out of sync at runtime.
"""
def do_run_func(task):
return not task.is_pseudotask() and not task.in_sync()
return not task.in_sync()
self._run_helper(self.get_out_of_sync_tasks(), do_run_func, mock_run)

@property
Expand Down
21 changes: 8 additions & 13 deletions flo/tasks/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ def __init__(self, graph, creates=None, depends=None,
raise InvalidTaskDefinition(
"every task must define a `creates`"
)
if self._command is None:
raise InvalidTaskDefinition(
"every task must define a `command`"
)

# remember other attributes of this Task for rendering
# purposes below
Expand Down Expand Up @@ -122,9 +126,8 @@ def root_directory(self):
return self.graph.root_directory

def iter_resources(self):
if not self.is_pseudotask():
for resource in self.creates_resources:
yield resource
for resource in self.creates_resources:
yield resource
for resource in self.depends_resources:
yield resource

Expand Down Expand Up @@ -186,11 +189,6 @@ def get_current_state(self):
msg += k + str(self.attrs[k])
return self.get_stream_state(StringIO.StringIO(msg))

def is_pseudotask(self):
"""Check to see if this task is a pseudotask.
"""
return self._command is None

def in_sync(self):
"""Test whether this task is in sync with the stored state and
needs to be executed
Expand Down Expand Up @@ -219,9 +217,8 @@ def clean_command(self):

def clean(self):
"""Remove the specified target"""
if not self.is_pseudotask():
self.run(self.clean_command())
self.graph.logger.info("removed %s" % self.creates_message())
self.run(self.clean_command())
self.graph.logger.info("removed %s" % self.creates_message())

def mock_run(self):
"""Mock run this task by displaying output as if it were run"""
Expand Down Expand Up @@ -276,8 +273,6 @@ def render_command_template(self):
"""Uses jinja template syntax to render the command from the other
data specified in the YAML file
"""
if self.is_pseudotask():
return None
return self.render_template(self._command)

def duration_message(self, color=colors.blue):
Expand Down
2 changes: 1 addition & 1 deletion tests/run_functional_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ validate_example () {
# checksum by just running this script and determining what the
# correct checksum is
validate_example hello-world 040bf35be21ac0a3d6aa9ff4ff25df24
validate_example model-correlations 14ba1ffc4c37cd306bf415107d6edfd1
validate_example model-correlations c2e4ae57ff2d970a076b364bab87a87f

# this runs specific tests for the --start-at option
cd $BASEDIR
Expand Down

0 comments on commit d4434ee

Please sign in to comment.