Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Accommodating pipes in programs.py #387

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 65 additions & 16 deletions src/toil_scripts/lib/programs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import subprocess
import logging
from bd2k.util.exceptions import panic
from toil_scripts.lib import require

_log = logging.getLogger(__name__)

Expand All @@ -15,34 +16,55 @@ def mock_mode():
return True if int(os.environ.get('TOIL_SCRIPTS_MOCK_MODE', '0')) else False


def docker_call(tool,
def docker_call(tool=None,
tools=None,
parameters=None,
work_dir='.',
rm=True,
env=None,
outfile=None,
errfile=None,
inputs=None,
outputs=None,
docker_parameters=None,
check_output=False,
return_stderr=False,
mock=None):
"""
Calls Docker, passing along parameters and tool.
:param str tool: Name of the Docker image to be used (e.g. quay.io/ucsc_cgl/samtools)
:param (str tool | str tools): str tool name of the Docker image to be used (e.g. tool='quay.io/ucsc_cgl/samtools')
OR str tools of the Docker images and order to be used when piping commands to
Docker. (e.g. 'quay.io/ucsc_cgl/samtools'). Both tool and tools are mutually
exclusive parameters to docker_call.
:param list[str] parameters: Command line arguments to be passed to the tool
:param str work_dir: Directory to mount into the container via `-v`. Destination convention is /data
:param bool rm: Set to True to pass `--rm` flag.
:param dict[str,str] env: Environment variables to be added (e.g. dict(JAVA_OPTS='-Xmx15G'))
:param bool sudo: If True, prepends `sudo` to the docker call
:param file outfile: Pipe output of Docker call to file handle
:param file outfile: Pipe stdout of Docker call to file handle
:param file errfile: Pipe stderr of Docker call to file handle
:param list[str] inputs: A list of the input files.
:param dict[str,str] outputs: A dictionary containing the outputs files as keys with either None
or a url. The value is only used if mock=True
:param dict[str,str] docker_parameters: Parameters to pass to docker
:param bool check_output: When True, this function returns docker's output
:param bool return_stderr: When True, this function includes stderr in docker's output
:param bool mock: Whether to run in mock mode. If this variable is unset, its value will be determined by
the environment variable.
Pipes in docker commands:
Running a pipe in docker in 'pipe-in-single-container' mode produces command structure
docker '... | ... | ...' where each '...' command corresponds to each element in the 'parameters'
argument that uses a docker container. This is the most efficient method if you want to run a pipe of
commands where each command uses the same docker container.
Example for running command 'head -c 1M /dev/urandom | gzip | gunzip | md5sum 1>&2':
Running 'pipe-in-single-container' mode:
command= ['head -c 1M /dev/urandom', 'gzip', 'gunzip', 'md5sum 1>&2']
docker_work_dir=curr_work_dir
docker_tools='ubuntu'
stdout = docker_call(work_dir=docker_work_dir, parameters=command, tools=docker_tools, check_output=True)
"""
from toil_scripts.lib.urls import download_url

Expand Down Expand Up @@ -83,37 +105,56 @@ def docker_call(tool,
if env:
for e, v in env.iteritems():
base_docker_call.extend(['-e', '{}={}'.format(e, v)])

if docker_parameters:
base_docker_call += docker_parameters

docker_call = []

require(bool(tools) != bool(tool), 'Either "tool" or "tools" must contain a value, but not both')

# Pipe functionality
# each element in the parameters list must represent a sub-pipe command
if bool(tools):
# If tools is set then format the docker call in the 'pipe-in-single-container' mode
docker_call = " ".join(base_docker_call + ['--entrypoint /bin/bash', tools, '-c \'{}\''.format(" | ".join(parameters))])
_log.debug("Calling docker with %s." % docker_call)

else:
docker_call = " ".join(base_docker_call + [tool] + parameters)
_log.debug("Calling docker with %s." % docker_call)

_log.debug("Calling docker with %s." % " ".join(base_docker_call + [tool] + parameters))

docker_call = base_docker_call + [tool] + parameters


try:
if outfile:
subprocess.check_call(docker_call, stdout=outfile)
if errfile:
subprocess.check_call(docker_call, stdout=outfile, stderr=errfile, shell=True)
else:
subprocess.check_call(docker_call, stdout=outfile, shell=True)
else:
if check_output:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

… or return_stderr

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And can you add a ticket reminding us to rename check_output to return_stdout ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So essentially can you make it so that check_output=True yields only stdout, return_stderr=True yields only stderr, while check_output=True, return_stderr=True yields both?

return subprocess.check_output(docker_call)
if return_stderr:
return subprocess.check_output(docker_call, shell=True, stderr=subprocess.STDOUT)
else:
return subprocess.check_output(docker_call, shell=True)
else:
subprocess.check_call(docker_call)
subprocess.check_call(docker_call, shell=True)
# Fix root ownership of output files
except:
# Panic avoids hiding the exception raised in the try block
with panic():
_fix_permissions(base_docker_call, tool, work_dir)
_fix_permissions(base_docker_call, tool, tools, work_dir)
else:
_fix_permissions(base_docker_call, tool, work_dir)
_fix_permissions(base_docker_call, tool, tools, work_dir)

for filename in outputs.keys():
if not os.path.isabs(filename):
filename = os.path.join(work_dir, filename)
assert(os.path.isfile(filename))


def _fix_permissions(base_docker_call, tool, work_dir):
"""
def _fix_permissions(base_docker_call, tool, tools, work_dir):
"""
Fix permission of a mounted Docker directory by reusing the tool
:param list base_docker_call: Docker run parameters
Expand All @@ -122,5 +163,13 @@ def _fix_permissions(base_docker_call, tool, work_dir):
"""
base_docker_call.append('--entrypoint=chown')
stat = os.stat(work_dir)
command = base_docker_call + [tool] + ['-R', '{}:{}'.format(stat.st_uid, stat.st_gid), '/data']
subprocess.check_call(command)
if tools:
command = base_docker_call + [tools] + ['-R', '{}:{}'.format(stat.st_uid, stat.st_gid), '/data']
subprocess.check_call(command)
else:
command = base_docker_call + [tool] + ['-R', '{}:{}'.format(stat.st_uid, stat.st_gid), '/data']
subprocess.check_call(command)




14 changes: 13 additions & 1 deletion src/toil_scripts/lib/test/test_programs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os

import re

def test_docker_call(tmpdir):
from toil_scripts.lib.programs import docker_call
Expand All @@ -12,3 +12,15 @@ def test_docker_call(tmpdir):
with open(fpath, 'w') as f:
docker_call(tool='ubuntu', env=dict(foo='bar'), parameters=['printenv', 'foo'], outfile=f)
assert open(fpath).read() == 'bar\n'

# Test pipe functionality
# download ubuntu docker image
docker_call(work_dir=work_dir, tool="ubuntu")
command1 = ['head -c 1G /dev/urandom | tee /data/first', 'gzip', 'gunzip', 'md5sum 1>&2']
command2 = ['md5sum /data/first 1>&2']
# Test 'pipe-in-single-container' mode
stdout1 = docker_call(work_dir=work_dir, parameters=command1, tools='ubuntu', check_output=True, return_stderr=True)
stdout2 = docker_call(work_dir=work_dir, parameters=command2, tool='ubuntu', check_output=True, return_stderr=True)
test1 = re.findall(r"([a-fA-F\d]{32})", stdout1)
test2 = re.findall(r"([a-fA-F\d]{32})", stdout2)
assert test1[0] == test2[0]