Skip to content

Commit

Permalink
Accommodating pipes in programs.py
Browse files Browse the repository at this point in the history
Adding in docker pipes unit test

Added docs to docker_call and fixed docker_call unit test

Changed datasize for docker_call pipes unit test to 1GB

Polished docker_call functionality

Added stderr to file handle support in docker_call

Factored out pipe_of_containers in docker_call
  • Loading branch information
cmarkello committed Aug 31, 2016
1 parent 9060b0c commit 32ce17d
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 17 deletions.
81 changes: 65 additions & 16 deletions src/toil_scripts/lib/programs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import subprocess
import logging
from bd2k.util.exceptions import panic
from toil_scripts.lib import require

_log = logging.getLogger(__name__)

Expand All @@ -15,34 +16,55 @@ def mock_mode():
return True if int(os.environ.get('TOIL_SCRIPTS_MOCK_MODE', '0')) else False


def docker_call(tool,
def docker_call(tool=None,
tools=None,
parameters=None,
work_dir='.',
rm=True,
env=None,
outfile=None,
errfile=None,
inputs=None,
outputs=None,
docker_parameters=None,
check_output=False,
return_stderr=False,
mock=None):
"""
Calls Docker, passing along parameters and tool.
:param str tool: Name of the Docker image to be used (e.g. quay.io/ucsc_cgl/samtools)
:param (str tool | str tools): str tool name of the Docker image to be used (e.g. tool='quay.io/ucsc_cgl/samtools')
OR str tools of the Docker images and order to be used when piping commands to
Docker. (e.g. 'quay.io/ucsc_cgl/samtools'). Both tool and tools are mutually
exclusive parameters to docker_call.
:param list[str] parameters: Command line arguments to be passed to the tool
:param str work_dir: Directory to mount into the container via `-v`. Destination convention is /data
:param bool rm: Set to True to pass `--rm` flag.
:param dict[str,str] env: Environment variables to be added (e.g. dict(JAVA_OPTS='-Xmx15G'))
:param bool sudo: If True, prepends `sudo` to the docker call
:param file outfile: Pipe output of Docker call to file handle
:param file outfile: Pipe stdout of Docker call to file handle
:param file errfile: Pipe stderr of Docker call to file handle
:param list[str] inputs: A list of the input files.
:param dict[str,str] outputs: A dictionary containing the outputs files as keys with either None
or a url. The value is only used if mock=True
:param dict[str,str] docker_parameters: Parameters to pass to docker
:param bool check_output: When True, this function returns docker's output
:param bool return_stderr: When True, this function includes stderr in docker's output
:param bool mock: Whether to run in mock mode. If this variable is unset, its value will be determined by
the environment variable.
Pipes in docker commands:
Running a pipe in docker in 'pipe-in-single-container' mode produces command structure
docker '... | ... | ...' where each '...' command corresponds to each element in the 'parameters'
argument that uses a docker container. This is the most efficient method if you want to run a pipe of
commands where each command uses the same docker container.
Example for running command 'head -c 1M /dev/urandom | gzip | gunzip | md5sum 1>&2':
Running 'pipe-in-single-container' mode:
command= ['head -c 1M /dev/urandom', 'gzip', 'gunzip', 'md5sum 1>&2']
docker_work_dir=curr_work_dir
docker_tools='ubuntu'
stdout = docker_call(work_dir=docker_work_dir, parameters=command, tools=docker_tools, check_output=True)
"""
from toil_scripts.lib.urls import download_url

Expand Down Expand Up @@ -83,37 +105,56 @@ def docker_call(tool,
if env:
for e, v in env.iteritems():
base_docker_call.extend(['-e', '{}={}'.format(e, v)])

if docker_parameters:
base_docker_call += docker_parameters

docker_call = []

require(bool(tools) != bool(tool), 'Either "tool" or "tools" must contain a value, but not both')

# Pipe functionality
# each element in the parameters list must represent a sub-pipe command
if bool(tools):
# If tools is set then format the docker call in the 'pipe-in-single-container' mode
docker_call = " ".join(base_docker_call + ['--entrypoint /bin/bash', tools, '-c \'{}\''.format(" | ".join(parameters))])
_log.debug("Calling docker with %s." % docker_call)

else:
docker_call = " ".join(base_docker_call + [tool] + parameters)
_log.debug("Calling docker with %s." % docker_call)

_log.debug("Calling docker with %s." % " ".join(base_docker_call + [tool] + parameters))

docker_call = base_docker_call + [tool] + parameters


try:
if outfile:
subprocess.check_call(docker_call, stdout=outfile)
if errfile:
subprocess.check_call(docker_call, stdout=outfile, stderr=errfile, shell=True)
else:
subprocess.check_call(docker_call, stdout=outfile, shell=True)
else:
if check_output:
return subprocess.check_output(docker_call)
if return_stderr:
return subprocess.check_output(docker_call, shell=True, stderr=subprocess.STDOUT)
else:
return subprocess.check_output(docker_call, shell=True)
else:
subprocess.check_call(docker_call)
subprocess.check_call(docker_call, shell=True)
# Fix root ownership of output files
except:
# Panic avoids hiding the exception raised in the try block
with panic():
_fix_permissions(base_docker_call, tool, work_dir)
_fix_permissions(base_docker_call, tool, tools, work_dir)
else:
_fix_permissions(base_docker_call, tool, work_dir)
_fix_permissions(base_docker_call, tool, tools, work_dir)

for filename in outputs.keys():
if not os.path.isabs(filename):
filename = os.path.join(work_dir, filename)
assert(os.path.isfile(filename))


def _fix_permissions(base_docker_call, tool, work_dir):
"""
def _fix_permissions(base_docker_call, tool, tools, work_dir):
"""
Fix permission of a mounted Docker directory by reusing the tool
:param list base_docker_call: Docker run parameters
Expand All @@ -122,5 +163,13 @@ def _fix_permissions(base_docker_call, tool, work_dir):
"""
base_docker_call.append('--entrypoint=chown')
stat = os.stat(work_dir)
command = base_docker_call + [tool] + ['-R', '{}:{}'.format(stat.st_uid, stat.st_gid), '/data']
subprocess.check_call(command)
if tools:
command = base_docker_call + [tools] + ['-R', '{}:{}'.format(stat.st_uid, stat.st_gid), '/data']
subprocess.check_call(command)
else:
command = base_docker_call + [tool] + ['-R', '{}:{}'.format(stat.st_uid, stat.st_gid), '/data']
subprocess.check_call(command)




14 changes: 13 additions & 1 deletion src/toil_scripts/lib/test/test_programs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os

import re

def test_docker_call(tmpdir):
from toil_scripts.lib.programs import docker_call
Expand All @@ -12,3 +12,15 @@ def test_docker_call(tmpdir):
with open(fpath, 'w') as f:
docker_call(tool='ubuntu', env=dict(foo='bar'), parameters=['printenv', 'foo'], outfile=f)
assert open(fpath).read() == 'bar\n'

# Test pipe functionality
# download ubuntu docker image
docker_call(work_dir=work_dir, tool="ubuntu")
command1 = ['head -c 1G /dev/urandom | tee /data/first', 'gzip', 'gunzip', 'md5sum 1>&2']
command2 = ['md5sum /data/first 1>&2']
# Test 'pipe-in-single-container' mode
stdout1 = docker_call(work_dir=work_dir, parameters=command1, tools='ubuntu', check_output=True, return_stderr=True)
stdout2 = docker_call(work_dir=work_dir, parameters=command2, tool='ubuntu', check_output=True, return_stderr=True)
test1 = re.findall(r"([a-fA-F\d]{32})", stdout1)
test2 = re.findall(r"([a-fA-F\d]{32})", stdout2)
assert test1[0] == test2[0]

0 comments on commit 32ce17d

Please sign in to comment.