Skip to content

Commit

Permalink
test: adding batch-cos (#29)
Browse files Browse the repository at this point in the history
I have been able to add batch COS as suggested to run a hello world
workflow, but now the original workflows are no longer running, and
there is not sufficient error message in the log beyond WorkflowError to
understand what is happening.

---------

Signed-off-by: vsoch <[email protected]>
Co-authored-by: vsoch <[email protected]>
Co-authored-by: Cade Mirchandani <[email protected]>
  • Loading branch information
3 people authored Apr 25, 2024
1 parent 364107d commit 3dcfc8c
Show file tree
Hide file tree
Showing 6 changed files with 160 additions and 72 deletions.
12 changes: 12 additions & 0 deletions docs/further.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ $ snakemake --jobs 1 --executor googlebatch --googlebatch-bucket snakemake-cache
The following environment variables are available within any Google batch run:

- `BATCH_TASK_INDEX`: The index of the workflow step (Google Batch calls a "task")
- `GOOGLEBATCH_DOCKER_PASSWORD`: your docker registry passwork if using the container operating system (COS) and your container requires credentials
- `GOOGLEBATCH_DOCKER_USERNAME`: the same, but the username

### GPU

Expand Down Expand Up @@ -142,6 +144,15 @@ rule hello_world:
"..."
```

Note that the way to get updated names is to run:

```bash
gcloud compute images list \
--project=batch-custom-image \
--no-standard-images
```

And see [this page](https://cloud.google.com/batch/docs/view-os-images) for more details.

#### googlebatch_image_project

Expand Down Expand Up @@ -373,6 +384,7 @@ rule hello_world:
"..."
```


#### googlebatch_snippets

One or more named (or file-derived) snippets to add to setup.
Expand Down
11 changes: 2 additions & 9 deletions example/hello-world-cos/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,12 @@ gcloud compute images list \
--no-standard-images
```

Here is how to use the debian base:
Here is an example command:

```bash
GOOGLE_PROJECT=myproject
snakemake --jobs 1 --executor googlebatch --googlebatch-image-family batch-debian-11-official --googlebatch-region us-central1 --googlebatch-image-project batch-custom-image --googlebatch-project ${GOOGLE_PROJECT} --default-storage-provider s3 --default-storage-prefix s3://my-snakemake-testing
snakemake --jobs 1 --executor googlebatch --googlebatch-image-family batch-cos-stable-official --googlebatch-region us-central1 --googlebatch-image-project batch-custom-image --googlebatch-project ${GOOGLE_PROJECT} --default-storage-provider s3 --default-storage-prefix s3://my-snakemake-testing
```

And a centos.

```bash
snakemake --jobs 1 --executor googlebatch --googlebatch-image-family batch-centos-7-official --googlebatch-region us-central1 --googlebatch-image-project batch-custom-image --googlebatch-project ${GOOGLE_PROJECT} --default-storage-provider s3 --default-storage-prefix s3://my-snakemake-testing
```


See [this link](https://cloud.google.com/batch/docs/vm-os-environment-overview#supported_vm_os_images) for how to find a compatible COS image project and family.
You can also see information [here](https://cloud.google.com/batch/docs/view-os-images),
27 changes: 27 additions & 0 deletions snakemake_executor_plugin_googlebatch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,33 @@ class ExecutorSettings(ExecutorSettingsBase):
},
)

container: Optional[str] = field(
default=None,
metadata={
"help": "A custom container for use with Google Batch COS",
"env_var": False,
"required": False,
},
)

docker_password: Optional[str] = field(
default=None,
metadata={
"help": "A docker registry password for COS if credentials are required",
"env_var": True,
"required": False,
},
)

docker_username: Optional[str] = field(
default=None,
metadata={
"help": "A docker registry username for COS if credentials are required",
"env_var": True,
"required": False,
},
)

# mpitune configurations are validated on c2 and c2d instances only.
machine_type: Optional[str] = field(
default="c2-standard-4",
Expand Down
79 changes: 45 additions & 34 deletions snakemake_executor_plugin_googlebatch/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,31 @@

import snakemake_executor_plugin_googlebatch.snippet as sniputil

write_snakefile = """cat <<EOF > ./Snakefile
write_snakefile = """
#!/bin/bash
snakefile_path=$(realpath %s)
snakefile_dir=$(dirname $snakefile_path)
mkdir -p $snakefile_dir || true
cat <<EOF > $snakefile_path
%s
EOF
cat ./Snakefile
echo "Snakefile is at $snakefile_path"
cat $snakefile_path
"""

write_entrypoint = """
#!/bin/bash
mkdir -p /tmp/workdir
cat <<EOF > /tmp/workdir/entrypoint.sh
%s
# https://github.com/boto/botocore/issues/3111
python3 -m pip install boto3==1.33.11
python3 -m pip install urllib3==1.26.17
EOF
chmod +x /tmp/workdir/entrypoint.sh
cat /tmp/workdir/entrypoint.sh
"""

snakemake_base_environment = """export HOME=/root
Expand All @@ -32,6 +53,7 @@

install_snakemake = """
echo "I am batch index ${BATCH_TASK_INDEX}"
export PATH=/opt/conda/bin:${PATH}
repo=https://raw.githubusercontent.com/snakemake/snakemake-executor-plugin-googlebatch
path=main/scripts/install-snek.sh
Expand Down Expand Up @@ -73,19 +95,19 @@ class CommandWriter:
def __init__(
self,
command=None,
container=None,
snakefile=None,
snippets=None,
settings=None,
resources=None,
snakefile_path=None,
):
self.command = command
self.container = container

# This is the contents of the snakefile and not the path
# This is the contents of the snakefile
self.snakefile = snakefile
self.resources = resources
self.settings = settings
self.snakefile_path = snakefile_path

# Prepare (and validate) any provided snippets for the job
self.load_snippets(snippets)
Expand All @@ -100,23 +122,20 @@ def load_snippets(self, spec):
self.snippets = sniputil.SnippetGroup(spec, self.settings, self.resources)
self.snippets.validate()

def run(self, pre_commands=None):
def run(self):
"""
Write the command script. This is likely shared.
We allow one or more pre-commands (e.g., to download artifacts)
"""
pre_commands = pre_commands or []
command = ""
for pre_command in pre_commands:
command += pre_command + "\n"
command = "\n"

# Ensure we check for snakemake
command += "\n" + check_for_snakemake
command += check_for_snakemake

# If we have a snippet group, add snippets before installing snakemake
if self.snippets:
command += self.snippets.render_run(self.command, self.container)
command += self.snippets.render_run(self.command)

# Don't include the main command twice
if self.snippets.has_run_command_snippet:
Expand All @@ -127,18 +146,24 @@ def setup(self):
"""
Derive the correct setup command based on the family.
"""
raise NotImplementedError(f"Setup is not implemented for {self}.")
pass

def write_snakefile(self):
"""
Return tempalted snakefile. We do this in a separate step so
a later container step can use it.
"""
return write_snakefile % (self.snakefile_path, self.snakefile)

def _template_setup(self, template, use_container=False):
"""
Shared logic to template the setup command.
"""
command = template
command += write_snakefile % self.snakefile

# If we have a snippet group, add snippets before installing snakemake
if self.snippets:
command += self.snippets.render_setup(self.command, self.container)
command += self.snippets.render_setup(self.command)

# If we don't use container, install snakemkae to VM
if not use_container:
Expand All @@ -153,26 +178,12 @@ class COSWriter(CommandWriter):

def setup(self):
"""
We pre-pull the container so they start at the same time.
"""
command = f"docker pull {self.container}"
return self._template_setup(command, use_container=True)

def run(self, pre_commands=None):
Setup for the container operating system means writing
the entrypoint. We do not use any snippets here, using
a container assumes what the user needs is in the
container.
"""
Write the run command script for cos.
For this command we assume the container has python as python3
"""
pre_commands = pre_commands or []
command = ""
for pre_command in pre_commands:
command += pre_command + "\n"
command += write_snakefile % self.snakefile
volume = "$PWD/Snakefile:./Snakefile"
docker = f"docker run -it -v {volume} {self.container} {self.command}"
command += docker
return command
return write_entrypoint % self.command


class DebianWriter(CommandWriter):
Expand Down
Loading

0 comments on commit 3dcfc8c

Please sign in to comment.