Skip to content

Commit

Permalink
Merge branch 'main' of github.com:bihealth/StemCNV-check
Browse files Browse the repository at this point in the history
  • Loading branch information
Nicolai-vKuegelgen committed Nov 1, 2024
2 parents 91f8fd0 + 36ed378 commit de7b829
Show file tree
Hide file tree
Showing 9 changed files with 32 additions and 27 deletions.
4 changes: 4 additions & 0 deletions stemcnv_check/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ def setup_argparse():
group_snake.add_argument('--no-cache', action='store_true',
help="Do not use a chache directory. The cache is used for workflow created metadata "
"(conda envs, singularity images, and VEP data). The default cache path is defined in the conifg file.")
group_snake.add_argument('--bind-points',
help="Additional bind points for apptainer containers, intended for expter users. "
"Use i.e. '/path' to make it availbale in apptainer, useful in case local directory "
"contains symlinks that won't resolve in the container.")

group_snake.add_argument('--target', '-t', default='complete',
choices=('complete', 'report', 'collate-summary', 'summary-tables', 'collate-cnv-calls',
Expand Down
20 changes: 13 additions & 7 deletions stemcnv_check/app/make_staticdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,15 @@ def run_staticdata_workflow(args, array_name):
logging.info('All static files are present')
return 0

# Check if vcf file is present, generate one if none are
sample_data = read_sample_table(args.sample_table, args.column_remove_regex)
# Check if vcf file _for the selected array_ is present, generate one if none are
sample_data_df = read_sample_table(args.sample_table, args.column_remove_regex, return_type='dataframe')
sample_data_df = sample_data_df[sample_data_df['Array_Name'] == array_name]
datapath = config['data_path']
filter_settings = config['settings']['default-filter-set']
#FIXME (future): check if annotation is enabled/disabled
vcf_files = [os.path.join(datapath, f"{sample_id}", f"{sample_id}.annotated-SNP-data.{filter_settings}-filter.vcf.gz") for
sample_id, _, _, _, _, _ in sample_data]
vcf_files = [
os.path.join(datapath, f"{sample_id}", f"{sample_id}.annotated-SNP-data.{filter_settings}-filter.vcf.gz")
for sample_id in sample_data_df['Sample_ID']
]
vcf_present = [vcf for vcf in vcf_files if os.path.exists(vcf)]

if vcf_present:
Expand Down Expand Up @@ -151,7 +153,9 @@ def run_staticdata_workflow(args, array_name):
deployment_method=DeploymentMethod.parse_choices_set(['conda', 'apptainer']),
conda_prefix=cache_path,
apptainer_prefix=cache_path,
apptainer_args=make_apptainer_args(config, cache_path, not_existing_ok=True),
apptainer_args=make_apptainer_args(
config, cache_path, not_existing_ok=True, extra_bind_args=args.bind_points
),
),
)
.dag(
Expand Down Expand Up @@ -189,7 +193,9 @@ def run_staticdata_workflow(args, array_name):
deployment_method=DeploymentMethod.parse_choices_set(['conda', 'apptainer']),
conda_prefix=cache_path,
apptainer_prefix=cache_path,
apptainer_args=make_apptainer_args(config, cache_path, tmpdir=tmpdir, not_existing_ok=True),
apptainer_args=make_apptainer_args(
config, cache_path, tmpdir=tmpdir, not_existing_ok=True, extra_bind_args=args.bind_points
),
)
)
.dag(
Expand Down
2 changes: 1 addition & 1 deletion stemcnv_check/app/run_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def run_stemcnv_check_workflow(args):
"-s", str(importlib.resources.files(STEM_CNV_CHECK).joinpath('rules', 'StemCNV-check.smk')),
"--printshellcmds", "--rerun-incomplete",
"--sdm", "conda", "apptainer",
"--apptainer-args", make_apptainer_args(config, cache_path),
"--apptainer-args", make_apptainer_args(config, cache_path, extra_bind_args=args.bind_points),
#"--conda-frontend", args.conda_frontend,
]
if args.directory:
Expand Down
2 changes: 1 addition & 1 deletion stemcnv_check/control_files/default_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ tools:
SNV_analysis:
threads: 2
memory: 20000 # "2000MB"
runtime: "1h"
runtime: "4h"
knitr:
memory: 10000 # "10000MB"
runtime: "1h"
8 changes: 7 additions & 1 deletion stemcnv_check/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def rename_func(name): return name
return sample_tb.set_index('Sample_ID', drop=False)


def make_apptainer_args(config, cache_path, tmpdir=None, not_existing_ok=False):
def make_apptainer_args(config, cache_path, tmpdir=None, not_existing_ok=False, extra_bind_args=None):
"""Collect all outside filepaths that need to be bound inside container"""

bind_points = [
Expand Down Expand Up @@ -106,6 +106,12 @@ def make_apptainer_args(config, cache_path, tmpdir=None, not_existing_ok=False):

# Sort bind_points to make testing easier (loops over config_dict add them in a non-deterministic order)
bind_point_str = "-B " + ','.join(f"'{host}':'{cont}'" for host, cont in sorted(bind_points, key=lambda x: x[1]))
# allow additional arguments to be passed
if extra_bind_args:
if isinstance(extra_bind_args, str):
extra_bind_args = [extra_bind_args]
bind_point_str = bind_point_str + ',' + ','.join(list(extra_bind_args))

logging.debug("Binding points for apptainer: " + str(bind_point_str))

return bind_point_str
Expand Down
3 changes: 1 addition & 2 deletions stemcnv_check/rules/StemCNV-check.smk
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ localrules:

sample_data = read_sample_table(SAMPLETABLE, str(config['column_remove_regex']))
sample_data_df = read_sample_table(SAMPLETABLE, str(config['column_remove_regex']), return_type='dataframe')
sample_data_full = read_sample_table(SAMPLETABLE, str(config['column_remove_regex']), return_type='list_withopt')


include: "common.smk"
Expand All @@ -79,7 +78,7 @@ include: "report_generation.smk"

def get_target_files(target=TARGET):
# Target options: ('report', 'combined-cnv-calls', 'PennCNV', 'CBS', 'SNP-data'),
all_samples = [sample_id for sample_id, _, _, _, _, _ in sample_data]
all_samples = sample_data_df['Sample_ID']

# complete
if target == "complete":
Expand Down
16 changes: 1 addition & 15 deletions stemcnv_check/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,12 @@ from stemcnv_check import STEM_CNV_CHECK, VEP_version
from stemcnv_check.helpers import config_extract, get_global_file
from stemcnv_check.exceptions import SampleConstraintError

# Makestatic Data :
# def fix_container_path(path_in, bound_to):
#
# path_in = Path(path_in)
# if bound_to == 'static':
# rel_path = path_in.name
# else:
# local_target = {
# 'snakedir': Path(importlib.resources.files(STEM_CNV_CHECK)),
# 'tmp': Path(DOWNLOAD_DIR)
# }[bound_to].absolute()
# rel_path = path_in.absolute().relative_to(local_target)
#
# return Path('/outside/') / bound_to / rel_path

def get_sample_info(wildcards):
return sample_data_df.loc[wildcards.sample_id].to_dict()


def get_ref_id(wildcards, get_sex=False):
#FIXME: switch to using sample_data_df
sample_id = wildcards.sample_id
sex, ref_id = [(s, r) for sid, _, _, _, s, r in sample_data if sid == sample_id][0]
sex = sex[0].lower()
Expand Down
1 change: 1 addition & 0 deletions stemcnv_check/rules/illumina_raw_processing.smk
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ rule run_gencall:
def get_chip(wildcards, outtype="dir_path"):
"""Get the chip name from a sample_id
Values for outtype: 'dirpath' | 'file'"""
#FIXME: switch to using sample_data_df
chip_name, chip_pos = [
(n, p) for sid, n, p, _, _, _ in sample_data if sid == wildcards.sample_id
][0]
Expand Down
3 changes: 3 additions & 0 deletions tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ def get_expected(extra=[]):
expected = get_expected(expected_extra + ["'relative/bpm_manifest.bpm':'/outside/ExampleArray/bpm_manifest.bpm'"])
assert expected == helpers.make_apptainer_args(config, None)

# test with direct addition
assert expected+',/abcdef' == helpers.make_apptainer_args(config, None, extra_bind_args='/abcdef')

# test with cache_path & auto-creation of global paths
cache_path = '/path/to/cache'
config['global_settings'] = {
Expand Down

0 comments on commit de7b829

Please sign in to comment.