Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/alsmith151/SeqNado into f…
Browse files Browse the repository at this point in the history
…eat-add-transcript-counting
  • Loading branch information
CChahrour committed Mar 27, 2024
2 parents 35ef12d + bdff71b commit b585e1b
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 23 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build_docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.10"
python-version: '3.x'
- run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
- uses: actions/cache@v3
with:
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ repos:
# supported by your project here, or alternatively use
# pre-commit's default_language_version, see
# https://pre-commit.com/#top_level-default_language_version
language_version: python3.10
language_version: python3.12

- repo: https://github.com/snakemake/snakefmt
rev: v0.8.4 # Replace by any tag/version ≥0.2.4 : https://github.com/snakemake/snakefmt/releases
Expand Down
2 changes: 1 addition & 1 deletion seqnado/workflow/config/config.yaml.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ genome:

fastq_screen: "{{fastq_screen}}"
fastq_screen_config: "{{fastq_screen_config}}"

library_complexity: "{{library_complexity}}"
remove_blacklist: "{{remove_blacklist}}"
blacklist: "{{blacklist}}"

Expand Down
2 changes: 1 addition & 1 deletion seqnado/workflow/rules/exogenous_norm.smk
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ NORM_GROUPS = NormGroups.from_design(DESIGN)

use rule align_paired as align_paired_spikein with:
params:
index=config["genome"]["indices"],
options="--no-mixed --no-discordant",
index=config["genome"]["indices"],
output:
bam=temp("seqnado_output/aligned/spikein/raw/{sample}.bam"),
resources:
Expand Down
56 changes: 37 additions & 19 deletions tests/test_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,13 @@
import requests


@pytest.fixture(scope="function", params=["atac", "chip", "chip-rx", "rna", "rna-rx"], autouse=True)
@pytest.fixture(
scope="function", params=["atac", "chip", "chip-rx", "rna", "rna-rx"], autouse=True
)
def assay(request):
return request.param


@pytest.fixture(scope="function")
def repo_path() -> pathlib.Path:
return pathlib.Path(__file__).resolve().parents[1]
Expand Down Expand Up @@ -48,7 +51,7 @@ def config_path(workflow_path):

@pytest.fixture(scope="function")
def genome_path(test_data_path):
p = test_data_path / "genome"
p = test_data_path / "genome"
p.mkdir(parents=True, exist_ok=True)
return p

Expand All @@ -62,8 +65,8 @@ def genome_indices_path(genome_path, assay) -> pathlib.Path:
return genome_path / "STAR_chr21_rna_spikein"


@pytest.fixture(scope="function", autouse=True)
def indicies(genome_indices_path, genome_path):
@pytest.fixture(scope="function")
def indicies(genome_indices_path, genome_path) -> pathlib.Path:

download_indices = True if not genome_indices_path.exists() else False
suffix = genome_indices_path.with_suffix(".tar.gz").name
Expand All @@ -73,7 +76,7 @@ def indicies(genome_indices_path, genome_path):
indicies_path = genome_indices_path / "bt2_chr21_dm6_chr2L"
else:
indicies_path = genome_indices_path

if download_indices:

r = requests.get(url, stream=True)
Expand All @@ -85,7 +88,9 @@ def indicies(genome_indices_path, genome_path):

tar = tarfile.open(tar_index)

if "bt2" in str(genome_indices_path): # These are individual files so need to extract to the indicies folder
if "bt2" in str(
genome_indices_path
): # These are individual files so need to extract to the indicies folder
genome_indices_path.mkdir(parents=True, exist_ok=True)
tar.extractall(path=genome_indices_path, filter="data")
else:
Expand All @@ -106,7 +111,7 @@ def chromsizes(genome_path):
url = f"https://userweb.molbiol.ox.ac.uk/public/project/milne_group/asmith/ngs_pipeline/{suffix}"
r = requests.get(url, stream=True)
with open(genome_path / suffix, "wb") as f:
f.write(r.content)
f.write(r.content)

return genome_path / suffix

Expand All @@ -127,6 +132,7 @@ def gtf(genome_path, assay, indicies):

return gtf_path


@pytest.fixture(scope="function")
def blacklist(genome_path):

Expand Down Expand Up @@ -157,16 +163,15 @@ def fastqs(test_data_path, assay) -> list[pathlib.Path]:
if not path.exists():
url = f"https://userweb.molbiol.ox.ac.uk/public/project/milne_group/asmith/ngs_pipeline/fastq.tar.gz"
r = requests.get(url, stream=True)

tar_path = path.with_suffix(".tar.gz")

with open(tar_path, "wb") as f:
f.write(r.content)

with tarfile.open(tar_path) as tar:
tar.extractall(path=path.parent, filter="data")


match assay:
case "atac":
files = list(path.glob("atac*.fastq.gz"))
Expand All @@ -190,12 +195,14 @@ def run_directory(tmpdir_factory, assay):


@pytest.fixture(scope="function")
def user_inputs(test_data_path, indicies, chromsizes, assay, assay_type, gtf, blacklist):
def user_inputs(
test_data_path, indicies, chromsizes, assay, assay_type, gtf, blacklist
):

defaults = {
"project_name": "test",
"genome_name": "hg38",
"indices": indicies,
"indices": str(indicies),
"chromsizes": str(chromsizes),
"gtf": str(gtf),
"blacklist": str(blacklist),
Expand Down Expand Up @@ -298,7 +305,6 @@ def config_yaml(run_directory, user_inputs, assay_type):

stdout, stderr = process.communicate(input=user_inputs)


project_name = "test"
date = datetime.now().strftime("%Y-%m-%d")
config_file_path = (
Expand All @@ -307,21 +313,24 @@ def config_yaml(run_directory, user_inputs, assay_type):
assert config_file_path.exists(), f"{assay_type} config file not created."
return config_file_path


@pytest.fixture(scope="function")
def config_yaml_for_testing(config_yaml, assay):
import yaml

with open(config_yaml, "r") as f:
config = yaml.safe_load(f)

if assay == "chip":
config["pileup_method"] = ["deeptools", "homer"]
config['peak_calling_method'] = ["lanceotron", "macs", "homer"]
config["peak_calling_method"] = ["lanceotron", "macs", "homer"]

with open(config_yaml, "w") as f:
yaml.dump(config, f)

return pathlib.Path(config_yaml)


@pytest.fixture(scope="function")
def seqnado_run_dir(config_yaml_for_testing):
return pathlib.Path(config_yaml_for_testing).parent
Expand All @@ -336,18 +345,17 @@ def design(seqnado_run_dir, assay_type, assay):
if assay == "rna-rx":
# Add deseq2 column to design file
import pandas as pd

df = pd.read_csv(seqnado_run_dir / "design.csv", index_col=0)
df["deseq2"] = df.index.str.split("-").str[-2]
df.to_csv(seqnado_run_dir / "design.csv")


return seqnado_run_dir / "design.csv"


@pytest.fixture(scope="function", autouse=True)
def set_up(seqnado_run_dir, fastqs):


cwd = pathlib.Path(os.getcwd())
os.chdir(seqnado_run_dir)

Expand All @@ -370,15 +378,25 @@ def apptainer_args(indicies, test_data_path):
indicies_mount = indicies.parent if not indicies.is_dir() else indicies
tmpdir = pathlib.Path(os.environ.get("TMPDIR", "/tmp") or "/tmp")
wd = pathlib.Path(os.getcwd()).resolve()
os.environ["APPTAINER_BINDPATH"]= f"{wd}:{wd}, {test_data_path}:{test_data_path}, {indicies_mount}:{indicies_mount}"
os.environ["APPTAINER_BINDPATH"] = (
f"{wd}:{wd}, {test_data_path}:{test_data_path}, {indicies_mount}:{indicies_mount}"
)


@pytest.fixture(scope="function")
def test_profile_path(workflow_path):
return workflow_path / "envs" / "profiles" / "profile_test"


def test_pipeline(
assay_type, config_yaml_for_testing, apptainer_args, cores, design, indicies, test_data_path, test_profile_path
assay_type,
config_yaml_for_testing,
apptainer_args,
cores,
design,
indicies,
test_data_path,
test_profile_path,
):

subprocess.run(
Expand Down

0 comments on commit b585e1b

Please sign in to comment.