Merge branch 'master' of https://github.com/alsmith151/SeqNado into f…

…eat-add-transcript-counting
alsmith151 · Mar 27, 2024 · b585e1b · b585e1b
2 parents 35ef12d + bdff71b
commit b585e1b
Show file tree

Hide file tree

Showing 5 changed files with 41 additions and 23 deletions.
diff --git a/.github/workflows/build_docs.yml b/.github/workflows/build_docs.yml
@@ -13,7 +13,7 @@ jobs:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v4
         with:
-          python-version: "3.10"
+          python-version: '3.x'
       - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
       - uses: actions/cache@v3
         with:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -26,7 +26,7 @@ repos:
         # supported by your project here, or alternatively use
         # pre-commit's default_language_version, see
         # https://pre-commit.com/#top_level-default_language_version
-        language_version: python3.10
+        language_version: python3.12
 
   - repo: https://github.com/snakemake/snakefmt
     rev: v0.8.4 # Replace by any tag/version ≥0.2.4 : https://github.com/snakemake/snakefmt/releases

diff --git a/seqnado/workflow/config/config.yaml.jinja b/seqnado/workflow/config/config.yaml.jinja
@@ -16,7 +16,7 @@ genome:
 
 fastq_screen: "{{fastq_screen}}"
 fastq_screen_config: "{{fastq_screen_config}}"
-
+library_complexity: "{{library_complexity}}"
 remove_blacklist: "{{remove_blacklist}}"
 blacklist: "{{blacklist}}"
 

diff --git a/seqnado/workflow/rules/exogenous_norm.smk b/seqnado/workflow/rules/exogenous_norm.smk
@@ -4,8 +4,8 @@ NORM_GROUPS = NormGroups.from_design(DESIGN)
 
 use rule align_paired as align_paired_spikein with:
     params:
-        index=config["genome"]["indices"],
         options="--no-mixed --no-discordant",
+        index=config["genome"]["indices"],
     output:
         bam=temp("seqnado_output/aligned/spikein/raw/{sample}.bam"),
     resources:

diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py
@@ -12,10 +12,13 @@
 import requests
 
 
-@pytest.fixture(scope="function", params=["atac", "chip", "chip-rx", "rna", "rna-rx"], autouse=True)
+@pytest.fixture(
+    scope="function", params=["atac", "chip", "chip-rx", "rna", "rna-rx"], autouse=True
+)
 def assay(request):
     return request.param
 
+
 @pytest.fixture(scope="function")
 def repo_path() -> pathlib.Path:
     return pathlib.Path(__file__).resolve().parents[1]
@@ -48,7 +51,7 @@ def config_path(workflow_path):
 
 @pytest.fixture(scope="function")
 def genome_path(test_data_path):
-    p =  test_data_path / "genome"
+    p = test_data_path / "genome"
     p.mkdir(parents=True, exist_ok=True)
     return p
 
@@ -62,8 +65,8 @@ def genome_indices_path(genome_path, assay) -> pathlib.Path:
         return genome_path / "STAR_chr21_rna_spikein"
 
 
-@pytest.fixture(scope="function", autouse=True)
-def indicies(genome_indices_path, genome_path):
+@pytest.fixture(scope="function")
+def indicies(genome_indices_path, genome_path) -> pathlib.Path:
 
     download_indices = True if not genome_indices_path.exists() else False
     suffix = genome_indices_path.with_suffix(".tar.gz").name
@@ -73,7 +76,7 @@ def indicies(genome_indices_path, genome_path):
         indicies_path = genome_indices_path / "bt2_chr21_dm6_chr2L"
     else:
         indicies_path = genome_indices_path
-    
+
     if download_indices:
 
         r = requests.get(url, stream=True)
@@ -85,7 +88,9 @@ def indicies(genome_indices_path, genome_path):
 
         tar = tarfile.open(tar_index)
 
-        if "bt2" in str(genome_indices_path): # These are individual files so need to extract to the indicies folder
+        if "bt2" in str(
+            genome_indices_path
+        ):  # These are individual files so need to extract to the indicies folder
             genome_indices_path.mkdir(parents=True, exist_ok=True)
             tar.extractall(path=genome_indices_path, filter="data")
         else:
@@ -106,7 +111,7 @@ def chromsizes(genome_path):
         url = f"https://userweb.molbiol.ox.ac.uk/public/project/milne_group/asmith/ngs_pipeline/{suffix}"
         r = requests.get(url, stream=True)
         with open(genome_path / suffix, "wb") as f:
-            f.write(r.content)    
+            f.write(r.content)
 
     return genome_path / suffix
 
@@ -127,6 +132,7 @@ def gtf(genome_path, assay, indicies):
 
     return gtf_path
 
+
 @pytest.fixture(scope="function")
 def blacklist(genome_path):
 
@@ -157,16 +163,15 @@ def fastqs(test_data_path, assay) -> list[pathlib.Path]:
     if not path.exists():
         url = f"https://userweb.molbiol.ox.ac.uk/public/project/milne_group/asmith/ngs_pipeline/fastq.tar.gz"
         r = requests.get(url, stream=True)
-        
+
         tar_path = path.with_suffix(".tar.gz")
-        
+
         with open(tar_path, "wb") as f:
             f.write(r.content)
-        
+
         with tarfile.open(tar_path) as tar:
             tar.extractall(path=path.parent, filter="data")
 
-
     match assay:
         case "atac":
             files = list(path.glob("atac*.fastq.gz"))
@@ -190,12 +195,14 @@ def run_directory(tmpdir_factory, assay):
 
 
 @pytest.fixture(scope="function")
-def user_inputs(test_data_path, indicies, chromsizes, assay, assay_type, gtf, blacklist):
+def user_inputs(
+    test_data_path, indicies, chromsizes, assay, assay_type, gtf, blacklist
+):
 
     defaults = {
         "project_name": "test",
         "genome_name": "hg38",
-        "indices": indicies,
+        "indices": str(indicies),
         "chromsizes": str(chromsizes),
         "gtf": str(gtf),
         "blacklist": str(blacklist),
@@ -298,7 +305,6 @@ def config_yaml(run_directory, user_inputs, assay_type):
 
     stdout, stderr = process.communicate(input=user_inputs)
 
-
     project_name = "test"
     date = datetime.now().strftime("%Y-%m-%d")
     config_file_path = (
@@ -307,21 +313,24 @@ def config_yaml(run_directory, user_inputs, assay_type):
     assert config_file_path.exists(), f"{assay_type} config file not created."
     return config_file_path
 
+
 @pytest.fixture(scope="function")
 def config_yaml_for_testing(config_yaml, assay):
     import yaml
+
     with open(config_yaml, "r") as f:
         config = yaml.safe_load(f)
 
     if assay == "chip":
         config["pileup_method"] = ["deeptools", "homer"]
-        config['peak_calling_method'] = ["lanceotron", "macs", "homer"]
+        config["peak_calling_method"] = ["lanceotron", "macs", "homer"]
 
     with open(config_yaml, "w") as f:
         yaml.dump(config, f)
 
     return pathlib.Path(config_yaml)
 
+
 @pytest.fixture(scope="function")
 def seqnado_run_dir(config_yaml_for_testing):
     return pathlib.Path(config_yaml_for_testing).parent
@@ -336,18 +345,17 @@ def design(seqnado_run_dir, assay_type, assay):
     if assay == "rna-rx":
         # Add deseq2 column to design file
         import pandas as pd
+
         df = pd.read_csv(seqnado_run_dir / "design.csv", index_col=0)
         df["deseq2"] = df.index.str.split("-").str[-2]
         df.to_csv(seqnado_run_dir / "design.csv")
 
-
     return seqnado_run_dir / "design.csv"
 
 
 @pytest.fixture(scope="function", autouse=True)
 def set_up(seqnado_run_dir, fastqs):
 
-
     cwd = pathlib.Path(os.getcwd())
     os.chdir(seqnado_run_dir)
 
@@ -370,15 +378,25 @@ def apptainer_args(indicies, test_data_path):
     indicies_mount = indicies.parent if not indicies.is_dir() else indicies
     tmpdir = pathlib.Path(os.environ.get("TMPDIR", "/tmp") or "/tmp")
     wd = pathlib.Path(os.getcwd()).resolve()
-    os.environ["APPTAINER_BINDPATH"]= f"{wd}:{wd}, {test_data_path}:{test_data_path}, {indicies_mount}:{indicies_mount}"
+    os.environ["APPTAINER_BINDPATH"] = (
+        f"{wd}:{wd}, {test_data_path}:{test_data_path}, {indicies_mount}:{indicies_mount}"
+    )
+
 
 @pytest.fixture(scope="function")
 def test_profile_path(workflow_path):
     return workflow_path / "envs" / "profiles" / "profile_test"
 
 
 def test_pipeline(
-    assay_type, config_yaml_for_testing, apptainer_args, cores, design, indicies, test_data_path, test_profile_path
+    assay_type,
+    config_yaml_for_testing,
+    apptainer_args,
+    cores,
+    design,
+    indicies,
+    test_data_path,
+    test_profile_path,
 ):
 
     subprocess.run(