|
1 | 1 | import os
|
2 | 2 | import re
|
| 3 | +import glob |
3 | 4 | import pandas as pd
|
4 | 5 | from snakemake.shell import shell
|
5 | 6 |
|
|
16 | 17 | out_dir = snakemake.params["outdir"]
|
17 | 18 |
|
18 | 19 | #dam = os.path.join(data_dir, "Dam.bam")
|
19 |
| -name = re.sub(".bam$", "", os.path.basename(bam)) |
| 20 | +name = re.sub(".sorted.bam$", "", os.path.basename(bam)) |
20 | 21 |
|
21 |
| -# Load sample table |
22 |
| -csv = pd.read_csv("config/samples.csv") |
23 |
| - |
24 |
| -# Check if treatment column contains any NaN values, if so replace with "none" |
25 |
| -if csv["treatment"].isnull().values.any(): |
26 |
| - csv.fillna({"treatment": "none"}, inplace=True) |
27 |
| - |
28 |
| -# Combine genotypes and treatments into one condition column |
29 |
| -csv["condition"] = csv["genotype"] + "_" + csv["treatment"] |
30 |
| - |
31 |
| -# Get condition for name |
32 |
| -condition = csv[csv["sample"] == name]["condition"].tolist()[0] |
33 |
| - |
34 |
| -# Get dam sample that matches bam sample (name) condition in csv |
35 |
| -dam = csv[csv["sample"].str.contains("Dam")] |
36 |
| - |
37 |
| -if len(dam) == 1: |
38 |
| - dam = dam["sample"].tolist()[0] |
| 22 | +# Get Dam only bam file |
| 23 | +dam = [x for x in glob.glob(os.path.join(data_dir, "*.sorted.bam")) if "dam" in x.lower()] |
| 24 | +if len(dam) == 0: |
| 25 | + raise ValueError("No Dam only bam file found...") |
| 26 | +elif len(dam) > 1: |
| 27 | + raise ValueError("Too many Dam only bam files found...") |
39 | 28 | else:
|
40 |
| - dam = dam[csv["condition"].str.contains(condition)]["sample"].tolist()[0] |
41 |
| -dam = os.path.join(data_dir, dam + ".bam") |
| 29 | + dam = dam[0] |
42 | 30 |
|
43 | 31 | # Construct MACS2 arguments
|
44 | 32 | if paired_end:
|
|
0 commit comments