Skip to content

Commit

Permalink
format w snakefmt
Browse files Browse the repository at this point in the history
  • Loading branch information
jbloom committed Apr 11, 2024
1 parent 5153af5 commit 6e72204
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 14 deletions.
24 changes: 12 additions & 12 deletions funcs.smk
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def process_miscellaneous_plates(misc_plates_d):
for plate, plate_dict in misc_plates_d.items():
misc_plates[plate] = {}
if not req_keys.issubset(plate_dict):
raise ValueError(f"miscellaneous_plate {plate} lacks {req_keys=}")
raise ValueError(f"miscellaneous_plate {plate} lacks {req_keys =}")
misc_plates[plate]["viral_library"] = plate_dict["viral_library"]
misc_plates[plate]["neut_standard_set"] = plate_dict["neut_standard_set"]
samples = pd.read_csv(plate_dict["samples_csv"])
Expand Down Expand Up @@ -48,29 +48,29 @@ def process_plate(plate, plate_params):
"curvefit_qc",
}
if not req_plate_params.issubset(plate_params):
raise ValueError(f"{plate=} {plate_params=} lacks {req_plate_params=}")
raise ValueError(f"{plate =} {plate_params =} lacks {req_plate_params =}")
if plate_params["viral_library"] not in viral_libraries:
raise ValueError(
f"{plate=} {plate_params['viral_library']=} not in {viral_libraries=}"
f"{plate =} {plate_params['viral_library'] =} not in {viral_libraries =}"
)
if plate_params["neut_standard_set"] not in neut_standard_sets:
raise ValueError(
f"{plate=} {plate_params['neut_standard_set']=} not in {neut_standard_sets=}"
f"{plate =} {plate_params['neut_standard_set'] =} not in {neut_standard_sets =}"
)
plate_d = copy.deepcopy(plate_params)
plate_d["group"] = str(plate_d["group"])
plate_d["date"] = str(plate_d["date"])
if not re.fullmatch("\d{4}\-\d{2}\-\d{2}", str(plate_d["date"])):
raise ValueError(f"{plate=} {plate_d['date']=} not in YYYY-MM-DD format")
raise ValueError(f"{plate =} {plate_d['date'] =} not in YYYY-MM-DD format")

# Process samples_csv to create the sample data frame
req_sample_cols = ["well", "serum", "dilution_factor", "replicate", "fastq"]
samples_df = pd.read_csv(plate_params["samples_csv"], comment="#")
if not set(req_sample_cols).issubset(samples_df.columns):
raise ValueError(f"{plate=} {samples_df.columns=} lacks {req_sample_cols=}")
raise ValueError(f"{plate =} {samples_df.columns =} lacks {req_sample_cols =}")

if samples_df["serum"].isnull().any():
raise ValueError(f"{plate=} 'samples_csv' has null values in 'serum' column")
raise ValueError(f"{plate =} 'samples_csv' has null values in 'serum' column")

# try to turn columns of ints and NAs into Int64 to avoid ints appearing as flaots
for col in ["replicate", "dilution_factor"]:
Expand Down Expand Up @@ -113,7 +113,7 @@ def process_plate(plate, plate_params):
plate_replicate=lambda x: x.apply(
lambda row: (
plate
+ ("" if row["one_serum_replicate"] else f"{-row['replicate']}")
+ ("" if row["one_serum_replicate"] else f"{- row['replicate']}")
),
axis=1,
),
Expand All @@ -136,17 +136,17 @@ def process_plate(plate, plate_params):
.drop(columns="duplicates")
)
if len(dup_rows):
raise ValueError(f"{plate=} has duplicated serum / replicates:\n{dup_rows}")
raise ValueError(f"{plate =} has duplicated serum / replicates:\n{dup_rows}")

# make sure dilution_factor is valid
if not (
(samples_df["dilution_factor"] >= 1) | (samples_df["serum"] == "none")
).all():
raise ValueError(f"{plate=} has dilution factors not >= 1 for non-none serum")
raise ValueError(f"{plate =} has dilution factors not >= 1 for non-none serum")

# make sure there is at least one "none" sample
if "none" not in set(samples_df["serum"]):
raise ValueError(f"{plate=} has no samples with serum set to 'none'")
raise ValueError(f"{plate =} has no samples with serum set to 'none'")

# make sure fastqs are unique
dup_fastqs = (
Expand All @@ -157,7 +157,7 @@ def process_plate(plate, plate_params):
.drop(columns="duplicates")
)
if len(dup_fastqs):
raise ValueError(f"{plate=} has duplicate FASTQs:\n{dup_fastqs}")
raise ValueError(f"{plate =} has duplicate FASTQs:\n{dup_fastqs}")

plate_d["samples"] = samples_df

Expand Down
4 changes: 2 additions & 2 deletions seqneut-pipeline.smk
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,15 @@ plates = {
groups = sorted(set(plate_params["group"] for plate_params in plates.values()))
groups_cannot_contain = ["|", "_"] # wildcard problems if group contains these
if any(s in group for s in groups_cannot_contain for group in groups):
raise ValueError(f"found {groups_cannot_contain=} character in {groups=}")
raise ValueError(f"found {groups_cannot_contain =} character in {groups =}")


wildcard_constraints:
group="|".join(groups),


if not set(config["sera_override_defaults"]).issubset(groups):
raise ValueError(f"{config['sera_override_defaults']=} keyed by invalid groups")
raise ValueError(f"{config['sera_override_defaults'] =} keyed by invalid groups")


samples = pd.concat(
Expand Down

0 comments on commit 6e72204

Please sign in to comment.