diff --git a/docs/pipeline.md b/docs/pipeline.md index 364a5fb3..1e887fd3 100644 --- a/docs/pipeline.md +++ b/docs/pipeline.md @@ -14,6 +14,7 @@ seqnado-config chip # options -r, --rerun # Re-run the config -g, --genome [dm6|hg19|hg38|hg38_dm6|hg38_mm39|hg38_spikein|mm10|mm39|other] # Genome to use if genome preset is configured + ``` You should get somthing like this: diff --git a/seqnado/utils.py b/seqnado/utils.py index b3e40f68..88acabee 100644 --- a/seqnado/utils.py +++ b/seqnado/utils.py @@ -145,15 +145,16 @@ def sample_name(self) -> str: @property def sample_base(self) -> str: to_sub = { - r"_\S\d+_": "_", + r"_S\d+_": "_", r"_L00\d_": "_", - r"_R?[12](_001)?$": "", + r"_R?[12](_001)?$": "_", + r"__": "_", + r"_$": "", } base = self.sample_name for pattern, rep in to_sub.items(): base = re.sub(pattern, rep, base) - return base @computed_field @@ -230,13 +231,6 @@ def predict_ip(self) -> Optional[str]: logger.warning(f"Could not predict IP for {self.sample_base}") return None - def sample_name_without_antibody(self) -> str: - """ - Return the sample name without the antibody name. - - """ - return re.sub(f"_{self.antibody}_", "_", self.sample_name) - def predict_is_control(self) -> bool: """ Return True if the fastq file is an input. @@ -253,12 +247,15 @@ def sample_base_without_ip(self) -> str: Return the sample base without the antibody name. """ - return re.sub( - f"(_{self.ip})?(_S\\d+)?(_L00\\d)?(_R?[12])?(_001)?", + pattern = f"(_{self.ip})?(_S\\d+)?(_L00\\d)?(_R?[12])?(_001)?" + base = re.sub( + pattern, "", self.sample_name, ) + return base + class AssayNonIP(BaseModel): name: str = Field(default=None, description="Name of the assay")