From 16b2e987ae4cd065ddd6b9885d4ae81f6a533dbf Mon Sep 17 00:00:00 2001 From: "Jinlong Ru@hms" Date: Sun, 4 Sep 2022 20:08:12 +0200 Subject: [PATCH] add feature_prefix --- bin/add_prophage_to_gbk.py | 38 +------------------------------------ bin/extract_CDS_from_gbk.py | 13 +++++++------ 2 files changed, 8 insertions(+), 43 deletions(-) mode change 100755 => 120000 bin/add_prophage_to_gbk.py diff --git a/bin/add_prophage_to_gbk.py b/bin/add_prophage_to_gbk.py deleted file mode 100755 index 7b0e357..0000000 --- a/bin/add_prophage_to_gbk.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python - -import click -from Bio import SeqFeature, SeqIO -import pandas as pd - - -def add_prophage(record, prophage_loc): - start_pos = SeqFeature.ExactPosition(prophage_loc['start']) - end_pos = SeqFeature.ExactPosition(prophage_loc['end']) - feature_location = SeqFeature.FeatureLocation(start_pos-1, end_pos, strand=1) - qualifiers = {"ID": prophage_loc['prophage_id'], "note": "Prophage"} - - new_feature = SeqFeature.SeqFeature(feature_location, type="misc_feature", qualifiers=qualifiers) - record.features.append(new_feature) - return record - - -@click.command() -@click.option("--gbk", '-g', help="input file") -@click.option("--prophage_coords", '-p', help="Prophage coordinates file") -@click.option("--fout", '-o', help="output file name") -def main(gbk, prophage_coords, fout): - records = list(SeqIO.parse(gbk, format="genbank")) - prophages = pd.read_csv(prophage_coords, sep='\t', names = ['ref_id', 'start', 'end', 'prophage_id']).to_dict(orient="records") - - for rec in records: - for pg_loc in prophages: - if rec.id == pg_loc['ref_id']: - rec = add_prophage(rec, pg_loc) - - with open(fout, "w") as fh: - SeqIO.write(records, fh, "genbank") - - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/bin/add_prophage_to_gbk.py b/bin/add_prophage_to_gbk.py new file mode 120000 index 0000000..5dfc829 --- /dev/null +++ b/bin/add_prophage_to_gbk.py @@ -0,0 +1 @@ +/home/viro/jinlong.ru/github/VIRO_pipelines/script/add_prophage_to_gbk.py \ No newline at end of file diff --git a/bin/extract_CDS_from_gbk.py b/bin/extract_CDS_from_gbk.py index a884daf..d6e9b54 100755 --- a/bin/extract_CDS_from_gbk.py +++ b/bin/extract_CDS_from_gbk.py @@ -4,7 +4,7 @@ from Bio import SeqIO, SeqRecord, Seq -def extract_CDSs(gbks, fout_DNA, fout_AA): +def extract_CDSs(gbks, fout_DNA, fout_AA, feature_prefix): cds_DNAs = [] cds_AAs = [] for gbk in gbks: @@ -15,11 +15,11 @@ def extract_CDSs(gbks, fout_DNA, fout_AA): protein_seq = cds_feature.qualifiers.get('translation') if protein_seq: protein_seq = protein_seq[0] - cds_AA = SeqRecord.SeqRecord(seq=Seq.Seq(protein_seq), id=locus_tag, description="") + cds_AA = SeqRecord.SeqRecord(seq=Seq.Seq(protein_seq), id="{}{}".format(feature_prefix, locus_tag), description="") cds_AAs.append(cds_AA) cds_DNA = cds_feature.location.extract(gbk) - cds_DNA.id=locus_tag + cds_DNA.id="{}{}".format(feature_prefix, locus_tag) cds_DNA.description = "" cds_DNAs.append(cds_DNA) @@ -38,7 +38,8 @@ def extract_genome(gbks, fout): @click.command() @click.option("--fgbk", '-i', help="Genbank file") @click.option("--fout_prefix", '-o', help="Genbank file") -def main(fgbk, fout_prefix): +@click.option("--feature_prefix", '-p', default="", required=False, help="Genbank file") +def main(fgbk, fout_prefix, feature_prefix): gbks = list(SeqIO.parse(fgbk, format="genbank")) # Output @@ -47,8 +48,8 @@ def main(fgbk, fout_prefix): fout_cds_AA = "{}_protein.faa".format(fout_prefix) extract_genome(gbks, fout_genome) - extract_CDSs(gbks, fout_cds_DNA, fout_cds_AA) + extract_CDSs(gbks, fout_cds_DNA, fout_cds_AA, feature_prefix) if __name__ == '__main__': - main() \ No newline at end of file + main()