Skip to content

Commit

Permalink
Condition added to deal with MGYGXXX.1 accessions in human-gut catalogue
Browse files Browse the repository at this point in the history
  • Loading branch information
Alejandra Escobar committed Apr 17, 2024
1 parent 7115c89 commit 7183741
Showing 1 changed file with 8 additions and 19 deletions.
27 changes: 8 additions & 19 deletions bin/pangenomeDB_builder_codon.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,10 @@ def metadata_parser(catalogue_metadata):
def accessory_writer(reps_clusters, loc_prefix):
for rep in reps_clusters:
if len(reps_clusters[rep]) > 1:
rep_prefix = rep[:-2]
if rep.endswith('.1'):
rep_prefix = rep[:-4]
else:
rep_prefix = rep[:-2]
pan_loc = (
loc_prefix
+ "/species_catalogue/"
Expand Down Expand Up @@ -85,7 +88,10 @@ def accessory_writer(reps_clusters, loc_prefix):
def annot_writer(reps_clusters, loc_prefix, pfam_desc):
for rep in reps_clusters:
core_list, core_mgygs = [], []
rep_prefix = rep[:-2]
if rep.endswith('.1'):
rep_prefix = rep[:-4]
else:
rep_prefix = rep[:-2]
rep_loc = (
loc_prefix + "/species_catalogue/" + rep_prefix + "/" + rep + "/genome/"
)
Expand Down Expand Up @@ -205,23 +211,6 @@ def gff_parser(gff_file):
att_l = attr.split(";")
gene_id = att_l[0].replace("ID=", "")
gff_dict[gene_id] = [contig, start, end, strand]
"""
kegg_flag, pfam_flag = 0, 0
for attribute in att_l:
att_key,att_val = attribute.split('=')
if att_key == 'KEGG':
ko = att_val.replace('ko:','')
kegg_flag = 1
if att_key == 'Pfam':
pfam = att_val
pfam_flag = 1
if kegg_flag == 0:
ko = '-'
if pfam_flag == 0:
pfam = '-'
gff_dict[gene_id].append(ko)
gff_dict[gene_id].append(pfam)
"""
return gff_dict


Expand Down

0 comments on commit 7183741

Please sign in to comment.