Skip to content

Commit

Permalink
Support alternative FTP directory structure for NCBI assemblies (#15)
Browse files Browse the repository at this point in the history
  • Loading branch information
mihailefter committed Jan 30, 2024
1 parent 9d4031d commit 8e6584a
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 17 deletions.
10 changes: 6 additions & 4 deletions mutalyzer_retriever/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,12 @@ def _parse_args(args):

def _write_model(model, args):
if args.split:
with open(f"{args.output}/{args.id}.annotations", "w") as f:
f.write(json.dumps(model["annotations"], indent=args.indent))
with open(f"{args.output}/{args.id}.sequence", "w") as f:
f.write(model["sequence"]["seq"])
if model.get("annotations"):
with open(f"{args.output}/{args.id}.annotations", "w") as f:
f.write(json.dumps(model["annotations"], indent=args.indent))
if model.get("sequence"):
with open(f"{args.output}/{args.id}.sequence", "w") as f:
f.write(model["sequence"]["seq"])
else:
with open(f"{args.output}/{args.id}", "w") as f:
f.write(json.dumps(model, indent=args.indent))
Expand Down
33 changes: 20 additions & 13 deletions mutalyzer_retriever/sources/ncbi_assemblies.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,18 +180,25 @@ def _get_ftp_locations(self):
for d_d in ftp.nlst():
if d_d.endswith("annotation_report.xml"):
annotation["annotation_report"] = d_d
if d_d.startswith("GCF_") and "GRCh" in d_d:
annotation["dir"] = d_d
try:
ftp.cwd(d_d)
except error_perm:
continue
for d_f in ftp.nlst():
if d_f.endswith("_genomic.gff.gz"):
annotation["file_gff"] = d_f
elif d_f.endswith("_genomic.fna.gz"):
annotation["file_fasta"] = d_f
ftp.cwd("..")
if d_a.startswith("GCF"):
annotation["dir"] = ""
if d_d.endswith("_genomic.gff.gz"):
annotation["file_gff"] = d_d
elif d_d.endswith("_genomic.fna.gz"):
annotation["file_fasta"] = d_d
else:
if d_d.startswith("GCF_") and "GRCh" in d_d:
annotation["dir"] = d_d
try:
ftp.cwd(d_d)
except error_perm:
continue
for d_f in ftp.nlst():
if d_f.endswith("_genomic.gff.gz"):
annotation["file_gff"] = d_f
elif d_f.endswith("_genomic.fna.gz"):
annotation["file_fasta"] = d_f
ftp.cwd("..")
ftp.cwd("..")
locations.append(annotation)
print(" done")
Expand Down Expand Up @@ -354,7 +361,7 @@ def annotations_summary(models_directory, ref_id_start=None):
def _per_model():
output = {}
for file in Path(models_directory).glob(glob):
model = json.load(open(file))["annotations"]
model = json.load(open(file))
summary = {"genes": 0, "transcripts": 0, "added": 0}
if model.get("features"):
summary["genes"] += len(model["features"])
Expand Down

0 comments on commit 8e6584a

Please sign in to comment.