Skip to content

Commit

Permalink
change in kraken parser to stop bug where names with non-alphanumeric…
Browse files Browse the repository at this point in the history
… chars don't get read
  • Loading branch information
WhalleyT committed Sep 23, 2024
1 parent c459c8f commit a916a3b
Showing 1 changed file with 2 additions and 3 deletions.
5 changes: 2 additions & 3 deletions bin/parse_kraken_report2.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ def read_kraken_report(input, pct_threshold, num_threshold):
num_frags_direct = line[2] # defined as "number of fragments assigned directly to this taxon"
rank_code = line[3] # defined as "rank code, indicating (U)nclassified, (R)oot, (D)omain, (K)ingdom, (P)hylum, (C)lass, (O)rder, (F)amily, (G)enus, or (S)pecies". Takes the form of one letter, optionally followed by one number.
ncbi_taxon_id = line[4] # defined as "NCBI taxonomic ID number"
name = line[5] # defined as "scientific name"

name = re.sub(r'\W+', '', line[5]) # defined as "scientific name; remove non alphanumerics as it breaks the other scripts "
# convert strings to float or int
pc_frags = float(pc_frags.strip())
num_frags_rooted = int(num_frags_rooted.strip())
Expand Down Expand Up @@ -257,4 +256,4 @@ def process_requirements(args):

# CREATE OUTPUT FILE
with open(out_file, 'w') as f:
json.dump(out, f, indent = 4)
json.dump(out, f, indent = 4)

0 comments on commit a916a3b

Please sign in to comment.