Skip to content

Commit

Permalink
summarise: Add --output-taxonomic-profile-with-extras.
Browse files Browse the repository at this point in the history
  • Loading branch information
wwood committed Apr 23, 2024
1 parent f89ced2 commit 3a77ca0
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 1 deletion.
7 changes: 7 additions & 0 deletions bin/singlem
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,7 @@ if __name__ == '__main__':
summarise_output_args.add_argument('--output-species-by-site-level', help="Output site by species level to this file", choices=['species','genus','family','order','class','phylum','domain'], default='species')
summarise_output_args.add_argument('--output-taxonomic-level-coverage', help="Output summary of how much coverage has been assigned to each taxonomic level in a taxonomic profile to a TSV file")
summarise_output_args.add_argument('--output-filled-taxonomic-profile', help="Output a taxonomic profile where the coverage of each taxon includes the coverage of each of its descendent taxons e.g. the d__Bacteria entry includes the p__Patescibacteria entry. Requires --input-taxonomic-profiles.")
summarise_output_args.add_argument('--output-taxonomic-profile-with-extras', help="Output a taxonomic profile with extra information (coverage, 'filled' coverage, relative abundance, taxonomy level). Requires --input-taxonomic-profiles.")

read_fraction_description = 'Estimate the fraction of reads from a metagenome that are assigned to Bacteria and Archaea compared to e.g. eukaryote or phage.'
read_fraction_parser = bird_argparser.new_subparser('microbial_fraction', read_fraction_description, parser_group='Tools')
Expand Down Expand Up @@ -776,6 +777,7 @@ if __name__ == '__main__':
if args.output_species_by_site_relative_abundance: num_output_types += 1
if args.output_taxonomic_level_coverage: num_output_types += 1
if args.output_filled_taxonomic_profile: num_output_types += 1
if args.output_taxonomic_profile_with_extras: num_output_types += 1
if num_output_types != 1:
raise Exception("Exactly 1 output type must be specified, sorry, %i were provided" % num_output_types)
if not args.input_otu_tables and \
Expand Down Expand Up @@ -982,6 +984,11 @@ if __name__ == '__main__':
Summariser.write_filled_taxonomic_profile(
input_taxonomic_profile_files = args.input_taxonomic_profiles,
output_filled_taxonomic_profile_io = f)
elif args.output_taxonomic_profile_with_extras:
with open(args.output_taxonomic_profile_with_extras, 'w') as f:
Summariser.write_taxonomic_profile_with_extras(
input_taxonomic_profile_files = args.input_taxonomic_profiles,
output_taxonomic_profile_extras_io = f)
else:
raise Exception("Expected --output-taxonomic-profile-krona or --output-site-by-species-relative-abundance or --output-taxonomic-level-coverage to be defined, since --input-taxonomic-profiles was defined")

Expand Down
37 changes: 36 additions & 1 deletion singlem/summariser.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,4 +586,39 @@ def write_filled_taxonomic_profile(**kwargs):
file=output_io)

logging.info("Wrote {} filled taxonomic profiles".format(
len(input_taxonomic_profile_files)))
len(input_taxonomic_profile_files)))

@staticmethod
def write_taxonomic_profile_with_extras(**kwargs):
input_taxonomic_profile_files = kwargs.pop('input_taxonomic_profile_files')
output_io = kwargs.pop('output_taxonomic_profile_extras_io')
if len(kwargs) > 0:
raise Exception("Unexpected arguments detected: %s" % kwargs)

logging.info("Writing taxonomic profile with extras")

print("\t".join(["sample", "coverage", "full_coverage", "relative_abundance", "level", "taxonomy"]), file=output_io)

# For each profile
num_printed = 0
for profile_file in input_taxonomic_profile_files:
with open(profile_file) as f:
for profile in CondensedCommunityProfile.each_sample_wise(f):
# First get the total coverage of each taxonomic level, to act as the numerator
total_coverage = profile.tree.get_full_coverage()

# Now write out the profile
for wn in profile.breadth_first_iter():
level = wn.calculate_level()
full_coverage = wn.get_full_coverage()
print("\t".join([
profile.sample,
str(wn.coverage),
str(round(full_coverage, 2)),
str(round(full_coverage / total_coverage * 100, 2)),
str(level),
'; '.join(wn.get_taxonomy())
]), file=output_io)
num_printed += 1

logging.info("Wrote {} lines of taxonomic profile with extras".format(num_printed))
19 changes: 19 additions & 0 deletions test/test_summariser.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,25 @@ def test_fill_condensed(self):
land 2.17 Root; d__Bacteria; p__Proteobacteria
""")
self.assertEqual(expected, stdout)

def test_output_taxonomic_profile_with_extras(self):
cmd = f'bin/singlem summarise --input-taxonomic-profile <(head -5 {path_to_data}/read_fraction/marine0.profile) <(head -5 {path_to_data}/read_fraction/marine0.profile |sed s/marine0.1/land/) '\
'--output-taxonomic-profile-with-extras /dev/stdout'
stdout = extern.run(cmd)
expected = re.compile(r' +').sub('\t', """sample coverage full_coverage relative_abundance level taxonomy
marine0.1 0 7.17 100.0 0 Root
marine0.1 3.64 4.2 58.58 1 Root; d__Archaea
marine0.1 0 2.97 41.42 1 Root; d__Bacteria
marine0.1 0.56 0.56 7.81 2 Root; d__Archaea; p__Thermoproteota
marine0.1 0.8 0.8 11.16 2 Root; d__Bacteria; p__Desulfobacterota
marine0.1 2.17 2.17 30.26 2 Root; d__Bacteria; p__Proteobacteria
land 0 7.17 100.0 0 Root
land 3.64 4.2 58.58 1 Root; d__Archaea
land 0 2.97 41.42 1 Root; d__Bacteria
land 0.56 0.56 7.81 2 Root; d__Archaea; p__Thermoproteota
land 0.8 0.8 11.16 2 Root; d__Bacteria; p__Desulfobacterota
land 2.17 2.17 30.26 2 Root; d__Bacteria; p__Proteobacteria
""")


if __name__ == "__main__":
Expand Down

0 comments on commit 3a77ca0

Please sign in to comment.