Skip to content

Commit

Permalink
Humann profiles (#59)
Browse files Browse the repository at this point in the history
* add demo data from humann

* add humann_profile function

* add humann_profiles function

* add tests, fix some stuff
  • Loading branch information
kescobo committed Oct 4, 2021
1 parent da6e28e commit 3788420
Show file tree
Hide file tree
Showing 7 changed files with 4,136 additions and 0 deletions.
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Microbiome = "3bd8f0ae-a0f2-5238-a5af-e1b399a4940c"
RCall = "6f49c342-dc21-5d91-9882-a32aef131414"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[compat]
CSV = "0.8"
Expand Down
6 changes: 6 additions & 0 deletions src/BiobakeryUtils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ export
clean_abundance_tables,
metaphlan_profile,
metaphlan_profiles,
humann_profile,
humann_profiles,
taxfilter,
taxfilter!,
parsetaxa,
Expand All @@ -29,4 +31,8 @@ include("general.jl")
include("metaphlan.jl")
include("humann.jl")

function __init__()

end

end
61 changes: 61 additions & 0 deletions src/humann.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,64 @@
"""
Stratified import currently non-functional
"""
function humann_profile(path::AbstractString; sample=basename(first(splitext(path))), stratified=false)
gfs = GeneFunction[]
abundances = Float64[]

for (i, (gf, abundance)) in enumerate(CSV.File(path, datarow=2, header=["function", "abundance"]))
if occursin('|', gf) # indicates a taxon-stratified entry
stratified || continue
# (gf, tax) = split(gf, '|')
# if tax == "unclassified"
# tax = Taxon("unclassified")
# else
# tm = match(r"s__(\w+)", tax)
# cld = :species
# if isnothing(tm)
# tm = match(r"g__(\d+)", tax)
# cld = :genus
# isnothing(tm) && error("Incorrectly formatted taxon stratification: $tax")
# end
# tax = Taxon(string(tm.captures[1]), cld)
# end
# push!(gfs, GeneFunction(gf, tax))
else
push!(gfs, GeneFunction(gf))
end
push!(abundances, abundance)
end
mat = sparse(reshape(abundances, length(abundances), 1))
sample = sample isa Microbiome.AbstractSample ? sample : MicrobiomeSample(sample)

return CommunityProfile(mat, gfs, [sample])
end

"""
Stratified import currently non-functional
"""
function humann_profiles(path::AbstractString; samples=nothing, stratified=false)
tbl = CSV.File(path)
gfs = GeneFunction[]
if !isnothing(samples)
length(samples) == length(keys(first(tbl))) - 1 || throw(ArgumentError("Passed $(length(samples)) samples, but table has $(length(keys(first(tbl))) - 1)"))
else
samples = keys(first(tbl))[2:end]
end

# Need to add code to deal with stratified input
tbl = filter(row-> !occursin('|', row[1]), tbl)
mat = spzeros(length(tbl), length(samples))

for (i, (row)) in enumerate(tbl)
push!(gfs, GeneFunction(row[1]))
for j in 1:length(samples)
mat[i, j] = row[j+1]
end
end
samples = eltype(samples) == MicrobiomeSample ? samples : MicrobiomeSample.(string.(samples))
return CommunityProfile(mat, gfs, samples)
end

"""
function humann_regroup(df::AbstractDataFrame; inkind="uniref90", outkind::String="ec")
Expand Down
1,359 changes: 1,359 additions & 0 deletions test/files/humann_joined.tsv

Large diffs are not rendered by default.

Loading

0 comments on commit 3788420

Please sign in to comment.