Skip to content

Commit

Permalink
Define AlphabetExtractor type
Browse files Browse the repository at this point in the history
soldatmat committed Feb 28, 2024
1 parent f34a4cc commit 4a6b770
Showing 8 changed files with 83 additions and 31 deletions.
1 change: 1 addition & 0 deletions src/DESilico.jl
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@ export de!, de_evaluation
include("types/include.jl")
include("de.jl")
include("de_evaluation.jl")
include("alphabet_extractor/include.jl")
include("selection_strategy/include.jl")
include("mutagenesis/include.jl")
include("screening/include.jl")
20 changes: 20 additions & 0 deletions src/alphabet_extractor/alphabet_extractor.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""
Creates complete alphabet of all seen symbols at each position in provided sequences.
AlphabetExctractor()
Constructs `AlphabetExtractor`.
"""
struct AlphabetExctractor <: AbstractAlphabetExtractor end

function (::AlphabetExctractor)(sequences::AbstractVector{Vector{Char}})
alphabets = Vector{Set{Char}}(undef, length(sequences[1]))
for position in 1:length(sequences[1])
symbols = Vector{Char}(undef, length(sequences))
for (p, parent) in enumerate(sequences)
symbols[p] = parent[position]
end
alphabets[position] = Set(symbols)
end
return alphabets
end
1 change: 1 addition & 0 deletions src/alphabet_extractor/include.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include("alphabet_extractor.jl")
34 changes: 18 additions & 16 deletions src/mutagenesis/recombination.jl
Original file line number Diff line number Diff line change
@@ -1,30 +1,32 @@
"""
Creates all recombinations of parents sequences.
Recombination{T}(alphabet_extractor::T)
Recombination(alphabet_extractor::T)
Constructs `Recombination{T}`.
# Arguments
- `alphabet_extractor::T`: Structure called to obtained positional alphabets from parent sequences.
Recombination()
Constructs `Recombination{AlphabetExctractor}`.
"""
struct Recombination <: Mutagenesis end
struct Recombination{T} <: Mutagenesis where {T<:AbstractAlphabetExtractor}
alphabet_extractor::T
end

Recombination() = Recombination(AlphabetExctractor())

function (m::Recombination)(parents::AbstractVector{Vector{Char}})
@assert DESilico.same_length_sequences(parents)
length(parents) == 0 && return Vector{Vector{Char}}([])
alphabets = get_alphabets(parents)
mutant_library = recombine_symbols(alphabets, parents[1])
end

function get_alphabets(parents::AbstractVector{Vector{Char}})
alphabets = Vector{Set{Char}}(undef, length(parents[1]))
for position in 1:length(parents[1])
symbols = Vector{Char}(undef, length(parents))
for (p, parent) in enumerate(parents)
symbols[p] = parent[position]
end
alphabets[position] = Set(symbols)
end
return alphabets
alphabets = m.alphabet_extractor(parents)
mutant_library = _recombine_symbols(alphabets, parents[1])
end

function recombine_symbols(
function _recombine_symbols(
alphabets::Vector{Set{Char}},
first_parent::AbstractVector{Char},
)
10 changes: 10 additions & 0 deletions src/types/types.jl
Original file line number Diff line number Diff line change
@@ -35,3 +35,13 @@ Structures derived from this type have to implement the following method:
This method should return the sequences' fitness values as a subtype of `AbstarctVector{Float64}`.
"""
abstract type Screening end

"""
Extracts an alphabet for each position of sequences with same length.
Structures derived from this type have to implement the following method:
`(::CustomAlphabetExtractor)(sequences::AbstractVector{Vector{Char}})`
This method should return an alphabet for each position in `sequences` as a subtype of `AbstractVector{Set{Char}}`.
This method can assume that `sequences` have the same length.
"""
abstract type AbstractAlphabetExtractor end
14 changes: 14 additions & 0 deletions test/unit/alphabet_extractor/alphabet_extractor.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
@testset "alphabet_extractor.jl" begin
ae = DESilico.AlphabetExctractor()
@test typeof(ae) == DESilico.AlphabetExctractor

parents = [
['A', 'A', 'A'],
['A', 'B', 'C'],
]
alphabets = ae(parents)
@test length(alphabets) == 3
@test alphabets[1] == Set(['A'])
@test alphabets[2] == Set(['A', 'B'])
@test alphabets[3] == Set(['A', 'C'])
end
33 changes: 18 additions & 15 deletions test/unit/mutagenesis/recombination.jl
Original file line number Diff line number Diff line change
@@ -1,24 +1,12 @@
@testset "recombination.jl" begin
@testset "get_alphabets" begin
parents = [
['A', 'A', 'A'],
['A', 'B', 'C'],
]
alphabets = DESilico.get_alphabets(parents)
@test length(alphabets) == 3
@test alphabets[1] == Set(['A'])
@test alphabets[2] == Set(['A', 'B'])
@test alphabets[3] == Set(['A', 'C'])
end

@testset "recombine_symbols" begin
@testset "_recombine_symbols" begin
alphabets = [
Set(['A', 'C']),
Set(['A']),
Set(['B', 'D']),
]
first_parent = ['A', 'A', 'B']
mutants = DESilico.recombine_symbols(alphabets, first_parent)
mutants = DESilico._recombine_symbols(alphabets, first_parent)
@test length(mutants) == 4
@test Set(mutants) == Set([
['A', 'A', 'B'],
@@ -29,9 +17,24 @@
@test first_parent == ['A', 'A', 'B']
end

@testset "Recomination call" begin
@testset "Constructors" begin
m = DESilico.Recombination()
@test typeof(m) == DESilico.Recombination{DESilico.AlphabetExctractor}
@test typeof(m.alphabet_extractor) == DESilico.AlphabetExctractor

ae = DESilico.AlphabetExctractor()
m = DESilico.Recombination(ae)
@test typeof(m) == DESilico.Recombination{DESilico.AlphabetExctractor}
@test typeof(m.alphabet_extractor) == DESilico.AlphabetExctractor

ae = DESilico.AlphabetExctractor()
m = DESilico.Recombination{DESilico.AlphabetExctractor}(ae)
@test typeof(m) == DESilico.Recombination{DESilico.AlphabetExctractor}
@test typeof(m.alphabet_extractor) == DESilico.AlphabetExctractor
end

@testset "Recombination call" begin
m = DESilico.Recombination()
parents = [
['A', 'A', 'B'],
['C', 'A', 'D'],
1 change: 1 addition & 0 deletions test/unit/runtests.jl
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@
include("types/variant.jl")
include("types/sequence_space.jl")

include("alphabet_extractor/alphabet_extractor.jl")
include("screening/dict_screening.jl")
include("selection_strategy/top_k.jl")
include("selection_strategy/sampling_select.jl")

0 comments on commit 4a6b770

Please sign in to comment.