diff --git a/CHANGELOG.md b/CHANGELOG.md index c000d42..32e9290 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Added +- `Base.parse` and `Base.tryparse` for `DNA` and `RNA` ## [4.0.0] ### Added diff --git a/docs/src/nucleicacids.md b/docs/src/nucleicacids.md index 0d67509..ee2714b 100644 --- a/docs/src/nucleicacids.md +++ b/docs/src/nucleicacids.md @@ -89,6 +89,25 @@ true ``` +`parse` also works on characters and strings in a case-insensitive way: +```jldoctest +julia> parse(DNA, "T") +DNA_T + +julia> parse(RNA, "U") +RNA_U + +julia> parse(RNA, "U") == parse(RNA, "u") +true + +julia> tryparse(DNA, "A") # tryparse returns either a DNA nucleotide or nothing +DNA_A + +julia> tryparse(DNA, "SD") + +``` + + `print` and `show` methods are defined to output the text representation of a symbol: ```jldoctest julia> print(DNA_A) # un-decorated text diff --git a/docs/src/sequences.md b/docs/src/sequences.md index 710dee6..9977d25 100644 --- a/docs/src/sequences.md +++ b/docs/src/sequences.md @@ -15,14 +15,14 @@ A quick way to create a DNA/RNA sequence is storing symbols in a vector. ```jldoctest julia> seq = [DNA_A, DNA_C, DNA_G, DNA_T] -4-element Array{DNA,1}: +4-element Vector{DNA}: DNA_A DNA_C DNA_G DNA_T julia> [convert(DNA, x) for x in "ACGT"] # from a string -4-element Array{DNA,1}: +4-element Vector{DNA}: DNA_A DNA_C DNA_G diff --git a/src/nucleicacid.jl b/src/nucleicacid.jl index b3aa74e..b8fbaa3 100644 --- a/src/nucleicacid.jl +++ b/src/nucleicacid.jl @@ -57,41 +57,59 @@ RNA(nt::DNA) = convert(RNA, nt) # Conversion from/to characters # ----------------------------- - -function Base.convert(::Type{DNA}, c::Char) - if c > '\uff' - throw(InexactError(:convert, DNA, c)) +function Base.convert(t::Union{Type{DNA},Type{RNA}}, c::Char) + nt = tryparse(t, c) + if nt === nothing + throw(InexactError(:convert, t, c)) end - @inbounds dna = char_to_dna[convert(Int, c) + 1] + return nt +end +DNA(c::Char) = convert(DNA, c) +RNA(c::Char) = convert(RNA, c) + +function Base.convert(::Type{Char}, nt::DNA) + return dna_to_char[encoded_data(nt)+1] +end +Char(nt::DNA) = convert(Char, nt) + +function Base.convert(::Type{Char}, nt::RNA) + return rna_to_char[encoded_data(nt)+1] +end +Char(nt::RNA) = convert(Char, nt) + +function Base.tryparse(::Type{DNA}, c::Char) + c > '\uff' && return nothing + @inbounds dna = char_to_dna[convert(Int, c)+1] if !isvalid(DNA, dna) - throw(InexactError(:convert, DNA, c)) + return nothing end return encode(DNA, dna) end -DNA(c::Char) = convert(DNA, c) -function Base.convert(::Type{RNA}, c::Char) - if c > '\uff' - throw(InexactError(:convert, RNA, c)) - end - @inbounds rna = char_to_rna[convert(Int, c) + 1] +function Base.tryparse(::Type{RNA}, c::Char) + c > '\uff' && return nothing + @inbounds rna = char_to_rna[convert(Int, c)+1] if !isvalid(RNA, rna) - throw(InexactError(:convert, RNA, c)) + return nothing end return encode(RNA, rna) -end -RNA(c::Char) = convert(RNA, c) -function Base.convert(::Type{Char}, nt::DNA) - return dna_to_char[encoded_data(nt) + 1] end -Char(nt::DNA) = convert(Char, nt) -function Base.convert(::Type{Char}, nt::RNA) - return rna_to_char[encoded_data(nt) + 1] +function Base.tryparse(t::Union{Type{DNA},Type{RNA}}, s::AbstractString) + sizeof(s) == 1 && return tryparse(t, first(s)) + stripped = strip(s) + sizeof(stripped) == 1 && return tryparse(t, first(stripped)) + return nothing end -Char(nt::RNA) = convert(Char, nt) +function Base.parse(t::Union{Type{DNA},Type{RNA}}, c::Union{AbstractString,Char}) + nt = tryparse(t, c) + if nt === nothing + throw(ArgumentError("invalid nucleotide")) + end + return nt +end # Encoding of DNA and RNA NucleicAcids # ------------------------------------ diff --git a/test/runtests.jl b/test/runtests.jl index 79d0f76..6e8a959 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -90,13 +90,13 @@ end @test encoded_data(RNA_N) === 0b1111 end - @testset "stringbyte" begin - for T in (DNA, RNA) - @test all(alphabet(DNA)) do i - UInt8(Char(i)) == stringbyte(i) - end - end - end + @testset "stringbyte" begin + for T in (DNA, RNA) + @test all(alphabet(DNA)) do i + UInt8(Char(i)) == stringbyte(i) + end + end + end end @testset "Char" begin @@ -290,11 +290,11 @@ end end @testset "Broadcasting" begin - v = DNA[DNA_A, DNA_C, DNA_G, DNA_C] - v[2:3] .= DNA_A - @test v == [DNA_A, DNA_A, DNA_A, DNA_C] - v .= DNA_T - @test v == fill(DNA_T, 4) + v = DNA[DNA_A, DNA_C, DNA_G, DNA_C] + v[2:3] .= DNA_A + @test v == [DNA_A, DNA_A, DNA_A, DNA_C] + v .= DNA_T + @test v == fill(DNA_T, 4) end @testset "Show DNA" begin @@ -384,6 +384,44 @@ end @test collect(ACGUN) == [RNA_A, RNA_C, RNA_G, RNA_U, RNA_N] end + @testset "Parsers" begin + @testset "Valid Cases" begin + fromto = [('a', DNA_A), ('c', RNA_C), ('s', DNA_S), ('s', RNA_S)] + + for (from, to) in fromto + @test parse(typeof(to), from) === tryparse(typeof(to), from) === to + # Strings also work + str_from = string(from) + @test parse(typeof(to), str_from) === tryparse(typeof(to), str_from) === to + # Case doesn't matter + @test parse(typeof(to), uppercase(from)) === parse(typeof(to), lowercase(from)) === to + # Whitespace doesn't matter + whitespace_from = "\t" * from * " \n" + @test parse(typeof(to), whitespace_from) === tryparse(typeof(to), whitespace_from) === to + end + end + + @testset "Invalid Cases" begin + @test_throws ArgumentError parse(DNA, "") + @test_throws ArgumentError parse(RNA, "") + @test_throws ArgumentError parse(DNA, "U") + @test_throws ArgumentError parse(RNA, "T") + @test_throws ArgumentError parse(DNA, "AL") + @test_throws ArgumentError parse(RNA, "LA") + @test_throws ArgumentError parse(RNA, '\0') + @test_throws ArgumentError parse(DNA, '@') + @test_throws ArgumentError parse(DNA, '亜') + @test tryparse(DNA, "U") === tryparse(RNA, "T") == nothing + @test tryparse(DNA, "") === tryparse(RNA, "") == nothing + @test tryparse(DNA, "AL") === tryparse(RNA, "AL") === nothing + @test tryparse(DNA, "LA") === tryparse(RNA, "LA") === nothing + @test tryparse(DNA, "ALAA") === tryparse(RNA, "ALAA") === nothing + @test tryparse(DNA, '\0') === tryparse(RNA, '\0') === nothing + @test tryparse(DNA, '@') === tryparse(RNA, '@') === nothing + @test tryparse(DNA, '亜') === tryparse(RNA, '亜') === nothing + end + end + @testset "Hashing" begin @test hash(DNA_A) != hash(RNA_A) @test hash(DNA_A) != hash(DNA_G) @@ -424,9 +462,9 @@ end end @testset "stringbyte" begin - @test all(alphabet(AminoAcid)) do i - UInt8(Char(i)) == stringbyte(i) - end + @test all(alphabet(AminoAcid)) do i + UInt8(Char(i)) == stringbyte(i) + end end @testset "isvalid" begin