Skip to content

Commit c5b94b3

Browse files
committed
use appropriate optimizations in regex_dna
The multi-replace method is being added in JuliaLang/julia#40484
1 parent dd25659 commit c5b94b3

File tree

1 file changed

+40
-30
lines changed

1 file changed

+40
-30
lines changed

src/shootout/regex_dna.jl

Lines changed: 40 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -5,30 +5,30 @@
55
# Fix from David Campbell
66

77
const variants = [
8-
"agggtaaa|tttaccct",
9-
"[cgt]gggtaaa|tttaccc[acg]",
10-
"a[act]ggtaaa|tttacc[agt]t",
11-
"ag[act]gtaaa|tttac[agt]ct",
12-
"agg[act]taaa|ttta[agt]cct",
13-
"aggg[acg]aaa|ttt[cgt]ccct",
14-
"agggt[cgt]aa|tt[acg]accct",
15-
"agggta[cgt]a|t[acg]taccct",
16-
"agggtaa[cgt]|[acg]ttaccct"
8+
r"agggtaaa|tttaccct",
9+
r"[cgt]gggtaaa|tttaccc[acg]",
10+
r"a[act]ggtaaa|tttacc[agt]t",
11+
r"ag[act]gtaaa|tttac[agt]ct",
12+
r"agg[act]taaa|ttta[agt]cct",
13+
r"aggg[acg]aaa|ttt[cgt]ccct",
14+
r"agggt[cgt]aa|tt[acg]accct",
15+
r"agggta[cgt]a|t[acg]taccct",
16+
r"agggtaa[cgt]|[acg]ttaccct"
1717
]
1818

19-
const subs = [
20-
(r"B", "(c|g|t)"),
21-
(r"D", "(a|g|t)"),
22-
(r"H", "(a|c|t)"),
23-
(r"K", "(g|t)"),
24-
(r"M", "(a|c)"),
25-
(r"N", "(a|c|g|t)"),
26-
(r"R", "(a|g)"),
27-
(r"S", "(c|g)"),
28-
(r"V", "(a|c|g)"),
29-
(r"W", "(a|t)"),
30-
(r"Y", "(c|t)")
31-
]
19+
const subs = (
20+
("B" => "(c|g|t)"),
21+
("D" => "(a|g|t)"),
22+
("H" => "(a|c|t)"),
23+
("K" => "(g|t)"),
24+
("M" => "(a|c)"),
25+
("N" => "(a|c|g|t)"),
26+
("R" => "(a|g)"),
27+
("S" => "(c|g)"),
28+
("V" => "(a|c|g)"),
29+
("W" => "(a|t)"),
30+
("Y" => "(c|t)")
31+
)
3232

3333
function perf_regex_dna()
3434
infile = joinpath(SHOOTOUT_DATA_PATH, "regexdna-input.txt")
@@ -38,20 +38,30 @@ function perf_regex_dna()
3838
seq = replace(seq, r">.*\n|\n" => "")
3939
l2 = length(seq)
4040

41+
kk = 0
4142
for v in variants
4243
k = 0
43-
for m in eachmatch(Regex(v), seq)
44+
for m in eachmatch(v, seq)
4445
k += 1
4546
end
46-
# @printf("%s %d\n", v, k)
47+
kk += k
4748
end
4849

49-
for (u, v) in subs
50-
seq = replace(seq, u => v)
50+
if applicable(replace, seq, subs...)
51+
# VERSION > 1.7-dev
52+
seq = replace(seq, subs...)
53+
elseif false
54+
# semi-optimized regex
55+
seq = replace(seq, subs...)
56+
r = Regex(join(first.(subs), "|"))
57+
repl = Dict(subs)
58+
seq = replace(seq, r => (r -> repl[r]))
59+
else
60+
# multiple passes
61+
for sub in subs
62+
seq = replace(seq, sub)
63+
end
5164
end
5265

53-
# println()
54-
# println(l1)
55-
# println(l2)
56-
# println(length(seq))
66+
seq, kk
5767
end

0 commit comments

Comments
 (0)