Skip to content

Commit

Permalink
Merge branch 'master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
roy-ht authored Feb 10, 2024
2 parents e36676a + 84396f3 commit 759dec0
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 4 deletions.
7 changes: 4 additions & 3 deletions editdistance/_editdistance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,14 @@ unsigned int edit_distance_bpv(T &cmap, int64_t const *vec, size_t const &vecsiz


/// c.f. http://handasse.blogspot.com/2009/04/c_29.html
template<typename T>
unsigned int edit_distance_dp(T const *str1, size_t const size1, T const *str2, size_t const size2) {
unsigned int edit_distance_dp(int64_t const *str1, size_t const size1, int64_t const *str2, size_t const size2) {
// vectorより固定長配列の方が速いが、文字列が長い時の保険でのみ呼ばれるのでサイズを決め打ちできない
vector< vector<uint32_t> > d(2, vector<uint32_t>(size2 + 1));
d[0][0] = 0;
d[1][0] = 1;
for (size_t i = 0; i < size2 + 1; i++) d[0][i] = i;
for (size_t i = 1; i < size1 + 1; i++) {
d[i&1][0] = d[(i-1)&1][0] + 1;
for (size_t j = 1; j < size2 + 1; j++) {
d[i&1][j] = min(min(d[(i-1)&1][j], d[i&1][j-1]) + 1, d[(i-1)&1][j-1] + (str1[i-1] == str2[j-1] ? 0 : 1));
}
Expand All @@ -83,6 +83,7 @@ bool edit_distancec_dp(T const *str1, size_t const size1, T const *str2, size_t
d[1][0] = 1;
for (size_t i = 0; i < size2 + 1; i++) d[0][i] = i;
for (size_t i = 1; i < size1 + 1; i++) {
d[i&1][0] = d[(i-1)&1][0] + 1;
bool below_thr = false;
for (size_t j = 1; j < size2 + 1; j++) {
d[i&1][j] = min(min(d[(i-1)&1][j], d[i&1][j-1]) + 1, d[(i-1)&1][j-1] + (str1[i-1] == str2[j-1] ? 0 : 1));
Expand Down Expand Up @@ -148,7 +149,7 @@ unsigned int edit_distance(const int64_t *a, const unsigned int asize, const int
else if(vsize == 8) return edit_distance_map_<8>(ap, *asizep, bp, *bsizep);
else if(vsize == 9) return edit_distance_map_<9>(ap, *asizep, bp, *bsizep);
else if(vsize == 10) return edit_distance_map_<10>(ap, *asizep, bp, *bsizep);
return edit_distance_dp<int64_t>(ap, *asizep, bp, *bsizep); // dynamic programmingに任せる
return edit_distance_dp(ap, *asizep, bp, *bsizep); // dynamic programmingに任せる
}

bool edit_distance_criterion(const int64_t *a, const unsigned int asize, const int64_t *b, const unsigned int bsize, const unsigned int thr) {
Expand Down
1 change: 1 addition & 0 deletions editdistance/_editdistance.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ extern "C" {

unsigned int edit_distance(const int64_t *a, const unsigned int asize, const int64_t *b, const unsigned int bsize);
bool edit_distance_criterion(const int64_t *a, const unsigned int asize, const int64_t *b, const unsigned int bsize, const unsigned int thr);
unsigned int edit_distance_dp(int64_t const *str1, size_t const size1, int64_t const *str2, size_t const size2);

#ifdef __cplusplus
}
Expand Down
16 changes: 15 additions & 1 deletion editdistance/bycython.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ cdef extern from "./_editdistance.h":
ctypedef int int64_t
unsigned int edit_distance(const int64_t *a, const unsigned int asize, const int64_t *b, const unsigned int bsize)
bool edit_distance_criterion(const int64_t *a, const unsigned int asize, const int64_t *b, const unsigned int bsize, const unsigned int thr)
unsigned int edit_distance_dp(const int64_t *str1, const size_t size1, const int64_t *str2, const size_t size2)


cpdef unsigned int eval(object a, object b) except 0xffffffffffffffff:
Expand All @@ -23,7 +24,7 @@ cpdef unsigned int eval(object a, object b) except 0xffffffffffffffff:
free(al)
free(bl)
return dist

cpdef bint eval_criterion(object a, object b, const unsigned int thr) except 0xffffffffffffffff:
cdef unsigned int i
cdef bint ret
Expand All @@ -37,3 +38,16 @@ cpdef bint eval_criterion(object a, object b, const unsigned int thr) except 0xf
free(al)
free(bl)
return ret

cpdef unsigned int eval_dp(object a, object b) except 0xffffffffffffffff:
cdef unsigned int i, dist
cdef int64_t *al = <int64_t *>malloc(len(a) * sizeof(int64_t))
for i in range(len(a)):
al[i] = hash(a[i])
cdef int64_t *bl = <int64_t *>malloc(len(b) * sizeof(int64_t))
for i in range(len(b)):
bl[i] = hash(b[i])
dist = edit_distance_dp(al, len(a), bl, len(b))
free(al)
free(bl)
return dist
17 changes: 17 additions & 0 deletions test/test_editdistance.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import unittest
import random


class TestEditDistance(unittest.TestCase):
def test_editdistance(self):
Expand All @@ -9,6 +11,21 @@ def test_editdistance_criterion(self):
import editdistance
self.assertEqual(False, editdistance.eval_criterion('abcb', 'aeca', 1))
self.assertEqual(True, editdistance.eval_criterion('abc', 'aec', 1))

def test_dp_editdistance(self):
from editdistance.bycython import eval_dp
self.assertEqual(3, eval_dp('bbb', 'a'))
self.assertEqual(3, eval_dp('a', 'bbb'))

def test_dp_vs_default(self):
for _ in range(10):
import editdistance
from editdistance.bycython import eval_dp
seq1 = random.choices([0, 1, 2], k=random.randint(10, 50))
seq2 = random.choices([0, 1, 2], k=random.randint(10, 50))

self.assertEqual(editdistance.eval(seq1, seq2), eval_dp(seq1, seq2))


if __name__ == '__main__':
unittest.main()

0 comments on commit 759dec0

Please sign in to comment.