Skip to content

Commit 21b49e9

Browse files
Merge pull request #30 from intuit/nn_multiple_match_fix
fixed NEAREST_NEIGHBORS MatchTypes to allow returning Multiple tokens
2 parents 66ea3fc + 56481c9 commit 21b49e9

File tree

5 files changed

+21
-105
lines changed

5 files changed

+21
-105
lines changed

src/main/java/com/intuit/fuzzymatcher/component/TokenRepo.java

+18-26
Original file line numberDiff line numberDiff line change
@@ -44,30 +44,26 @@ private class Repo {
4444

4545
Map<Object, Set<Element>> tokenElementSet;
4646

47-
TreeSet<Token> tokenBinaryTree;
47+
TreeSet<Object> tokenBinaryTree;
4848

4949
Repo(MatchType matchType) {
5050
this.matchType = matchType;
5151
switch (matchType) {
52+
case NEAREST_NEIGHBORS:
53+
tokenBinaryTree = new TreeSet<>();
5254
case EQUALITY:
5355
tokenElementSet = new ConcurrentHashMap<>();
54-
break;
55-
case NEAREST_NEIGHBORS:
56-
tokenBinaryTree = new TreeSet<>(Token.byValue);
57-
break;
5856
}
5957
}
6058

6159
void put(Token token, Element element) {
6260
switch (matchType) {
61+
case NEAREST_NEIGHBORS:
62+
tokenBinaryTree.add(token.getValue());
6363
case EQUALITY:
6464
Set<Element> elements = tokenElementSet.getOrDefault(token.getValue(), new HashSet<>());
6565
elements.add(element);
6666
tokenElementSet.put(token.getValue(), elements);
67-
break;
68-
case NEAREST_NEIGHBORS:
69-
tokenBinaryTree.add(token);
70-
7167
}
7268
}
7369

@@ -79,7 +75,7 @@ Set<Element> get(Token token) {
7975
TokenRange tokenRange = new TokenRange(token, token.getElement().getNeighborhoodRange());
8076
return tokenBinaryTree.subSet(tokenRange.lower, true, tokenRange.higher, true)
8177
.stream()
82-
.map(Token::getElement).collect(Collectors.toSet());
78+
.flatMap(val -> tokenElementSet.get(val).stream()).collect(Collectors.toSet());
8379

8480
}
8581
return null;
@@ -88,37 +84,33 @@ Set<Element> get(Token token) {
8884

8985
private class TokenRange {
9086

91-
private final Token lower;
92-
private final Token higher;
87+
private final Object lower;
88+
private final Object higher;
9389
private static final double DATE_SCALE_FACTOR = 1.1;
9490

9591

9692
TokenRange(Token token, double pct) {
9793
Object value = token.getValue();
9894
if (value instanceof Double) {
99-
this.lower = new Token(getLower((Double) value, pct).doubleValue(), token.getElement());
100-
this.higher = new Token(getHigher((Double) value, pct).doubleValue(), token.getElement());
95+
this.lower = getLower((Double) value, pct).doubleValue();
96+
this.higher = getHigher((Double) value, pct).doubleValue();
10197
} else if (value instanceof Integer) {
102-
this.lower = new Token(getLower((Integer) value, pct).intValue(), token.getElement());
103-
this.higher = new Token(getHigher((Integer) value, pct).intValue(), token.getElement());
98+
this.lower = getLower((Integer) value, pct).intValue();
99+
this.higher = getHigher((Integer) value, pct).intValue();
104100
} else if (value instanceof Long) {
105-
this.lower = new Token(getLower((Long) value, pct).longValue(), token.getElement());
106-
this.higher = new Token(getHigher((Long) value, pct).longValue(), token.getElement());
101+
this.lower = getLower((Long) value, pct).longValue();
102+
this.higher = getHigher((Long) value, pct).longValue();
107103
} else if (value instanceof Float) {
108-
this.lower = new Token(getLower((Float) value, pct).floatValue(), token.getElement());
109-
this.higher = new Token(getHigher((Float) value, pct).floatValue(), token.getElement());
104+
this.lower = getLower((Float) value, pct).floatValue();
105+
this.higher = getHigher((Float) value, pct).floatValue();
110106
} else if (value instanceof Date) {
111-
this.lower = getDateToken(getLower(((Date) value).getTime(), pct * DATE_SCALE_FACTOR), token);
112-
this.higher = getDateToken(getHigher(((Date) value).getTime(), pct * DATE_SCALE_FACTOR), token);
107+
this.lower = new Date(getLower(((Date) value).getTime(), pct * DATE_SCALE_FACTOR).longValue());
108+
this.higher = new Date(getHigher(((Date) value).getTime(), pct * DATE_SCALE_FACTOR).longValue());
113109
} else {
114110
throw new MatchException("Data Type not supported");
115111
}
116112
}
117113

118-
private Token getDateToken(Number number, Token token) {
119-
return new Token(new Date(number.longValue()), token.getElement());
120-
}
121-
122114
private Number getLower(Number number, double pct) {
123115
Double dnum = number.doubleValue();
124116
Double pctVal = Math.abs(dnum * (1.0 - pct));

src/main/java/com/intuit/fuzzymatcher/domain/Token.java

-14
Original file line numberDiff line numberDiff line change
@@ -45,18 +45,4 @@ public int hashCode() {
4545

4646
return Objects.hash(value, element);
4747
}
48-
49-
public static Comparator<Token> byValue = (Token t1, Token t2) -> {
50-
if (t2 == null) {
51-
return -1;
52-
}
53-
if (t1 == null) {
54-
return 1;
55-
}
56-
57-
if (t1.getValue() instanceof Comparable) {
58-
return ((Comparable) t1.getValue()).compareTo((Comparable) t2.getValue());
59-
}
60-
return -1;
61-
};
6248
}

src/test/java/com/intuit/fuzzymatcher/component/MatchServiceTest.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ public void itShouldApplyMatchForBalancedEmptyElements() throws FileNotFoundExce
482482
}
483483

484484
@Test
485-
public void itShouldApplyMatchForUnBalancedEmptyElements() throws FileNotFoundException {
485+
public void itShouldApplyMatchForUnBalancedEmptyElements() {
486486
List<Document> inputData = new ArrayList<>();
487487
inputData.add(new Document.Builder("1")
488488
.addElement(new Element.Builder().setType(NAME).setValue("James Parker").createElement())

src/test/java/com/intuit/fuzzymatcher/component/TokenRepoTest.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
public class TokenRepoTest {
2121

22+
private final AtomicInteger ai = new AtomicInteger(0);
23+
2224
@Test
2325
public void shouldGetForNameWithEquality() {
2426
List<Object> names = Arrays.asList("Amy Doe", "Brian Doe", "Jane Amy", "Michael Wane");
@@ -169,7 +171,6 @@ private Date getDate(String val) {
169171
}
170172

171173
@Test
172-
@Ignore
173174
public void shouldGetMultipleMatchedWithNearestNeighbour() {
174175
List<Object> numbers = Arrays.asList(100, 100);
175176

@@ -195,7 +196,6 @@ private List<Element> getElements(List<Object> values, ElementType elementType,
195196
.map(value -> getElement(value, elementType, matchType)).collect(Collectors.toList());
196197
}
197198

198-
AtomicInteger ai = new AtomicInteger(0);
199199
private Element getElement(Object value, ElementType elementType, MatchType matchType) {
200200
Document.Builder documentBuilder = new Document.Builder(ai.incrementAndGet()+"");
201201
Element.Builder elementBuilder = new Element.Builder().setType(elementType).setValue(value);

src/test/java/com/intuit/fuzzymatcher/domain/TokenTest.java

-62
This file was deleted.

0 commit comments

Comments
 (0)