Skip to content

Commit 2e3793c

Browse files
Merge pull request #13 from intuit/perf-improvements
- Memory usage improvements by dropping storage of "childScore" in Ma…
2 parents 39fe9c6 + 1d0eb51 commit 2e3793c

File tree

9 files changed

+3304
-89
lines changed

9 files changed

+3304
-89
lines changed

README.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ be easily configured by passing a lambda expression.
5454
* _Levenshtein_: Gets the Levenshtein distance score using apache commons similarity library
5555
* _Jaccard_: Gets the Jaccard score using apache commons similarity library
5656

57-
* __Scoring__ : Expects a ```Function<Match, Double>```, this defines functions on how to accumulate scores from Tokens into Elements and from Elements into Documents
57+
* __Scoring__ : Expects a ```BiFunction<Match, List<Score>, Double>```, this defines functions on how to accumulate scores
58+
from Tokens into Elements and from Elements into Documents.
5859
* _Simple Average_: Adds up total scores of each child matches / total children. This is the default scoring for Elements
5960
* _Weighted Average_: This is useful for Document Scoring, where users can input weights on elements.
6061
Example a phone number or email could be considered an important element to identify match between 2 User objects, and we can add weights to such elements.

src/main/java/com/intuit/fuzzymatcher/domain/Document.java

+7-6
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import java.util.List;
1111
import java.util.Set;
1212
import java.util.concurrent.ConcurrentHashMap;
13+
import java.util.function.BiFunction;
1314
import java.util.function.Function;
1415
import java.util.function.Predicate;
1516
import java.util.stream.Collectors;
@@ -27,7 +28,7 @@
2728
* </ul>
2829
*/
2930
public class Document implements Matchable {
30-
private Document(String key, Set<Element> elements, double threshold, Function<Match, Score> scoringFunction) {
31+
private Document(String key, Set<Element> elements, double threshold, BiFunction<Match, List<Score>, Score> scoringFunction) {
3132
this.key = key;
3233
this.elements = elements;
3334
this.threshold = threshold;
@@ -38,11 +39,11 @@ private Document(String key, Set<Element> elements, double threshold, Function<M
3839
private Set<Element> elements;
3940
private Set<Element> preProcessedElement;
4041
private double threshold;
41-
private Function<Match, Score> scoringFunction;
42+
private BiFunction<Match, List<Score>, Score> scoringFunction;
4243
private Boolean source;
4344
private Set<Document> matchedWith = new HashSet<>();
4445

45-
private static final Function<Match, Score> DEFAULT_DOCUMENT_SCORING = ScoringFunction.getExponentialWeightedAverageScore();
46+
private static final BiFunction<Match, List<Score>, Score> DEFAULT_DOCUMENT_SCORING = ScoringFunction.getExponentialWeightedAverageScore();
4647

4748
public String getKey() {
4849
return key;
@@ -111,7 +112,7 @@ public long getUnmatchedChildCount(Matchable other) {
111112
}
112113

113114
@Override
114-
public Function<Match, Score> getScoringFunction() {
115+
public BiFunction<Match, List<Score>, Score> getScoringFunction() {
115116
return this.scoringFunction != null ? this.scoringFunction : DEFAULT_DOCUMENT_SCORING;
116117
}
117118

@@ -140,7 +141,7 @@ public static class Builder {
140141
private String key;
141142
private Set<Element> elements;
142143
private double threshold = 0.5;
143-
private Function<Match, Score> scoringFunction;
144+
private BiFunction<Match, List<Score>, Score> scoringFunction;
144145

145146
public Builder(String key) {
146147
this.key = key;
@@ -159,7 +160,7 @@ public Builder addElement(Element element) {
159160
return this;
160161
}
161162

162-
public Builder setScoringFunction(Function<Match, Score> scoringFunction) {
163+
public Builder setScoringFunction(BiFunction<Match, List<Score>, Score> scoringFunction) {
163164
this.scoringFunction = scoringFunction;
164165
return this;
165166
}

src/main/java/com/intuit/fuzzymatcher/domain/Element.java

+11-6
Original file line numberDiff line numberDiff line change
@@ -40,17 +40,17 @@ public class Element implements Matchable {
4040
private Function<Object, Object> preProcessFunction;
4141
private Function<Element, Stream<Token>> tokenizerFunction;
4242
private BiFunction<Token, Token, Double> similarityMatchFunction;
43-
private Function<Match, Score> scoringFunction;
43+
private BiFunction<Match, List<Score>, Score> scoringFunction;
4444
private List<Token> tokens;
4545

4646
private Object preProcessedValue;
4747

48-
private static final Function<Match, Score> DEFAULT_ELEMENT_SCORING = ScoringFunction.getSimpleAverageScore();
48+
private static final BiFunction<Match, List<Score>, Score> DEFAULT_ELEMENT_SCORING = ScoringFunction.getSimpleAverageScore();
4949

5050
public Element(ElementType type, String variance, Object value, double weight, double threshold,
5151
Function<Object, Object> preProcessFunction,
5252
Function<Element, Stream<Token>> tokenizerFunction,
53-
BiFunction<Token, Token, Double> similarityMatchFunction, Function<Match, Score> scoringFunction,
53+
BiFunction<Token, Token, Double> similarityMatchFunction, BiFunction<Match, List<Score>, Score> scoringFunction,
5454
Function<List<Token>, Stream<Match<Token>>> matchOptimizerFunction) {
5555
this.weight = weight;
5656
this.elementClassification = new ElementClassification(type, variance,
@@ -153,7 +153,7 @@ public long getUnmatchedChildCount(Matchable other) {
153153
}
154154

155155
@Override
156-
public Function<Match, Score> getScoringFunction() {
156+
public BiFunction<Match, List<Score>, Score> getScoringFunction() {
157157
return this.scoringFunction;
158158
}
159159

@@ -167,7 +167,7 @@ public static class Builder {
167167

168168
private Function<Element, Stream<Token>> tokenizerFunction;
169169
private BiFunction<Token, Token, Double> similarityMatchFunction;
170-
private Function<Match, Score> scoringFunction;
170+
private BiFunction<Match, List<Score>, Score> scoringFunction;
171171
private Function<List<Token>, Stream<Match<Token>>> matchOptimizerFunction;
172172

173173
public Builder setType(ElementType type) {
@@ -180,6 +180,11 @@ public Builder setVariance(String variance) {
180180
return this;
181181
}
182182

183+
public Builder setValue(Object value) {
184+
this.value = value;
185+
return this;
186+
}
187+
183188
public Builder setValue(String value) {
184189
this.value = value;
185190
return this;
@@ -221,7 +226,7 @@ public Builder setSimilarityMatchFunction(BiFunction<Token, Token, Double> simil
221226
return this;
222227
}
223228

224-
public Builder setScoringFunction(Function<Match, Score> scoringFunction) {
229+
public Builder setScoringFunction(BiFunction<Match, List<Score>, Score> scoringFunction) {
225230
this.scoringFunction = scoringFunction;
226231
return this;
227232
}

src/main/java/com/intuit/fuzzymatcher/domain/Match.java

+13-16
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,19 @@
1616
*/
1717
public class Match<T extends Matchable> {
1818

19-
public Match(T t, T matchedWith, List<Score> childScores) {
19+
20+
public Match(T t, T matchedWith) {
2021
this.data = t;
2122
this.matchedWith = matchedWith;
22-
this.childScores = childScores;
23+
}
24+
public Match(T t, T matchedWith, List<Score> childScores) {
25+
this(t, matchedWith);
26+
List<Score> maxDistinctChildScores = getMaxDistinctScores(childScores);
27+
setScore(maxDistinctChildScores);
2328
}
2429

2530
public Match(T t, T matchedWith, double result) {
26-
this.data = t;
27-
this.matchedWith = matchedWith;
31+
this(t, matchedWith);
2832
this.score = new Score(result, this);
2933
}
3034

@@ -34,8 +38,6 @@ public Match(T t, T matchedWith, double result) {
3438

3539
private Score score;
3640

37-
private List<Score> childScores;
38-
3941
public T getData() {
4042
return this.data;
4143
}
@@ -44,23 +46,18 @@ public T getMatchedWith() {
4446
return matchedWith;
4547
}
4648

47-
public void setMatchedWith(T matchedWith) {
48-
this.matchedWith = matchedWith;
49-
}
50-
5149
public double getResult() {
52-
return this.getScore().getResult();
50+
return this.score.getResult();
5351
}
5452

5553
public Score getScore() {
56-
if (this.score == null) {
57-
this.score = this.data.getScoringFunction().apply(this);
58-
}
5954
return this.score;
6055
}
6156

62-
public List<Score> getChildScores() {
63-
return getMaxDistinctScores(this.childScores);
57+
public void setScore(List<Score> childScores) {
58+
if (this.score == null) {
59+
this.score = this.data.getScoringFunction().apply(this, childScores);
60+
}
6461
}
6562

6663
private List<Score> getMaxDistinctScores(List<Score> scoreList) {

src/main/java/com/intuit/fuzzymatcher/domain/Matchable.java

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package com.intuit.fuzzymatcher.domain;
22

3-
import java.util.function.Function;
3+
import java.util.List;
4+
import java.util.function.BiFunction;
45

56
/**
67
*
@@ -10,7 +11,7 @@ public interface Matchable {
1011

1112
public long getChildCount(Matchable other);
1213

13-
public Function<Match, Score> getScoringFunction();
14+
public BiFunction<Match, List<Score>, Score> getScoringFunction();
1415

1516
public double getWeight();
1617

src/main/java/com/intuit/fuzzymatcher/function/ScoringFunction.java

+19-23
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,14 @@
44
import com.intuit.fuzzymatcher.domain.Score;
55

66
import java.util.List;
7+
import java.util.function.BiFunction;
78
import java.util.function.Function;
89
import java.util.stream.Collectors;
910

1011
/**
1112
* A functional interface to get a score between 2 Match objects
1213
*/
13-
public interface ScoringFunction extends Function<Match, Score> {
14+
public interface ScoringFunction extends BiFunction<Match, List<Score>, Score> {
1415

1516
double EXPONENT = 1.5;
1617
double EXPONENTIAL_INCREASE_THRESHOLD = 0.9;
@@ -24,8 +25,7 @@ public interface ScoringFunction extends Function<Match, Score> {
2425
* @return the scoring function for Average
2526
*/
2627
static ScoringFunction getAverageScore() {
27-
return match -> {
28-
List<Score> childScores = match.getChildScores();
28+
return (match, childScores) -> {
2929
double numerator = getSumOfResult(childScores) + getUnmatchedChildScore(match);
3030
double denominator = getChildCount(match);
3131
return new Score(numerator / denominator, match);
@@ -39,8 +39,7 @@ static ScoringFunction getAverageScore() {
3939
* @return the scoring function for Simple Average
4040
*/
4141
static ScoringFunction getSimpleAverageScore() {
42-
return match -> {
43-
List<Score> childScores = match.getChildScores();
42+
return (match, childScores) -> {
4443
double numerator = getSumOfResult(childScores);
4544
double denominator = getChildCount(match);
4645
return new Score(numerator / denominator, match);
@@ -54,13 +53,12 @@ static ScoringFunction getSimpleAverageScore() {
5453
* @return the scoring function for WeightedAverage
5554
*/
5655
static ScoringFunction getWeightedAverageScore() {
57-
return match -> {
58-
List<Score> childScoreList = match.getChildScores();
59-
double numerator = getSumOfWeightedResult(childScoreList)
56+
return (match, childScores) -> {
57+
double numerator = getSumOfWeightedResult(childScores)
6058
+ getUnmatchedChildScore(match);
61-
double denominator = getSumOfWeights(childScoreList)
59+
double denominator = getSumOfWeights(childScores)
6260
+ getChildCount(match)
63-
- childScoreList.size();
61+
- childScores.size();
6462
return new Score(numerator / denominator, match);
6563
};
6664
}
@@ -72,21 +70,20 @@ static ScoringFunction getWeightedAverageScore() {
7270
* @return the scoring function for ExponentialAverage
7371
*/
7472
static ScoringFunction getExponentialAverageScore() {
75-
return match -> {
76-
List<Score> childScoreList = match.getChildScores();
77-
List<Score> perfectMatchedElements = getPerfectMatchedElement(childScoreList);
73+
return (match, childScores) -> {
74+
List<Score> perfectMatchedElements = getPerfectMatchedElement(childScores);
7875

7976
if (perfectMatchedElements.size() > 1 && getSumOfResult(perfectMatchedElements) > 1) {
8077
double numerator = getExponentiallyIncreasedValue(getSumOfResult(perfectMatchedElements))
81-
+ getSumOfResult(getNonPerfectMatchedElement(childScoreList))
78+
+ getSumOfResult(getNonPerfectMatchedElement(childScores))
8279
+ getUnmatchedChildScore(match);
8380

8481
double denominator = getExponentiallyIncreasedValue(perfectMatchedElements.size())
8582
+ getChildCount(match)
8683
- perfectMatchedElements.size();
8784
return new Score(numerator / denominator, match);
8885
} else
89-
return getAverageScore().apply(match);
86+
return getAverageScore().apply(match, childScores);
9087
};
9188
}
9289

@@ -97,24 +94,23 @@ static ScoringFunction getExponentialAverageScore() {
9794
* @return the scoring function for ExponentialWeightedAverage
9895
*/
9996
static ScoringFunction getExponentialWeightedAverageScore() {
100-
return match -> {
101-
List<Score> childScoreList = match.getChildScores();
102-
List<Score> perfectMatchedElements = getPerfectMatchedElement(childScoreList);
97+
return (match, childScores) -> {
98+
List<Score> perfectMatchedElements = getPerfectMatchedElement(childScores);
10399

104100
// Apply Exponent if match elements > 1
105101
if (perfectMatchedElements.size() > 1 && getSumOfWeightedResult(perfectMatchedElements) > 1) {
106-
List<Score> notPerfectMachedElements = getNonPerfectMatchedElement(childScoreList);
102+
List<Score> notPerfectMachedElements = getNonPerfectMatchedElement(childScores);
107103
double numerator = getExponentiallyIncreasedValue(getSumOfWeightedResult(perfectMatchedElements))
108104
+ getSumOfWeightedResult(notPerfectMachedElements)
109105
+ getUnmatchedChildScore(match);
110106

111107
double denominator = getExponentiallyIncreasedValue(getSumOfWeights(perfectMatchedElements))
112108
+ getSumOfWeights(notPerfectMachedElements)
113109
+ getChildCount(match)
114-
- childScoreList.size();
110+
- childScores.size();
115111
return new Score(numerator / denominator, match);
116112
} else
117-
return getWeightedAverageScore().apply(match);
113+
return getWeightedAverageScore().apply(match, childScores);
118114
};
119115
}
120116

@@ -125,8 +121,8 @@ static ScoringFunction getExponentialWeightedAverageScore() {
125121
* @return the scoring function for Jaccard
126122
*/
127123
static ScoringFunction getJaccardScore() {
128-
return match ->
129-
new Score((double) match.getChildScores().size() /
124+
return (match, childScores) ->
125+
new Score((double) childScores.size() /
130126
((match.getData().getChildCount(match.getMatchedWith()))), match);
131127
}
132128

0 commit comments

Comments
 (0)