Skip to content

Commit f74fa45

Browse files
authored
Merge pull request Catrobat#3797 from david-loe/CATROID-721
CATROID-721 Extended Keyword search
2 parents a1f0517 + b89fda1 commit f74fa45

File tree

2 files changed

+169
-7
lines changed

2 files changed

+169
-7
lines changed

catroid/src/main/java/org/catrobat/catroid/utils/HtmlRegexExtractor.java

+58-7
Original file line numberDiff line numberDiff line change
@@ -27,19 +27,70 @@
2727

2828
import org.catrobat.catroid.R;
2929

30+
import java.util.regex.Matcher;
31+
import java.util.regex.Pattern;
32+
33+
import androidx.annotation.VisibleForTesting;
34+
3035
public class HtmlRegexExtractor {
36+
3137
private Context context;
38+
3239
public HtmlRegexExtractor(Context context) {
3340
this.context = context;
3441
}
35-
public void searchKeyword(String search, String text) {
36-
int index = text.indexOf(search);
37-
if (index >= 0) {
38-
ToastUtil.showSuccess(context,
39-
R.string.formula_editor_function_regex_html_extractor_found);
42+
43+
public void searchKeyword(String keyword, String text) {
44+
if (findKeyword(keyword, text) == null) {
45+
showError();
4046
} else {
41-
ToastUtil.showError(context,
42-
R.string.formula_editor_function_regex_html_extractor_not_found);
47+
showSuccess();
48+
}
49+
}
50+
51+
private void showSuccess() {
52+
ToastUtil.showSuccess(context,
53+
R.string.formula_editor_function_regex_html_extractor_found);
54+
}
55+
private void showError() {
56+
ToastUtil.showError(context,
57+
R.string.formula_editor_function_regex_html_extractor_not_found);
58+
}
59+
60+
@VisibleForTesting
61+
public String findKeyword(String keyword, String text) {
62+
if (keyword.equals("")) {
63+
throw new IllegalArgumentException("No empty keywords allowed");
64+
}
65+
if (text.indexOf(keyword) >= 0) {
66+
return keyword;
67+
} else {
68+
return findKeywordWithHtmlBetweenWordsInText(keyword, text);
69+
}
70+
}
71+
72+
private String findKeywordWithHtmlBetweenWordsInText(String keyword, String text) {
73+
String[] splittedKeyword = keyword.split(" ");
74+
String regex = Pattern.quote(splittedKeyword[0]);
75+
76+
for (int i = 1; i < splittedKeyword.length; i++) {
77+
regex += ".*?" + Pattern.quote(splittedKeyword[i]);
78+
}
79+
return findShortestOccurrenceInText(regex, text);
80+
}
81+
82+
private String findShortestOccurrenceInText(String regex, String text) {
83+
Matcher m = Pattern.compile(regex).matcher(text);
84+
85+
String shortestOccurrence = null;
86+
int lastIndex = -1;
87+
while (m.find(lastIndex + 1)) {
88+
String found = m.group();
89+
if (shortestOccurrence == null || shortestOccurrence.length() > found.length()) {
90+
shortestOccurrence = found;
91+
lastIndex = m.start();
92+
}
4393
}
94+
return shortestOccurrence;
4495
}
4596
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
/*
2+
* Catroid: An on-device visual programming system for Android devices
3+
* Copyright (C) 2010-2020 The Catrobat Team
4+
* (<http://developer.catrobat.org/credits>)
5+
*
6+
* This program is free software: you can redistribute it and/or modify
7+
* it under the terms of the GNU Affero General Public License as
8+
* published by the Free Software Foundation, either version 3 of the
9+
* License, or (at your option) any later version.
10+
*
11+
* An additional term exception under section 7 of the GNU Affero
12+
* General Public License, version 3, is available at
13+
* http://developer.catrobat.org/license_additional_term
14+
*
15+
* This program is distributed in the hope that it will be useful,
16+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
* GNU Affero General Public License for more details.
19+
*
20+
* You should have received a copy of the GNU Affero General Public License
21+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
22+
*/
23+
24+
package org.catrobat.catroid.test.ui.regexassistant;
25+
26+
import android.app.Activity;
27+
28+
import org.catrobat.catroid.utils.HtmlRegexExtractor;
29+
import org.junit.Before;
30+
import org.junit.Test;
31+
32+
import static org.junit.Assert.assertEquals;
33+
import static org.junit.Assert.assertNull;
34+
35+
public class HtmlRegexExtractorTest {
36+
37+
@Before
38+
public void setUp() {
39+
Activity context = new Activity();
40+
htmlExtractor = new HtmlRegexExtractor(context);
41+
}
42+
private HtmlRegexExtractor htmlExtractor;
43+
44+
@Test(expected = IllegalArgumentException.class)
45+
public void testFindKeywordWithEmptyKeyword() {
46+
htmlExtractor.findKeyword("", "abc");
47+
}
48+
49+
@Test
50+
public void testFindKeywordWithWrongKeyword() {
51+
assertNull(htmlExtractor.findKeyword("def", "abc"));
52+
}
53+
54+
@Test
55+
public void testFindOneKeywordInLongWord() {
56+
assertEquals("abc", htmlExtractor.findKeyword("abc", "abcdefg"));
57+
}
58+
59+
@Test
60+
public void testFindKeywordInSentence() {
61+
assertEquals("abc", htmlExtractor.findKeyword("abc", "Wer are looking for the abc "
62+
+ "statement in long text"));
63+
}
64+
65+
@Test
66+
public void testFindKeywordInSentenceWithSpaceInKeyword() {
67+
assertEquals("ab c", htmlExtractor.findKeyword("ab c", "Wer are looking for the "
68+
+ "ab c statement in long text"));
69+
}
70+
71+
@Test
72+
public void testFindKeywordInSentenceWithSpaceInKeywordAndTextWithNBSP() {
73+
assertEquals("ab&nbsp;c", htmlExtractor.findKeyword("ab c", "Wer are looking for "
74+
+ "the "
75+
+ "ab&nbsp;c statement in long text"));
76+
}
77+
78+
@Test
79+
public void testKeywordWithNonBreakingSpace() {
80+
assertEquals("Key&nbsp;Word", htmlExtractor.findKeyword("Key Word", "Key&nbsp;Word"));
81+
}
82+
83+
@Test
84+
public void testKeywordWithTag() {
85+
assertEquals("Key <i>Word", htmlExtractor.findKeyword("Key Word", "Key "
86+
+ "<i>Word</i>"));
87+
}
88+
89+
@Test
90+
public void testFalseKeywordOrder() {
91+
assertNull(htmlExtractor.findKeyword("Key Word", "Word Key"));
92+
}
93+
94+
@Test
95+
public void testRegexAsKeyword() {
96+
assertNull("A regular expression should not be found inside a html text that "
97+
+ "doesn't contain it literally",
98+
htmlExtractor.findKeyword("[A-Z]", "ABCDE"));
99+
}
100+
101+
@Test
102+
public void testRegexAsKeywordAndInText() {
103+
assertEquals("[A-Z]", htmlExtractor.findKeyword("[A-Z]", "[A-Z]+.*"));
104+
}
105+
106+
@Test
107+
public void testFindKeywordSmallestOccurrence() {
108+
assertEquals("Hello&nbsp;World", htmlExtractor.findKeyword("Hello World",
109+
"Hello Banana Animal Text Hello&nbsp;World Ape"));
110+
}
111+
}

0 commit comments

Comments
 (0)