Skip to content

Commit 67f66f3

Browse files
committed
CATROID-762 - Regex. Assistant Output
-add Regex creator from given keyword in html text -add Unit tests
1 parent b89fda1 commit 67f66f3

File tree

2 files changed

+102
-10
lines changed

2 files changed

+102
-10
lines changed

catroid/src/main/java/org/catrobat/catroid/utils/HtmlRegexExtractor.java

+51-2
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@ public HtmlRegexExtractor(Context context) {
4141
}
4242

4343
public void searchKeyword(String keyword, String text) {
44-
if (findKeyword(keyword, text) == null) {
44+
String keywordFound = findKeyword(keyword, text);
45+
String regexFound = htmlToRegexConverter(keywordFound, text);
46+
if (regexFound == null) {
4547
showError();
4648
} else {
4749
showSuccess();
@@ -60,7 +62,7 @@ private void showError() {
6062
@VisibleForTesting
6163
public String findKeyword(String keyword, String text) {
6264
if (keyword.equals("")) {
63-
throw new IllegalArgumentException("No empty keywords allowed");
65+
return null;
6466
}
6567
if (text.indexOf(keyword) >= 0) {
6668
return keyword;
@@ -93,4 +95,51 @@ private String findShortestOccurrenceInText(String regex, String text) {
9395
}
9496
return shortestOccurrence;
9597
}
98+
99+
public String htmlToRegexConverter(String keyword, String htmlText) {
100+
int keywordIndex;
101+
String regex;
102+
103+
if (keyword != null) {
104+
keywordIndex = htmlText.indexOf(keyword);
105+
if (keyword.equals(htmlText)) {
106+
regex = "(.*)";
107+
} else {
108+
regex = "(.*)";
109+
int distance = 0;
110+
do {
111+
distance++;
112+
113+
String beforeKeyword = "";
114+
int beforeKeywordIndex = keywordIndex - distance;
115+
if (beforeKeywordIndex >= 0) {
116+
beforeKeyword = String.valueOf(htmlText.charAt(beforeKeywordIndex));
117+
}
118+
119+
String afterKeyword = "";
120+
int afterKeywordIndex = keywordIndex + keyword.length() + distance - 1;
121+
if (afterKeywordIndex < htmlText.length()) {
122+
afterKeyword =
123+
String.valueOf(htmlText.charAt(afterKeywordIndex));
124+
}
125+
126+
regex = beforeKeyword + regex + afterKeyword;
127+
} while (!matchesUniquely(regex, htmlText, keyword));
128+
}
129+
} else {
130+
regex = null;
131+
}
132+
return regex;
133+
}
134+
private boolean matchesUniquely(String pattern, String text, String expectedMatch) {
135+
int counter = 0;
136+
Matcher matcher = Pattern.compile(pattern).matcher(text);
137+
138+
String matched = null;
139+
while (matcher.find()) {
140+
matched = matcher.group(1);
141+
counter++;
142+
}
143+
return counter == 1 && expectedMatch.equals(matched);
144+
}
96145
}

catroid/src/test/java/org/catrobat/catroid/test/ui/regexassistant/HtmlRegexExtractorTest.java

+51-8
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@ public void setUp() {
4141
}
4242
private HtmlRegexExtractor htmlExtractor;
4343

44-
@Test(expected = IllegalArgumentException.class)
44+
@Test
4545
public void testFindKeywordWithEmptyKeyword() {
46-
htmlExtractor.findKeyword("", "abc");
46+
assertNull(htmlExtractor.findKeyword("", "abc"));
4747
}
4848

4949
@Test
@@ -62,12 +62,6 @@ public void testFindKeywordInSentence() {
6262
+ "statement in long text"));
6363
}
6464

65-
@Test
66-
public void testFindKeywordInSentenceWithSpaceInKeyword() {
67-
assertEquals("ab c", htmlExtractor.findKeyword("ab c", "Wer are looking for the "
68-
+ "ab c statement in long text"));
69-
}
70-
7165
@Test
7266
public void testFindKeywordInSentenceWithSpaceInKeywordAndTextWithNBSP() {
7367
assertEquals("ab&nbsp;c", htmlExtractor.findKeyword("ab c", "Wer are looking for "
@@ -108,4 +102,53 @@ public void testFindKeywordSmallestOccurrence() {
108102
assertEquals("Hello&nbsp;World", htmlExtractor.findKeyword("Hello World",
109103
"Hello Banana Animal Text Hello&nbsp;World Ape"));
110104
}
105+
106+
@Test
107+
public void testCreateRegexWithOneCharContext() {
108+
assertEquals("b(.*)e", htmlExtractor.htmlToRegexConverter("cd", "abcdefg"));
109+
}
110+
111+
@Test
112+
public void testCreateRegexWithKeywordAtStart() {
113+
assertEquals("(.*)c", htmlExtractor.htmlToRegexConverter("ab", "abcdefg"));
114+
}
115+
116+
@Test
117+
public void testCreateRegexWithKeywordAtEnd() {
118+
assertEquals("d(.*)", htmlExtractor.htmlToRegexConverter("efg", "abcdefg"));
119+
}
120+
121+
@Test
122+
public void testCreateRegexWithDuplicateKeywordFirstOccurrence1CharContext() {
123+
assertEquals("ab(.*)defga", htmlExtractor.htmlToRegexConverter("KEY", "abKEYdefgadKEYdefg"));
124+
}
125+
126+
@Test
127+
public void testCreateRegexWith2CharContext() {
128+
assertEquals("yb(.*)de", htmlExtractor.htmlToRegexConverter("KEY", "abcdefg ybKEYdefg"));
129+
}
130+
131+
@Test
132+
public void testCreateRegexWhereKeywordEqualsHtmlText() {
133+
assertEquals("(.*)", htmlExtractor.htmlToRegexConverter("abcdefg", "abcdefg"));
134+
}
135+
136+
@Test
137+
public void testCreateRegexPostfixInKeyword() {
138+
assertEquals("(.*)b", htmlExtractor.htmlToRegexConverter("abc", "abcbc"));
139+
}
140+
141+
@Test
142+
public void testCreateRegexOutOfBoundsAfter2CharContext() {
143+
assertEquals("b(.*)ba", htmlExtractor.htmlToRegexConverter("abc", "babcbabcb"));
144+
}
145+
@Test
146+
public void testFirstKeyBordersOnSecondKey() {
147+
assertEquals("baaaa(.*)aaaaK", htmlExtractor.htmlToRegexConverter("KEY",
148+
"baaaaKEYaaaaKEYaaaa"));
149+
}
150+
@Test
151+
public void testCreateRegexWhereTextOnlyKeywords() {
152+
assertEquals("(.*)aa", htmlExtractor.htmlToRegexConverter("a", "aaa"));
153+
}
111154
}

0 commit comments

Comments
 (0)