Skip to content

Commit

Permalink
Performance improvements of Find recipe (#4758)
Browse files Browse the repository at this point in the history
* Implement some performance improvements on Find recipe

* Add extra tests

* Modify last test

* Restore linked list

* More performance gains

---------

Co-authored-by: Tim te Beek <[email protected]>
  • Loading branch information
nielsdebruin and timtebeek authored Dec 11, 2024
1 parent 2bcf394 commit d789bcb
Show file tree
Hide file tree
Showing 2 changed files with 157 additions and 8 deletions.
48 changes: 40 additions & 8 deletions rewrite-core/src/main/java/org/openrewrite/text/Find.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,7 @@
import org.openrewrite.remote.Remote;
import org.openrewrite.table.TextMatches;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

Expand Down Expand Up @@ -91,6 +89,22 @@ public String getDescription() {
@Nullable
String filePattern;

private static Deque<Integer> findAllNewLineIndexes(String input, int offset) {
ArrayDeque<Integer> indexes = new ArrayDeque<>();
int index = input.lastIndexOf('\n', offset); // Find the first occurrence
if (index != -1) {
indexes.add(index);
}

index = input.indexOf('\n', offset); // Find occurrence after the offset
while (index != -1) {
indexes.add(index); // Add the index to the list
index = input.indexOf('\n', index + 1); // Find the next occurrence
}

return indexes;
}

@Override
public TreeVisitor<?, ExecutionContext> getVisitor() {

Expand Down Expand Up @@ -123,24 +137,42 @@ public Tree visit(@Nullable Tree tree, ExecutionContext ctx) {
return sourceFile;
}
matcher.reset();

String sourceFilePath = sourceFile.getSourcePath().toString();

List<PlainText.Snippet> snippets = new ArrayList<>();
int previousEnd = 0;

Deque<Integer> newlineIndexes = null;
int lastNewLineIndex = -1;

while (matcher.find()) {
if (newlineIndexes == null) {
newlineIndexes = findAllNewLineIndexes(rawText, matcher.start());
}

int matchStart = matcher.start();
snippets.add(snippet(rawText.substring(previousEnd, matchStart)));
snippets.add(SearchResult.found(snippet(rawText.substring(matchStart, matcher.end()))));
previousEnd = matcher.end();

int startLine = Math.max(0, rawText.substring(0, matchStart).lastIndexOf('\n') + 1);
while (!newlineIndexes.isEmpty() && newlineIndexes.peek() < matchStart) {
lastNewLineIndex = newlineIndexes.pop();
}
int startLine = Math.max(0, lastNewLineIndex + 1);

int endLine = rawText.indexOf('\n', matcher.end());
if (endLine == -1) {
endLine = rawText.length();
}

textMatches.insertRow(ctx, new TextMatches.Row(
sourceFile.getSourcePath().toString(),
rawText.substring(startLine, matcher.start()) + "~~>" +
rawText.substring(matcher.start(), endLine)
sourceFilePath,
new StringBuilder(endLine - startLine + 3)
.append(rawText, startLine, matcher.start())
.append("~~>")
.append(rawText, matcher.start(), endLine)
.toString()
));
}
snippets.add(snippet(rawText.substring(previousEnd)));
Expand All @@ -160,8 +192,8 @@ public Tree visit(@Nullable Tree tree, ExecutionContext ctx) {
return visitor;
}


private static PlainText.Snippet snippet(String text) {
return new PlainText.Snippet(Tree.randomId(), Markers.EMPTY, text);
}

}
117 changes: 117 additions & 0 deletions rewrite-core/src/test/java/org/openrewrite/text/FindTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -111,4 +111,121 @@ void caseInsensitive() {
)
);
}

@Test
void regexBasicMultiLine() {
rewriteRun(
spec -> spec.recipe(new Find("[T\\s]", true, true, true, null, null)),
text(
"""
This is\ttext.
This is\ttext.
""",
"""
~~>This~~> is~~>\ttext.~~>
~~>This~~> is~~>\ttext.
"""
)
);
}

@Test
void regexWithoutMultilineAndDotall() {
rewriteRun(
spec -> spec.recipe(new Find("^This.*below\\.$", true, true, false, false, null)),
text(
"""
This is text.
This is a line below.
This is a line above.
This is text.
This is a line below.
"""
)
);
}

@Test
void regexMatchingWhitespaceWithoutMultilineWithDotall() {
rewriteRun(
spec -> spec.recipe(new Find("One.Two$", true, true, false, true, null)),
//language=csv
text( // the `.` above matches the space character on the same line
"""
Zero
One Two
Three
"""
)
);
}

@Test
void regexWithoutMultilineAndWithDotAll() {
rewriteRun(
spec -> spec.recipe(new Find("^This.*below\\.$", true, true, false, true, null)),
text(
"""
This is text.
This is a line below.
This is a line above.
This is text.
This is a line below.
""",
"""
~~>This is text.
This is a line below.
This is a line above.
This is text.
This is a line below.
"""
)
);
}

@Test
void regexWithMultilineAndWithoutDotall() {
rewriteRun(
spec -> spec.recipe(new Find("^This.*below\\.$", true, true, true, false, null)),
text(
"""
This is text.
This is a line below.
This is a line above.
This is text.
This is a line below.
""",
"""
This is text.
~~>This is a line below.
This is a line above.
This is text.
~~>This is a line below.
"""
)
);
}

@Test
void regexWithBothMultilineAndDotAll() {
rewriteRun(
spec -> spec.recipe(new Find("^This.*below\\.$", true, true, true, true, null)),
text(
"""
The first line.
This is a line below.
This is a line above.
This is text.
This is a line below.
""",
"""
The first line.
~~>This is a line below.
This is a line above.
This is text.
This is a line below.
"""
)
);
}
}

0 comments on commit d789bcb

Please sign in to comment.