Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test support for error recovery #2034

Merged
merged 23 commits into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
0c8bc49
Started working on decent test support
PieterOlivier Sep 24, 2024
6f27c75
Merge branch 'error-recovery' into recovery/testing
PieterOlivier Sep 24, 2024
89d07fe
Added Java and C recovery tests
PieterOlivier Sep 24, 2024
09de7e0
Removed obsolete tests, replaced by tests in LanguageRecoveryTests.rsc
PieterOlivier Sep 25, 2024
0401f72
Added "delete until end of line" test
PieterOlivier Sep 25, 2024
0464d89
Improved performance of `findBestError`, improved statistics reporting
PieterOlivier Sep 26, 2024
6c1efe3
Added some more languages to test and the output of a baseline run
PieterOlivier Sep 26, 2024
c93de3e
Removed space before %
PieterOlivier Sep 26, 2024
5fbc8cd
Reintroduced 'private'
PieterOlivier Sep 26, 2024
11c705e
Added removed compass direction
PieterOlivier Sep 27, 2024
480eb12
Added Rascal batch testing support
PieterOlivier Sep 27, 2024
01fef03
Merge branch 'error-recovery' into recovery/testing
PieterOlivier Sep 27, 2024
9bbc1e0
Fixed issue with infinite recursion during parsing
PieterOlivier Sep 28, 2024
ed91821
Merge branch 'error-recovery' into recovery/testing
PieterOlivier Sep 28, 2024
080c4f6
Readded missing line
PieterOlivier Sep 29, 2024
781a260
Improved benchmarking, both in speed and stat quality
PieterOlivier Sep 30, 2024
f95fba1
Fixed cumulative stats display
PieterOlivier Sep 30, 2024
b9d991b
Fixed division-by-zero when reference parse time happens to be 0
PieterOlivier Sep 30, 2024
88a3647
Added bug tests and some support for selective testing
PieterOlivier Oct 2, 2024
b17dd42
Removed unnecessary changes
PieterOlivier Oct 2, 2024
6103116
Removed unnecessary changes
PieterOlivier Oct 2, 2024
2b00b16
Printing a newline after each line of input that is processed
PieterOlivier Oct 2, 2024
e093964
Fixed off-by-one error in newline printing
PieterOlivier Oct 2, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.dot diff=-astextplain
48 changes: 10 additions & 38 deletions src/org/rascalmpl/library/ParseTree.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ extend Message;
extend List;

import String;
import Node;
import Set;

@synopsis{The Tree data type as produced by the parser.}
@description{
Expand Down Expand Up @@ -807,20 +807,19 @@ str getErrorText(appl(error(_, _, _), [*_, appl(skipped(_), chars)])) = stringCh
This filter removes error trees until no ambiguities caused by error recovery are left.
Note that regular ambiguous trees remain in the parse forest.
}
Tree defaultErrorDisambiguationFilter(t: appl(Production prod, args)) {
Tree result = appl(prod, [defaultErrorDisambiguationFilter(arg) | arg <- args]);
return setKeywordParameters(result, getKeywordParameters(t));
Tree defaultErrorDisambiguationFilter(Tree t) {
return visit(t) {
case a:amb(_) => ambDisambiguation(a)
};
}

Tree defaultErrorDisambiguationFilter(amb(set[Tree] alternatives)) {
private Tree ambDisambiguation(amb(set[Tree] alternatives)) {
// Go depth-first
set[Tree] disambiguatedAlts = { defaultErrorDisambiguationFilter(alt) | Tree alt <- alternatives };

set[Tree] errorTrees = { alt | Tree alt <- disambiguatedAlts, /appl(error(_,_,_), _) := alt };
set[Tree] nonErrorTrees = { alt | Tree alt <- disambiguatedAlts, /appl(error(_,_,_), _) !:= alt };
rel[int score, Tree alt] scoredErrorTrees = { <scoreErrors(alt), alt> | Tree alt <- alternatives };
set[Tree] nonErrorTrees = scoredErrorTrees[0];

if (nonErrorTrees == {}) {
return getBestErrorTree(errorTrees);
return (getFirstFrom(scoredErrorTrees) | it.score > c.score ? c : it | c <- scoredErrorTrees).alt;
}

if ({Tree single} := nonErrorTrees) {
Expand All @@ -832,34 +831,7 @@ Tree defaultErrorDisambiguationFilter(amb(set[Tree] alternatives)) {
return amb(nonErrorTrees);
}

private Tree getBestErrorTree(set[Tree] trees) {
Tree best = char(0);
int bestErrorCount = -1;
int bestErrorLength = 0;

for (tree <- trees) {
list[Tree] errors = findAllErrors(tree);
int errorCount = size(errors);
int errorLength = 0;

for (err <- errors) {
errorLength += getSkipped(err).src.length;
}

if (bestErrorCount == -1 || errorCount < bestErrorCount || (errorCount == bestErrorCount && errorLength < bestErrorLength)) {
best = tree;
bestErrorCount = errorCount;
bestErrorLength = errorLength;
}
}

if (bestErrorCount != -1) {
return best;
}

// trees must have been empty
fail;
}
private int scoreErrors(Tree t) = (0 | it + getSkipped(e).src.length | /e:appl(error(_,_,_),_) := t);

// Handle char and cycle nodes
default Tree defaultErrorDisambiguationFilter(Tree t) = t;
19 changes: 19 additions & 0 deletions src/org/rascalmpl/library/lang/c90/examples/hello-world.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@

int print(const char *text);

void printHello(char *name) {
print("Hello ");
print(name);
print("!");
}

int main(int argc, char *argv[]) {
char *name;
if (argc > 1) {
name = argv[1];
} else {
name = "World";
}

printHello(name);
}
2 changes: 2 additions & 0 deletions src/org/rascalmpl/library/lang/diff/unified/UnifiedDiff.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
@contributor{Tijs van der Storm - [email protected] (CWI)}
module lang::diff::unified::UnifiedDiff

start syntax DiffFile = Diff;

syntax Diff
= Header old Header new Chunk* chunks
;
Expand Down
39 changes: 39 additions & 0 deletions src/org/rascalmpl/library/lang/diff/unified/examples/example.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
--- a/src/org/rascalmpl/parser/uptr/UPTRNodeFactory.java
+++ b/src/org/rascalmpl/parser/uptr/UPTRNodeFactory.java
@@ -1,6 +1,7 @@
package org.rascalmpl.parser.uptr;

import java.net.URI;
+import java.util.Arrays;
import java.util.IdentityHashMap;
import java.util.Map;

@@ -21,7 +22,9 @@ import org.rascalmpl.values.parsetrees.ProductionAdapter;
import org.rascalmpl.values.parsetrees.TreeAdapter;

public class UPTRNodeFactory implements INodeConstructorFactory<ITree, ISourceLocation>{
- private final static RascalValueFactory VF = (RascalValueFactory) ValueFactoryFactory.getValueFactory();
+ private static final RascalValueFactory VF = (RascalValueFactory) ValueFactoryFactory.getValueFactory();
+ private static final IConstructor SKIPPED = VF.constructor(RascalValueFactory.Production_Skipped, VF.constructor(RascalValueFactory.Symbol_IterStar, VF.constructor(RascalValueFactory.Symbol_CharClass, VF.list(VF.constructor(RascalValueFactory.CharRange_Range, VF.integer(1), VF.integer(Character.MAX_CODE_POINT))))));
+
private boolean allowAmb;

public UPTRNodeFactory(boolean allowAmbiguity){
@@ -141,7 +144,14 @@ public class UPTRNodeFactory implements INodeConstructorFactory<ITree, ISourceLo
}

@Override
- public ITree createRecoveryNode(int[] characters) {
- throw new UnsupportedOperationException();
- }
+ public ITree createSkippedNode(int[] characters) {
+ return createLiteralNode(characters, SKIPPED);
+ }
+
+ public ITree createErrorNode(ArrayList<ITree> children, Object production) {
+ IConstructor prod = (IConstructor) production;
+ IConstructor errorProd = VF.constructor(RascalValueFactory.Production_Error, prod.get(0), prod, VF.integer(children.size()-1));
+ return buildAppl(children, errorProd);
+ }
+
}
65 changes: 65 additions & 0 deletions src/org/rascalmpl/library/lang/dot/examples/parser-state.dot
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
digraph Parser {
"Parser"["label"="Parser\nInput: \"void f(){if(1){}}\"\nLocation: 0 ('v')\nStep 5: Reducing terminals"];
"todo-1"["label"="<0> 0", "shape"="record"];
"-2"["label"="Epsilon: \n.0@0 ,matchable,end\n?\nin: 'lex(\"LAYOUT\") -> regular(\iter-star(lex(\"LAYOUT\")))'"];
"7226"["label"="List: 7226\n.0@0 ,expandable,end\n7226\nin: 'LAYOUTLIST -> \iter-star(lex(\"LAYOUT\"))'"];
"12860"["label"="NonTerminal: LAYOUTLIST\n.1@0 \nlayouts_LAYOUTLIST\nin: Tags Visibility Signature '=' Expression 'when' 12878 ';'"];
"-1"["label"="NonTerminal: FunctionDeclaration\n.0@-1 \nFunctionDeclaration"];
"12860" -> "-1";
"7226" -> "12860";
"-2" -> "7226";
"todo-1":"0":sw -> "-2"["label"="Stack"];
"46484886"["shape"="octagon", "label"="Epsilon"];
"todo-1":"0":se -> "46484886"["label"="Node"];
"todoLists":"1" -> "todo-1";
"todoLists"["label"="<0> 0 | <1> 1 | <2> 2 | <3> 3 | <4> 4 | <5> 5 | <6> 6 | <7> 7 | <8> 8 | <9> 9 | <10> 10 | <11> 11 | <12> 12 | <13> 13 | <14> 14 | <15> 15", "shape"="record"];
"Parser" -> "todoLists"["label"="todo lists"];
"stacksToExpand"["label"="", "shape"="record"];
"Parser" -> "stacksToExpand"["label"="stacks to expand"];
"terminalsToReduce"["label"="<0> 0", "shape"="record", "color"="red"];
"terminalsToReduce":"0":sw -> "-2"["label"="Stack"];
"terminalsToReduce":"0":se -> "46484886"["label"="Node"];
"Parser" -> "terminalsToReduce"["label"="terminals to reduce"];
"nonTerminalsToReduce"["label"="", "shape"="record"];
"Parser" -> "nonTerminalsToReduce"["label"="non-terminals to reduce"];
"122"["label"="NonTerminal: Tag\n.0@0 ,end\nTag\nin: 'sort(\"Tag\") -> regular(\iter-star-seps(sort(\"Tag\"),[layouts(\"LAYOUTLIST\")]))'"];
"124"["label"="SeparatedList: 124\n.0@0 ,expandable,end\n124\nin: 'default -> tags'"];
"12858"["label"="NonTerminal: Tags\n.0@0 \nTags\nin: Tags Visibility Signature '=' Expression 'when' 12878 ';'"];
"12858" -> "-1";
"124" -> "12858";
"122" -> "124";
"unexpandableNodes":"0" -> "122";
"13120"["label"="NonTerminal: Comment\n.0@0 ,end\nComment\nin: 'LAYOUT -> Comment'"];
"7221"["label"="NonTerminal: LAYOUT\n.0@0 ,end\nLAYOUT\nin: 'lex(\"LAYOUT\") -> regular(\iter-star(lex(\"LAYOUT\")))'"];
"7221" -> "7226";
"13120" -> "7221";
"unexpandableNodes":"1" -> "13120";
"unexpandableNodes"["label"="<0> 0 | <1> 1", "shape"="record"];
"12824"["label"="Char: \n.0@-1 ,matchable\n0\nin: 0 'sort(\"FunctionDeclaration\")' ':' 12828 0"];
"unmatchableLeafNodes":"0" -> "12824";
"128"["label"="Char: \n.0@-1 ,matchable\n0\nin: 0 'sort(\"Tags\")' ':' 132 0"];
"unmatchableLeafNodes":"1" -> "128";
"2043"["label"="Literal: \n.0@-1 ,matchable\n'@'\nin: '@' Name '=' Expression"];
"unmatchableLeafNodes":"2" -> "2043";
"2065"["label"="Char: \n.0@-1 ,matchable\n0\nin: 0 '\iter-star(sort(\"Tag\"))' ':' 2069 0"];
"unmatchableLeafNodes":"3" -> "2065";
"13122"["label"="Char: \n.0@-1 ,matchable,end\n9-13,32,133,160,5760,6158,8192-8202,8232-8233,8239,8287,12288\nin: 'LAYOUT -> [range(9,13),range(32,32),range(133,133),range(160,160),range(5760,5760),range(6158,6158),range(8192,8202),range(8232,8233),range(8239,8239),range(8287,8287),range(12288,12288)]'"];
"unmatchableLeafNodes":"4" -> "13122";
"13125"["label"="Char: \n.0@-1 ,matchable\n0\nin: 0 '\iter-star(sort(\"LAYOUT\"))' ':' 13129 0"];
"unmatchableLeafNodes":"5" -> "13125";
"7373"["label"="Literal: \n.0@-1 ,matchable\n'/*'\nin: '/*' 7379 '*/'"];
"unmatchableLeafNodes":"6" -> "7373";
"7382"["label"="Literal: \n.0@-1 ,matchable\n'//'\nin: '//' 7386"];
"unmatchableLeafNodes":"7" -> "7382";
"7389"["label"="Char: \n.0@-1 ,matchable\n0\nin: 0 'sort(\"Comment\")' ':' 7393 0"];
"unmatchableLeafNodes":"8" -> "7389";
"unmatchableLeafNodes"["label"="<0> 0 | <1> 1 | <2> 2 | <3> 3 | <4> 4 | <5> 5 | <6> 6 | <7> 7 | <8> 8", "shape"="record"];
"unmatchableMidProductionNodes"["shape"="record", "label"=""];
"filteredNodes"["label"="", "shape"="record"];
"error"["label"="Errors"];
"Parser" -> "error"["label"="error tracking"];
"error" -> "unexpandableNodes"["label"="unexpandable"];
"error" -> "unmatchableLeafNodes"["label"="unmatchable leafs"];
"error" -> "unmatchableMidProductionNodes"["label"="unmatchable mid-prod"];
"error" -> "filteredNodes"["label"="filtered"];
}
2 changes: 1 addition & 1 deletion src/org/rascalmpl/library/lang/dot/syntax/Dot.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ syntax NodeId
| Id Port
;

syntax Port = ":" Id Id?
syntax Port = ":" Id (":" Id)?
// | ":" Id
// | ":" CompassPt
;
Expand Down
18 changes: 18 additions & 0 deletions src/org/rascalmpl/library/lang/pico/examples/fac.pico
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
begin declare input : natural,
output : natural,
repnr : natural,
rep : natural,
s1 : string,
s2 : string;
input := 14;
output := 1;
while input - 1 do
rep := output;
repnr := input;
while repnr - 1 do
output := output + rep;
repnr := repnr - 1
od;
input := input - 1
od
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
module lang::rascal::tests::concrete::recovery::ErrorRecoveryBenchmark

import lang::rascal::tests::concrete::recovery::RecoveryTestSupport;

import IO;
import util::Benchmark;
import String;
import List;

void runTestC() { testRecoveryC(); }
void runTestDiff() { testRecoveryDiff(); }
void runTestDot() { testRecoveryDot(); }
void runTestJava() { testRecoveryJava(); }
void runTestJson() { testRecoveryJson(); }
void runTestPico() { testRecoveryPico(); }
void runTestRascal() { testRecoveryRascal(); }

FileStats testRecoveryC() = testErrorRecovery(|std:///lang/c90/syntax/C.rsc|, "TranslationUnit", |std:///lang/c90/examples/hello-world.c|);
FileStats testRecoveryDiff() = testErrorRecovery(|std:///lang/diff/unified/UnifiedDiff.rsc|, "DiffFile", |std:///lang/diff/unified/examples/example.diff|);
FileStats testRecoveryDot() = testErrorRecovery(|std:///lang/dot/syntax/Dot.rsc|, "DOT", |std:///lang/dot/examples/parser-state.dot|);
FileStats testRecoveryJava() = testErrorRecovery(|std:///lang/java/syntax/Java15.rsc|, "CompilationUnit", zippedFile("m3/snakes-and-ladders-project-source.zip", "src/snakes/LastSquare.java"));
FileStats testRecoveryJson() = testErrorRecovery(|std:///lang/json/syntax/JSON.rsc|, "JSONText", |std:///lang/json/examples/ex01.json|);
FileStats testRecoveryPico() = testErrorRecovery(|std:///lang/pico/syntax/Main.rsc|, "Program", |std:///lang/pico/examples/fac.pico|);
FileStats testRecoveryRascal() = testErrorRecovery(|std:///lang/rascal/syntax/Rascal.rsc|, "Module", |std:///lang/rascal/vis/ImportGraph.rsc|);

void runLanguageTests() {
testRecoveryC();
testRecoveryDiff();
testRecoveryDot();
testRecoveryJava();
testRecoveryJson();
testRecoveryPico();
testRecoveryRascal();
}

void runRascalBatchTest(int maxFiles=1000, int maxFileSize=4000) {
int startTime = realTime();
TestStats stats = batchRecoveryTest(|std:///lang/rascal/syntax/Rascal.rsc|, "Module", |std:///|, ".rsc", maxFiles, maxFileSize);
int duration = realTime() - startTime;
println();
println("========================im========================================");
println("Rascal batch test done in <duration/1000> seconds, total result:");
printStats(stats);
}

int main(list[str] args) {
int maxFiles = 1000;
int maxFileSize = 4000;
if (size(args) == 2) {
maxFiles = toInt(args[0]);
maxFileSize = toInt(args[1]);
} else if (size(args) != 0) {
println("Usage: ErrorRecoveryBenchmark <max-files> <max-file-size>");
}

runRascalBatchTest(maxFiles=maxFiles, maxFileSize=maxFileSize);
return 0;
}
Loading