Skip to content

Commit

Permalink
Merge pull request #2035 from usethesource/error-disambiguation-in-java
Browse files Browse the repository at this point in the history
Reimplemented error tree disambiguation in Java
  • Loading branch information
PieterOlivier authored and toinehartman committed Oct 8, 2024
2 parents c039de5 + 9827f5a commit b0e4514
Show file tree
Hide file tree
Showing 17 changed files with 432 additions and 123 deletions.
54 changes: 23 additions & 31 deletions src/org/rascalmpl/library/ParseTree.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ extend Message;
extend List;
import String;
import Set;
import util::Maybe;
@synopsis{The Tree data type as produced by the parser.}
@description{
Expand Down Expand Up @@ -355,6 +355,16 @@ The latter option terminates much faster, i.e. always in cubic time, and always
while constructing ambiguous parse forests may grow to O(n^p+1), where p is the length of the longest production rule and n
is the length of the input.
The `allowRecovery` can be set to `true` to enable error recovery. This is an experimental feature.
When error recovery is enabled, the parser will attempt to recover from parse errors and continue parsing.
If successful, a parse tree with error and skipped productions is returned (see the definition of `Production` above).
A number of functions is provided to analyze trees with errors, for example `hasErrors`, `getSkipped`, and `getErrorText`.
Note that the resulting parse forest can contain a lot of error nodes. `disambiguateErrors` can be used to prune the forest
and leave a tree with a single (or even zero) errors based on simple heuristics.
When `allowAmbiguity` is set to false, `allowRecovery` is set to true, and `filters` is empty, this disambiguation is done
automatically so you should end up with a tree with no error ambiguities. Regular ambiguities can still occur
and will result in an error.
The `filters` set contains functions which may be called optionally after the parse algorithm has finished and just before
the Tree representation is built. The set of functions contain alternative functions, only on of them is successfully applied
to each node in a tree. If such a function fails to apply, the other ones are tried. There is no fixed-point computation, so
Expand Down Expand Up @@ -784,7 +794,15 @@ list[Tree] findAllErrors(Tree tree) = [err | /err:appl(error(_, _, _), _) := tr
Tree findFirstError(/err:appl(error(_, _, _), _)) = err;
@synopsis{Find the best error from a tree containing errors. This function will fail if `tree` does not contain an error.}
Tree findBestError(Tree tree) = findFirstError(defaultErrorDisambiguationFilter(tree));
Maybe[Tree] findBestError(Tree tree) {
Tree disambiguated = disambiguateErrors(tree);
if (/err:appl(error(_, _, _), _) := disambiguated) {
return just(err);
}
// All errors have disappeared
return nothing();
}
@synopsis{Get the symbol (sort) of the failing production}
Symbol getErrorSymbol(appl(error(Symbol sym, _, _), _)) = sym;
Expand All @@ -803,35 +821,9 @@ If you want the text of the whole error tree, you can just use string interpolat
}
str getErrorText(appl(error(_, _, _), [*_, appl(skipped(_), chars)])) = stringChars([c | char(c) <- chars]);
@javaClass{org.rascalmpl.parser.gtd.recovery.ParseErrorDisambiguator}
@synopsis{Error recovery often produces ambiguous trees where errors can be recovered in multiple ways.
This filter removes error trees until no ambiguities caused by error recovery are left.
Note that regular ambiguous trees remain in the parse forest.
}
Tree defaultErrorDisambiguationFilter(Tree t) {
return visit(t) {
case a:amb(_) => ambDisambiguation(a)
};
}
private Tree ambDisambiguation(amb(set[Tree] alternatives)) {
// Go depth-first
rel[int score, Tree alt] scoredErrorTrees = { <scoreErrors(alt), alt> | Tree alt <- alternatives };
set[Tree] nonErrorTrees = scoredErrorTrees[0];
if (nonErrorTrees == {}) {
return (getFirstFrom(scoredErrorTrees) | it.score > c.score ? c : it | c <- scoredErrorTrees).alt;
}
if ({Tree single} := nonErrorTrees) {
// One ambiguity left, no ambiguity concerns here
return single;
}
// Multiple non-error trees left, return an ambiguity node with just the non-error trees
return amb(nonErrorTrees);
Note that regular ambiguous trees remain in the parse forest unless `allowAmbiguity` is set to false in which case an error is thrown.
}
private int scoreErrors(Tree t) = (0 | it + getSkipped(e).src.length | /e:appl(error(_,_,_),_) := t);
// Handle char and cycle nodes
default Tree defaultErrorDisambiguationFilter(Tree t) = t;
java Tree disambiguateErrors(Tree t, bool allowAmbiguity=true);
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
module lang::rascal::tests::concrete::recovery::BasicRecoveryTests

import ParseTree;
import util::Maybe;

layout Layout = [\ ]* !>> [\ ];

Expand All @@ -33,15 +34,23 @@ test bool basicOk() {

test bool abx() {
Tree t = parseS("a b x $");
return getErrorText(findBestError(t)) == "x ";
return getErrorText(findBestError(t).val) == "x ";
}

test bool axc() {
Tree t = parseS("a x c $");
return getErrorText(findBestError(t)) == "x c";
return getErrorText(findBestError(t).val) == "x c";
}

test bool ax() {
Tree t = parseS("a x $");
return getErrorText(findBestError(t)) == "x ";
str input = "a x $";

Tree t = parseS(input);
assert size(findAllErrors(t)) == 3;
assert getErrorText(findBestError(t).val) == "x ";

Tree autoDisambiguated = parser(#S, allowRecovery=true, allowAmbiguity=false)(input, |unknown:///|);
assert size(findAllErrors(autoDisambiguated)) == 1;

return getErrorText(findFirstError(autoDisambiguated)) == getErrorText(findBestError(t).val);
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,24 +33,35 @@ void runLanguageTests() {
testRecoveryRascal();
}

void runRascalBatchTest(int maxFiles=1000, int maxFileSize=4000) {
void runRascalBatchTest(int maxFiles=1000, int minFileSize=0, int maxFileSize=4000, int fromFile=0) {
int startTime = realTime();
TestStats stats = batchRecoveryTest(|std:///lang/rascal/syntax/Rascal.rsc|, "Module", |std:///|, ".rsc", maxFiles, maxFileSize);
TestStats stats = batchRecoveryTest(|std:///lang/rascal/syntax/Rascal.rsc|, "Module", |std:///|, ".rsc", maxFiles, minFileSize, maxFileSize, fromFile, |cwd:///rascal-recovery-stats.csv|);
int duration = realTime() - startTime;
println();
println("========================im========================================");
println("================================================================");
println("Rascal batch test done in <duration/1000> seconds, total result:");
printStats(stats);
}int main(list[str] args) {
int maxFiles = 1000;
int maxFileSize = 4000;
if (size(args) == 2) {
int maxFileSize = 1000000;
int minFileSize = 0;
int fromFile = 0;
if (size(args) > 0) {
maxFiles = toInt(args[0]);
maxFileSize = toInt(args[1]);
} else if (size(args) != 0) {
println("Usage: ErrorRecoveryBenchmark <max-files> <max-file-size>");
}
if (size(args) > 1) {
minFileSize = toInt(args[1]);
}
if (size(args) > 2) {
maxFileSize = toInt(args[2]);
}
if (size(args) > 3) {
fromFile = toInt(args[3]);
} else {
println("Usage: ErrorRecoveryBenchmark [\<max-files\> [\<min-file-size\> [\<max-file-size\> [\<from-file\>]]]]");
}
runRascalBatchTest(maxFiles=maxFiles, minFileSize=minFileSize, maxFileSize=maxFileSize, fromFile=fromFile);
runRascalBatchTest(maxFiles=maxFiles, maxFileSize=maxFileSize);
return 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
module lang::rascal::tests::concrete::recovery::NestedRecoveryTests

import ParseTree;
import util::Maybe;

layout Layout = [\ ]* !>> [\ ];

Expand All @@ -35,5 +36,5 @@ test bool nestedOk() {

test bool nestedTypo() {
Tree t = parseS("a b x c");
return getErrorText(findFirstError(defaultErrorDisambiguationFilter(t))) == "x ";
return getErrorText(findBestError(t).val) == "x ";
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import ParseTree;

import IO;
import String;
import util::Maybe;

Tree parsePico(str input, bool visualize=false)
= parser(#Program, allowRecovery=true, allowAmbiguity=true)(input, |unknown:///?visualize=<"<visualize>">|);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ import lang::rascal::\syntax::Rascal;

import ParseTree;
import IO;
import util::Maybe;

bool debugging = false;

bool debugging = false;

Expand All @@ -31,8 +34,8 @@ Tree parseRascal(type[&T] t, str input, bool visualize=false) {
println("- <getErrorText(error)>");
}

Tree disambiguated = defaultErrorDisambiguationFilter(result);
println("Best error: <getErrorText(findFirstError(disambiguated))>");
println("Best error: <getErrorText(findBestError(result).val)>");
}
}
}

Expand Down Expand Up @@ -96,7 +99,7 @@ test bool rascalMissingCloseParen() {
Tree t = parseRascal("module A void f({} void g(){}");

assert getErrorText(findFirstError(t)) == "void g(";
assert getErrorText(findFirstError(defaultErrorDisambiguationFilter(t))) == "(";
assert getErrorText(findBestError(t).val) == "(";

return true;
}
Expand All @@ -106,7 +109,7 @@ test bool rascalFunctionDeclarationMissingCloseParen() {

assert getErrorText(findFirstError(t)) == "void g(";

Tree error = findFirstError(defaultErrorDisambiguationFilter(t));
Tree error = findBestError(t).val;
assert getErrorText(error) == "(";
loc location = getSkipped(error).src;
assert location.begin.column == 16 && location.length == 1;
Expand All @@ -116,14 +119,14 @@ test bool rascalFunctionDeclarationMissingCloseParen() {

test bool rascalIfMissingExpr() {
Tree t = parseFunctionDeclaration("void f(){if(){1;}}", visualize=false);
return getErrorText(findFirstError(t)) == ")";
return getErrorText(findBestError(t).val) == ")";
}

test bool rascalIfBodyEmpty() {
Tree t = parseRascal("module A void f(){1;} void g(){if(1){}} void h(){1;}");

println("error: <getErrorText(findFirstError(t))>");
assert getErrorText(findBestError(t)) == "} void h(){1";
assert getErrorText(findBestError(t).val) == "} void h(){1";

return true;
}
Expand Down
Loading

0 comments on commit b0e4514

Please sign in to comment.