Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reimplemented error tree disambiguation in Java #2035

Merged
merged 15 commits into from
Oct 8, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 178 additions & 0 deletions rascal-recovery-stats.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
source,size,result,duration,disambiguationDuration,errorSize
PieterOlivier marked this conversation as resolved.
Show resolved Hide resolved
|std:///analysis/grammars/Dependency.rsc?deletedChar=0|,1070,recovery,0,2,341
|std:///analysis/grammars/Dependency.rsc?deletedChar=1|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=2|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=3|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=4|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=5|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=6|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=7|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=8|,1070,recovery,0,2,272
|std:///analysis/grammars/Dependency.rsc?deletedChar=9|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=10|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=11|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=12|,1070,success,7,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=13|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=14|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=15|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=16|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=17|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=18|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=19|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=20|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=21|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=22|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=23|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=24|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=25|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=26|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=27|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=28|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=29|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=30|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=31|,1070,success,7,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=32|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=33|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=34|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=35|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=36|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=37|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=38|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=39|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=40|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=41|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=42|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=43|,1070,success,10,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=44|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=45|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=46|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=47|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=48|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=49|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=50|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=51|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=52|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=53|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=54|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=55|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=56|,1070,success,7,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=57|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=58|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=59|,1070,success,7,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=60|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=61|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=62|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=63|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=64|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=65|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=66|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=67|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=68|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=69|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=70|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=71|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=72|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=73|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=74|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=75|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=76|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=77|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=78|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=79|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=80|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=81|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=82|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=83|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=84|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=85|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=86|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=87|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=88|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=89|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=90|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=91|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=92|,1070,success,3,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=93|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=94|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=95|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=96|,1070,success,11,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=97|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=98|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=99|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=100|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=101|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=102|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=103|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=104|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=105|,1070,success,7,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=106|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=107|,1070,success,7,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=108|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=109|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=110|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=111|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=112|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=113|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=114|,1070,success,3,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=115|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=116|,1070,success,3,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=117|,1070,success,3,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=118|,1070,success,3,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=119|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=120|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=121|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=122|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=123|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=124|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=125|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=126|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=127|,1070,success,3,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=128|,1070,success,3,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=129|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=130|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=131|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=132|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=133|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=134|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=135|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=136|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=137|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=138|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=139|,1070,success,7,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=140|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=141|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=142|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=143|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=144|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=145|,1070,success,7,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=146|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=147|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=148|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=149|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=150|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=151|,1070,success,10,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=152|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=153|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=154|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=155|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=156|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=157|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=158|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=159|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=160|,1070,success,3,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=161|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=162|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=163|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=164|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=165|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=166|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=167|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=168|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=169|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=170|,1070,success,6,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=171|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=172|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=173|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=174|,1070,success,5,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=175|,1070,success,4,-1,0
|std:///analysis/grammars/Dependency.rsc?deletedChar=176|,1070,success,5,-1,0
54 changes: 23 additions & 31 deletions src/org/rascalmpl/library/ParseTree.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ extend Message;
extend List;

import String;
import Set;
import util::Maybe;

@synopsis{The Tree data type as produced by the parser.}
@description{
Expand Down Expand Up @@ -355,6 +355,16 @@ The latter option terminates much faster, i.e. always in cubic time, and always
while constructing ambiguous parse forests may grow to O(n^p+1), where p is the length of the longest production rule and n
is the length of the input.

The `allowRecovery` can be set to `true` to enable error recovery. This is an experimental feature.
When error recovery is enabled, the parser will attempt to recover from parse errors and continue parsing.
If successful, a parse tree with error and skipped productions is returned (see the definition of `Production` above).
A number of functions is provided to analyze trees with errors, for example `hasErrors`, `getSkipped`, and `getErrorText`.
Note that the resulting parse forest can contain a lot of error nodes. `disambiguateErrors` can be used to prune the forest
and leave a tree with a single (or even zero) errors based on simple heuristics.
When `allowAmbiguity` is set to false, `allowRecovery` is set to true, and `filters` is empty, this disambiguation is done
automatically so you should end up with a tree with no error ambiguities. Regular ambiguities can still occur
and will result in an error.

The `filters` set contains functions which may be called optionally after the parse algorithm has finished and just before
the Tree representation is built. The set of functions contain alternative functions, only on of them is successfully applied
to each node in a tree. If such a function fails to apply, the other ones are tried. There is no fixed-point computation, so
Expand Down Expand Up @@ -784,7 +794,15 @@ list[Tree] findAllErrors(Tree tree) = [err | /err:appl(error(_, _, _), _) := tr
Tree findFirstError(/err:appl(error(_, _, _), _)) = err;

@synopsis{Find the best error from a tree containing errors. This function will fail if `tree` does not contain an error.}
Tree findBestError(Tree tree) = findFirstError(defaultErrorDisambiguationFilter(tree));
Maybe[Tree] findBestError(Tree tree) {
Tree disambiguated = disambiguateErrors(tree);
if (/err:appl(error(_, _, _), _) := disambiguated) {
return just(err);
}

// All errors have disappeared
return nothing();
}

@synopsis{Get the symbol (sort) of the failing production}
Symbol getErrorSymbol(appl(error(Symbol sym, _, _), _)) = sym;
Expand All @@ -803,35 +821,9 @@ If you want the text of the whole error tree, you can just use string interpolat
}
str getErrorText(appl(error(_, _, _), [*_, appl(skipped(_), chars)])) = stringChars([c | char(c) <- chars]);

@javaClass{org.rascalmpl.parser.gtd.recovery.ParseErrorDisambiguator}
@synopsis{Error recovery often produces ambiguous trees where errors can be recovered in multiple ways.
This filter removes error trees until no ambiguities caused by error recovery are left.
Note that regular ambiguous trees remain in the parse forest.
Note that regular ambiguous trees remain in the parse forest unless `allowAmbiguity` is set to false in which case an error is thrown.
}
Tree defaultErrorDisambiguationFilter(Tree t) {
return visit(t) {
case a:amb(_) => ambDisambiguation(a)
};
}

private Tree ambDisambiguation(amb(set[Tree] alternatives)) {
// Go depth-first
rel[int score, Tree alt] scoredErrorTrees = { <scoreErrors(alt), alt> | Tree alt <- alternatives };
set[Tree] nonErrorTrees = scoredErrorTrees[0];

if (nonErrorTrees == {}) {
return (getFirstFrom(scoredErrorTrees) | it.score > c.score ? c : it | c <- scoredErrorTrees).alt;
}

if ({Tree single} := nonErrorTrees) {
// One ambiguity left, no ambiguity concerns here
return single;
}

// Multiple non-error trees left, return an ambiguity node with just the non-error trees
return amb(nonErrorTrees);
}

private int scoreErrors(Tree t) = (0 | it + getSkipped(e).src.length | /e:appl(error(_,_,_),_) := t);

// Handle char and cycle nodes
default Tree defaultErrorDisambiguationFilter(Tree t) = t;
java Tree disambiguateErrors(Tree t, bool allowAmbiguity=true);
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
module lang::rascal::tests::concrete::recovery::BasicRecoveryTests

import ParseTree;
import util::Maybe;

layout Layout = [\ ]* !>> [\ ];

Expand All @@ -33,15 +34,23 @@ test bool basicOk() {

test bool abx() {
Tree t = parseS("a b x $");
return getErrorText(findBestError(t)) == "x ";
return getErrorText(findBestError(t).val) == "x ";
}

test bool axc() {
Tree t = parseS("a x c $");
return getErrorText(findBestError(t)) == "x c";
return getErrorText(findBestError(t).val) == "x c";
}

test bool ax() {
Tree t = parseS("a x $");
return getErrorText(findBestError(t)) == "x ";
str input = "a x $";

Tree t = parseS(input);
assert size(findAllErrors(t)) == 3;
assert getErrorText(findBestError(t).val) == "x ";

Tree autoDisambiguated = parser(#S, allowRecovery=true, allowAmbiguity=false)(input, |unknown:///|);
assert size(findAllErrors(autoDisambiguated)) == 1;

return getErrorText(findFirstError(autoDisambiguated)) == getErrorText(findBestError(t).val);
}
Loading
Loading