Skip to content

Commit

Permalink
Merge pull request #2020 from usethesource/to-token-recoverer
Browse files Browse the repository at this point in the history
Implement basic error recovery
  • Loading branch information
PieterOlivier authored and toinehartman committed Oct 16, 2024
2 parents 6d3d58d + 2d31585 commit 2f5c58b
Show file tree
Hide file tree
Showing 79 changed files with 1,994 additions and 1,461 deletions.
16 changes: 0 additions & 16 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,6 @@
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "java",
"name": "Simple2",
"request": "launch",
"mainClass": "org.rascalmpl.test.parser.Simple2",
"projectName": "rascal"
},
{
"type": "java",
"name": "Launch DocRunner",
Expand Down Expand Up @@ -39,15 +32,6 @@
"projectName": "rascal",
"vmArgs": "-Xss80m -Xmx2g -ea"
},
{
"type": "java",
"name": "Recovery tests",
"request": "launch",
"mainClass": "org.rascalmpl.shell.RascalShell",
"projectName": "rascal",
"vmArgs": "-Xss80m -Xmx2g -ea",
"args": "lang::rascal::tests::recovery::RunRecoveryTests"
},
{
"type": "java",
"name": "Launch RascalShell Tutor",
Expand Down
2 changes: 2 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@
<goal>package</goal>
</goals>
</execution>
<!--
<execution>
<id>default-cli</id>
<phase>compile</phase>
Expand All @@ -177,6 +178,7 @@
</ignores>
</configuration>
</execution>
-->
</executions>
</plugin>
<plugin>
Expand Down
41 changes: 20 additions & 21 deletions src/org/rascalmpl/library/ParseTree.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,8 @@ module ParseTree
extend Type;
extend Message;
extend List;
extend Set;
import String;
import IO;
import Node;
@synopsis{The Tree data type as produced by the parser.}
Expand Down Expand Up @@ -195,8 +193,9 @@ data Production
| \reference(Symbol def, str cons) // <5>
;
data Production = error(Symbol def, Production prod, int dot)
| skipped(Symbol symbol);
data Production
= \error(Symbol def, Production prod, int dot)
| \skipped(Symbol symbol);
@synopsis{Attributes in productions.}
@description{
Expand Down Expand Up @@ -400,6 +399,7 @@ catch ParseError(loc l): {
}
```
}
&T<:Tree parse(type[&T<:Tree] begin, str input, bool allowAmbiguity=false, bool allowRecovery=false, bool hasSideEffects=false, set[Tree(Tree)] filters={})
= parser(begin, allowAmbiguity=allowAmbiguity, allowRecovery=allowRecovery, hasSideEffects=hasSideEffects, filters=filters)(input, |unknown:///|);
Expand All @@ -426,8 +426,8 @@ The parse function behaves differently depending of the given keyword parameters
* `allowAmbiguity`: if true then no exception is thrown in case of ambiguity and a parse forest is returned. if false,
the parser throws an exception during tree building and produces only the first ambiguous subtree in its message.
if set to `false`, the parse constructs trees in linear time. if set to `true` the parser constructs trees in polynomial time.
* 'allowRecovery`: ***experimental*** if true, the parser tries to recover on a parse error. if a parse error is encountered that can be recovered from, special `skipped` nodes
are included in the resulting parse tree. More documentation will be added here when this feature matures.
* 'allowRecovery`: ***experimental*** if true, the parser tries to recover when it encounters a parse error. if a parse error is encountered that can be recovered from,
special `error` and `skipped` nodes are included in the resulting parse tree. More documentation will be added here when this feature matures.
* `hasSideEffects`: if false then the parser is a lot faster when constructing trees, since it does not execute the parse _actions_ in an
interpreted environment to make side effects (like a symbol table) and it can share more intermediate results as a result.
}
Expand Down Expand Up @@ -774,17 +774,17 @@ bool isNonTerminalType(Symbol::\parameterized-lex(str _, list[Symbol] _)) = true
bool isNonTerminalType(Symbol::\start(Symbol s)) = isNonTerminalType(s);
default bool isNonTerminalType(Symbol s) = false;
@synopsis{Check if a parse tree contains any skipped nodes, the result of error recovery.}
@synopsis{Check if a parse tree contains any error nodes, the result of error recovery.}
bool hasErrors(Tree tree) = /appl(error(_, _, _), _) := tree;
@synopsis{Find all error productions in a parse tree.}
list[Tree] findAllErrors(Tree tree) = [err | /err:appl(error(_, _, _), _) := tree];
@synopsis{Find the first production containing an error.}
Tree findFirstError(Tree tree) {
if (/err:appl(error(_, _, _), _) := tree) return err;
fail;
}
Tree findFirstError(/err:appl(error(_, _, _), _)) = err;
@synopsis{Find the best error from a tree containing errors. This function will fail if `tree` does not contain an error.}
Tree findBestError(Tree tree) = findFirstError(defaultErrorDisambiguationFilter(tree));
@synopsis{Get the symbol (sort) of the failing production}
Symbol getErrorSymbol(appl(error(Symbol sym, _, _), _)) = sym;
Expand All @@ -796,20 +796,16 @@ Production getErrorProduction(appl(error(_, Production prod, _), _)) = prod;
int getErrorDot(appl(error(_, _, int dot), _)) = dot;
@synopsis{Get the skipped tree}
Tree getSkipped(appl(error(_, _, _), [*_, skip:appl(skipped(_), _)])) {
return skip;
}
Tree getSkipped(appl(error(_, _, _), [*_, skip:appl(skipped(_), _)])) = skip;
@synopsis{Get the text that failed to parse. This is only the text of the part that has been skipped to be able to continue parsing.
If you want the text of the whole error tree, you can just use string interpolation: "<error>".
}
str getErrorText(appl(error(_, _, _), [*_, appl(skipped(_), chars)])) {
return stringChars([c | ch <- chars, char(c) := ch]);
}
str getErrorText(appl(error(_, _, _), [*_, appl(skipped(_), chars)])) = stringChars([c | char(c) <- chars]);
@synopsis{Error recovery often produces ambiguous trees where errors can be recovered in multiple ways.
This filter removes error trees until no ambiguities caused by error recovery are left.
Note that regular ambiguous trees remain in the parse tree.
Note that regular ambiguous trees remain in the parse forest.
}
Tree defaultErrorDisambiguationFilter(t: appl(Production prod, args)) {
Tree result = appl(prod, [defaultErrorDisambiguationFilter(arg) | arg <- args]);
Expand All @@ -820,12 +816,14 @@ Tree defaultErrorDisambiguationFilter(amb(set[Tree] alternatives)) {
// Go depth-first
set[Tree] disambiguatedAlts = { defaultErrorDisambiguationFilter(alt) | Tree alt <- alternatives };
set[Tree] errorTrees = { alt | Tree alt <- disambiguatedAlts, appl(error(_,_,_), _) := alt };
set[Tree] nonErrorTrees = { alt | Tree alt <- disambiguatedAlts, appl(error(_,_,_), _) !:= alt };
set[Tree] errorTrees = { alt | Tree alt <- disambiguatedAlts, /appl(error(_,_,_), _) := alt };
set[Tree] nonErrorTrees = { alt | Tree alt <- disambiguatedAlts, /appl(error(_,_,_), _) !:= alt };
if (nonErrorTrees == {}) {
return getBestErrorTree(errorTrees);
} else if ({Tree single} := nonErrorTrees) {
}
if ({Tree single} := nonErrorTrees) {
// One ambiguity left, no ambiguity concerns here
return single;
}
Expand All @@ -843,6 +841,7 @@ private Tree getBestErrorTree(set[Tree] trees) {
list[Tree] errors = findAllErrors(tree);
int errorCount = size(errors);
int errorLength = 0;
for (err <- errors) {
errorLength += getSkipped(err).src.length;
}
Expand Down
4 changes: 4 additions & 0 deletions src/org/rascalmpl/library/Prelude.java
Original file line number Diff line number Diff line change
Expand Up @@ -2381,6 +2381,10 @@ public IFunction parser(IValue start, IBool allowAmbiguity, IBool allowRecovery
return rascalValues.parser(start, allowAmbiguity, allowRecovery, hasSideEffects, values.bool(false), filters);
}

public IFunction parser(IValue start, IBool allowAmbiguity, IBool hasSideEffects, ISet filters) {
return rascalValues.parser(start, allowAmbiguity, values.bool(false), hasSideEffects, values.bool(false), filters);
}

public IFunction firstAmbiguityFinder(IValue start, IBool allowRecovery, IBool hasSideEffects, ISet filters) {
return rascalValues.parser(start, values.bool(true), allowRecovery, hasSideEffects, values.bool(true), filters);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/**
* Copyright (c) 2024, NWO-I Centrum Wiskunde & Informatica (CWI)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**/

module lang::rascal::tests::concrete::recovery::BasicRecoveryTests

import ParseTree;

layout Layout = [\ ]* !>> [\ ];

syntax S = T;

syntax T = ABC End;
syntax ABC = 'a' 'b' 'c';
syntax End = "$";

private Tree parseS(str input, bool visualize=false)
= parser(#S, allowRecovery=true, allowAmbiguity=true)(input, |unknown:///?visualize=<"<visualize>">|);

test bool basicOk() {
return !hasErrors(parseS("a b c $"));
}

test bool abx() {
Tree t = parseS("a b x $");
return getErrorText(findFirstError(defaultErrorDisambiguationFilter(t))) == "x ";
}

test bool axc() {
Tree t = parseS("a x c $");
return getErrorText(findFirstError(t)) == "x c";
}

test bool ax() {
Tree t = parseS("a x $");
return getErrorText(findFirstError(t)) == "x ";
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/**
* Copyright (c) 2024, NWO-I Centrum Wiskunde & Informatica (CWI)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**/

module lang::rascal::tests::concrete::recovery::ListRecoveryTests

import ParseTree;

layout Layout = [\ ]* !>> [\ ];

syntax S = T End;

syntax T = { AB "," }*;
syntax AB = "a" "b";
syntax End = "$";

Tree parseList(str s, bool visualize=false) {
return parser(#S, allowRecovery=true, allowAmbiguity=true)(s, |unknown:///?visualize=<"<visualize>">|);
}

test bool listOk() {
return !hasErrors(parseList("a b , a b , a b $", visualize=true));
}

test bool listTypo() {
Tree t = parseList("a b, a x, ab $", visualize=true);
return hasErrors(t);
}

test bool listTypoWs() {
Tree t = parseList("a b , a x , a b $", visualize=true);
return hasErrors(t);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/**
* Copyright (c) 2024, NWO-I Centrum Wiskunde & Informatica (CWI)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**/

module lang::rascal::tests::concrete::recovery::NestedRecoveryTests

import ParseTree;

layout Layout = [\ ]* !>> [\ ];

syntax S = T;

syntax T = A B C;

syntax A = "a";
syntax B = "b" "b";
syntax C = "c";

private Tree parseS(str input, bool visualize=false)
= parser(#S, allowRecovery=true, allowAmbiguity=true)(input, |unknown:///?visualize=<"<visualize>">|);

test bool nestedOk() {
return !hasErrors(parseS("a b b c"));
}

test bool nestedTypo() {
Tree t = parseS("a b x c");
return getErrorText(findFirstError(defaultErrorDisambiguationFilter(t))) == "x ";
}
Loading

0 comments on commit 2f5c58b

Please sign in to comment.