- * "Tabs are replaced (from left to right) by one to eight spaces such that the total number of characters up to and - * including the replacement is a multiple of eight [...]" - *
- * -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation - **/ + // Calculates the indentation of the provided spaces, taking the + // following rules into account: + // + // "Tabs are replaced (from left to right) by one to eight spaces + // such that the total number of characters up to and including + // the replacement is a multiple of eight [...]" + // + // -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation static int getIndentationCount(String spaces) { int count = 0; for (char ch : spaces.toCharArray()) { - if (ch == '\t') { - count += 8 - (count % 8); - } else { - count++; + switch (ch) { + case '\t': + count += 8 - (count % 8); + break; + default: + // A normal space char. + count++; } } @@ -104,21 +111,26 @@ void onNewLine() { String newLine = getText().replaceAll("[^\r\n\f]+", ""); String spaces = getText().replaceAll("[\r\n\f]+", ""); + // Strip newlines inside open clauses except if we are near EOF. We keep NEWLINEs near EOF to + // satisfy the final newline needed by the single_put rule used by the REPL. int next = _input.LA(1); int nextnext = _input.LA(2); if (opened > 0 || (nextnext != -1 && (next == '\r' || next == '\n' || next == '\f' || next == '#'))) { + // If we're inside a list or on a blank line, ignore all indents, + // dedents and line breaks. skip(); } else { emit(commonToken(Python3Lexer.NEWLINE, newLine)); int indent = getIndentationCount(spaces); int previous = indents.isEmpty() ? 0 : indents.peek(); - if (indent == previous) { + // skip indents of the same size as the present indent-size skip(); } else if (indent > previous) { indents.push(indent); emit(commonToken(Python3Lexer.INDENT, spaces)); } else { + // Possibly emit more than 1 DEDENT token. while (!indents.isEmpty() && indents.peek() > indent) { this.emit(createDedent()); indents.pop(); @@ -129,10 +141,10 @@ void onNewLine() { @Override public void reset() { - tokens = new LinkedList<>(); - indents = new LinkedList<>(); + tokens = new java.util.LinkedList<>(); + indents = new ArrayDeque<>(); opened = 0; lastToken = null; super.reset(); } -} +} \ No newline at end of file diff --git a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java index 44b5926a4..713af92c1 100644 --- a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java +++ b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java @@ -1,18 +1,17 @@ package de.jplag.python3.grammar; -import org.antlr.v4.runtime.Parser; -import org.antlr.v4.runtime.TokenStream; +import org.antlr.v4.runtime.*; public abstract class Python3ParserBase extends Parser { protected Python3ParserBase(TokenStream input) { super(input); } - public boolean cannotBePlusMinus() { + public boolean CannotBePlusMinus() { return true; } - public boolean cannotBeDotLpEq() { + public boolean CannotBeDotLpEq() { return true; } -} +} \ No newline at end of file diff --git a/languages/python-3/src/test/resources/de/jplag/python3/test_utils.py b/languages/python-3/src/test/resources/de/jplag/python3/test_utils.py index 34d92252c..bfd3e8bf4 100644 --- a/languages/python-3/src/test/resources/de/jplag/python3/test_utils.py +++ b/languages/python-3/src/test/resources/de/jplag/python3/test_utils.py @@ -500,4 +500,9 @@ def force_legacy_ssl_support(): def switchWithBreak(): while True: - break \ No newline at end of file + break + +async def x(): + return "" + +x = await x() \ No newline at end of file