From 378d38b80cb64a2600648b219d1ab9d2e0037aa7 Mon Sep 17 00:00:00 2001 From: surister Date: Wed, 30 Oct 2024 17:08:08 +0100 Subject: [PATCH] Improve error matching on missing EOF --- CHANGES.md | 10 +++++-- .../cratedb_sqlparse/parser.js | 4 +-- cratedb_sqlparse_js/tests/exceptions.test.js | 27 ++++++++++++++++--- .../cratedb_sqlparse/parser.py | 10 +++---- cratedb_sqlparse_py/tests/test_exceptions.py | 27 +++++++++++++++++++ 5 files changed, 66 insertions(+), 12 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index d9d7b67..a4a1531 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,13 +1,19 @@ # Changelog ## Unreleased +- Fix error matching issue when missing EOF + +## 2024/10/25 v0.0.9 +- Set CrateDB version to 5.8.3 +- Fix error matching issue due to wrong error.query trimming in find_suitable_error + +## 2024/10/16 v0.0.8 - Export `Statement` in both Python and Javascript target - Fixed query parsing when expression includes special characters like `\n`, `\r`, or `\t` - Fixed sqlparse crash on missing error context -- Set CrateDB version to 5.8.3 ## 2024/09/18 v0.0.7 -- Improve error matching on single statement +- Improve error matching on single statements ## v0.0.6 skipped diff --git a/cratedb_sqlparse_js/cratedb_sqlparse/parser.js b/cratedb_sqlparse_js/cratedb_sqlparse/parser.js index 3f4bcbc..9ea50d0 100644 --- a/cratedb_sqlparse_js/cratedb_sqlparse/parser.js +++ b/cratedb_sqlparse_js/cratedb_sqlparse/parser.js @@ -135,7 +135,7 @@ class ExceptionCollectorListener extends ErrorListener { } else { const min_to_check = Math.max(1, offendingSymbol.tokenIndex - 2) - const tokens = recognizer.getTokenStream().tokens.slice(min_to_check, offendingSymbol.tokenIndex + 1) + const tokens = recognizer.getTokenStream().tokens.slice(min_to_check, offendingSymbol.tokenIndex) query = tokens.map((el) => el.text).join("") } @@ -192,7 +192,7 @@ function findSuitableError(statement, errors) { errorQuery = errorQuery.trimStart().trimEnd() // If a good match error_query contains statement.query - if (errorQuery.includes(statement.query)) { + if (statement.query.includes(errorQuery)) { statement.exception = error; errors.splice(errors.indexOf(error), 1); } diff --git a/cratedb_sqlparse_js/tests/exceptions.test.js b/cratedb_sqlparse_js/tests/exceptions.test.js index d332dc6..39ffbbb 100644 --- a/cratedb_sqlparse_js/tests/exceptions.test.js +++ b/cratedb_sqlparse_js/tests/exceptions.test.js @@ -114,7 +114,7 @@ test('Special characters should not avoid exception catching', () => { ] for (const stmt in stmts) { let r = sqlparse(stmt) - expect(r[0].exception).toBeDefined(); + expect(r[0].exception).not.toBeNull(); } }) @@ -146,8 +146,29 @@ test('Special query with several errors should correctly be matched regardless o ] for (const stmt of stmts) { const r = sqlparse(stmt) - expect(r[0].exception).toBeDefined() + expect(r[0].exception).not.toBeNull() expect(r[1].exception).toBeNull() - expect(r[2].exception).toBeDefined() + expect(r[2].exception).not.toBeNull() + } +}) + +test('Missing EOF should not block error catching', () => { + const stmts = [ + ` + select 1; + select 2 + select 3; + `, + ` + select 1; + select 1 I can put anything here + select 3 + ` + ] + + for (const stmt of stmts) { + const r = sqlparse(stmt) + expect(r[0].exception).toBeNull() + expect(r[1].exception).not.toBeNull() } }) \ No newline at end of file diff --git a/cratedb_sqlparse_py/cratedb_sqlparse/parser.py b/cratedb_sqlparse_py/cratedb_sqlparse/parser.py index 83dc77e..452abd3 100644 --- a/cratedb_sqlparse_py/cratedb_sqlparse/parser.py +++ b/cratedb_sqlparse_py/cratedb_sqlparse/parser.py @@ -128,7 +128,9 @@ def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e): # The newly generated query will be either the offendingToken + one token to the left # or offendingToken + two tokens to the left, if the second is possible it takes precedence. min_token_to_check = max(1, offendingSymbol.tokenIndex - 2) - tokens = recognizer.getTokenStream().tokens[min_token_to_check : offendingSymbol.tokenIndex + 1] + tokens = recognizer.getTokenStream().tokens + + tokens = tokens[min_token_to_check : offendingSymbol.tokenIndex] query = "".join(token.text for token in tokens) error = ParsingException( @@ -184,16 +186,14 @@ def __repr__(self): def find_suitable_error(statement, errors): for error in errors[:]: - # We clean the error_query of ';' and spaces because ironically, - # we can get the full query in the error but not in the parsed statement. error_query = error.query if error_query.endswith(";"): error_query = error_query[: len(error_query) - 1] error_query = error_query.lstrip().rstrip() - # If a good match error_query contains statement.query - if statement.query in error_query: + # If a good match, error_query contains statement.query + if error_query in statement.query: statement.exception = error errors.pop(errors.index(error)) diff --git a/cratedb_sqlparse_py/tests/test_exceptions.py b/cratedb_sqlparse_py/tests/test_exceptions.py index d5a96b7..c327f05 100644 --- a/cratedb_sqlparse_py/tests/test_exceptions.py +++ b/cratedb_sqlparse_py/tests/test_exceptions.py @@ -156,3 +156,30 @@ def test_sqlparse_match_exceptions_spaces(): assert r[0] assert r[1] assert r[2] + + +def test_sqlparse_match_exception_missing_eof(): + """ + Statements that miss an eof should be detected as one, and catch the appropriate exception + + See https://github.com/crate/cratedb-sqlparse/issues/113 + """ + from cratedb_sqlparse import sqlparse + + stmts = [ + """ + select 1; + select 2 + select 3; + """, + """ + select 1; + select 1 I can put anything here + select 3 + """, + ] + for stmt in stmts: + r = sqlparse(stmt) + assert len(r) == 2 + assert not r[0].exception + assert r[1].exception