Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve error matching on missing EOF #114

Merged
merged 1 commit into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
# Changelog

## Unreleased
- Fix error matching issue when missing EOF

## 2024/10/25 v0.0.9
- Set CrateDB version to 5.8.3
- Fix error matching issue due to wrong error.query trimming in find_suitable_error

## 2024/10/16 v0.0.8
- Export `Statement` in both Python and Javascript target
- Fixed query parsing when expression includes special characters like `\n`, `\r`, or `\t`
- Fixed sqlparse crash on missing error context
- Set CrateDB version to 5.8.3

## 2024/09/18 v0.0.7
- Improve error matching on single statement
- Improve error matching on single statements

## v0.0.6 skipped

Expand Down
4 changes: 2 additions & 2 deletions cratedb_sqlparse_js/cratedb_sqlparse/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ class ExceptionCollectorListener extends ErrorListener {

} else {
const min_to_check = Math.max(1, offendingSymbol.tokenIndex - 2)
const tokens = recognizer.getTokenStream().tokens.slice(min_to_check, offendingSymbol.tokenIndex + 1)
const tokens = recognizer.getTokenStream().tokens.slice(min_to_check, offendingSymbol.tokenIndex)
query = tokens.map((el) => el.text).join("")
}

Expand Down Expand Up @@ -192,7 +192,7 @@ function findSuitableError(statement, errors) {
errorQuery = errorQuery.trimStart().trimEnd()

// If a good match error_query contains statement.query
if (errorQuery.includes(statement.query)) {
if (statement.query.includes(errorQuery)) {
statement.exception = error;
errors.splice(errors.indexOf(error), 1);
}
Expand Down
27 changes: 24 additions & 3 deletions cratedb_sqlparse_js/tests/exceptions.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ test('Special characters should not avoid exception catching', () => {
]
for (const stmt in stmts) {
let r = sqlparse(stmt)
expect(r[0].exception).toBeDefined();
expect(r[0].exception).not.toBeNull();
}
})

Expand Down Expand Up @@ -146,8 +146,29 @@ test('Special query with several errors should correctly be matched regardless o
]
for (const stmt of stmts) {
const r = sqlparse(stmt)
expect(r[0].exception).toBeDefined()
expect(r[0].exception).not.toBeNull()
expect(r[1].exception).toBeNull()
expect(r[2].exception).toBeDefined()
expect(r[2].exception).not.toBeNull()
}
})

test('Missing EOF should not block error catching', () => {
const stmts = [
`
select 1;
select 2
select 3;
`,
`
select 1;
select 1 I can put anything here
select 3
`
]

for (const stmt of stmts) {
const r = sqlparse(stmt)
expect(r[0].exception).toBeNull()
expect(r[1].exception).not.toBeNull()
}
})
10 changes: 5 additions & 5 deletions cratedb_sqlparse_py/cratedb_sqlparse/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,9 @@ def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
# The newly generated query will be either the offendingToken + one token to the left
# or offendingToken + two tokens to the left, if the second is possible it takes precedence.
min_token_to_check = max(1, offendingSymbol.tokenIndex - 2)
tokens = recognizer.getTokenStream().tokens[min_token_to_check : offendingSymbol.tokenIndex + 1]
tokens = recognizer.getTokenStream().tokens

tokens = tokens[min_token_to_check : offendingSymbol.tokenIndex]
query = "".join(token.text for token in tokens)

error = ParsingException(
Expand Down Expand Up @@ -184,16 +186,14 @@ def __repr__(self):

def find_suitable_error(statement, errors):
for error in errors[:]:
# We clean the error_query of ';' and spaces because ironically,
# we can get the full query in the error but not in the parsed statement.
error_query = error.query
if error_query.endswith(";"):
error_query = error_query[: len(error_query) - 1]

error_query = error_query.lstrip().rstrip()

# If a good match error_query contains statement.query
if statement.query in error_query:
# If a good match, error_query contains statement.query
if error_query in statement.query:
statement.exception = error
errors.pop(errors.index(error))

Expand Down
27 changes: 27 additions & 0 deletions cratedb_sqlparse_py/tests/test_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,30 @@ def test_sqlparse_match_exceptions_spaces():
assert r[0]
assert r[1]
assert r[2]


def test_sqlparse_match_exception_missing_eof():
"""
Statements that miss an eof should be detected as one, and catch the appropriate exception

See https://github.com/crate/cratedb-sqlparse/issues/113
"""
from cratedb_sqlparse import sqlparse

stmts = [
"""
select 1;
select 2
select 3;
""",
"""
select 1;
select 1 I can put anything here
select 3
""",
]
for stmt in stmts:
r = sqlparse(stmt)
assert len(r) == 2
assert not r[0].exception
assert r[1].exception
Loading