Skip to content

Commit 05da14d

Browse files
OnlyFlyerliuxy0551dilu
authored
Feat/spark g4 (#168)
* feat: spark g4 test * fix: fixed build lint --------- Co-authored-by: liuyi <[email protected]> Co-authored-by: dilu <[email protected]>
1 parent 0a9a7d1 commit 05da14d

18 files changed

+27912
-26927
lines changed

src/grammar/spark/SparkSqlLexer.g4

Lines changed: 7 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -26,52 +26,14 @@ lexer grammar SparkSqlLexer;
2626
*/
2727
public has_unclosed_bracketed_comment = false;
2828
29-
/**
30-
* Verify whether current token is a valid decimal token (which contains dot).
31-
* Returns true if the character that follows the token is not a digit or letter or underscore.
32-
*
33-
* For example:
34-
* For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
35-
* For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
36-
* For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
37-
* For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is followed
38-
* by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
39-
* which is not a digit or letter or underscore.
40-
*/
41-
public isValidDecimal() {
42-
const nextChar = _input.LA(1);
43-
if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' ||
44-
nextChar == '_') {
45-
return false;
46-
} else {
47-
return true;
48-
}
49-
}
50-
51-
/**
52-
* This method will be called when we see '/*' and try to match it as a bracketed comment.
53-
* If the next character is '+', it should be parsed as hint later, and we cannot match
54-
* it as a bracketed comment.
55-
*
56-
* Returns true if the next character is '+'.
57-
*/
58-
public isHint() {
59-
const nextChar = _input.LA(1);
60-
if (nextChar == '+') {
61-
return true;
62-
} else {
63-
return false;
64-
}
65-
}
66-
6729
/**
6830
* This method will be called when the character stream ends and try to find out the
6931
* unclosed bracketed comment.
7032
* If the method be called, it means the end of the entire character stream match,
7133
* and we set the flag and fail later.
7234
*/
7335
public markUnclosedComment() {
74-
has_unclosed_bracketed_comment = true;
36+
this.has_unclosed_bracketed_comment = true;
7537
}
7638
}
7739

@@ -488,26 +450,26 @@ INTEGER_VALUE
488450

489451
EXPONENT_VALUE
490452
: DIGIT+ EXPONENT
491-
| DECIMAL_DIGITS EXPONENT {isValidDecimal()}?
453+
| DECIMAL_DIGITS EXPONENT
492454
;
493455

494456
DECIMAL_VALUE
495-
: DECIMAL_DIGITS {isValidDecimal()}?
457+
: DECIMAL_DIGITS
496458
;
497459

498460
FLOAT_LITERAL
499461
: DIGIT+ EXPONENT? 'F'
500-
| DECIMAL_DIGITS EXPONENT? 'F' {isValidDecimal()}?
462+
| DECIMAL_DIGITS EXPONENT? 'F'
501463
;
502464

503465
DOUBLE_LITERAL
504466
: DIGIT+ EXPONENT? 'D'
505-
| DECIMAL_DIGITS EXPONENT? 'D' {isValidDecimal()}?
467+
| DECIMAL_DIGITS EXPONENT? 'D'
506468
;
507469

508470
BIGDECIMAL_LITERAL
509471
: DIGIT+ EXPONENT? 'BD'
510-
| DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}?
472+
| DECIMAL_DIGITS EXPONENT? 'BD'
511473
;
512474

513475
IDENTIFIER
@@ -540,7 +502,7 @@ SIMPLE_COMMENT
540502
;
541503

542504
BRACKETED_COMMENT
543-
: '/*' {!isHint()}? ( BRACKETED_COMMENT | . )*? ('*/' | {markUnclosedComment();} EOF) -> channel(HIDDEN)
505+
: '/*' ( BRACKETED_COMMENT | . )*? ('*/' | {this.markUnclosedComment();} EOF) -> channel(HIDDEN)
544506
;
545507

546508
WS

src/grammar/spark/SparkSqlParser.g4

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1166,11 +1166,15 @@ windowSpec
11661166
RIGHT_PAREN
11671167
;
11681168

1169+
/**
1170+
* replace start identifier with start_ in grammar.
1171+
* https://github.com/tunnelvisionlabs/antlr4ts/issues/417
1172+
*/
11691173
windowFrame
1170-
: frameType=KW_RANGE start=frameBound
1171-
| frameType=KW_ROWS start=frameBound
1172-
| frameType=KW_RANGE KW_BETWEEN start=frameBound KW_AND end=frameBound
1173-
| frameType=KW_ROWS KW_BETWEEN start=frameBound KW_AND end=frameBound
1174+
: frameType=KW_RANGE start_=frameBound
1175+
| frameType=KW_ROWS start_=frameBound
1176+
| frameType=KW_RANGE KW_BETWEEN start_=frameBound KW_AND end=frameBound
1177+
| frameType=KW_ROWS KW_BETWEEN start_=frameBound KW_AND end=frameBound
11741178
;
11751179

11761180
frameBound

src/index.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@ export * from './lib/hive/HiveSqlParserListener';
88
export * from './lib/hive/HiveSqlParserVisitor';
99
export * from './lib/plsql/PlSqlParserListener';
1010
export * from './lib/plsql/PlSqlParserVisitor';
11-
export * from './lib/spark/SparkSqlVisitor';
12-
export * from './lib/spark/SparkSqlListener';
11+
export * from './lib/spark/SparkSqlParserVisitor';
12+
export * from './lib/spark/SparkSqlParserListener';
1313
export * from './lib/pgsql/PostgreSQLParserListener';
1414
export * from './lib/pgsql/PostgreSQLParserVisitor';
1515
export * from './lib/trinosql/TrinoSqlListener';
1616
export * from './lib/trinosql/TrinoSqlVisitor';
17-
export { SyntaxContextType } from './parser/common/basic-parser-types'
17+
export { SyntaxContextType } from './parser/common/basic-parser-types';
1818

1919

2020
export type * from './parser/common/basic-parser-types';

0 commit comments

Comments
 (0)