Skip to content

Commit 8def5ba

Browse files
committed
feat: support semantic context of isNewStatement
1 parent c0a2854 commit 8def5ba

32 files changed

+1605
-7
lines changed

src/parser/common/basicSQL.ts

+29
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import { ErrorStrategy } from './errorStrategy';
2020
import type { SplitListener } from './splitListener';
2121
import type { EntityCollector } from './entityCollector';
2222
import { EntityContext } from './entityCollector';
23+
import SemanticContextCollector from './semanticContextCollector';
2324

2425
/**
2526
* Basic SQL class, every sql needs extends it.
@@ -94,6 +95,15 @@ export abstract class BasicSQL<
9495

9596
public locale: LOCALE_TYPE = 'en_US';
9697

98+
/**
99+
* Get a new semanticContextCollector instance.
100+
*/
101+
protected abstract createSemanticContextCollector(
102+
input: string,
103+
caretPosition: CaretPosition,
104+
allTokens: Token[]
105+
): SemanticContextCollector;
106+
97107
/**
98108
* Create an antlr4 lexer from input.
99109
* @param input string
@@ -407,4 +417,23 @@ export abstract class BasicSQL<
407417

408418
return collectListener.getEntities();
409419
}
420+
421+
/**
422+
* Get semantic context infos
423+
* @param input source string
424+
* @param caretPosition caret position, such as cursor position
425+
* @returns analyzed semantic context
426+
*/
427+
public getSemanticContextAtCaretPosition(input: string, caretPosition: CaretPosition) {
428+
const allTokens = this.getAllTokens(input);
429+
const parseTree = this.parseWithCache(input);
430+
const statementContextListener = this.createSemanticContextCollector(
431+
input,
432+
caretPosition,
433+
allTokens
434+
);
435+
this.listen(statementContextListener, parseTree);
436+
437+
return statementContextListener.semanticContext;
438+
}
410439
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
import { ErrorNode, ParserRuleContext, TerminalNode, Token } from 'antlr4ng';
2+
import { findCaretTokenIndex } from '../common/findCaretTokenIndex';
3+
import { CaretPosition, SemanticContext } from '../common/types';
4+
5+
export const SQL_SPLIT_SYMBOL_TEXT = ';';
6+
7+
abstract class SemanticContextCollector {
8+
constructor(_input: string, caretPosition: CaretPosition, allTokens: Token[]) {
9+
// If caretPosition is whiteSpace, tokenIndex may be undefined.
10+
const tokenIndex = findCaretTokenIndex(caretPosition, allTokens);
11+
12+
if (tokenIndex !== undefined) {
13+
this._tokenIndex = tokenIndex;
14+
}
15+
this._allTokens = allTokens;
16+
17+
if (allTokens?.length) {
18+
let i = tokenIndex ? tokenIndex - 1 : allTokens.length - 1;
19+
/**
20+
* Find the previous no-white-space token.
21+
* If can't find tokenIndex or current token is whiteSpace at caretPosition,
22+
* prevTokenIndex is useful to help us determine if it is new statement.
23+
*/
24+
while (i >= 0) {
25+
const isWhiteSpaceToken =
26+
allTokens[i].type === this.getWhiteSpaceRuleType() ||
27+
allTokens[i].text === '\n';
28+
if (
29+
!isWhiteSpaceToken &&
30+
(allTokens[i].line < caretPosition?.lineNumber ||
31+
(allTokens[i].line === caretPosition.lineNumber &&
32+
allTokens[i].column < caretPosition.column))
33+
) {
34+
this._prevTokenIndex = allTokens[i].tokenIndex;
35+
break;
36+
}
37+
i--;
38+
}
39+
if (tokenIndex === 0 || i === -1) {
40+
this._isNewStatement = true;
41+
}
42+
}
43+
}
44+
45+
private _tokenIndex: number;
46+
private _allTokens: Token[] = [];
47+
private _isNewStatement: boolean = false;
48+
49+
/**
50+
* Prev tokenIndex that not white space before current tokenIndex or cart position
51+
*/
52+
private _prevTokenIndex: number;
53+
54+
public get semanticContext(): SemanticContext {
55+
return {
56+
isNewStatement: this._isNewStatement,
57+
};
58+
}
59+
60+
abstract getWhiteSpaceRuleType(): number;
61+
62+
abstract getStatementRuleType(): number;
63+
64+
private getPrevStatementRule(node: TerminalNode | ErrorNode | ParserRuleContext) {
65+
let parent = node.parent as ParserRuleContext;
66+
if (!parent) return null;
67+
const currentNodeIndex = parent.children!.findIndex((child) => child === node);
68+
if (currentNodeIndex <= 0) return null;
69+
return parent.children![currentNodeIndex - 1];
70+
}
71+
72+
/**
73+
* Most root rules is program.
74+
*/
75+
private getIsRootRuleNode(node: TerminalNode | ErrorNode | ParserRuleContext) {
76+
return node instanceof ParserRuleContext && node?.parent === null;
77+
}
78+
79+
/**
80+
* Caret position is white space, so it will not visited as terminal node or error node.
81+
* We can find the previous no-white-space token,
82+
* and if previous token is the last leaf node of the statement,
83+
* it can be considered as being in the context of new statement
84+
*/
85+
protected statementVisitor(ctx: ParserRuleContext) {
86+
const isWhiteSpaceToken =
87+
this._tokenIndex === undefined ||
88+
this._allTokens[this._tokenIndex]?.type === this.getWhiteSpaceRuleType() ||
89+
// PostgreSQL whiteSpace not inlcudes '\n' symbol
90+
this._allTokens[this._tokenIndex]?.text === '\n';
91+
92+
const isPrevTokenSplitSymbol =
93+
this._prevTokenIndex &&
94+
this._allTokens[this._prevTokenIndex].text === SQL_SPLIT_SYMBOL_TEXT;
95+
96+
const isPrevTokenEndOfStatement =
97+
this._prevTokenIndex !== undefined &&
98+
ctx.stop?.tokenIndex === this._prevTokenIndex &&
99+
ctx.exception === null;
100+
101+
if (isWhiteSpaceToken && (isPrevTokenSplitSymbol || isPrevTokenEndOfStatement)) {
102+
this._isNewStatement = true;
103+
}
104+
}
105+
106+
/**
107+
* Uncomplete keyword will be error node
108+
*/
109+
visitErrorNode(node: ErrorNode): void {
110+
if (node.symbol.tokenIndex !== this._tokenIndex) return;
111+
if (
112+
this._prevTokenIndex &&
113+
this._allTokens[this._prevTokenIndex].text === SQL_SPLIT_SYMBOL_TEXT
114+
) {
115+
this._isNewStatement = true;
116+
return;
117+
}
118+
119+
let parent: ParserRuleContext | null = node.parent as ParserRuleContext;
120+
let currentNode: TerminalNode | ParserRuleContext = node;
121+
/**
122+
* The error node is a direct child node of the program node
123+
*/
124+
if (parent.ruleIndex === this.getStatementRuleType() || this.getIsRootRuleNode(parent)) {
125+
const prevStatementRule = this.getPrevStatementRule(currentNode);
126+
if (
127+
prevStatementRule instanceof TerminalNode ||
128+
(prevStatementRule &&
129+
((prevStatementRule as ParserRuleContext).exception !== null ||
130+
(prevStatementRule as ParserRuleContext).ruleIndex !==
131+
this.getStatementRuleType()))
132+
) {
133+
this._isNewStatement = false;
134+
} else {
135+
this._isNewStatement = true;
136+
}
137+
return;
138+
}
139+
140+
/**
141+
* Error node must be the first leaf node of the statement parse tree.
142+
**/
143+
while (parent !== null && parent.ruleIndex !== this.getStatementRuleType()) {
144+
if (parent.children?.[0] !== currentNode) {
145+
this._isNewStatement = false;
146+
return;
147+
}
148+
149+
currentNode = parent;
150+
parent = currentNode.parent;
151+
}
152+
153+
let isNewStatement = true;
154+
155+
/**
156+
* Previous statement must have no exception
157+
*/
158+
if (parent?.ruleIndex === this.getStatementRuleType()) {
159+
const programRule = parent.parent;
160+
const currentStatementRuleIndex =
161+
programRule?.children?.findIndex((node) => node === parent) || -1;
162+
if (currentStatementRuleIndex > 0) {
163+
const prevStatementRule = programRule!.children![
164+
currentStatementRuleIndex - 1
165+
] as ParserRuleContext;
166+
/**
167+
* When you typed a keyword and doesn't match any rule, you will get a EOF error,
168+
* For example, just typed 'CREATE', 'INSERT'.
169+
*/
170+
const isStatementEOF = parent.exception?.offendingToken?.text === '<EOF>';
171+
isNewStatement =
172+
prevStatementRule.exception !== null && !isStatementEOF
173+
? false
174+
: isNewStatement;
175+
}
176+
}
177+
178+
this._isNewStatement = isNewStatement;
179+
}
180+
181+
visitTerminal(node: TerminalNode): void {
182+
if (node.symbol.tokenIndex !== this._tokenIndex) return;
183+
if (
184+
this._prevTokenIndex &&
185+
this._allTokens[this._prevTokenIndex].text === SQL_SPLIT_SYMBOL_TEXT
186+
) {
187+
this._isNewStatement = true;
188+
return;
189+
}
190+
191+
let currentNode: TerminalNode | ParserRuleContext = node;
192+
let parent = node.parent as ParserRuleContext | null;
193+
194+
/**
195+
* Current terminal node must be the first leaf node of the statement parse tree.
196+
**/
197+
while (parent !== null && parent.ruleIndex !== this.getStatementRuleType()) {
198+
if (parent.children?.[0] !== currentNode) {
199+
this._isNewStatement = false;
200+
return;
201+
}
202+
203+
currentNode = parent;
204+
parent = currentNode.parent!;
205+
}
206+
207+
let isNewStatement = true;
208+
209+
if (parent?.ruleIndex === this.getStatementRuleType()) {
210+
const programRule = parent.parent;
211+
const currentStatementRuleIndex =
212+
programRule?.children?.findIndex((node) => node === parent) || -1;
213+
if (currentStatementRuleIndex > 0) {
214+
const prevStatementRule = programRule!.children![
215+
currentStatementRuleIndex - 1
216+
] as ParserRuleContext;
217+
isNewStatement = prevStatementRule.exception !== null ? false : isNewStatement;
218+
}
219+
}
220+
221+
this._isNewStatement = isNewStatement;
222+
}
223+
224+
enterEveryRule(_node: ParserRuleContext): void {}
225+
exitEveryRule(_node: ParserRuleContext): void {}
226+
}
227+
228+
export default SemanticContextCollector;

src/parser/common/types.ts

+4
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,7 @@ export interface Suggestions<T = WordRange> {
6969
}
7070

7171
export type LOCALE_TYPE = 'zh_CN' | 'en_US';
72+
73+
export interface SemanticContext {
74+
isNewStatement: boolean;
75+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import { FlinkSqlParserListener } from '../../lib';
2+
import { FlinkSqlParser, SingleStatementContext } from '../../lib/flink/FlinkSqlParser';
3+
import SemanticContextCollector from '../common/semanticContextCollector';
4+
5+
class FlinkSemanticContextCollector
6+
extends SemanticContextCollector
7+
implements FlinkSqlParserListener
8+
{
9+
override getWhiteSpaceRuleType(): number {
10+
return FlinkSqlParser.SPACE;
11+
}
12+
override getStatementRuleType(): number {
13+
return FlinkSqlParser.RULE_singleStatement;
14+
}
15+
enterSingleStatement(ctx: SingleStatementContext) {
16+
this.statementVisitor(ctx);
17+
}
18+
}
19+
20+
export { FlinkSemanticContextCollector };

src/parser/flink/index.ts

+10-1
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,14 @@ import { CharStream, CommonTokenStream, Token } from 'antlr4ng';
22
import { CandidatesCollection } from 'antlr4-c3';
33
import { FlinkSqlLexer } from '../../lib/flink/FlinkSqlLexer';
44
import { FlinkSqlParser, ProgramContext } from '../../lib/flink/FlinkSqlParser';
5-
import { EntityContextType, Suggestions, SyntaxSuggestion } from '../common/types';
5+
import { CaretPosition, EntityContextType, Suggestions, SyntaxSuggestion } from '../common/types';
66
import { BasicSQL } from '../common/basicSQL';
77
import { StmtContextType } from '../common/entityCollector';
88
import { FlinkSqlSplitListener } from './flinkSplitListener';
99
import { FlinkEntityCollector } from './flinkEntityCollector';
1010
import { ErrorListener } from '../common/parseErrorListener';
1111
import { FlinkErrorListener } from './flinkErrorListener';
12+
import { FlinkSemanticContextCollector } from './flinkSemanticContextCollector';
1213

1314
export { FlinkSqlSplitListener, FlinkEntityCollector };
1415

@@ -47,6 +48,14 @@ export class FlinkSQL extends BasicSQL<FlinkSqlLexer, ProgramContext, FlinkSqlPa
4748
return new FlinkEntityCollector(input, caretTokenIndex);
4849
}
4950

51+
protected createSemanticContextCollector(
52+
input: string,
53+
caretPosition: CaretPosition,
54+
allTokens: Token[]
55+
) {
56+
return new FlinkSemanticContextCollector(input, caretPosition, allTokens);
57+
}
58+
5059
protected processCandidates(
5160
candidates: CandidatesCollection,
5261
allTokens: Token[],
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import { HiveSqlParserListener } from '../../lib';
2+
import { HiveSqlParser, StatementContext } from '../../lib/hive/HiveSqlParser';
3+
import SemanticContextCollector from '../common/semanticContextCollector';
4+
5+
class HiveSemanticContextCollector
6+
extends SemanticContextCollector
7+
implements HiveSqlParserListener
8+
{
9+
override getWhiteSpaceRuleType(): number {
10+
return HiveSqlParser.WHITE_SPACE;
11+
}
12+
override getStatementRuleType(): number {
13+
return HiveSqlParser.RULE_statement;
14+
}
15+
enterStatement(ctx: StatementContext) {
16+
this.statementVisitor(ctx);
17+
}
18+
}
19+
20+
export { HiveSemanticContextCollector };

src/parser/hive/index.ts

+10-1
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,13 @@ import { HiveSqlLexer } from '../../lib/hive/HiveSqlLexer';
44
import { HiveSqlParser, ProgramContext } from '../../lib/hive/HiveSqlParser';
55
import { BasicSQL } from '../common/basicSQL';
66

7-
import { EntityContextType, Suggestions, SyntaxSuggestion } from '../common/types';
7+
import { CaretPosition, EntityContextType, Suggestions, SyntaxSuggestion } from '../common/types';
88
import { StmtContextType } from '../common/entityCollector';
99
import { HiveSqlSplitListener } from './hiveSplitListener';
1010
import { HiveEntityCollector } from './hiveEntityCollector';
1111
import { ErrorListener } from '../common/parseErrorListener';
1212
import { HiveErrorListener } from './hiveErrorListener';
13+
import { HiveSemanticContextCollector } from './hiveSemanticContextCollector';
1314

1415
export { HiveEntityCollector, HiveSqlSplitListener };
1516

@@ -48,6 +49,14 @@ export class HiveSQL extends BasicSQL<HiveSqlLexer, ProgramContext, HiveSqlParse
4849
return new HiveEntityCollector(input, caretTokenIndex);
4950
}
5051

52+
protected createSemanticContextCollector(
53+
input: string,
54+
caretPosition: CaretPosition,
55+
allTokens: Token[]
56+
) {
57+
return new HiveSemanticContextCollector(input, caretPosition, allTokens);
58+
}
59+
5160
protected processCandidates(
5261
candidates: CandidatesCollection,
5362
allTokens: Token[],

0 commit comments

Comments
 (0)