From d1c2920f80e05e2edf423b53c7f4a192614c503e Mon Sep 17 00:00:00 2001 From: Frank Date: Mon, 9 Oct 2023 09:49:22 +0800 Subject: [PATCH] Feat/spark sql auxiliary syntax (#165) * feat: supplement SparkSQL add syntax unit test * chore: recompile spark listener & visitor --------- Co-authored-by: dilu --- src/lib/spark/SparkSqlParserListener.ts | 2 +- src/lib/spark/SparkSqlParserVisitor.ts | 2 +- src/parser/spark.ts | 12 +-- test/parser/spark/listener.test.ts | 2 +- test/parser/spark/syntax/addStatement.test.ts | 16 +++ .../syntax/analyzeTableStatement.test.ts | 16 +++ .../spark/syntax/cacheStatement.test.ts | 16 +++ .../spark/syntax/describeStatement.test.ts | 16 +++ test/parser/spark/syntax/fixtures/add.sql | 34 ++++++ .../spark/syntax/fixtures/analyzeTable.sql | 32 ++++++ test/parser/spark/syntax/fixtures/cache.sql | 17 +++ .../parser/spark/syntax/fixtures/describe.sql | 45 ++++++++ test/parser/spark/syntax/fixtures/list.sql | 12 +++ test/parser/spark/syntax/fixtures/refresh.sql | 17 +++ test/parser/spark/syntax/fixtures/reset.sql | 9 ++ test/parser/spark/syntax/fixtures/set.sql | 15 +++ test/parser/spark/syntax/fixtures/show.sql | 101 ++++++++++++++++++ .../parser/spark/syntax/listStatement.test.ts | 16 +++ .../spark/syntax/refreshStatement.test.ts | 16 +++ .../spark/syntax/resetStatement.test.ts | 16 +++ test/parser/spark/syntax/setStatement.test.ts | 16 +++ .../parser/spark/syntax/showStatement.test.ts | 16 +++ test/parser/spark/visitor.test.ts | 10 +- 23 files changed, 440 insertions(+), 14 deletions(-) create mode 100644 test/parser/spark/syntax/addStatement.test.ts create mode 100644 test/parser/spark/syntax/analyzeTableStatement.test.ts create mode 100644 test/parser/spark/syntax/cacheStatement.test.ts create mode 100644 test/parser/spark/syntax/describeStatement.test.ts create mode 100644 test/parser/spark/syntax/fixtures/add.sql create mode 100644 test/parser/spark/syntax/fixtures/analyzeTable.sql create mode 100644 test/parser/spark/syntax/fixtures/cache.sql create mode 100644 test/parser/spark/syntax/fixtures/describe.sql create mode 100644 test/parser/spark/syntax/fixtures/list.sql create mode 100644 test/parser/spark/syntax/fixtures/refresh.sql create mode 100644 test/parser/spark/syntax/fixtures/reset.sql create mode 100644 test/parser/spark/syntax/fixtures/set.sql create mode 100644 test/parser/spark/syntax/fixtures/show.sql create mode 100644 test/parser/spark/syntax/listStatement.test.ts create mode 100644 test/parser/spark/syntax/refreshStatement.test.ts create mode 100644 test/parser/spark/syntax/resetStatement.test.ts create mode 100644 test/parser/spark/syntax/setStatement.test.ts create mode 100644 test/parser/spark/syntax/showStatement.test.ts diff --git a/src/lib/spark/SparkSqlParserListener.ts b/src/lib/spark/SparkSqlParserListener.ts index acfe9759..afef01ee 100644 --- a/src/lib/spark/SparkSqlParserListener.ts +++ b/src/lib/spark/SparkSqlParserListener.ts @@ -1,4 +1,4 @@ -// Generated from /Users/liuyi/Desktop/Projects/dtstack/dt-sql-parser/src/grammar/spark/SparkSqlParser.g4 by ANTLR 4.9.0-SNAPSHOT +// Generated from /Users/edy/github/dt-sql-parser/src/grammar/spark/SparkSqlParser.g4 by ANTLR 4.9.0-SNAPSHOT import { ParseTreeListener } from "antlr4ts/tree/ParseTreeListener"; diff --git a/src/lib/spark/SparkSqlParserVisitor.ts b/src/lib/spark/SparkSqlParserVisitor.ts index 236c0f95..2d752ec2 100644 --- a/src/lib/spark/SparkSqlParserVisitor.ts +++ b/src/lib/spark/SparkSqlParserVisitor.ts @@ -1,4 +1,4 @@ -// Generated from /Users/liuyi/Desktop/Projects/dtstack/dt-sql-parser/src/grammar/spark/SparkSqlParser.g4 by ANTLR 4.9.0-SNAPSHOT +// Generated from /Users/edy/github/dt-sql-parser/src/grammar/spark/SparkSqlParser.g4 by ANTLR 4.9.0-SNAPSHOT import { ParseTreeVisitor } from "antlr4ts/tree/ParseTreeVisitor"; diff --git a/src/parser/spark.ts b/src/parser/spark.ts index 8d567e35..b81f3523 100644 --- a/src/parser/spark.ts +++ b/src/parser/spark.ts @@ -17,18 +17,18 @@ export default class SparkSQL extends BasicParser = new Set(); - protected get splitListener () { + protected get splitListener() { return null as any; } protected processCandidates( - candidates: CandidatesCollection, - allTokens: Token[], - caretTokenIndex: number + candidates: CandidatesCollection, + allTokens: Token[], + caretTokenIndex: number, ): Suggestions { return { syntax: [], - keywords: [] - } + keywords: [], + }; } } diff --git a/test/parser/spark/listener.test.ts b/test/parser/spark/listener.test.ts index 3cc04195..83ffe067 100644 --- a/test/parser/spark/listener.test.ts +++ b/test/parser/spark/listener.test.ts @@ -15,7 +15,7 @@ describe('Spark SQL Listener Tests', () => { result = ctx.text.toLowerCase(); } } - const listenTableName: any = new MyListener(); + const listenTableName = new MyListener(); parser.listen(listenTableName, parserTree); expect(result).toBe(expectTableName); diff --git a/test/parser/spark/syntax/addStatement.test.ts b/test/parser/spark/syntax/addStatement.test.ts new file mode 100644 index 00000000..a7ed31a3 --- /dev/null +++ b/test/parser/spark/syntax/addStatement.test.ts @@ -0,0 +1,16 @@ +import SparkSQL from '../../../../src/parser/spark'; +import { readSQL } from '../../../helper'; + +const parser = new SparkSQL(); + +const features = { + add: readSQL(__dirname, 'add.sql'), +}; + +describe('Spark add Syntax Tests', () => { + features.add.forEach((itemSql) => { + it(itemSql, () => { + expect(parser.validate(itemSql).length).toBe(0); + }); + }); +}); diff --git a/test/parser/spark/syntax/analyzeTableStatement.test.ts b/test/parser/spark/syntax/analyzeTableStatement.test.ts new file mode 100644 index 00000000..88d8ce58 --- /dev/null +++ b/test/parser/spark/syntax/analyzeTableStatement.test.ts @@ -0,0 +1,16 @@ +import SparkSQL from '../../../../src/parser/spark'; +import { readSQL } from '../../../helper'; + +const parser = new SparkSQL(); + +const features = { + analyzeTable: readSQL(__dirname, 'analyzeTable.sql'), +}; + +describe('Spark analyzeTable Syntax Tests', () => { + features.analyzeTable.forEach((itemSql) => { + it(itemSql, () => { + expect(parser.validate(itemSql).length).toBe(0); + }); + }); +}); diff --git a/test/parser/spark/syntax/cacheStatement.test.ts b/test/parser/spark/syntax/cacheStatement.test.ts new file mode 100644 index 00000000..989d05d3 --- /dev/null +++ b/test/parser/spark/syntax/cacheStatement.test.ts @@ -0,0 +1,16 @@ +import SparkSQL from '../../../../src/parser/spark'; +import { readSQL } from '../../../helper'; + +const parser = new SparkSQL(); + +const features = { + cache: readSQL(__dirname, 'cache.sql'), +}; + +describe('Spark cache Syntax Tests', () => { + features.cache.forEach((itemSql) => { + it(itemSql, () => { + expect(parser.validate(itemSql).length).toBe(0); + }); + }); +}); diff --git a/test/parser/spark/syntax/describeStatement.test.ts b/test/parser/spark/syntax/describeStatement.test.ts new file mode 100644 index 00000000..db050d4a --- /dev/null +++ b/test/parser/spark/syntax/describeStatement.test.ts @@ -0,0 +1,16 @@ +import SparkSQL from '../../../../src/parser/spark'; +import { readSQL } from '../../../helper'; + +const parser = new SparkSQL(); + +const features = { + describe: readSQL(__dirname, 'describe.sql'), +}; + +describe('Spark describe Syntax Tests', () => { + features.describe.forEach((itemSql) => { + it(itemSql, () => { + expect(parser.validate(itemSql).length).toBe(0); + }); + }); +}); diff --git a/test/parser/spark/syntax/fixtures/add.sql b/test/parser/spark/syntax/fixtures/add.sql new file mode 100644 index 00000000..fbc5d59b --- /dev/null +++ b/test/parser/spark/syntax/fixtures/add.sql @@ -0,0 +1,34 @@ + +-- ADD { FILE | FILES } resource_name [ ... ] + +ADD FILE /tmp/test; + +ADD FILE "/path/to/file/abc.txt"; + +ADD FILE '/another/test.txt'; + +ADD FILE "/path with space/abc.txt"; + +ADD FILE "/path/to/some/directory"; + +ADD FILES "/path with space/cde.txt" '/path with space/fgh.txt'; + +-- ADD { JAR | JARS } file_name [ ... ] + +ADD JAR /tmp/test.jar; + +ADD JAR "/path/to/some.jar"; + +ADD JAR '/some/other.jar'; + +ADD JAR "/path with space/abc.jar"; + +ADD JARS "/path with space/def.jar" '/path with space/ghi.jar'; + +ADD JAR "ivy://group:module:version"; + +ADD JAR "ivy://group:module:version?transitive=false" + +ADD JAR "ivy://group:module:version?transitive=true" + +ADD JAR "ivy://group:module:version?exclude=group:module&transitive=true" diff --git a/test/parser/spark/syntax/fixtures/analyzeTable.sql b/test/parser/spark/syntax/fixtures/analyzeTable.sql new file mode 100644 index 00000000..90b91ba8 --- /dev/null +++ b/test/parser/spark/syntax/fixtures/analyzeTable.sql @@ -0,0 +1,32 @@ +CREATE DATABASE school_db; +USE school_db; + +CREATE TABLE teachers (name STRING, teacher_id INT); +INSERT INTO teachers VALUES ('Tom', 1), ('Jerry', 2); + +CREATE TABLE students (name STRING, student_id INT) PARTITIONED BY (student_id); +INSERT INTO students VALUES ('Mark', 111111), ('John', 222222); + +ANALYZE TABLE students COMPUTE STATISTICS NOSCAN; + +DESC EXTENDED students; + +ANALYZE TABLE students COMPUTE STATISTICS; + +DESC EXTENDED students; + +ANALYZE TABLE students PARTITION (student_id = 111111) COMPUTE STATISTICS; + +DESC EXTENDED students PARTITION (student_id = 111111); + +ANALYZE TABLE students COMPUTE STATISTICS FOR COLUMNS name; + +DESC EXTENDED students name; + +ANALYZE TABLES IN school_db COMPUTE STATISTICS NOSCAN; + +DESC EXTENDED teachers; + +ANALYZE TABLES COMPUTE STATISTICS; + +DESC EXTENDED teachers; diff --git a/test/parser/spark/syntax/fixtures/cache.sql b/test/parser/spark/syntax/fixtures/cache.sql new file mode 100644 index 00000000..d79b9b2f --- /dev/null +++ b/test/parser/spark/syntax/fixtures/cache.sql @@ -0,0 +1,17 @@ +-- CACHE LAZY TABLE testCache1 [ OPTIONS ( 'storageLevel' [ = ] value ) ] [ [ AS ] query ] + +CACHE TABLE testCache OPTIONS ('storageLevel' 'DISK_ONLY') SELECT * FROM testData; + +CACHE LAZY TABLE testCache1 SELECT * FROM testData; + +CACHE LAZY TABLE testCache2 AS SELECT * FROM testData; + + +-- CLEAR CACHE + +CLEAR CACHE; + + +-- UNCACHE TABLE [ IF EXISTS ] table_identifier + +UNCACHE TABLE t1; diff --git a/test/parser/spark/syntax/fixtures/describe.sql b/test/parser/spark/syntax/fixtures/describe.sql new file mode 100644 index 00000000..73c4f0bc --- /dev/null +++ b/test/parser/spark/syntax/fixtures/describe.sql @@ -0,0 +1,45 @@ +-- { DESC | DESCRIBE } DATABASE [ EXTENDED ] db_name + +DESCRIBE DATABASE employees; + +DESCRIBE DATABASE EXTENDED employees; + +DESC DATABASE deployment; + + +-- { DESC | DESCRIBE } FUNCTION [ EXTENDED ] function_name + +DESC FUNCTION abs; + +DESC FUNCTION EXTENDED abs; + +DESC FUNCTION max; + +DESC FUNCTION EXTENDED explode; + + +-- { DESC | DESCRIBE } [ QUERY ] input_statement + +DESCRIBE QUERY SELECT age, sum(age) FROM person GROUP BY age; + +DESCRIBE QUERY WITH all_names_cte + AS (SELECT name from person) SELECT * FROM all_names_cte; + +DESC QUERY VALUES(100, 'John', 10000) AS employee(id, name, salary); + +DESC QUERY TABLE person; + +DESCRIBE FROM person SELECT age; + + +-- { DESC | DESCRIBE } [ TABLE ] [ format ] table_identifier [ partition_spec ] [ col_name ] + +DESCRIBE TABLE customer; + +DESCRIBE TABLE salesdb.customer; + +DESCRIBE TABLE EXTENDED customer; + +DESCRIBE TABLE EXTENDED customer PARTITION (state = 'AR'); + +DESCRIBE customer salesdb.customer.name; diff --git a/test/parser/spark/syntax/fixtures/list.sql b/test/parser/spark/syntax/fixtures/list.sql new file mode 100644 index 00000000..20442108 --- /dev/null +++ b/test/parser/spark/syntax/fixtures/list.sql @@ -0,0 +1,12 @@ +-- LIST { FILE | FILES } file_name [ ... ] + +LIST FILE; + +LIST FILE /tmp/test /some/random/file /another/random/file; + + +-- LIST { JAR | JARS } file_name [ ... ] + +LIST JAR; + +LIST JAR /tmp/test.jar /some/random.jar /another/random.jar; diff --git a/test/parser/spark/syntax/fixtures/refresh.sql b/test/parser/spark/syntax/fixtures/refresh.sql new file mode 100644 index 00000000..4773b2da --- /dev/null +++ b/test/parser/spark/syntax/fixtures/refresh.sql @@ -0,0 +1,17 @@ +-- REFRESH resource_path + +REFRESH "hdfs://path/to/table"; + + +-- REFRESH FUNCTION function_identifier + +REFRESH FUNCTION func1; + +REFRESH FUNCTION db1.func1; + + +-- REFRESH [TABLE] table_identifier + +REFRESH TABLE tbl1; + +REFRESH TABLE tempDB.view1; diff --git a/test/parser/spark/syntax/fixtures/reset.sql b/test/parser/spark/syntax/fixtures/reset.sql new file mode 100644 index 00000000..9bf264c9 --- /dev/null +++ b/test/parser/spark/syntax/fixtures/reset.sql @@ -0,0 +1,9 @@ +-- RESET; + +-- RESET configuration_key; + +-- Reset any runtime configurations specific to the current session which were set via the SET command to their default values. +RESET; + +-- If you start your application with --conf spark.foo=bar and set spark.foo=foobar in runtime, the example below will restore it to 'bar'. If spark.foo is not specified during starting, the example below will remove this config from the SQLConf. It will ignore nonexistent keys. +RESET spark.abc; diff --git a/test/parser/spark/syntax/fixtures/set.sql b/test/parser/spark/syntax/fixtures/set.sql new file mode 100644 index 00000000..13041f26 --- /dev/null +++ b/test/parser/spark/syntax/fixtures/set.sql @@ -0,0 +1,15 @@ +-- SET +-- SET [ -v ] +-- SET property_key[ = property_value ] + +-- Set a property. +SET spark.sql.variable.substitute=false; + +-- List all SQLConf properties with value and meaning. +SET -v; + +-- List all SQLConf properties with value for current session. +SET; + +-- List the value of specified property key. +SET spark.sql.variable.substitute; diff --git a/test/parser/spark/syntax/fixtures/show.sql b/test/parser/spark/syntax/fixtures/show.sql new file mode 100644 index 00000000..e1f5fd50 --- /dev/null +++ b/test/parser/spark/syntax/fixtures/show.sql @@ -0,0 +1,101 @@ +-- SHOW COLUMNS table_identifier [ database ] + +SHOW COLUMNS IN customer; + +SHOW COLUMNS IN salesdb.customer; + +SHOW COLUMNS IN customer IN salesdb; + + +-- SHOW CREATE TABLE table_identifier [ AS SERDE ] + +SHOW CREATE TABLE test; + +SHOW CREATE TABLE test AS SERDE; + + +-- SHOW { DATABASES | SCHEMAS } [ LIKE regex_pattern ] + +SHOW DATABASES; + +SHOW DATABASES LIKE 'pay*'; + +SHOW SCHEMAS; + + +-- SHOW [ function_kind ] FUNCTIONS [ { FROM | IN } database_name ] [ LIKE regex_pattern ] + +SHOW FUNCTIONS trim; + +SHOW SYSTEM FUNCTIONS concat; + +SHOW SYSTEM FUNCTIONS FROM salesdb LIKE 'max'; + +SHOW FUNCTIONS LIKE 't*'; + +SHOW FUNCTIONS LIKE 'yea*|windo*'; + +SHOW FUNCTIONS LIKE 't[a-z][a-z][a-z]'; + + +-- SHOW PARTITIONS table_identifier [ partition_spec ] + +SHOW PARTITIONS customer; + +SHOW PARTITIONS salesdb.customer; + +SHOW PARTITIONS customer PARTITION (state = 'CA', city = 'Fremont'); + +SHOW PARTITIONS customer PARTITION (state = 'CA'); + +SHOW PARTITIONS customer PARTITION (city = 'San Jose'); + + +-- SHOW TABLE EXTENDED [ { IN | FROM } database_name ] LIKE regex_pattern +-- [ partition_spec ] + +SHOW TABLE EXTENDED LIKE 'employee'; + +SHOW TABLE EXTENDED LIKE 'employe*'; + +SHOW TABLE EXTENDED IN default LIKE 'employee' PARTITION (grade=1); + +SHOW TABLE EXTENDED IN default LIKE 'empl*' PARTITION (grade=1); + + +-- SHOW TABLES [ { FROM | IN } database_name ] [ LIKE regex_pattern ] + +SHOW TABLES; + +SHOW TABLES FROM userdb; + +SHOW TABLES IN userdb; + +SHOW TABLES FROM default LIKE 'sam*'; + +SHOW TABLES LIKE 'sam*|suj'; + + +-- SHOW TBLPROPERTIES table_identifier +-- [ ( unquoted_property_key | property_key_as_string_literal ) ] + +SHOW TBLPROPERTIES customer; + +SHOW TBLPROPERTIES salesdb.customer; + +SHOW TBLPROPERTIES customer (created.by.user); + +SHOW TBLPROPERTIES customer ('created.date'); + + +-- SHOW VIEWS [ { FROM | IN } database_name ] [ LIKE regex_pattern ] + +SHOW VIEWS; + +SHOW VIEWS FROM userdb; + +SHOW VIEWS IN global_temp; + +SHOW VIEWS FROM default LIKE 'sam*'; + +SHOW VIEWS LIKE 'sam|suj|temp*'; diff --git a/test/parser/spark/syntax/listStatement.test.ts b/test/parser/spark/syntax/listStatement.test.ts new file mode 100644 index 00000000..67f8381c --- /dev/null +++ b/test/parser/spark/syntax/listStatement.test.ts @@ -0,0 +1,16 @@ +import SparkSQL from '../../../../src/parser/spark'; +import { readSQL } from '../../../helper'; + +const parser = new SparkSQL(); + +const features = { + list: readSQL(__dirname, 'list.sql'), +}; + +describe('Spark list Syntax Tests', () => { + features.list.forEach((itemSql) => { + it(itemSql, () => { + expect(parser.validate(itemSql).length).toBe(0); + }); + }); +}); diff --git a/test/parser/spark/syntax/refreshStatement.test.ts b/test/parser/spark/syntax/refreshStatement.test.ts new file mode 100644 index 00000000..4f95cc79 --- /dev/null +++ b/test/parser/spark/syntax/refreshStatement.test.ts @@ -0,0 +1,16 @@ +import SparkSQL from '../../../../src/parser/spark'; +import { readSQL } from '../../../helper'; + +const parser = new SparkSQL(); + +const features = { + refresh: readSQL(__dirname, 'refresh.sql'), +}; + +describe('Spark refresh Syntax Tests', () => { + features.refresh.forEach((itemSql) => { + it(itemSql, () => { + expect(parser.validate(itemSql).length).toBe(0); + }); + }); +}); diff --git a/test/parser/spark/syntax/resetStatement.test.ts b/test/parser/spark/syntax/resetStatement.test.ts new file mode 100644 index 00000000..5630625f --- /dev/null +++ b/test/parser/spark/syntax/resetStatement.test.ts @@ -0,0 +1,16 @@ +import SparkSQL from '../../../../src/parser/spark'; +import { readSQL } from '../../../helper'; + +const parser = new SparkSQL(); + +const features = { + reset: readSQL(__dirname, 'reset.sql'), +}; + +describe('Spark reset Syntax Tests', () => { + features.reset.forEach((itemSql) => { + it(itemSql, () => { + expect(parser.validate(itemSql).length).toBe(0); + }); + }); +}); diff --git a/test/parser/spark/syntax/setStatement.test.ts b/test/parser/spark/syntax/setStatement.test.ts new file mode 100644 index 00000000..ce330db8 --- /dev/null +++ b/test/parser/spark/syntax/setStatement.test.ts @@ -0,0 +1,16 @@ +import SparkSQL from '../../../../src/parser/spark'; +import { readSQL } from '../../../helper'; + +const parser = new SparkSQL(); + +const features = { + set: readSQL(__dirname, 'set.sql'), +}; + +describe('Spark set Syntax Tests', () => { + features.set.forEach((itemSql) => { + it(itemSql, () => { + expect(parser.validate(itemSql).length).toBe(0); + }); + }); +}); diff --git a/test/parser/spark/syntax/showStatement.test.ts b/test/parser/spark/syntax/showStatement.test.ts new file mode 100644 index 00000000..2c392e5f --- /dev/null +++ b/test/parser/spark/syntax/showStatement.test.ts @@ -0,0 +1,16 @@ +import SparkSQL from '../../../../src/parser/spark'; +import { readSQL } from '../../../helper'; + +const parser = new SparkSQL(); + +const features = { + show: readSQL(__dirname, 'show.sql'), +}; + +describe('Spark show Syntax Tests', () => { + features.show.forEach((itemSql) => { + it(itemSql, () => { + expect(parser.validate(itemSql).length).toBe(0); + }); + }); +}); diff --git a/test/parser/spark/visitor.test.ts b/test/parser/spark/visitor.test.ts index 3d391abf..23f303f6 100644 --- a/test/parser/spark/visitor.test.ts +++ b/test/parser/spark/visitor.test.ts @@ -12,18 +12,18 @@ describe('Spark SQL Visitor Tests', () => { }); test('Visitor visitRelationPrimary', () => { - let result = ''; class MyVisitor extends AbstractParseTreeVisitor implements SparkSqlParserVisitor { + result: string = ''; protected defaultResult() { - return result; + return this.result; } visitRelationPrimary = (ctx): void => { - result = ctx.text.toLowerCase(); + this.result = ctx.text.toLowerCase(); } } - const visitor: any = new MyVisitor(); + const visitor = new MyVisitor(); visitor.visit(parserTree); - expect(result).toBe(expectTableName); + expect(visitor.result).toBe(expectTableName); }); });