Feat/spark sql auxiliary syntax (#165)

* feat: supplement SparkSQL add syntax unit test * chore: recompile spark listener & visitor --------- Co-authored-by: dilu <[email protected]>
DTStack · Oct 9, 2023 · d1c2920 · d1c2920
1 parent 05da14d
commit d1c2920
Show file tree

Hide file tree

Showing 23 changed files with 440 additions and 14 deletions.
diff --git a/src/lib/spark/SparkSqlParserListener.ts b/src/lib/spark/SparkSqlParserListener.ts
@@ -1,4 +1,4 @@
-// Generated from /Users/liuyi/Desktop/Projects/dtstack/dt-sql-parser/src/grammar/spark/SparkSqlParser.g4 by ANTLR 4.9.0-SNAPSHOT
+// Generated from /Users/edy/github/dt-sql-parser/src/grammar/spark/SparkSqlParser.g4 by ANTLR 4.9.0-SNAPSHOT
 
 
 import { ParseTreeListener } from "antlr4ts/tree/ParseTreeListener";

diff --git a/src/lib/spark/SparkSqlParserVisitor.ts b/src/lib/spark/SparkSqlParserVisitor.ts
@@ -1,4 +1,4 @@
-// Generated from /Users/liuyi/Desktop/Projects/dtstack/dt-sql-parser/src/grammar/spark/SparkSqlParser.g4 by ANTLR 4.9.0-SNAPSHOT
+// Generated from /Users/edy/github/dt-sql-parser/src/grammar/spark/SparkSqlParser.g4 by ANTLR 4.9.0-SNAPSHOT
 
 
 import { ParseTreeVisitor } from "antlr4ts/tree/ParseTreeVisitor";

diff --git a/src/parser/spark.ts b/src/parser/spark.ts
@@ -17,18 +17,18 @@ export default class SparkSQL extends BasicParser<SparkSqlLexer, ProgramContext,
 
     protected preferredRules: Set<number> = new Set();
 
-    protected get splitListener () {
+    protected get splitListener() {
         return null as any;
     }
 
     protected processCandidates(
-        candidates: CandidatesCollection, 
-        allTokens: Token[], 
-        caretTokenIndex: number
+        candidates: CandidatesCollection,
+        allTokens: Token[],
+        caretTokenIndex: number,
     ): Suggestions<Token> {
         return {
             syntax: [],
-            keywords: []
-        }
+            keywords: [],
+        };
     }
 }
diff --git a/test/parser/spark/listener.test.ts b/test/parser/spark/listener.test.ts
@@ -15,7 +15,7 @@ describe('Spark SQL Listener Tests', () => {
                 result = ctx.text.toLowerCase();
             }
         }
-        const listenTableName: any = new MyListener();
+        const listenTableName = new MyListener();
 
         parser.listen(listenTableName, parserTree);
         expect(result).toBe(expectTableName);

diff --git a/test/parser/spark/syntax/addStatement.test.ts b/test/parser/spark/syntax/addStatement.test.ts
@@ -0,0 +1,16 @@
+import SparkSQL from '../../../../src/parser/spark';
+import { readSQL } from '../../../helper';
+
+const parser = new SparkSQL();
+
+const features = {
+    add: readSQL(__dirname, 'add.sql'),
+};
+
+describe('Spark add Syntax Tests', () => {
+    features.add.forEach((itemSql) => {
+        it(itemSql, () => {
+            expect(parser.validate(itemSql).length).toBe(0);
+        });
+    });
+});
diff --git a/test/parser/spark/syntax/analyzeTableStatement.test.ts b/test/parser/spark/syntax/analyzeTableStatement.test.ts
@@ -0,0 +1,16 @@
+import SparkSQL from '../../../../src/parser/spark';
+import { readSQL } from '../../../helper';
+
+const parser = new SparkSQL();
+
+const features = {
+    analyzeTable: readSQL(__dirname, 'analyzeTable.sql'),
+};
+
+describe('Spark analyzeTable Syntax Tests', () => {
+    features.analyzeTable.forEach((itemSql) => {
+        it(itemSql, () => {
+            expect(parser.validate(itemSql).length).toBe(0);
+        });
+    });
+});
diff --git a/test/parser/spark/syntax/cacheStatement.test.ts b/test/parser/spark/syntax/cacheStatement.test.ts
@@ -0,0 +1,16 @@
+import SparkSQL from '../../../../src/parser/spark';
+import { readSQL } from '../../../helper';
+
+const parser = new SparkSQL();
+
+const features = {
+    cache: readSQL(__dirname, 'cache.sql'),
+};
+
+describe('Spark cache Syntax Tests', () => {
+    features.cache.forEach((itemSql) => {
+        it(itemSql, () => {
+            expect(parser.validate(itemSql).length).toBe(0);
+        });
+    });
+});
diff --git a/test/parser/spark/syntax/describeStatement.test.ts b/test/parser/spark/syntax/describeStatement.test.ts
@@ -0,0 +1,16 @@
+import SparkSQL from '../../../../src/parser/spark';
+import { readSQL } from '../../../helper';
+
+const parser = new SparkSQL();
+
+const features = {
+    describe: readSQL(__dirname, 'describe.sql'),
+};
+
+describe('Spark describe Syntax Tests', () => {
+    features.describe.forEach((itemSql) => {
+        it(itemSql, () => {
+            expect(parser.validate(itemSql).length).toBe(0);
+        });
+    });
+});
diff --git a/test/parser/spark/syntax/fixtures/add.sql b/test/parser/spark/syntax/fixtures/add.sql
@@ -0,0 +1,34 @@
+
+-- ADD { FILE | FILES } resource_name [ ... ]
+
+ADD FILE /tmp/test;
+
+ADD FILE "/path/to/file/abc.txt";
+
+ADD FILE '/another/test.txt';
+
+ADD FILE "/path with space/abc.txt";
+
+ADD FILE "/path/to/some/directory";
+
+ADD FILES "/path with space/cde.txt" '/path with space/fgh.txt';
+
+-- ADD { JAR | JARS } file_name [ ... ]
+
+ADD JAR /tmp/test.jar;
+
+ADD JAR "/path/to/some.jar";
+
+ADD JAR '/some/other.jar';
+
+ADD JAR "/path with space/abc.jar";
+
+ADD JARS "/path with space/def.jar" '/path with space/ghi.jar';
+
+ADD JAR "ivy://group:module:version";
+
+ADD JAR "ivy://group:module:version?transitive=false"
+
+ADD JAR "ivy://group:module:version?transitive=true"
+
+ADD JAR "ivy://group:module:version?exclude=group:module&transitive=true"
diff --git a/test/parser/spark/syntax/fixtures/analyzeTable.sql b/test/parser/spark/syntax/fixtures/analyzeTable.sql
@@ -0,0 +1,32 @@
+CREATE DATABASE school_db;
+USE school_db;
+
+CREATE TABLE teachers (name STRING, teacher_id INT);
+INSERT INTO teachers VALUES ('Tom', 1), ('Jerry', 2);
+
+CREATE TABLE students (name STRING, student_id INT) PARTITIONED BY (student_id);
+INSERT INTO students VALUES ('Mark', 111111), ('John', 222222);
+
+ANALYZE TABLE students COMPUTE STATISTICS NOSCAN;
+
+DESC EXTENDED students;
+
+ANALYZE TABLE students COMPUTE STATISTICS;
+
+DESC EXTENDED students;
+
+ANALYZE TABLE students PARTITION (student_id = 111111) COMPUTE STATISTICS;
+
+DESC EXTENDED students PARTITION (student_id = 111111);
+
+ANALYZE TABLE students COMPUTE STATISTICS FOR COLUMNS name;
+
+DESC EXTENDED students name;
+
+ANALYZE TABLES IN school_db COMPUTE STATISTICS NOSCAN;
+
+DESC EXTENDED teachers;
+
+ANALYZE TABLES COMPUTE STATISTICS;
+
+DESC EXTENDED teachers;
diff --git a/test/parser/spark/syntax/fixtures/cache.sql b/test/parser/spark/syntax/fixtures/cache.sql
@@ -0,0 +1,17 @@
+-- CACHE LAZY TABLE testCache1 [ OPTIONS ( 'storageLevel' [ = ] value ) ] [ [ AS ] query ]
+
+CACHE TABLE testCache OPTIONS ('storageLevel' 'DISK_ONLY') SELECT * FROM testData;
+
+CACHE LAZY TABLE testCache1 SELECT * FROM testData;
+
+CACHE LAZY TABLE testCache2 AS SELECT * FROM testData;
+
+
+-- CLEAR CACHE
+
+CLEAR CACHE;
+
+
+-- UNCACHE TABLE [ IF EXISTS ] table_identifier
+
+UNCACHE TABLE t1;
diff --git a/test/parser/spark/syntax/fixtures/describe.sql b/test/parser/spark/syntax/fixtures/describe.sql
@@ -0,0 +1,45 @@
+-- { DESC | DESCRIBE } DATABASE [ EXTENDED ] db_name
+
+DESCRIBE DATABASE employees;
+
+DESCRIBE DATABASE EXTENDED employees;
+
+DESC DATABASE deployment;
+
+
+-- { DESC | DESCRIBE } FUNCTION [ EXTENDED ] function_name
+
+DESC FUNCTION abs;
+
+DESC FUNCTION EXTENDED abs;
+
+DESC FUNCTION max;
+
+DESC FUNCTION EXTENDED explode;
+
+
+-- { DESC | DESCRIBE } [ QUERY ] input_statement
+
+DESCRIBE QUERY SELECT age, sum(age) FROM person GROUP BY age;
+
+DESCRIBE QUERY WITH all_names_cte
+    AS (SELECT name from person) SELECT * FROM all_names_cte;
+
+DESC QUERY VALUES(100, 'John', 10000) AS employee(id, name, salary);
+
+DESC QUERY TABLE person;
+
+DESCRIBE FROM person SELECT age;
+
+
+-- { DESC | DESCRIBE } [ TABLE ] [ format ] table_identifier [ partition_spec ] [ col_name ]
+
+DESCRIBE TABLE customer;
+
+DESCRIBE TABLE salesdb.customer;
+
+DESCRIBE TABLE EXTENDED customer;
+
+DESCRIBE TABLE EXTENDED customer PARTITION (state = 'AR');
+
+DESCRIBE customer salesdb.customer.name;
diff --git a/test/parser/spark/syntax/fixtures/list.sql b/test/parser/spark/syntax/fixtures/list.sql
@@ -0,0 +1,12 @@
+-- LIST { FILE | FILES } file_name [ ... ]
+
+LIST FILE;
+
+LIST FILE /tmp/test /some/random/file /another/random/file;
+
+
+-- LIST { JAR | JARS } file_name [ ... ]
+
+LIST JAR;
+
+LIST JAR /tmp/test.jar /some/random.jar /another/random.jar;
diff --git a/test/parser/spark/syntax/fixtures/refresh.sql b/test/parser/spark/syntax/fixtures/refresh.sql
@@ -0,0 +1,17 @@
+-- REFRESH resource_path
+
+REFRESH "hdfs://path/to/table";
+
+
+-- REFRESH FUNCTION function_identifier
+
+REFRESH FUNCTION func1;
+
+REFRESH FUNCTION db1.func1;
+
+
+-- REFRESH [TABLE] table_identifier
+
+REFRESH TABLE tbl1;
+
+REFRESH TABLE tempDB.view1;
diff --git a/test/parser/spark/syntax/fixtures/reset.sql b/test/parser/spark/syntax/fixtures/reset.sql
@@ -0,0 +1,9 @@
+-- RESET;
+
+-- RESET configuration_key;
+
+-- Reset any runtime configurations specific to the current session which were set via the SET command to their default values.
+RESET;
+
+-- If you start your application with --conf spark.foo=bar and set spark.foo=foobar in runtime, the example below will restore it to 'bar'. If spark.foo is not specified during starting, the example below will remove this config from the SQLConf. It will ignore nonexistent keys.
+RESET spark.abc;
diff --git a/test/parser/spark/syntax/fixtures/set.sql b/test/parser/spark/syntax/fixtures/set.sql
@@ -0,0 +1,15 @@
+-- SET
+-- SET [ -v ]
+-- SET property_key[ = property_value ]
+
+-- Set a property.
+SET spark.sql.variable.substitute=false;
+
+-- List all SQLConf properties with value and meaning.
+SET -v;
+
+-- List all SQLConf properties with value for current session.
+SET;
+
+-- List the value of specified property key.
+SET spark.sql.variable.substitute;