From df9d500963914d4d704f3734b414fa0eafb62cb9 Mon Sep 17 00:00:00 2001 From: liuyi Date: Sat, 7 Oct 2023 20:45:38 +0800 Subject: [PATCH] feat: spark DML test --- .../spark/syntax/fixtures/insertIntoTable.sql | 30 +++++++++++++++++++ .../fixtures/insertOverwriteDirectory.sql | 29 ++++++++++++++++++ .../syntax/fixtures/insertOverwriteTable.sql | 24 +++++++++++++++ .../parser/spark/syntax/fixtures/loadData.sql | 11 +++++++ .../spark/syntax/insertStatement.test.ts | 22 ++++++++++++++ .../parser/spark/syntax/loadStatement.test.ts | 19 ++++++++++++ 6 files changed, 135 insertions(+) create mode 100644 test/parser/spark/syntax/fixtures/insertIntoTable.sql create mode 100644 test/parser/spark/syntax/fixtures/insertOverwriteDirectory.sql create mode 100644 test/parser/spark/syntax/fixtures/insertOverwriteTable.sql create mode 100644 test/parser/spark/syntax/fixtures/loadData.sql create mode 100644 test/parser/spark/syntax/insertStatement.test.ts create mode 100644 test/parser/spark/syntax/loadStatement.test.ts diff --git a/test/parser/spark/syntax/fixtures/insertIntoTable.sql b/test/parser/spark/syntax/fixtures/insertIntoTable.sql new file mode 100644 index 00000000..62fa3869 --- /dev/null +++ b/test/parser/spark/syntax/fixtures/insertIntoTable.sql @@ -0,0 +1,30 @@ +-- Syntax +-- INSERT INTO [ TABLE ] table_identifier REPLACE WHERE boolean_expression query + + +-- Single Row Insert Using a VALUES Clause +INSERT INTO students VALUES ('Amy Smith', '123 Park Ave, San Jose', 111, 12.34, '-=--@#!$%%'); + +-- Multi-Row Insert Using a VALUES Clause +INSERT INTO students VALUES ('Bob Brown', '456 Taylor St, Cupelation', 222222), ('Cathy Johnson', '789 Race Ave, Pale Alto', 333333); + +-- Insert Using a SELECT Statement +INSERT INTO students PARTITION (student_id = 444444) SELECT name, address FROM persons WHERE name = "Dora Williams"; + +-- Insert Using a REPLACE WHERE Statement +INSERT INTO persons REPLACE WHERE ssn = 123456789 SELECT * FROM persons2; + +-- Insert Using a TABLE Statement +INSERT INTO students TABLE visiting_students; + +-- Insert Using a FROM Statement +INSERT INTO students FROM applicants SELECT name, address, student_id WHERE qualified = true; + +-- Insert Using a Typed Date Literal for a Partition Column Value +INSERT INTO students PARTITION (birthday = date'2019-01-02') VALUES ('Amy Smith', '123 Park Ave, San Jose'); + +-- Insert with a column list +INSERT INTO students (address, name, student_id) VALUES ('Hangzhou, China', 'Kent Yao', 11215016); + +-- Insert with both a partition spec and a column list +INSERT INTO students PARTITION (student_id = 11215017) (address, name) VALUES ('Hangzhou, China', 'Kent Yao Jr.'); diff --git a/test/parser/spark/syntax/fixtures/insertOverwriteDirectory.sql b/test/parser/spark/syntax/fixtures/insertOverwriteDirectory.sql new file mode 100644 index 00000000..20c440b6 --- /dev/null +++ b/test/parser/spark/syntax/fixtures/insertOverwriteDirectory.sql @@ -0,0 +1,29 @@ +-- Syntax +-- INSERT OVERWRITE [ LOCAL ] DIRECTORY [ directory_path ] +-- { spark_format | hive_format } +-- { VALUES ( { value | NULL } [ , ... ] ) [ , ( ... ) ] | query } +-- USING file_format [ OPTIONS ( key = val [ , ... ] ) ] +-- [ ROW FORMAT row_format ] [ STORED AS hive_serde ] + + +INSERT OVERWRITE DIRECTORY '/path/to/output/directory' SELECT * FROM your_table WHERE condition; + +INSERT OVERWRITE DIRECTORY '/tmp/destination' + USING parquet + OPTIONS (col1 1, col2 2, col3 'test') + SELECT * FROM test_table; + +INSERT OVERWRITE DIRECTORY + USING parquet + OPTIONS ('path' '/tmp/destination', col1 1, col2 2, col3 'test') + SELECT * FROM test_table; + +INSERT OVERWRITE LOCAL DIRECTORY '/tmp/destination' + USING parquet + OPTIONS (col1 1, col2 2, col3 'test') + SELECT * FROM test_table; + +INSERT OVERWRITE LOCAL DIRECTORY + USING parquet + OPTIONS ('path' '/tmp/destination', col1 1, col2 2, col3 'test') + SELECT * FROM test_table; diff --git a/test/parser/spark/syntax/fixtures/insertOverwriteTable.sql b/test/parser/spark/syntax/fixtures/insertOverwriteTable.sql new file mode 100644 index 00000000..d9ec937f --- /dev/null +++ b/test/parser/spark/syntax/fixtures/insertOverwriteTable.sql @@ -0,0 +1,24 @@ +-- Syntax +-- INSERT [ INTO | OVERWRITE ] [ TABLE ] table_identifier [ partition_spec ] [ ( column_list ) ] { VALUES ( { value | NULL } [ , ... ] ) [ , ( ... ) ] | query } + + +-- Insert Using a VALUES Clause +INSERT OVERWRITE students VALUES ('Ashur Hill', '456 Erica Ct, Cupelation', 111111), ('Brian Reed', '723 Kern Ave, Pale Alto', 222222); + +-- Insert Using a SELECT Statement +INSERT OVERWRITE students PARTITION (student_id = 222222) SELECT name, address FROM persons WHERE name = "Dora Williams"; + +-- Insert Using a TABLE Statement +INSERT OVERWRITE students TABLE visiting_students; + +-- Insert Using a FROM Statement +INSERT OVERWRITE students FROM applicants SELECT name, address, student_id WHERE qualified = true; + +-- Insert Using a Typed Date Literal for a Partition Column Value +INSERT OVERWRITE students PARTITION (birthday = date'2019-01-02') VALUES('Jason Wang', '908 Bird St, Saratoga'); + +-- Insert with a column list +INSERT OVERWRITE students (address, name, student_id) VALUES ('Hangzhou, China', 'Kent Yao', 11215016); + +-- Insert with both a partition spec and a column list +INSERT OVERWRITE students PARTITION (student_id = 11215016) (address, name) VALUES ('Hangzhou, China', 'Kent Yao Jr.'); diff --git a/test/parser/spark/syntax/fixtures/loadData.sql b/test/parser/spark/syntax/fixtures/loadData.sql new file mode 100644 index 00000000..274f3feb --- /dev/null +++ b/test/parser/spark/syntax/fixtures/loadData.sql @@ -0,0 +1,11 @@ +-- Syntax +-- LOAD DATA [ LOCAL ] INPATH path [ OVERWRITE ] INTO TABLE table_identifier [ partition_spec ] + + +-- Assuming the students table is in '/user/hive/warehouse/' +LOAD DATA LOCAL INPATH '/user/hive/warehouse/students' OVERWRITE INTO TABLE test_load; +LOAD DATA LOCAL INPATH '/path/to/datafile.csv' INTO TABLE my_table; +LOAD DATA INPATH '/path/to/datafile.csv' INTO TABLE my_table; + +-- Assuming the test_partition table is in '/user/hive/warehouse/' +LOAD DATA LOCAL INPATH '/user/hive/warehouse/test_partition/c2=2/c3=3' OVERWRITE INTO TABLE test_load_partition PARTITION (c2=2, c3=3); diff --git a/test/parser/spark/syntax/insertStatement.test.ts b/test/parser/spark/syntax/insertStatement.test.ts new file mode 100644 index 00000000..1bd23369 --- /dev/null +++ b/test/parser/spark/syntax/insertStatement.test.ts @@ -0,0 +1,22 @@ +import SparkSQL from '../../../../src/parser/spark'; +import { readSQL } from '../../../helper'; + + +const parser = new SparkSQL(); + +const features = { + insertIntoTable: readSQL(__dirname, 'insertIntoTable.sql'), + insertOverwriteTable: readSQL(__dirname, 'insertOverwriteTable.sql'), + insertOverwriteDirectory: readSQL(__dirname, 'insertOverwriteDirectory.sql'), + loadData: readSQL(__dirname, 'loadData.sql'), +}; + +describe('SparkSQL Insert Syntax Tests', () => { + Object.keys(features).forEach((key) => { + features[key].forEach((sql) => { + it(sql, () => { + expect(parser.validate(sql).length).toBe(0); + }); + }); + }); +}); diff --git a/test/parser/spark/syntax/loadStatement.test.ts b/test/parser/spark/syntax/loadStatement.test.ts new file mode 100644 index 00000000..62f06949 --- /dev/null +++ b/test/parser/spark/syntax/loadStatement.test.ts @@ -0,0 +1,19 @@ +import SparkSQL from '../../../../src/parser/spark'; +import { readSQL } from '../../../helper'; + + +const parser = new SparkSQL(); + +const features = { + loadData: readSQL(__dirname, 'loadData.sql'), +}; + +describe('SparkSQL Insert Syntax Tests', () => { + Object.keys(features).forEach((key) => { + features[key].forEach((sql) => { + it(sql, () => { + expect(parser.validate(sql).length).toBe(0); + }); + }); + }); +});