diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 655442d7e353..c05f43d04d60 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -679,11 +679,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { options, } = statement; - if file_type != "CSV" - && file_type != "JSON" + if (file_type == "PARQUET" || file_type == "AVRO" || file_type == "ARROW") && file_compression_type != CompressionTypeVariant::UNCOMPRESSED { - plan_err!("File compression type can be specified for CSV/JSON files.")?; + plan_err!( + "File compression type cannot be set for PARQUET, AVRO, or ARROW files." + )?; } let schema = self.build_schema(columns)?; diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 07112184bf59..154bd3f9a01a 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -1850,6 +1850,7 @@ fn create_external_table_with_compression_type() { "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV COMPRESSION TYPE BZIP2 LOCATION 'foo.csv.bz2'", "CREATE EXTERNAL TABLE t(c1 int) STORED AS JSON COMPRESSION TYPE GZIP LOCATION 'foo.json.gz'", "CREATE EXTERNAL TABLE t(c1 int) STORED AS JSON COMPRESSION TYPE BZIP2 LOCATION 'foo.json.bz2'", + "CREATE EXTERNAL TABLE t(c1 int) STORED AS NONSTANDARD COMPRESSION TYPE GZIP LOCATION 'foo.unk'", ]; for sql in sqls { let expected = "CreateExternalTable: Bare { table: \"t\" }"; @@ -1862,11 +1863,13 @@ fn create_external_table_with_compression_type() { "CREATE EXTERNAL TABLE t STORED AS AVRO COMPRESSION TYPE BZIP2 LOCATION 'foo.avro'", "CREATE EXTERNAL TABLE t STORED AS PARQUET COMPRESSION TYPE GZIP LOCATION 'foo.parquet'", "CREATE EXTERNAL TABLE t STORED AS PARQUET COMPRESSION TYPE BZIP2 LOCATION 'foo.parquet'", + "CREATE EXTERNAL TABLE t STORED AS ARROW COMPRESSION TYPE GZIP LOCATION 'foo.arrow'", + "CREATE EXTERNAL TABLE t STORED AS ARROW COMPRESSION TYPE BZIP2 LOCATION 'foo.arrow'", ]; for sql in sqls { let err = logical_plan(sql).expect_err("query should have failed"); assert_eq!( - "Plan(\"File compression type can be specified for CSV/JSON files.\")", + "Plan(\"File compression type cannot be set for PARQUET, AVRO, or ARROW files.\")", format!("{err:?}") ); } diff --git a/docs/source/user-guide/sql/ddl.md b/docs/source/user-guide/sql/ddl.md index f566b8342ec1..751159c305fc 100644 --- a/docs/source/user-guide/sql/ddl.md +++ b/docs/source/user-guide/sql/ddl.md @@ -79,7 +79,7 @@ LOCATION `file_type` is one of `CSV`, `PARQUET`, `AVRO` or `JSON` -`LOCATION ` specfies the location to find the data. It can be +`LOCATION ` specifies the location to find the data. It can be a path to a file or directory of partitioned files locally or on an object store.