Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: change file type logic for create table #7477

Merged
merged 2 commits into from
Sep 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions datafusion/sql/src/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -679,11 +679,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
options,
} = statement;

if file_type != "CSV"
&& file_type != "JSON"
if (file_type == "PARQUET" || file_type == "AVRO" || file_type == "ARROW")
&& file_compression_type != CompressionTypeVariant::UNCOMPRESSED
{
plan_err!("File compression type can be specified for CSV/JSON files.")?;
plan_err!(
"File compression type cannot be set for PARQUET, AVRO, or ARROW files."
)?;
}

let schema = self.build_schema(columns)?;
Expand Down
5 changes: 4 additions & 1 deletion datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1850,6 +1850,7 @@ fn create_external_table_with_compression_type() {
"CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV COMPRESSION TYPE BZIP2 LOCATION 'foo.csv.bz2'",
"CREATE EXTERNAL TABLE t(c1 int) STORED AS JSON COMPRESSION TYPE GZIP LOCATION 'foo.json.gz'",
"CREATE EXTERNAL TABLE t(c1 int) STORED AS JSON COMPRESSION TYPE BZIP2 LOCATION 'foo.json.bz2'",
"CREATE EXTERNAL TABLE t(c1 int) STORED AS NONSTANDARD COMPRESSION TYPE GZIP LOCATION 'foo.unk'",
];
for sql in sqls {
let expected = "CreateExternalTable: Bare { table: \"t\" }";
Expand All @@ -1862,11 +1863,13 @@ fn create_external_table_with_compression_type() {
"CREATE EXTERNAL TABLE t STORED AS AVRO COMPRESSION TYPE BZIP2 LOCATION 'foo.avro'",
"CREATE EXTERNAL TABLE t STORED AS PARQUET COMPRESSION TYPE GZIP LOCATION 'foo.parquet'",
"CREATE EXTERNAL TABLE t STORED AS PARQUET COMPRESSION TYPE BZIP2 LOCATION 'foo.parquet'",
"CREATE EXTERNAL TABLE t STORED AS ARROW COMPRESSION TYPE GZIP LOCATION 'foo.arrow'",
"CREATE EXTERNAL TABLE t STORED AS ARROW COMPRESSION TYPE BZIP2 LOCATION 'foo.arrow'",
];
for sql in sqls {
let err = logical_plan(sql).expect_err("query should have failed");
assert_eq!(
"Plan(\"File compression type can be specified for CSV/JSON files.\")",
"Plan(\"File compression type cannot be set for PARQUET, AVRO, or ARROW files.\")",
format!("{err:?}")
);
}
Expand Down
2 changes: 1 addition & 1 deletion docs/source/user-guide/sql/ddl.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ LOCATION <literal>

`file_type` is one of `CSV`, `PARQUET`, `AVRO` or `JSON`
Copy link
Member

@jackwener jackwener Sep 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to add Arrow file_type, and add a example in doc

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Proposed change in #7489


`LOCATION <literal>` specfies the location to find the data. It can be
`LOCATION <literal>` specifies the location to find the data. It can be
a path to a file or directory of partitioned files locally or on an
object store.

Expand Down