From 782efffe6314e4d0aea160ee55c8540f43104921 Mon Sep 17 00:00:00 2001 From: Luis Pessoa Date: Sat, 20 Jan 2024 10:20:52 -0300 Subject: [PATCH] sql: support copy encoding utf8 syntax Fixes #114200 Adding support to the ENCODING option for COPY. - Included ENCODING in grammar file. - Processing parsed value and validating it is set to UTF8 - Removed Unimplemented syntax tessts Release note (sql change): Added support for the ENCODING option of COPY, as long as the encoding of 'UTF8' is specified. --- docs/generated/sql/bnf/stmt_block.bnf | 2 ++ pkg/sql/copy_from.go | 12 ++++++++ pkg/sql/parser/parse_test.go | 2 -- pkg/sql/parser/sql.y | 8 ++--- pkg/sql/parser/testdata/copy | 42 +++++++++++++++++++++++---- pkg/sql/sem/tree/copy.go | 13 +++++++++ 6 files changed, 68 insertions(+), 11 deletions(-) diff --git a/docs/generated/sql/bnf/stmt_block.bnf b/docs/generated/sql/bnf/stmt_block.bnf index e4ba9f19511..1820c9688d7 100644 --- a/docs/generated/sql/bnf/stmt_block.bnf +++ b/docs/generated/sql/bnf/stmt_block.bnf @@ -2085,6 +2085,7 @@ copy_options ::= | 'HEADER' | 'QUOTE' 'SCONST' | 'ESCAPE' 'SCONST' + | 'ENCODING' 'SCONST' copy_generic_options ::= 'DESTINATION' string_or_placeholder @@ -2099,6 +2100,7 @@ copy_generic_options ::= | 'HEADER' 'FALSE' | 'QUOTE' 'SCONST' | 'ESCAPE' 'SCONST' + | 'ENCODING' 'SCONST' db_object_name_component ::= name diff --git a/pkg/sql/copy_from.go b/pkg/sql/copy_from.go index 76b94cce5a7..da455a1601c 100644 --- a/pkg/sql/copy_from.go +++ b/pkg/sql/copy_from.go @@ -91,6 +91,7 @@ type copyOptions struct { delimiter byte format tree.CopyFormat null string + encoding string } // TODO(#sql-sessions): copy all pre-condition checks from the PG code @@ -184,6 +185,17 @@ func processCopyOptions( ) } + if opts.Encoding != nil { + e, err := exprEval.String(ctx, opts.Encoding) + if err != nil { + return c, err + } + if strings.ToUpper(e) != "UTF8" { + return c, pgerror.New(pgcode.FeatureNotSupported, "only 'utf8' ENCODING is supported") + } + c.encoding = "utf8" + } + return c, nil } diff --git a/pkg/sql/parser/parse_test.go b/pkg/sql/parser/parse_test.go index b665d3c17ec..bb4b826fff9 100644 --- a/pkg/sql/parser/parse_test.go +++ b/pkg/sql/parser/parse_test.go @@ -348,13 +348,11 @@ func TestUnimplementedSyntax(t *testing.T) { {`COPY t FROM STDIN OIDS`, 41608, `oids`, ``}, {`COPY t FROM STDIN FREEZE`, 41608, `freeze`, ``}, - {`COPY t FROM STDIN ENCODING 'utf-8'`, 41608, `encoding`, ``}, {`COPY t FROM STDIN FORCE QUOTE *`, 41608, `quote`, ``}, {`COPY t FROM STDIN FORCE NULL *`, 41608, `force_null`, ``}, {`COPY t FROM STDIN FORCE NOT NULL *`, 41608, `force_not_null`, ``}, {`COPY t FROM STDIN WITH (OIDS)`, 41608, `oids`, ``}, {`COPY t FROM STDIN (FREEZE)`, 41608, `freeze`, ``}, - {`COPY t FROM STDIN WITH (ESCAPE ',', ENCODING 'utf-8')`, 41608, `encoding`, ``}, {`COPY t FROM STDIN WITH (FORCE_QUOTE) *`, 41608, `quote`, ``}, {`COPY t FROM STDIN (FORCE_NULL) *`, 41608, `force_null`, ``}, {`COPY t FROM STDIN (HEADER, FORCE_NOT_NULL) *`, 41608, `force_not_null`, ``}, diff --git a/pkg/sql/parser/sql.y b/pkg/sql/parser/sql.y index cfd3885158d..b1dcd706a1e 100644 --- a/pkg/sql/parser/sql.y +++ b/pkg/sql/parser/sql.y @@ -4216,9 +4216,9 @@ copy_options: { return unimplementedWithIssueDetail(sqllex, 41608, "force_null") } -| ENCODING SCONST error +| ENCODING SCONST { - return unimplementedWithIssueDetail(sqllex, 41608, "encoding") + $$.val = &tree.CopyOptions{Encoding: tree.NewStrVal($2)} } copy_generic_options: @@ -4301,9 +4301,9 @@ copy_generic_options: { return unimplementedWithIssueDetail(sqllex, 41608, "force_null") } -| ENCODING SCONST error +| ENCODING SCONST { - return unimplementedWithIssueDetail(sqllex, 41608, "encoding") + $$.val = &tree.CopyOptions{Encoding: tree.NewStrVal($2)} } // %Help: CANCEL diff --git a/pkg/sql/parser/testdata/copy b/pkg/sql/parser/testdata/copy index 564c4edd526..03d0565d1d2 100644 --- a/pkg/sql/parser/testdata/copy +++ b/pkg/sql/parser/testdata/copy @@ -87,12 +87,12 @@ COPY t (a, b, c) FROM STDIN WITH (FORMAT CSV, DELIMITER '_', DESTINATION '_') -- COPY _ (_, _, _) FROM STDIN WITH (FORMAT CSV, DELIMITER ' ', DESTINATION 'filename') -- identifiers removed parse -COPY t (a, b, c) FROM STDIN destination = 'filename' CSV DELIMITER ' ' ESCAPE 'x' HEADER +COPY t (a, b, c) FROM STDIN destination = 'filename' CSV DELIMITER ' ' ESCAPE 'x' HEADER ENCODING 'utf8' ---- -COPY t (a, b, c) FROM STDIN WITH (FORMAT CSV, DELIMITER ' ', DESTINATION 'filename', ESCAPE 'x', HEADER true) -- normalized! -COPY t (a, b, c) FROM STDIN WITH (FORMAT CSV, DELIMITER (' '), DESTINATION ('filename'), ESCAPE ('x'), HEADER true) -- fully parenthesized -COPY t (a, b, c) FROM STDIN WITH (FORMAT CSV, DELIMITER '_', DESTINATION '_', ESCAPE '_', HEADER true) -- literals removed -COPY _ (_, _, _) FROM STDIN WITH (FORMAT CSV, DELIMITER ' ', DESTINATION 'filename', ESCAPE 'x', HEADER true) -- identifiers removed +COPY t (a, b, c) FROM STDIN WITH (FORMAT CSV, DELIMITER ' ', ENCODING 'utf8', DESTINATION 'filename', ESCAPE 'x', HEADER true) -- normalized! +COPY t (a, b, c) FROM STDIN WITH (FORMAT CSV, DELIMITER (' '), ENCODING ('utf8'), DESTINATION ('filename'), ESCAPE ('x'), HEADER true) -- fully parenthesized +COPY t (a, b, c) FROM STDIN WITH (FORMAT CSV, DELIMITER '_', ENCODING '_', DESTINATION '_', ESCAPE '_', HEADER true) -- literals removed +COPY _ (_, _, _) FROM STDIN WITH (FORMAT CSV, DELIMITER ' ', ENCODING 'utf8', DESTINATION 'filename', ESCAPE 'x', HEADER true) -- identifiers removed parse COPY t TO STDOUT @@ -388,3 +388,35 @@ at or near "explain": syntax error DETAIL: source SQL: COPY (EXPLAIN SELECT * FROM t) TO STDOUT ^ + +parse +COPY "copytab" FROM STDIN (ENCODING 'utf8') +---- +COPY copytab FROM STDIN WITH (ENCODING 'utf8') -- normalized! +COPY copytab FROM STDIN WITH (ENCODING ('utf8')) -- fully parenthesized +COPY copytab FROM STDIN WITH (ENCODING '_') -- literals removed +COPY _ FROM STDIN WITH (ENCODING 'utf8') -- identifiers removed + +parse +COPY "copytab" FROM STDIN (HEADER true, ESCAPE '%', ENCODING 'utf8') +---- +COPY copytab FROM STDIN WITH (ENCODING 'utf8', ESCAPE '%', HEADER true) -- normalized! +COPY copytab FROM STDIN WITH (ENCODING ('utf8'), ESCAPE ('%'), HEADER true) -- fully parenthesized +COPY copytab FROM STDIN WITH (ENCODING '_', ESCAPE '_', HEADER true) -- literals removed +COPY _ FROM STDIN WITH (ENCODING 'utf8', ESCAPE '%', HEADER true) -- identifiers removed + +parse +COPY t (a, b, c) FROM STDIN destination = 'filename' CSV DELIMITER ' ' +---- +COPY t (a, b, c) FROM STDIN WITH (FORMAT CSV, DELIMITER ' ', DESTINATION 'filename') -- normalized! +COPY t (a, b, c) FROM STDIN WITH (FORMAT CSV, DELIMITER (' '), DESTINATION ('filename')) -- fully parenthesized +COPY t (a, b, c) FROM STDIN WITH (FORMAT CSV, DELIMITER '_', DESTINATION '_') -- literals removed +COPY _ (_, _, _) FROM STDIN WITH (FORMAT CSV, DELIMITER ' ', DESTINATION 'filename') -- identifiers removed + +error +COPY "copytab" FROM STDIN (FORMAT csv, ENCODING 'abc', ENCODING 'def') +---- +at or near "def": syntax error: encoding option specified multiple times +DETAIL: source SQL: +COPY "copytab" FROM STDIN (FORMAT csv, ENCODING 'abc', ENCODING 'def') + ^ diff --git a/pkg/sql/sem/tree/copy.go b/pkg/sql/sem/tree/copy.go index a9af5b6667f..b801894e5b8 100644 --- a/pkg/sql/sem/tree/copy.go +++ b/pkg/sql/sem/tree/copy.go @@ -62,6 +62,7 @@ type CopyOptions struct { Escape *StrVal Header bool Quote *StrVal + Encoding *StrVal // Additional flags are needed to keep track of whether explicit default // values were already set. @@ -117,6 +118,12 @@ func (o *CopyOptions) Format(ctx *FmtCtx) { ctx.FormatNode(o.Delimiter) addSep = true } + if o.Encoding != nil { + maybeAddSep() + ctx.WriteString("ENCODING ") + ctx.FormatNode(o.Encoding) + addSep = true + } if o.Null != nil { maybeAddSep() ctx.WriteString("NULL ") @@ -181,6 +188,12 @@ func (o *CopyOptions) CombineWith(other *CopyOptions) error { } o.Delimiter = other.Delimiter } + if other.Encoding != nil { + if o.Encoding != nil { + return pgerror.Newf(pgcode.Syntax, "encoding option specified multiple times") + } + o.Encoding = other.Encoding + } if other.Null != nil { if o.Null != nil { return pgerror.Newf(pgcode.Syntax, "null option specified multiple times")