From 3f2c5afed4be6bd8a30306645c69fdd6429b2622 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Mon, 9 Dec 2024 13:45:42 +0100
Subject: [PATCH 01/40] Initial trial for new read_csv syntax

---
 include/pgduckdb/pgduckdb_metadata_cache.hpp |   3 +
 include/pgduckdb/pgduckdb_ruleutils.h        |   2 +
 sql/pg_duckdb--0.2.0--0.3.0.sql              | 766 +++++++++++++++++++
 src/pgduckdb_hooks.cpp                       |   1 +
 src/pgduckdb_metadata_cache.cpp              |  29 +
 src/pgduckdb_options.cpp                     | 109 +++
 src/pgduckdb_ruleutils.cpp                   |  23 +
 src/vendor/pg_ruleutils_17.c                 |  62 +-
 8 files changed, 982 insertions(+), 13 deletions(-)

diff --git a/include/pgduckdb/pgduckdb_metadata_cache.hpp b/include/pgduckdb/pgduckdb_metadata_cache.hpp
index c8f9c564..4ff778bd 100644
--- a/include/pgduckdb/pgduckdb_metadata_cache.hpp
+++ b/include/pgduckdb/pgduckdb_metadata_cache.hpp
@@ -7,6 +7,9 @@ bool IsExtensionRegistered();
 bool IsDuckdbOnlyFunction(Oid function_oid);
 uint64 CacheVersion();
 Oid ExtensionOid();
+Oid SchemaOid();
+Oid DuckdbRowOid();
+Oid DuckdbUnresolvedTypeOid();
 Oid DuckdbTableAmOid();
 bool IsMotherDuckEnabled();
 bool IsMotherDuckEnabledAnywhere();
diff --git a/include/pgduckdb/pgduckdb_ruleutils.h b/include/pgduckdb/pgduckdb_ruleutils.h
index 5032670b..57f55e62 100644
--- a/include/pgduckdb/pgduckdb_ruleutils.h
+++ b/include/pgduckdb/pgduckdb_ruleutils.h
@@ -7,3 +7,5 @@ char *pgduckdb_get_tabledef(Oid relation_id);
 bool pgduckdb_is_not_default_expr(Node *node, void *context);
 List *pgduckdb_db_and_schema(const char *postgres_schema_name, bool is_duckdb_table);
 const char *pgduckdb_db_and_schema_string(const char *postgres_schema_name, bool is_duckdb_table);
+bool pgduckdb_var_is_duckdb_row(Var *var);
+bool pgduckdb_func_returns_duckdb_row(RangeTblFunction *rtfunc);
diff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql
index a23bb628..a40ef052 100644
--- a/sql/pg_duckdb--0.2.0--0.3.0.sql
+++ b/sql/pg_duckdb--0.2.0--0.3.0.sql
@@ -17,3 +17,769 @@ CREATE AGGREGATE @extschema@.approx_count_distinct(anyelement)
 
 CREATE DOMAIN pg_catalog.blob AS bytea;
 COMMENT ON DOMAIN pg_catalog.blob IS 'The DuckDB BLOB alias for BYTEA';
+
+CREATE TYPE duckdb.row;
+CREATE TYPE duckdb.unresolved_type;
+
+-- TODO: Should we remove IMMUTABLE STRICT?
+CREATE FUNCTION duckdb.row_in(cstring) RETURNS duckdb.row AS 'MODULE_PATHNAME', 'duckdb_row_in' LANGUAGE C IMMUTABLE STRICT;
+CREATE FUNCTION duckdb.row_out(duckdb.row) RETURNS cstring AS 'MODULE_PATHNAME', 'duckdb_row_out' LANGUAGE C IMMUTABLE STRICT;
+CREATE FUNCTION duckdb.row_subscript(internal) RETURNS internal AS 'MODULE_PATHNAME', 'duckdb_row_subscript' LANGUAGE C IMMUTABLE STRICT;
+CREATE TYPE duckdb.row (
+    INTERNALLENGTH = VARIABLE,
+    INPUT = duckdb.row_in,
+    OUTPUT = duckdb.row_out,
+    SUBSCRIPT = duckdb.row_subscript
+);
+
+CREATE FUNCTION duckdb.unresolved_type_in(cstring) RETURNS duckdb.unresolved_type AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_in' LANGUAGE C IMMUTABLE STRICT;
+CREATE FUNCTION duckdb.unresolved_type_out(duckdb.unresolved_type) RETURNS cstring AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_out' LANGUAGE C IMMUTABLE STRICT;
+CREATE TYPE duckdb.unresolved_type (
+    INTERNALLENGTH = VARIABLE,
+    INPUT = duckdb.unresolved_type_in,
+    OUTPUT = duckdb.unresolved_type_out
+);
+
+-- Dummy functions for binary operators with unresolved type on the lefthand
+CREATE FUNCTION duckdb_unresolved_type_operator(duckdb.unresolved_type, "any") RETURNS duckdb.unresolved_type AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_operator' LANGUAGE C IMMUTABLE STRICT;
+CREATE FUNCTION duckdb_unresolved_type_operator_bool(duckdb.unresolved_type, "any") RETURNS boolean AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_operator' LANGUAGE C IMMUTABLE STRICT;
+
+-- Dummy functions for binary operators with unresolved type on the righthand
+CREATE FUNCTION duckdb_unresolved_type_operator("any", duckdb.unresolved_type) RETURNS duckdb.unresolved_type AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_operator' LANGUAGE C IMMUTABLE STRICT;
+CREATE FUNCTION duckdb_unresolved_type_operator_bool("any", duckdb.unresolved_type) RETURNS boolean AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_operator' LANGUAGE C IMMUTABLE STRICT;
+
+-- Dummy function for prefix/unary operators
+CREATE FUNCTION duckdb_unresolved_type_operator(duckdb.unresolved_type) RETURNS duckdb.unresolved_type AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_operator' LANGUAGE C IMMUTABLE STRICT;
+
+-- prefix operators + and -
+CREATE OPERATOR + (
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator
+);
+
+CREATE OPERATOR - (
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator
+);
+
+-- Basic comparison operators
+CREATE OPERATOR <= (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = "any",
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
+CREATE OPERATOR <= (
+    LEFTARG = "any",
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
+CREATE OPERATOR < (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = "any",
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
+CREATE OPERATOR < (
+    LEFTARG = "any",
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
+CREATE OPERATOR <> (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = "any",
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
+CREATE OPERATOR <> (
+    LEFTARG = "any",
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
+CREATE OPERATOR = (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = "any",
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
+CREATE OPERATOR = (
+    LEFTARG = "any",
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
+CREATE OPERATOR > (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = "any",
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
+CREATE OPERATOR > (
+    LEFTARG = "any",
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
+CREATE OPERATOR >= (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = "any",
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
+CREATE OPERATOR >= (
+    LEFTARG = "any",
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
+-- binary math operators
+CREATE OPERATOR + (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = "any",
+    FUNCTION = duckdb_unresolved_type_operator
+);
+
+CREATE OPERATOR + (
+    LEFTARG = "any",
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator
+);
+
+CREATE OPERATOR - (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = "any",
+    FUNCTION = duckdb_unresolved_type_operator
+);
+
+CREATE OPERATOR - (
+    LEFTARG = "any",
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator
+);
+
+CREATE OPERATOR * (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = "any",
+    FUNCTION = duckdb_unresolved_type_operator
+);
+
+CREATE OPERATOR * (
+    LEFTARG = "any",
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator
+);
+
+CREATE OPERATOR / (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = "any",
+    FUNCTION = duckdb_unresolved_type_operator
+);
+
+CREATE OPERATOR / (
+    LEFTARG = "any",
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator
+);
+
+-- Self comparison operators and necessary dummy function, needed for GROUP BY and ORDER BY
+CREATE FUNCTION duckdb_unresolved_type_operator_bool(duckdb.unresolved_type, duckdb.unresolved_type) RETURNS boolean AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_operator' LANGUAGE C IMMUTABLE STRICT;
+
+CREATE OPERATOR < (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
+CREATE OPERATOR <= (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
+CREATE OPERATOR = (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
+CREATE OPERATOR > (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
+CREATE OPERATOR >= (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
+-- TODO: use other dummy function with better error
+CREATE FUNCTION duckdb_unresolved_type_btree_cmp(duckdb.unresolved_type, duckdb.unresolved_type) RETURNS int AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_operator' LANGUAGE C IMMUTABLE STRICT;
+
+-- Create a B-tree operator class for duckdb.unresolved_type, so it can be used in ORDER BY
+CREATE OPERATOR CLASS duckdb_unresolved_type_ops
+DEFAULT FOR TYPE duckdb.unresolved_type USING btree AS
+    OPERATOR 1 < (duckdb.unresolved_type, duckdb.unresolved_type),
+    OPERATOR 2 <= (duckdb.unresolved_type, duckdb.unresolved_type),
+    OPERATOR 3 = (duckdb.unresolved_type, duckdb.unresolved_type),
+    OPERATOR 4 >= (duckdb.unresolved_type, duckdb.unresolved_type),
+    OPERATOR 5 > (duckdb.unresolved_type, duckdb.unresolved_type),
+    FUNCTION 1 duckdb_unresolved_type_btree_cmp(duckdb.unresolved_type, duckdb.unresolved_type);
+
+CREATE FUNCTION duckdb_unresolved_type_hash(duckdb.unresolved_type) RETURNS int AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_operator' LANGUAGE C IMMUTABLE STRICT;
+
+-- Create a hash operator class for duckdb.unresolved_type, so it can be used in GROUP BY
+CREATE OPERATOR CLASS duckdb_unresolved_type_hash_ops
+DEFAULT FOR TYPE duckdb.unresolved_type USING hash AS
+    OPERATOR 1 = (duckdb.unresolved_type, duckdb.unresolved_type),
+    FUNCTION 1 duckdb_unresolved_type_hash(duckdb.unresolved_type);
+
+-- TODO: create dedicated dummy C functions for these
+--
+-- State transition function
+CREATE FUNCTION duckdb_unresolved_type_state_trans(state duckdb.unresolved_type, value duckdb.unresolved_type)
+RETURNS duckdb.unresolved_type AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_operator' LANGUAGE C IMMUTABLE STRICT;
+CREATE FUNCTION duckdb_unresolved_type_state_trans(state duckdb.unresolved_type, value duckdb.unresolved_type, other "any")
+RETURNS duckdb.unresolved_type AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_operator' LANGUAGE C IMMUTABLE STRICT;
+CREATE FUNCTION duckdb_unresolved_type_state_trans(state duckdb.unresolved_type, value duckdb.unresolved_type, other "any", another "any")
+RETURNS duckdb.unresolved_type AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_operator' LANGUAGE C IMMUTABLE STRICT;
+
+-- Final function
+CREATE FUNCTION duckdb_unresolved_type_final(state duckdb.unresolved_type)
+RETURNS duckdb.unresolved_type AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_operator' LANGUAGE C IMMUTABLE STRICT;
+
+-- Aggregate functions
+
+-- NOTE: any_value is already definied in core in PG16+, so we don't create it.
+-- People using older Postgres versions can manually implement the aggregate if
+-- they really require it.
+
+CREATE AGGREGATE arbitrary(duckdb.unresolved_type) (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE arg_max(duckdb.unresolved_type, "any") (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE arg_max(duckdb.unresolved_type, "any", "any") (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE arg_max_null(duckdb.unresolved_type, "any") (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE arg_min(duckdb.unresolved_type, "any") (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE arg_min(duckdb.unresolved_type, "any", "any") (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE arg_min_null(duckdb.unresolved_type, "any") (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE array_agg(duckdb.unresolved_type) (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE avg(duckdb.unresolved_type) (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE bit_and(duckdb.unresolved_type) (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE bit_or(duckdb.unresolved_type) (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE bit_xor(duckdb.unresolved_type) (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE bitstring_agg(duckdb.unresolved_type) (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE bool_and(duckdb.unresolved_type) (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE bool_or(duckdb.unresolved_type) (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+-- NOTE: count(*) and count(duckdb.unresolved_type) are already defined in the core
+
+CREATE AGGREGATE favg(duckdb.unresolved_type) (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE first(duckdb.unresolved_type) (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE fsum(duckdb.unresolved_type) (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE geomean(duckdb.unresolved_type) (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE histogram(duckdb.unresolved_type) (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE histogram(duckdb.unresolved_type, "any") (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE histogram_exact(duckdb.unresolved_type, "any") (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE last(duckdb.unresolved_type) (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE list(duckdb.unresolved_type) (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE max(duckdb.unresolved_type, "any") (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE max_by(duckdb.unresolved_type, "any") (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE max_by(duckdb.unresolved_type, "any", "any") (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE min(duckdb.unresolved_type, "any") (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE min_by(duckdb.unresolved_type, "any") (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE min_by(duckdb.unresolved_type, "any", "any") (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE product(duckdb.unresolved_type) (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE string_agg(duckdb.unresolved_type, "any") (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+CREATE AGGREGATE sum(duckdb.unresolved_type) (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
+
+-- "AS ASSIGNMENT" cast to boolean for unresolved types, so that they can be
+-- used as the final expression in a WHERE clause
+CREATE CAST (duckdb.unresolved_type AS boolean)
+    WITH INOUT
+    AS ASSIGNMENT;
+
+-- Regular casts for all our supported types
+-- BOOLEAN (skiping plain boolean because it's right above)
+CREATE CAST (duckdb.unresolved_type AS boolean[])
+    WITH INOUT;
+
+-- TINYINT (CHAR)
+CREATE CAST (duckdb.unresolved_type AS char)
+    WITH INOUT;
+CREATE CAST (duckdb.unresolved_type AS char[])
+    WITH INOUT;
+
+-- SMALLINT (INT2)
+CREATE CAST (duckdb.unresolved_type AS smallint)
+    WITH INOUT;
+CREATE CAST (duckdb.unresolved_type AS smallint[])
+    WITH INOUT;
+
+-- INTEGER (INT4)
+CREATE CAST (duckdb.unresolved_type AS integer)
+    WITH INOUT;
+CREATE CAST (duckdb.unresolved_type AS integer[])
+    WITH INOUT;
+
+-- BIGINT (INT8)
+CREATE CAST (duckdb.unresolved_type AS bigint)
+    WITH INOUT;
+CREATE CAST (duckdb.unresolved_type AS bigint[])
+    WITH INOUT;
+
+-- VARCHAR (BPCHAR, TEXT, VARCHAR)
+CREATE CAST (duckdb.unresolved_type AS varchar)
+    WITH INOUT;
+CREATE CAST (duckdb.unresolved_type AS varchar[])
+    WITH INOUT;
+
+-- DATE
+CREATE CAST (duckdb.unresolved_type AS date)
+    WITH INOUT;
+CREATE CAST (duckdb.unresolved_type AS date[])
+    WITH INOUT;
+
+-- TIMESTAMP
+CREATE CAST (duckdb.unresolved_type AS timestamp)
+    WITH INOUT;
+CREATE CAST (duckdb.unresolved_type AS timestamp[])
+    WITH INOUT;
+
+-- TIMESTAMP WITH TIME ZONE
+CREATE CAST (duckdb.unresolved_type AS timestamptz)
+    WITH INOUT;
+
+-- FLOAT
+CREATE CAST (duckdb.unresolved_type AS real)
+    WITH INOUT;
+CREATE CAST (duckdb.unresolved_type AS real[])
+    WITH INOUT;
+
+-- DOUBLE
+CREATE CAST (duckdb.unresolved_type AS double precision)
+    WITH INOUT;
+CREATE CAST (duckdb.unresolved_type AS double precision[])
+    WITH INOUT;
+
+-- NUMERIC (DECIMAL)
+CREATE CAST (duckdb.unresolved_type AS numeric)
+    WITH INOUT;
+CREATE CAST (duckdb.unresolved_type AS numeric[])
+    WITH INOUT;
+
+-- UUID
+CREATE CAST (duckdb.unresolved_type AS uuid)
+    WITH INOUT;
+CREATE CAST (duckdb.unresolved_type AS uuid[])
+    WITH INOUT;
+
+-- JSON
+CREATE CAST (duckdb.unresolved_type AS json)
+    WITH INOUT;
+CREATE CAST (duckdb.unresolved_type AS json[])
+    WITH INOUT;
+
+-- read_parquet function for single path
+DROP FUNCTION @extschema@.read_parquet(path text, binary_as_string BOOLEAN,
+                                                   filename BOOLEAN,
+                                                   file_row_number BOOLEAN,
+                                                   hive_partitioning BOOLEAN,
+                                                   union_by_name BOOLEAN);
+CREATE FUNCTION @extschema@.read_parquet(path text, binary_as_string BOOLEAN DEFAULT FALSE,
+                                                   filename BOOLEAN DEFAULT FALSE,
+                                                   file_row_number BOOLEAN DEFAULT FALSE,
+                                                   hive_partitioning BOOLEAN DEFAULT FALSE,
+                                                   union_by_name BOOLEAN DEFAULT FALSE)
+RETURNS SETOF duckdb.row
+SET search_path = pg_catalog, pg_temp
+AS 'MODULE_PATHNAME', 'duckdb_only_function'
+LANGUAGE C;
+
+-- read_parquet function for array of paths
+DROP FUNCTION @extschema@.read_parquet(path text[], binary_as_string BOOLEAN,
+                                                     filename BOOLEAN,
+                                                     file_row_number BOOLEAN,
+                                                     hive_partitioning BOOLEAN,
+                                                     union_by_name BOOLEAN);
+CREATE FUNCTION @extschema@.read_parquet(path text[], binary_as_string BOOLEAN DEFAULT FALSE,
+                                                     filename BOOLEAN DEFAULT FALSE,
+                                                     file_row_number BOOLEAN DEFAULT FALSE,
+                                                     hive_partitioning BOOLEAN DEFAULT FALSE,
+                                                     union_by_name BOOLEAN DEFAULT FALSE)
+RETURNS SETOF duckdb.row
+SET search_path = pg_catalog, pg_temp
+AS 'MODULE_PATHNAME', 'duckdb_only_function'
+LANGUAGE C;
+
+-- read_csv function for single path
+DROP FUNCTION @extschema@.read_csv(path text, all_varchar BOOLEAN,
+                                               allow_quoted_nulls BOOLEAN,
+                                               auto_detect BOOLEAN,
+                                               auto_type_candidates TEXT[],
+                                               compression VARCHAR,
+                                               dateformat VARCHAR,
+                                               decimal_separator VARCHAR,
+                                               delim VARCHAR,
+                                               escape VARCHAR,
+                                               filename BOOLEAN,
+                                               force_not_null TEXT[],
+                                               header BOOLEAN,
+                                               hive_partitioning BOOLEAN,
+                                               ignore_errors BOOLEAN,
+                                               max_line_size BIGINT,
+                                               names TEXT[],
+                                               new_line VARCHAR,
+                                               normalize_names BOOLEAN,
+                                               null_padding BOOLEAN,
+                                               nullstr TEXT[],
+                                               parallel BOOLEAN,
+                                               quote VARCHAR,
+                                               sample_size BIGINT,
+                                               sep VARCHAR,
+                                               skip BIGINT,
+                                               timestampformat VARCHAR,
+                                               types TEXT[],
+                                               union_by_name BOOLEAN);
+CREATE FUNCTION @extschema@.read_csv(path text, all_varchar BOOLEAN DEFAULT FALSE,
+                                               allow_quoted_nulls BOOLEAN DEFAULT TRUE,
+                                               auto_detect BOOLEAN DEFAULT TRUE,
+                                               auto_type_candidates TEXT[] DEFAULT ARRAY[]::TEXT[],
+                                               compression VARCHAR DEFAULT 'auto',
+                                               dateformat VARCHAR DEFAULT '',
+                                               decimal_separator VARCHAR DEFAULT '.',
+                                               delim VARCHAR DEFAULT ',',
+                                               escape VARCHAR DEFAULT '"',
+                                               filename BOOLEAN DEFAULT FALSE,
+                                               force_not_null TEXT[] DEFAULT ARRAY[]::TEXT[],
+                                               header BOOLEAN DEFAULT FALSE,
+                                               hive_partitioning BOOLEAN DEFAULT FALSE,
+                                               ignore_errors BOOLEAN DEFAULT FALSE,
+                                               max_line_size BIGINT DEFAULT 2097152,
+                                               names TEXT[] DEFAULT ARRAY[]::TEXT[],
+                                               new_line VARCHAR DEFAULT '',
+                                               normalize_names BOOLEAN DEFAULT FALSE,
+                                               null_padding BOOLEAN DEFAULT FALSE,
+                                               nullstr TEXT[] DEFAULT ARRAY[]::TEXT[],
+                                               parallel BOOLEAN DEFAULT FALSE,
+                                               quote VARCHAR DEFAULT '"',
+                                               sample_size BIGINT DEFAULT 20480,
+                                               sep VARCHAR DEFAULT ',',
+                                               skip BIGINT DEFAULT 0,
+                                               timestampformat VARCHAR DEFAULT '',
+                                               types TEXT[] DEFAULT ARRAY[]::TEXT[],
+                                               union_by_name BOOLEAN DEFAULT FALSE)
+RETURNS SETOF duckdb.row
+SET search_path = pg_catalog, pg_temp
+AS 'MODULE_PATHNAME', 'duckdb_only_function'
+LANGUAGE C;
+
+-- read_csv function for array of paths
+DROP FUNCTION @extschema@.read_csv(path text[], all_varchar BOOLEAN,
+                                                  allow_quoted_nulls BOOLEAN,
+                                                  auto_detect BOOLEAN,
+                                                  auto_type_candidates TEXT[],
+                                                  compression VARCHAR,
+                                                  dateformat VARCHAR,
+                                                  decimal_separator VARCHAR,
+                                                  delim VARCHAR,
+                                                  escape VARCHAR,
+                                                  filename BOOLEAN,
+                                                  force_not_null TEXT[],
+                                                  header BOOLEAN,
+                                                  hive_partitioning BOOLEAN,
+                                                  ignore_errors BOOLEAN,
+                                                  max_line_size BIGINT,
+                                                  names TEXT[],
+                                                  new_line VARCHAR,
+                                                  normalize_names BOOLEAN,
+                                                  null_padding BOOLEAN,
+                                                  nullstr TEXT[],
+                                                  parallel BOOLEAN,
+                                                  quote VARCHAR,
+                                                  sample_size BIGINT,
+                                                  sep VARCHAR,
+                                                  skip BIGINT,
+                                                  timestampformat VARCHAR,
+                                                  types TEXT[],
+                                                  union_by_name BOOLEAN);
+CREATE FUNCTION @extschema@.read_csv(path text[], all_varchar BOOLEAN DEFAULT FALSE,
+                                                  allow_quoted_nulls BOOLEAN DEFAULT TRUE,
+                                                  auto_detect BOOLEAN DEFAULT TRUE,
+                                                  auto_type_candidates TEXT[] DEFAULT ARRAY[]::TEXT[],
+                                                  compression VARCHAR DEFAULT 'auto',
+                                                  dateformat VARCHAR DEFAULT '',
+                                                  decimal_separator VARCHAR DEFAULT '.',
+                                                  delim VARCHAR DEFAULT ',',
+                                                  escape VARCHAR DEFAULT '"',
+                                                  filename BOOLEAN DEFAULT FALSE,
+                                                  force_not_null TEXT[] DEFAULT ARRAY[]::TEXT[],
+                                                  header BOOLEAN DEFAULT FALSE,
+                                                  hive_partitioning BOOLEAN DEFAULT FALSE,
+                                                  ignore_errors BOOLEAN DEFAULT FALSE,
+                                                  max_line_size BIGINT DEFAULT 2097152,
+                                                  names TEXT[] DEFAULT ARRAY[]::TEXT[],
+                                                  new_line VARCHAR DEFAULT '',
+                                                  normalize_names BOOLEAN DEFAULT FALSE,
+                                                  null_padding BOOLEAN DEFAULT FALSE,
+                                                  nullstr TEXT[] DEFAULT ARRAY[]::TEXT[],
+                                                  parallel BOOLEAN DEFAULT FALSE,
+                                                  quote VARCHAR DEFAULT '"',
+                                                  sample_size BIGINT DEFAULT 20480,
+                                                  sep VARCHAR DEFAULT ',',
+                                                  skip BIGINT DEFAULT 0,
+                                                  timestampformat VARCHAR DEFAULT '',
+                                                  types TEXT[] DEFAULT ARRAY[]::TEXT[],
+                                                  union_by_name BOOLEAN DEFAULT FALSE)
+RETURNS SETOF duckdb.row
+SET search_path = pg_catalog, pg_temp
+AS 'MODULE_PATHNAME', 'duckdb_only_function'
+LANGUAGE C;
+
+-- iceberg_scan function
+DROP FUNCTION @extschema@.iceberg_scan(path text, allow_moved_paths BOOLEAN,
+                                                   mode TEXT,
+                                                   metadata_compression_codec TEXT,
+                                                   skip_schema_inference BOOLEAN,
+                                                   version TEXT,
+                                                   version_name_format TEXT);
+CREATE FUNCTION @extschema@.iceberg_scan(path text, allow_moved_paths BOOLEAN DEFAULT FALSE,
+                                                   mode TEXT DEFAULT '',
+                                                   metadata_compression_codec TEXT DEFAULT 'none',
+                                                   skip_schema_inference BOOLEAN DEFAULT FALSE,
+                                                   version TEXT DEFAULT 'version-hint.text',
+                                                   version_name_format TEXT DEFAULT 'v%s%s.metadata.json,%s%s.metadata.json')
+RETURNS SETOF duckdb.row
+SET search_path = pg_catalog, pg_temp
+AS 'MODULE_PATHNAME', 'duckdb_only_function'
+LANGUAGE C;
+
+-- delta_scan function
+DROP FUNCTION @extschema@.delta_scan(path text);
+CREATE FUNCTION @extschema@.delta_scan(path text)
+RETURNS SETOF duckdb.row
+SET search_path = pg_catalog, pg_temp
+AS 'MODULE_PATHNAME', 'duckdb_only_function'
+LANGUAGE C;
+
+-- read_json function for single path
+DROP FUNCTION @extschema@.read_json(path text, auto_detect BOOLEAN,
+                                                 compression VARCHAR,
+                                                 dateformat VARCHAR,
+                                                 format VARCHAR,
+                                                 ignore_errors BOOLEAN,
+                                                 maximum_depth BIGINT,
+                                                 maximum_object_size INT,
+                                                 records VARCHAR,
+                                                 sample_size BIGINT,
+                                                 timestampformat VARCHAR,
+                                                 union_by_name BOOLEAN);
+CREATE FUNCTION @extschema@.read_json(path text, auto_detect BOOLEAN DEFAULT FALSE,
+                                                 compression VARCHAR DEFAULT 'auto',
+                                                 dateformat VARCHAR DEFAULT 'iso',
+                                                 format VARCHAR DEFAULT 'array',
+                                                 ignore_errors BOOLEAN DEFAULT FALSE,
+                                                 maximum_depth BIGINT DEFAULT -1,
+                                                 maximum_object_size INT DEFAULT 16777216,
+                                                 records VARCHAR DEFAULT 'records',
+                                                 sample_size BIGINT DEFAULT 20480,
+                                                 timestampformat VARCHAR DEFAULT 'iso',
+                                                 union_by_name BOOLEAN DEFAULT FALSE)
+RETURNS SETOF duckdb.row
+SET search_path = pg_catalog, pg_temp
+AS 'MODULE_PATHNAME', 'duckdb_only_function'
+LANGUAGE C;
+
+-- read_json function for array of paths
+DROP FUNCTION @extschema@.read_json(path text[], auto_detect BOOLEAN,
+                                                   compression VARCHAR,
+                                                   dateformat VARCHAR,
+                                                   format VARCHAR,
+                                                   ignore_errors BOOLEAN,
+                                                   maximum_depth BIGINT,
+                                                   maximum_object_size INT,
+                                                   records VARCHAR,
+                                                   sample_size BIGINT,
+                                                   timestampformat VARCHAR,
+                                                   union_by_name BOOLEAN);
+CREATE FUNCTION @extschema@.read_json(path text[], auto_detect BOOLEAN DEFAULT FALSE,
+                                                   compression VARCHAR DEFAULT 'auto',
+                                                   dateformat VARCHAR DEFAULT 'iso',
+                                                   format VARCHAR DEFAULT 'array',
+                                                   ignore_errors BOOLEAN DEFAULT FALSE,
+                                                   maximum_depth BIGINT DEFAULT -1,
+                                                   maximum_object_size INT DEFAULT 16777216,
+                                                   records VARCHAR DEFAULT 'records',
+                                                   sample_size BIGINT DEFAULT 20480,
+                                                   timestampformat VARCHAR DEFAULT 'iso',
+                                                   union_by_name BOOLEAN DEFAULT FALSE)
+RETURNS SETOF duckdb.row
+SET search_path = pg_catalog, pg_temp
+AS 'MODULE_PATHNAME', 'duckdb_only_function'
+LANGUAGE C;
diff --git a/src/pgduckdb_hooks.cpp b/src/pgduckdb_hooks.cpp
index bc3d921f..dc0dbf47 100644
--- a/src/pgduckdb_hooks.cpp
+++ b/src/pgduckdb_hooks.cpp
@@ -189,6 +189,7 @@ DuckdbPlannerHook_Cpp(Query *parse, const char *query_string, int cursor_options
 	if (pgduckdb::IsExtensionRegistered()) {
 		if (NeedsDuckdbExecution(parse)) {
 			IsAllowedStatement(parse, true);
+			pprint(parse);
 
 			return DuckdbPlanNode(parse, query_string, cursor_options, bound_params, true);
 		} else if (duckdb_force_execution && IsAllowedStatement(parse) && ContainsFromClause(parse)) {
diff --git a/src/pgduckdb_metadata_cache.cpp b/src/pgduckdb_metadata_cache.cpp
index 4d812930..3fac74af 100644
--- a/src/pgduckdb_metadata_cache.cpp
+++ b/src/pgduckdb_metadata_cache.cpp
@@ -49,6 +49,12 @@ struct {
 	bool installed;
 	/* The Postgres OID of the pg_duckdb extension. */
 	Oid extension_oid;
+	/* The OID of the duckdb schema */
+	Oid schema_oid;
+	/* The OID of the duckdb.row type */
+	Oid row_oid;
+	/* The OID of the duckdb.unresolved_type */
+	Oid unresolved_type_oid;
 	/* The OID of the duckdb Table Access Method */
 	Oid table_am_oid;
 	/* The OID of the duckdb.motherduck_postgres_database */
@@ -174,6 +180,11 @@ IsExtensionRegistered() {
 		BuildDuckdbOnlyFunctions();
 		cache.table_am_oid = GetSysCacheOid1(AMNAME, Anum_pg_am_oid, CStringGetDatum("duckdb"));
 
+		cache.schema_oid = get_namespace_oid("duckdb", false);
+		cache.row_oid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid, CStringGetDatum("row"), cache.schema_oid);
+		cache.unresolved_type_oid =
+		    GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid, CStringGetDatum("unresolved_type"), cache.schema_oid);
+
 		cache.motherduck_postgres_database_oid = get_database_oid(duckdb_motherduck_postgres_database, false);
 
 		if (duckdb_postgres_role[0] != '\0') {
@@ -220,6 +231,24 @@ ExtensionOid() {
 	return cache.extension_oid;
 }
 
+Oid
+SchemaOid() {
+	Assert(cache.valid);
+	return cache.schema_oid;
+}
+
+Oid
+DuckdbRowOid() {
+	Assert(cache.valid);
+	return cache.row_oid;
+}
+
+Oid
+DuckdbUnresolvedTypeOid() {
+	Assert(cache.valid);
+	return cache.unresolved_type_oid;
+}
+
 Oid
 DuckdbTableAmOid() {
 	Assert(cache.valid);
diff --git a/src/pgduckdb_options.cpp b/src/pgduckdb_options.cpp
index bc4b81a3..0cb90d18 100644
--- a/src/pgduckdb_options.cpp
+++ b/src/pgduckdb_options.cpp
@@ -17,16 +17,24 @@ extern "C" {
 #include "access/xact.h"
 #include "executor/spi.h"
 #include "catalog/indexing.h"
+#include "parser/parse_coerce.h"
+#include "parser/parse_node.h"
+#include "parser/parse_expr.h"
+#include "nodes/subscripting.h"
+#include "nodes/nodeFuncs.h"
 #include "catalog/namespace.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
 #include "utils/rel.h"
 #include "utils/snapmgr.h"
+
+#include "nodes/print.h"
 }
 
 #include "pgduckdb/pgduckdb_options.hpp"
 #include "pgduckdb/pgduckdb_duckdb.hpp"
 #include "pgduckdb/pgduckdb_xact.hpp"
+#include "pgduckdb/pgduckdb_metadata_cache.hpp"
 #include "pgduckdb/utility/cpp_wrapper.hpp"
 
 namespace pgduckdb {
@@ -365,4 +373,105 @@ DECLARE_PG_FUNCTION(pgduckdb_recycle_ddb) {
 	PG_RETURN_BOOL(true);
 }
 
+void
+DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool isSlice,
+                            bool isAssignment) {
+	if (isAssignment) {
+		elog(ERROR, "Assignment to duckdb.row is not supported");
+	}
+
+	if (isSlice) {
+		elog(ERROR, "Creating a slice out of duckdb.row is not supported");
+	}
+
+	if (indirection == NIL) {
+		elog(ERROR, "Subscripting duckdb.row with an empty subscript is not supported");
+	}
+
+	if (list_length(indirection) != 1) {
+		/* TODO: Fix this, if the column contains an array we should be able to subscript that type */
+		elog(ERROR, "Subscripting duckdb.row is only supported with a single subscript");
+	}
+	pprint(indirection);
+
+	// Transform each subscript expression
+	foreach_node(A_Indices, subscript, indirection) {
+		Assert(!subscript->is_slice);
+		Assert(subscript->uidx);
+
+		Node *subscript_expr = transformExpr(pstate, subscript->uidx, pstate->p_expr_kind);
+		pprint(subscript_expr);
+		int expr_location = exprLocation(subscript->uidx);
+		Oid subscript_expr_type = exprType(subscript_expr);
+
+		Node *coerced_expr = coerce_to_target_type(pstate, subscript_expr, subscript_expr_type, TEXTOID, -1,
+		                                           COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, expr_location);
+		if (!coerced_expr) {
+			ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("duckdb.row subscript must have text type"),
+			                parser_errposition(pstate, expr_location)));
+		}
+
+		if (!IsA(subscript_expr, Const)) {
+			pprint(subscript_expr);
+			ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("duckdb.row subscript must be a constant"),
+			                parser_errposition(pstate, expr_location)));
+		}
+		sbsref->refupperindexpr = lappend(sbsref->refupperindexpr, coerced_expr);
+	}
+
+	/* TODO: Trigger cache population, probably we should do this somewhere else */
+	pgduckdb::IsExtensionRegistered();
+
+	// Set the result type of the subscripting operation
+	sbsref->refrestype = pgduckdb::DuckdbUnresolvedTypeOid();
+	sbsref->reftypmod = -1;
+}
+
+void
+DuckdbRowSubscriptExecSetup(const SubscriptingRef * /*sbsref*/, SubscriptingRefState * /*sbsrefstate*/,
+                            SubscriptExecSteps * /*exprstate*/) {
+	elog(ERROR, "Subscripting duckdb.row is not supported in the Postgres Executor");
+}
+
+static SubscriptRoutines duckdb_subscript_routines = {
+    .transform = DuckdbRowSubscriptTransform,
+    .exec_setup = DuckdbRowSubscriptExecSetup,
+    .fetch_strict = false,
+    .fetch_leakproof = true,
+    .store_leakproof = true,
+};
+
+DECLARE_PG_FUNCTION(duckdb_row_in) {
+	elog(ERROR, "Creating the duckdb.row type is not supported");
+}
+
+DECLARE_PG_FUNCTION(duckdb_row_out) {
+	elog(ERROR, "Converting a duckdb.row to a string is not supported");
+}
+
+DECLARE_PG_FUNCTION(duckdb_unresolved_type_in) {
+	elog(ERROR, "Creating the duckdb.unresolved_type type is not supported");
+}
+
+DECLARE_PG_FUNCTION(duckdb_unresolved_type_out) {
+	elog(ERROR, "Converting a duckdb.unresolved_type to a string is not supported");
+}
+
+DECLARE_PG_FUNCTION(duckdb_get_row_field_operator) {
+	elog(ERROR, "The 'get field' operator for duckdb.row cannot be executed by Postgres");
+}
+
+DECLARE_PG_FUNCTION(duckdb_row_subscript) {
+	PG_RETURN_POINTER(&duckdb_subscript_routines);
+}
+
+DECLARE_PG_FUNCTION(duckdb_unresolved_type_operator) {
+	elog(ERROR, "Unresolved duckdb types cannot be used by the Postgres executor");
+}
+
+DECLARE_PG_FUNCTION(duckdb_only_function) {
+	char *function_name = DatumGetCString(DirectFunctionCall1(regprocout, fcinfo->flinfo->fn_oid));
+	elog(ERROR, "Function '%s' only works with DuckDB execution", function_name);
+}
+
 } // extern "C"
diff --git a/src/pgduckdb_ruleutils.cpp b/src/pgduckdb_ruleutils.cpp
index 5624b586..4d9e1e5a 100644
--- a/src/pgduckdb_ruleutils.cpp
+++ b/src/pgduckdb_ruleutils.cpp
@@ -38,6 +38,29 @@ pgduckdb_function_name(Oid function_oid) {
 	return psprintf("system.main.%s", quote_identifier(func_name));
 }
 
+bool
+pgduckdb_var_is_duckdb_row(Var *var) {
+	if (!var) {
+		return false;
+	}
+	return var->vartype == pgduckdb::DuckdbRowOid();
+}
+
+bool
+pgduckdb_func_returns_duckdb_row(RangeTblFunction *rtfunc) {
+	if (!rtfunc) {
+		return false;
+	}
+
+	if (!IsA(rtfunc->funcexpr, FuncExpr)) {
+		return false;
+	}
+
+	FuncExpr *func_expr = castNode(FuncExpr, rtfunc->funcexpr);
+
+	return func_expr->funcresulttype == pgduckdb::DuckdbRowOid();
+}
+
 /*
  * Given a postgres schema name, this returns a list of two elements: the first
  * is the DuckDB database name and the second is the duckdb schema name. These
diff --git a/src/vendor/pg_ruleutils_17.c b/src/vendor/pg_ruleutils_17.c
index b9b78a83..afcac88a 100644
--- a/src/vendor/pg_ruleutils_17.c
+++ b/src/vendor/pg_ruleutils_17.c
@@ -6071,8 +6071,10 @@ get_target_list(List *targetList, deparse_context *context,
 		 * directly so that we can tell it to do the right thing, and so that
 		 * we can get the attribute name which is the default AS label.
 		 */
+		Var *var = NULL;
 		if (tle->expr && (IsA(tle->expr, Var)))
 		{
+			var = (Var *) tle->expr;
 			attname = get_variable((Var *) tle->expr, 0, true, context);
 		}
 		else
@@ -6099,7 +6101,7 @@ get_target_list(List *targetList, deparse_context *context,
 			colname = tle->resname;
 
 		/* Show AS unless the column's name is correct as-is */
-		if (colname)			/* resname could be NULL */
+		if (colname && !pgduckdb_var_is_duckdb_row(var))
 		{
 			if (attname == NULL || strcmp(attname, colname) != 0)
 				appendStringInfo(&targetbuf, " AS %s", quote_identifier(colname));
@@ -7504,17 +7506,35 @@ get_variable(Var *var, int levelsup, bool istoplevel, deparse_context *context)
 		if (attnum > colinfo->num_cols)
 			elog(ERROR, "invalid attnum %d for relation \"%s\"",
 				 attnum, rte->eref->aliasname);
-		attname = colinfo->colnames[attnum - 1];
+		if (pgduckdb_var_is_duckdb_row(var)) {
+			if (istoplevel) {
+				/* If the duckdb row is at the top level target list of a
+				 * select, then we want to generate r.*, to unpack all the
+				 * columns instead of returning a STRUCT from the query. */
+				attname = NULL;
+			} else {
+				/* In any other case, we want to simply use the alias of the
+				 * TargetEntry, without the column name that postgres gave to
+				 * it. Because even though according to the Postgres parser we
+				 * are referencing a column of type "duckdb.row", that column
+				 * won't actually exist in the DuckDB query.
+				 */
+				attname = refname;
+				refname = NULL;
+			}
+		} else {
+			attname = colinfo->colnames[attnum - 1];
 
-		/*
-		 * If we find a Var referencing a dropped column, it seems better to
-		 * print something (anything) than to fail.  In general this should
-		 * not happen, but it used to be possible for some cases involving
-		 * functions returning named composite types, and perhaps there are
-		 * still bugs out there.
-		 */
-		if (attname == NULL)
-			attname = "?dropped?column?";
+			/*
+			 * If we find a Var referencing a dropped column, it seems better to
+			 * print something (anything) than to fail.  In general this should
+			 * not happen, but it used to be possible for some cases involving
+			 * functions returning named composite types, and perhaps there are
+			 * still bugs out there.
+			 */
+			if (attname == NULL)
+				attname = "?dropped?column?";
+		}
 	}
 	else
 	{
@@ -7532,7 +7552,7 @@ get_variable(Var *var, int levelsup, bool istoplevel, deparse_context *context)
 	else
 	{
 		appendStringInfoChar(buf, '*');
-		if (istoplevel)
+		if (istoplevel && !pgduckdb_var_is_duckdb_row(var))
 			appendStringInfo(buf, "::%s",
 							 format_type_with_typemod(var->vartype,
 													  var->vartypmod));
@@ -9015,6 +9035,19 @@ get_rule_expr(Node *node, deparse_context *context,
 					appendStringInfoString(buf, " := ");
 					get_rule_expr(refassgnexpr, context, showimplicit);
 				}
+				else if (IsA(sbsref->refexpr, Var) && pgduckdb_var_is_duckdb_row((Var*) sbsref->refexpr)) {
+					Assert(sbsref->refupperindexpr);
+					Assert(!sbsref->reflowerindexpr);
+					Assert(list_length(sbsref->refupperindexpr) == 1);
+					Oid			typoutput;
+					bool		typIsVarlena;
+					Const *constval = castNode(Const, linitial(sbsref->refupperindexpr));
+					getTypeOutputInfo(constval->consttype,
+									&typoutput, &typIsVarlena);
+
+					char *extval = OidOutputFunctionCall(typoutput, constval->constvalue);
+					appendStringInfo(context->buf, ".%s", quote_identifier(extval));
+				}
 				else
 				{
 					/* Just an ordinary container fetch, so print subscripts */
@@ -12121,8 +12154,11 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
 		/* Print the relation alias, if needed */
 		get_rte_alias(rte, varno, false, context);
 
+		if (pgduckdb_func_returns_duckdb_row(rtfunc1)) {
+			/* Don't print column aliases for functions that return a duckdb.row */
+		}
 		/* Print the column definitions or aliases, if needed */
-		if (rtfunc1 && rtfunc1->funccolnames != NIL)
+		else if (rtfunc1 && rtfunc1->funccolnames != NIL)
 		{
 			/* Reconstruct the columndef list, which is also the aliases */
 			get_from_clause_coldeflist(rtfunc1, colinfo, context);

From bf6596fb5f3020c36a4f87a051802cd9ecdf6477 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Wed, 11 Dec 2024 13:57:43 +0100
Subject: [PATCH 02/40] Generate query that works for iceberg scans

---
 src/vendor/pg_ruleutils_17.c | 39 +++++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/src/vendor/pg_ruleutils_17.c b/src/vendor/pg_ruleutils_17.c
index afcac88a..b8d1eda2 100644
--- a/src/vendor/pg_ruleutils_17.c
+++ b/src/vendor/pg_ruleutils_17.c
@@ -7507,6 +7507,14 @@ get_variable(Var *var, int levelsup, bool istoplevel, deparse_context *context)
 			elog(ERROR, "invalid attnum %d for relation \"%s\"",
 				 attnum, rte->eref->aliasname);
 		if (pgduckdb_var_is_duckdb_row(var)) {
+			if (rte->rtekind == RTE_FUNCTION) {
+				if (istoplevel) {
+					appendStringInfoChar(buf, '*');
+					return NULL;
+				}
+				return NULL;
+			}
+
 			if (istoplevel) {
 				/* If the duckdb row is at the top level target list of a
 				 * select, then we want to generate r.*, to unpack all the
@@ -9039,6 +9047,32 @@ get_rule_expr(Node *node, deparse_context *context,
 					Assert(sbsref->refupperindexpr);
 					Assert(!sbsref->reflowerindexpr);
 					Assert(list_length(sbsref->refupperindexpr) == 1);
+					Var *var = (Var *) sbsref->refexpr;
+
+					deparse_namespace* dpns = (deparse_namespace *) list_nth(context->namespaces,
+																var->varlevelsup);
+					int			varno;
+					AttrNumber	varattno;
+
+					/*
+					* If we have a syntactic referent for the Var, and we're working from a
+					* parse tree, prefer to use the syntactic referent.  Otherwise, fall back
+					* on the semantic referent.  (Forcing use of the semantic referent when
+					* printing plan trees is a design choice that's perhaps more motivated by
+					* backwards compatibility than anything else.  But it does have the
+					* advantage of making plans more explicit.)
+					*/
+					if (var->varnosyn > 0 && dpns->plan == NULL)
+					{
+						varno = var->varnosyn;
+						varattno = var->varattnosyn;
+					}
+					else
+					{
+						varno = var->varno;
+						varattno = var->varattno;
+					}
+					RangeTblEntry* rte = rt_fetch(varno, dpns->rtable);
 					Oid			typoutput;
 					bool		typIsVarlena;
 					Const *constval = castNode(Const, linitial(sbsref->refupperindexpr));
@@ -9046,7 +9080,10 @@ get_rule_expr(Node *node, deparse_context *context,
 									&typoutput, &typIsVarlena);
 
 					char *extval = OidOutputFunctionCall(typoutput, constval->constvalue);
-					appendStringInfo(context->buf, ".%s", quote_identifier(extval));
+					if (rte->rtekind != RTE_FUNCTION) {
+						appendStringInfoChar(buf, '.');
+					}
+					appendStringInfoString(context->buf, quote_identifier(extval));
 				}
 				else
 				{

From 65681557b4f870a34d8408d7e8d63e72ee8adec3 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Thu, 12 Dec 2024 13:58:08 +0100
Subject: [PATCH 03/40] Correct column aliases and use star

---
 include/pgduckdb/pgduckdb_ruleutils.h         |  4 +
 sql/pg_duckdb--0.2.0--0.3.0.sql               | 93 ++++++++++++-------
 src/pgduckdb_options.cpp                      |  8 +-
 src/pgduckdb_ruleutils.cpp                    | 83 ++++++++++++++++-
 src/vendor/pg_ruleutils_17.c                  | 80 +++++++++++++++-
 .../expected/duckdb_only_functions.out        |  8 +-
 test/regression/expected/non_superuser.out    |  2 +-
 test/regression/expected/read_functions.out   | 50 +++++-----
 test/regression/expected/search_path.out      |  2 +-
 test/regression/sql/create_table_as.sql       |  2 +-
 test/regression/sql/duckdb_only_functions.sql |  8 +-
 test/regression/sql/non_superuser.sql         |  2 +-
 test/regression/sql/read_functions.sql        | 50 +++++-----
 test/regression/sql/search_path.sql           |  2 +-
 14 files changed, 284 insertions(+), 110 deletions(-)

diff --git a/include/pgduckdb/pgduckdb_ruleutils.h b/include/pgduckdb/pgduckdb_ruleutils.h
index 57f55e62..5c057b17 100644
--- a/include/pgduckdb/pgduckdb_ruleutils.h
+++ b/include/pgduckdb/pgduckdb_ruleutils.h
@@ -7,5 +7,9 @@ char *pgduckdb_get_tabledef(Oid relation_id);
 bool pgduckdb_is_not_default_expr(Node *node, void *context);
 List *pgduckdb_db_and_schema(const char *postgres_schema_name, bool is_duckdb_table);
 const char *pgduckdb_db_and_schema_string(const char *postgres_schema_name, bool is_duckdb_table);
+bool pgduckdb_is_duckdb_row(Oid type_oid);
+bool pgduckdb_is_unresolved_type(Oid type_oid);
 bool pgduckdb_var_is_duckdb_row(Var *var);
 bool pgduckdb_func_returns_duckdb_row(RangeTblFunction *rtfunc);
+Var *pgduckdb_duckdb_row_subscript_var(Expr *expr);
+List *pgduckdb_star_start_vars(List *target_list);
diff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql
index a40ef052..ba554590 100644
--- a/sql/pg_duckdb--0.2.0--0.3.0.sql
+++ b/sql/pg_duckdb--0.2.0--0.3.0.sql
@@ -48,6 +48,10 @@ CREATE FUNCTION duckdb_unresolved_type_operator_bool(duckdb.unresolved_type, "an
 CREATE FUNCTION duckdb_unresolved_type_operator("any", duckdb.unresolved_type) RETURNS duckdb.unresolved_type AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_operator' LANGUAGE C IMMUTABLE STRICT;
 CREATE FUNCTION duckdb_unresolved_type_operator_bool("any", duckdb.unresolved_type) RETURNS boolean AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_operator' LANGUAGE C IMMUTABLE STRICT;
 
+-- Dummy functions for binary operators with unresolved type on both sides
+CREATE FUNCTION duckdb_unresolved_type_operator(duckdb.unresolved_type, duckdb.unresolved_type) RETURNS duckdb.unresolved_type AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_operator' LANGUAGE C IMMUTABLE STRICT;
+CREATE FUNCTION duckdb_unresolved_type_operator_bool(duckdb.unresolved_type, duckdb.unresolved_type) RETURNS boolean AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_operator' LANGUAGE C IMMUTABLE STRICT;
+
 -- Dummy function for prefix/unary operators
 CREATE FUNCTION duckdb_unresolved_type_operator(duckdb.unresolved_type) RETURNS duckdb.unresolved_type AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_operator' LANGUAGE C IMMUTABLE STRICT;
 
@@ -63,6 +67,12 @@ CREATE OPERATOR - (
 );
 
 -- Basic comparison operators
+CREATE OPERATOR <= (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
 CREATE OPERATOR <= (
     LEFTARG = duckdb.unresolved_type,
     RIGHTARG = "any",
@@ -75,6 +85,12 @@ CREATE OPERATOR <= (
     FUNCTION = duckdb_unresolved_type_operator_bool
 );
 
+CREATE OPERATOR < (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
 CREATE OPERATOR < (
     LEFTARG = duckdb.unresolved_type,
     RIGHTARG = "any",
@@ -87,6 +103,12 @@ CREATE OPERATOR < (
     FUNCTION = duckdb_unresolved_type_operator_bool
 );
 
+CREATE OPERATOR <> (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
 CREATE OPERATOR <> (
     LEFTARG = duckdb.unresolved_type,
     RIGHTARG = "any",
@@ -99,6 +121,12 @@ CREATE OPERATOR <> (
     FUNCTION = duckdb_unresolved_type_operator_bool
 );
 
+CREATE OPERATOR = (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
 CREATE OPERATOR = (
     LEFTARG = duckdb.unresolved_type,
     RIGHTARG = "any",
@@ -111,6 +139,12 @@ CREATE OPERATOR = (
     FUNCTION = duckdb_unresolved_type_operator_bool
 );
 
+CREATE OPERATOR > (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
 CREATE OPERATOR > (
     LEFTARG = duckdb.unresolved_type,
     RIGHTARG = "any",
@@ -123,6 +157,12 @@ CREATE OPERATOR > (
     FUNCTION = duckdb_unresolved_type_operator_bool
 );
 
+CREATE OPERATOR >= (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator_bool
+);
+
 CREATE OPERATOR >= (
     LEFTARG = duckdb.unresolved_type,
     RIGHTARG = "any",
@@ -136,6 +176,12 @@ CREATE OPERATOR >= (
 );
 
 -- binary math operators
+CREATE OPERATOR + (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator
+);
+
 CREATE OPERATOR + (
     LEFTARG = duckdb.unresolved_type,
     RIGHTARG = "any",
@@ -148,6 +194,12 @@ CREATE OPERATOR + (
     FUNCTION = duckdb_unresolved_type_operator
 );
 
+CREATE OPERATOR - (
+    LEFTARG = duckdb.unresolved_type,
+    RIGHTARG = duckdb.unresolved_type,
+    FUNCTION = duckdb_unresolved_type_operator
+);
+
 CREATE OPERATOR - (
     LEFTARG = duckdb.unresolved_type,
     RIGHTARG = "any",
@@ -162,59 +214,38 @@ CREATE OPERATOR - (
 
 CREATE OPERATOR * (
     LEFTARG = duckdb.unresolved_type,
-    RIGHTARG = "any",
-    FUNCTION = duckdb_unresolved_type_operator
-);
-
-CREATE OPERATOR * (
-    LEFTARG = "any",
     RIGHTARG = duckdb.unresolved_type,
     FUNCTION = duckdb_unresolved_type_operator
 );
 
-CREATE OPERATOR / (
+CREATE OPERATOR * (
     LEFTARG = duckdb.unresolved_type,
     RIGHTARG = "any",
     FUNCTION = duckdb_unresolved_type_operator
 );
 
-CREATE OPERATOR / (
+CREATE OPERATOR * (
     LEFTARG = "any",
     RIGHTARG = duckdb.unresolved_type,
     FUNCTION = duckdb_unresolved_type_operator
 );
 
--- Self comparison operators and necessary dummy function, needed for GROUP BY and ORDER BY
-CREATE FUNCTION duckdb_unresolved_type_operator_bool(duckdb.unresolved_type, duckdb.unresolved_type) RETURNS boolean AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_operator' LANGUAGE C IMMUTABLE STRICT;
-
-CREATE OPERATOR < (
-    LEFTARG = duckdb.unresolved_type,
-    RIGHTARG = duckdb.unresolved_type,
-    FUNCTION = duckdb_unresolved_type_operator_bool
-);
-
-CREATE OPERATOR <= (
-    LEFTARG = duckdb.unresolved_type,
-    RIGHTARG = duckdb.unresolved_type,
-    FUNCTION = duckdb_unresolved_type_operator_bool
-);
-
-CREATE OPERATOR = (
+CREATE OPERATOR / (
     LEFTARG = duckdb.unresolved_type,
     RIGHTARG = duckdb.unresolved_type,
-    FUNCTION = duckdb_unresolved_type_operator_bool
+    FUNCTION = duckdb_unresolved_type_operator
 );
 
-CREATE OPERATOR > (
+CREATE OPERATOR / (
     LEFTARG = duckdb.unresolved_type,
-    RIGHTARG = duckdb.unresolved_type,
-    FUNCTION = duckdb_unresolved_type_operator_bool
+    RIGHTARG = "any",
+    FUNCTION = duckdb_unresolved_type_operator
 );
 
-CREATE OPERATOR >= (
-    LEFTARG = duckdb.unresolved_type,
+CREATE OPERATOR / (
+    LEFTARG = "any",
     RIGHTARG = duckdb.unresolved_type,
-    FUNCTION = duckdb_unresolved_type_operator_bool
+    FUNCTION = duckdb_unresolved_type_operator
 );
 
 -- TODO: use other dummy function with better error
diff --git a/src/pgduckdb_options.cpp b/src/pgduckdb_options.cpp
index 0cb90d18..98576c52 100644
--- a/src/pgduckdb_options.cpp
+++ b/src/pgduckdb_options.cpp
@@ -450,15 +450,11 @@ DECLARE_PG_FUNCTION(duckdb_row_out) {
 }
 
 DECLARE_PG_FUNCTION(duckdb_unresolved_type_in) {
-	elog(ERROR, "Creating the duckdb.unresolved_type type is not supported");
+	return textin(fcinfo);
 }
 
 DECLARE_PG_FUNCTION(duckdb_unresolved_type_out) {
-	elog(ERROR, "Converting a duckdb.unresolved_type to a string is not supported");
-}
-
-DECLARE_PG_FUNCTION(duckdb_get_row_field_operator) {
-	elog(ERROR, "The 'get field' operator for duckdb.row cannot be executed by Postgres");
+	return textout(fcinfo);
 }
 
 DECLARE_PG_FUNCTION(duckdb_row_subscript) {
diff --git a/src/pgduckdb_ruleutils.cpp b/src/pgduckdb_ruleutils.cpp
index 4d9e1e5a..0ca738ef 100644
--- a/src/pgduckdb_ruleutils.cpp
+++ b/src/pgduckdb_ruleutils.cpp
@@ -38,12 +38,22 @@ pgduckdb_function_name(Oid function_oid) {
 	return psprintf("system.main.%s", quote_identifier(func_name));
 }
 
+bool
+pgduckdb_is_unresolved_type(Oid type_oid) {
+	return type_oid == pgduckdb::DuckdbUnresolvedTypeOid();
+}
+
+bool
+pgduckdb_is_duckdb_row(Oid type_oid) {
+	return type_oid == pgduckdb::DuckdbRowOid();
+}
+
 bool
 pgduckdb_var_is_duckdb_row(Var *var) {
 	if (!var) {
 		return false;
 	}
-	return var->vartype == pgduckdb::DuckdbRowOid();
+	return pgduckdb_is_duckdb_row(var->vartype);
 }
 
 bool
@@ -58,7 +68,76 @@ pgduckdb_func_returns_duckdb_row(RangeTblFunction *rtfunc) {
 
 	FuncExpr *func_expr = castNode(FuncExpr, rtfunc->funcexpr);
 
-	return func_expr->funcresulttype == pgduckdb::DuckdbRowOid();
+	return pgduckdb_is_duckdb_row(func_expr->funcresulttype);
+}
+
+/*
+ * Returns NULL if the expression is not a subscript on a duckdb row. Returns
+ * the Var of the duckdb row if it is.
+ */
+Var *
+pgduckdb_duckdb_row_subscript_var(Expr *expr) {
+	if (!expr) {
+		return NULL;
+	}
+
+	if (!IsA(expr, SubscriptingRef)) {
+		return NULL;
+	}
+
+	SubscriptingRef *subscript = (SubscriptingRef *)expr;
+
+	if (!IsA(subscript->refexpr, Var)) {
+		return NULL;
+	}
+
+	Var *refexpr = (Var *)subscript->refexpr;
+
+	if (!pgduckdb_var_is_duckdb_row(refexpr)) {
+		return NULL;
+	}
+	return refexpr;
+}
+
+/*
+ * Returns a list of duckdb
+ */
+List *
+pgduckdb_star_start_vars(List *target_list) {
+	List *star_start_indexes = NIL;
+	Var *possible_star_start_var;
+	int possible_star_start_var_index = 0;
+
+	int i = 0;
+
+	foreach_node(TargetEntry, tle, target_list) {
+		i++;
+
+		if (!IsA(tle->expr, Var)) {
+			possible_star_start_var = NULL;
+			possible_star_start_var_index = 0;
+			continue;
+		}
+
+		Var *var = (Var *)tle->expr;
+
+		if (var->varattno == 1) {
+			possible_star_start_var = var;
+			possible_star_start_var_index = i;
+		} else if (possible_star_start_var) {
+			if (var->varno != possible_star_start_var->varno || var->varno == i - possible_star_start_var_index + 1) {
+				possible_star_start_var = NULL;
+				possible_star_start_var_index = 0;
+			}
+		}
+
+		if (pgduckdb_var_is_duckdb_row(var)) {
+			star_start_indexes = lappend_int(star_start_indexes, possible_star_start_var_index);
+			possible_star_start_var = NULL;
+			possible_star_start_var_index = 0;
+		}
+	}
+	return star_start_indexes;
 }
 
 /*
diff --git a/src/vendor/pg_ruleutils_17.c b/src/vendor/pg_ruleutils_17.c
index b8d1eda2..efcdf9e7 100644
--- a/src/vendor/pg_ruleutils_17.c
+++ b/src/vendor/pg_ruleutils_17.c
@@ -6041,11 +6041,51 @@ get_target_list(List *targetList, deparse_context *context,
 
 	sep = " ";
 	colno = 0;
+	List *star_starts = pgduckdb_star_start_vars(targetList);
+	ListCell *current_star_start = list_head(star_starts);
+
+	int varno_star = 0;
+	int varattno_star = 0;
+	bool added_star = false;
+
+	int i = 0;
 	foreach(l, targetList)
 	{
 		TargetEntry *tle = (TargetEntry *) lfirst(l);
 		char	   *colname;
 		char	   *attname;
+		i++;
+
+		if (current_star_start && lfirst_int(current_star_start) == i) {
+			Var *var = castNode(Var, tle->expr);
+			varno_star = var->varno;
+			varattno_star = var->varattno;
+		}
+
+		if (varno_star) {
+			bool reset = true;
+			if (tle->expr && IsA(tle->expr, Var)) {
+				Var *var = castNode(Var, tle->expr);
+
+				if (var->varno == varno_star && var->varattno == varattno_star) {
+					varattno_star++;
+					reset = false;
+					if (added_star || !pgduckdb_var_is_duckdb_row(var)) {
+						continue;
+					}
+					added_star = true;
+				}
+			}
+
+			if (reset) {
+				varno_star = 0;
+				varattno_star = 0;
+				current_star_start = lnext(star_starts, current_star_start);
+				added_star = false;
+			}
+		}
+
+
 
 		if (tle->resjunk)
 			continue;			/* ignore junk entries */
@@ -6100,8 +6140,39 @@ get_target_list(List *targetList, deparse_context *context,
 		else
 			colname = tle->resname;
 
+		bool duckdb_row_needs_as = !pgduckdb_var_is_duckdb_row(var);
+		Var *subscript_var = pgduckdb_duckdb_row_subscript_var(tle->expr);
+		if (subscript_var) {
+			deparse_namespace* dpns = (deparse_namespace *) list_nth(context->namespaces,
+														subscript_var->varlevelsup);
+			int			varno;
+			int			varattno;
+
+			/*
+			* If we have a syntactic referent for the Var, and we're working from a
+			* parse tree, prefer to use the syntactic referent.  Otherwise, fall back
+			* on the semantic referent.  (Forcing use of the semantic referent when
+			* printing plan trees is a design choice that's perhaps more motivated by
+			* backwards compatibility than anything else.  But it does have the
+			* advantage of making plans more explicit.)
+			*/
+			if (subscript_var->varnosyn > 0 && dpns->plan == NULL)
+			{
+				varno = subscript_var->varnosyn;
+				varattno = subscript_var->varattnosyn;
+			}
+			else
+			{
+				varno = subscript_var->varno;
+				varattno = subscript_var->varattno;
+			}
+			RangeTblEntry* rte = rt_fetch(varno, dpns->rtable);
+			char *original_column = strVal(list_nth(rte->eref->colnames, varattno - 1));
+			duckdb_row_needs_as = strcmp(original_column, colname) != 0;
+		}
+
 		/* Show AS unless the column's name is correct as-is */
-		if (colname && !pgduckdb_var_is_duckdb_row(var))
+		if (colname && duckdb_row_needs_as)
 		{
 			if (attname == NULL || strcmp(attname, colname) != 0)
 				appendStringInfo(&targetbuf, " AS %s", quote_identifier(colname));
@@ -9052,7 +9123,6 @@ get_rule_expr(Node *node, deparse_context *context,
 					deparse_namespace* dpns = (deparse_namespace *) list_nth(context->namespaces,
 																var->varlevelsup);
 					int			varno;
-					AttrNumber	varattno;
 
 					/*
 					* If we have a syntactic referent for the Var, and we're working from a
@@ -9065,12 +9135,10 @@ get_rule_expr(Node *node, deparse_context *context,
 					if (var->varnosyn > 0 && dpns->plan == NULL)
 					{
 						varno = var->varnosyn;
-						varattno = var->varattnosyn;
 					}
 					else
 					{
 						varno = var->varno;
-						varattno = var->varattno;
 					}
 					RangeTblEntry* rte = rt_fetch(varno, dpns->rtable);
 					Oid			typoutput;
@@ -11152,6 +11220,10 @@ get_const_expr(Const *constval, deparse_context *context, int showtype)
 	char	   *extval;
 	bool		needlabel = false;
 
+	if (pgduckdb_is_unresolved_type(constval->consttype)) {
+		showtype = -1;
+	}
+
 	if (constval->constisnull)
 	{
 		/*
diff --git a/test/regression/expected/duckdb_only_functions.out b/test/regression/expected/duckdb_only_functions.out
index cb70e1e3..a423f3a8 100644
--- a/test/regression/expected/duckdb_only_functions.out
+++ b/test/regression/expected/duckdb_only_functions.out
@@ -2,26 +2,26 @@
 -- duckdb.force_execution is turned off
 SET duckdb.force_execution = false;
 \set pwd `pwd`
-select * from read_parquet(:'pwd' || '/data/unsigned_types.parquet') as (usmallint int);
+select r['usmallint'] from read_parquet(:'pwd' || '/data/unsigned_types.parquet') r;
  usmallint 
 -----------
      65535
 (1 row)
 
-select * from read_parquet(ARRAY[:'pwd' || '/data/unsigned_types.parquet']) as (usmallint int);
+select r['usmallint'] from read_parquet(ARRAY[:'pwd' || '/data/unsigned_types.parquet']) r;
  usmallint 
 -----------
      65535
 (1 row)
 
-select * from read_csv(:'pwd' || '/data/web_page.csv') as (column00 int) LIMIT 2;
+select r['column00'] from read_csv(:'pwd' || '/data/web_page.csv') r LIMIT 2;
  column00 
 ----------
         1
         2
 (2 rows)
 
-select * from read_csv(ARRAY[:'pwd' || '/data/web_page.csv']) as (column00 int) LIMIT 2;
+select r['column00'] from read_csv(ARRAY[:'pwd' || '/data/web_page.csv']) r LIMIT 2;
  column00 
 ----------
         1
diff --git a/test/regression/expected/non_superuser.out b/test/regression/expected/non_superuser.out
index 37574149..b4aa0809 100644
--- a/test/regression/expected/non_superuser.out
+++ b/test/regression/expected/non_superuser.out
@@ -33,7 +33,7 @@ SELECT * FROM duckdb.cache('some hacky sql', 'some more hacky sql');
 ERROR:  permission denied for function cache
 SET duckdb.force_execution = true;
 -- read_csv from the local filesystem should be disallowed
-SELECT count("sepal.length") FROM read_csv('../../data/iris.csv') AS ("sepal.length" FLOAT);
+SELECT count(r['sepal.length']) FROM read_csv('../../data/iris.csv') r;
 ERROR:  (PGDuckDB/CreatePlan) Prepared query returned an error: 'Permission Error: File system LocalFileSystem has been disabled by configuration
 -- Should fail because DuckDB execution is not allowed for this user
 SET ROLE user3;
diff --git a/test/regression/expected/read_functions.out b/test/regression/expected/read_functions.out
index 85ebae38..9d9226ce 100644
--- a/test/regression/expected/read_functions.out
+++ b/test/regression/expected/read_functions.out
@@ -1,13 +1,13 @@
 -- PostgreSQL instance has data directory set to tmp_check/data so for all read functions argument
 -- is relative to that data directory patch
 -- read_parquet
-SELECT count("sepal.length") FROM read_parquet('../../data/iris.parquet') AS ("sepal.length" FLOAT);
+SELECT count(r['sepal.length']) FROM read_parquet('../../data/iris.parquet') r;
  count 
 -------
    150
 (1 row)
 
-SELECT "sepal.length" FROM read_parquet('../../data/iris.parquet') AS ("sepal.length" FLOAT) ORDER BY "sepal.length"  LIMIT 5;
+SELECT r['sepal.length'] FROM read_parquet('../../data/iris.parquet') r ORDER BY r['sepal.length']  LIMIT 5;
  sepal.length 
 --------------
           4.3
@@ -17,9 +17,9 @@ SELECT "sepal.length" FROM read_parquet('../../data/iris.parquet') AS ("sepal.le
           4.5
 (5 rows)
 
-SELECT "sepal.length", file_row_number, filename
-    FROM read_parquet('../../data/iris.parquet', file_row_number => true, filename => true)
-    AS ("sepal.length" FLOAT, file_row_number INT, filename VARCHAR) ORDER BY "sepal.length"  LIMIT 5;
+SELECT r['sepal.length'], r['file_row_number'], r['filename']
+    FROM read_parquet('../../data/iris.parquet', file_row_number => true, filename => true) r
+    ORDER BY r['sepal.length']  LIMIT 5;
  sepal.length | file_row_number |        filename         
 --------------+-----------------+-------------------------
           4.3 |              13 | ../../data/iris.parquet
@@ -30,13 +30,13 @@ SELECT "sepal.length", file_row_number, filename
 (5 rows)
 
 -- read_csv
-SELECT count("sepal.length") FROM read_csv('../../data/iris.csv') AS ("sepal.length" FLOAT);
+SELECT count(r['sepal.length']) FROM read_csv('../../data/iris.csv') r;
  count 
 -------
    150
 (1 row)
 
-SELECT "sepal.length" FROM read_csv('../../data/iris.csv') AS ("sepal.length" FLOAT) ORDER BY "sepal.length" LIMIT 5;
+SELECT r['sepal.length'] FROM read_csv('../../data/iris.csv') r ORDER BY r['sepal.length'] LIMIT 5;
  sepal.length 
 --------------
           4.3
@@ -46,9 +46,9 @@ SELECT "sepal.length" FROM read_csv('../../data/iris.csv') AS ("sepal.length" FL
           4.5
 (5 rows)
 
-SELECT "sepal.length", filename
-    FROM read_csv('../../data/iris.csv', filename => true, header => true)
-    AS ("sepal.length" FLOAT, filename VARCHAR) ORDER BY "sepal.length"  LIMIT 5;
+SELECT r['sepal.length'], r['filename']
+    FROM read_csv('../../data/iris.csv', filename => true, header => true) r
+    ORDER BY r['sepal.length']  LIMIT 5;
  sepal.length |      filename       
 --------------+---------------------
           4.3 | ../../data/iris.csv
@@ -58,7 +58,7 @@ SELECT "sepal.length", filename
           4.5 | ../../data/iris.csv
 (5 rows)
 
-SELECT * FROM read_csv('../../non-existing-file.csv') AS ("sepal.length" FLOAT);
+SELECT * FROM read_csv('../../non-existing-file.csv');
 ERROR:  (PGDuckDB/CreatePlan) Prepared query returned an error: 'IO Error: No files found that match the pattern "../../non-existing-file.csv"
 -- delta_scan
 SELECT duckdb.install_extension('delta');
@@ -67,13 +67,13 @@ SELECT duckdb.install_extension('delta');
  t
 (1 row)
 
-SELECT count(a) FROM delta_scan('../../data/delta_table') AS (a INT);
+SELECT count(r['a']) FROM delta_scan('../../data/delta_table') r;
  count 
 -------
    100
 (1 row)
 
-SELECT * FROM delta_scan('../../data/delta_table') AS (a INT, b VARCHAR) WHERE (a = 1 OR b = 'delta_table_3');
+SELECT * FROM delta_scan('../../data/delta_table') r WHERE (r['a'] = 1 OR r['b'] = 'delta_table_3');
  a |       b       
 ---+---------------
  1 | delta_table_1
@@ -87,7 +87,7 @@ SELECT duckdb.install_extension('iceberg');
  t
 (1 row)
 
-SELECT COUNT(l_orderkey) FROM iceberg_scan('../../data/lineitem_iceberg', allow_moved_paths => true) AS (l_orderkey BIGINT);
+SELECT COUNT(r['l_orderkey']) FROM iceberg_scan('../../data/lineitem_iceberg', allow_moved_paths => true) r;
  count 
 -------
  51793
@@ -95,18 +95,14 @@ SELECT COUNT(l_orderkey) FROM iceberg_scan('../../data/lineitem_iceberg', allow_
 
 -- TPCH query #6
 SELECT
-	sum(l_extendedprice * l_discount) as revenue
+	sum(r['l_extendedprice'] * r['l_discount']) as revenue
 FROM
-	iceberg_scan('../../data/lineitem_iceberg', allow_moved_paths => true) AS
-        (l_extendedprice DOUBLE PRECISION,
-         l_discount DOUBLE PRECISION,
-         l_shipdate DATE,
-         l_quantity DOUBLE PRECISION)
+	iceberg_scan('../../data/lineitem_iceberg', allow_moved_paths => true) r
 WHERE
-	l_shipdate >= date '1997-01-01'
-	AND l_shipdate < date '1997-01-01' + interval '1' year
-	AND l_discount between 0.08 - 0.01 and 0.08 + 0.01
-	AND l_quantity < 25
+	r['l_shipdate'] >= date '1997-01-01'
+	AND r['l_shipdate'] < date '1997-01-01' + interval '1' year
+	AND r['l_discount'] between 0.08 - 0.01 and 0.08 + 0.01
+	AND r['l_quantity'] < 25
 LIMIT 1;
    revenue    
 --------------
@@ -128,19 +124,19 @@ SELECT * FROM iceberg_metadata('../../data/lineitem_iceberg',  allow_moved_paths
 (2 rows)
 
 -- read_json
-SELECT COUNT(a) FROM read_json('../../data/table.json') AS (a INT);
+SELECT COUNT(r['a']) FROM read_json('../../data/table.json') r;
  count 
 -------
    100
 (1 row)
 
-SELECT COUNT(a) FROM read_json('../../data/table.json') AS (a INT, c FLOAT) WHERE c > 50.4;
+SELECT COUNT(r['a']) FROM read_json('../../data/table.json') r WHERE r['c'] > 50.4;
  count 
 -------
     51
 (1 row)
 
-SELECT a, b, c FROM read_json('../../data/table.json') AS (a INT, b VARCHAR, c FLOAT) WHERE c > 50.4 AND c < 51.2;
+SELECT r['a'], r['b'], r['c'] FROM read_json('../../data/table.json') r WHERE r['c'] > 50.4 AND r['c'] < 51.2;
  a  |    b    |  c   
 ----+---------+------
  50 | json_50 | 50.5
diff --git a/test/regression/expected/search_path.out b/test/regression/expected/search_path.out
index f9754c6c..381f5ef6 100644
--- a/test/regression/expected/search_path.out
+++ b/test/regression/expected/search_path.out
@@ -64,7 +64,7 @@ SELECT count(*) FROM public.t, other.t;
   1000
 (1 row)
 
-SELECT count(*) FROM public.read_csv(:'pwd' || '/data/web_page.csv') as (column00 int);
+SELECT count(*) FROM public.read_csv(:'pwd' || '/data/web_page.csv');
  count 
 -------
     60
diff --git a/test/regression/sql/create_table_as.sql b/test/regression/sql/create_table_as.sql
index 9f10e927..e24e7184 100644
--- a/test/regression/sql/create_table_as.sql
+++ b/test/regression/sql/create_table_as.sql
@@ -1,5 +1,5 @@
 \set pwd `pwd`
-CREATE TABLE webpages AS SELECT * FROM read_csv(:'pwd' || '/data/web_page.csv') as (column00 int, column01 text, column02 date);
+CREATE TABLE webpages AS SELECT r['column00'], r['column01'], r['column02'] FROM read_csv(:'pwd' || '/data/web_page.csv') r;
 
 select * from webpages order by column00 limit 2;
 select count(*) from webpages;
diff --git a/test/regression/sql/duckdb_only_functions.sql b/test/regression/sql/duckdb_only_functions.sql
index e38b6568..50213b9c 100644
--- a/test/regression/sql/duckdb_only_functions.sql
+++ b/test/regression/sql/duckdb_only_functions.sql
@@ -4,10 +4,10 @@ SET duckdb.force_execution = false;
 
 \set pwd `pwd`
 
-select * from read_parquet(:'pwd' || '/data/unsigned_types.parquet') as (usmallint int);
-select * from read_parquet(ARRAY[:'pwd' || '/data/unsigned_types.parquet']) as (usmallint int);
+select r['usmallint'] from read_parquet(:'pwd' || '/data/unsigned_types.parquet') r;
+select r['usmallint'] from read_parquet(ARRAY[:'pwd' || '/data/unsigned_types.parquet']) r;
 
-select * from read_csv(:'pwd' || '/data/web_page.csv') as (column00 int) LIMIT 2;
-select * from read_csv(ARRAY[:'pwd' || '/data/web_page.csv']) as (column00 int) LIMIT 2;
+select r['column00'] from read_csv(:'pwd' || '/data/web_page.csv') r LIMIT 2;
+select r['column00'] from read_csv(ARRAY[:'pwd' || '/data/web_page.csv']) r LIMIT 2;
 
 -- TODO: Add a test for scan_iceberg once we have a test table
diff --git a/test/regression/sql/non_superuser.sql b/test/regression/sql/non_superuser.sql
index 73bad923..63361757 100644
--- a/test/regression/sql/non_superuser.sql
+++ b/test/regression/sql/non_superuser.sql
@@ -25,7 +25,7 @@ SELECT * FROM duckdb.cache('some hacky sql', 'some more hacky sql');
 SET duckdb.force_execution = true;
 
 -- read_csv from the local filesystem should be disallowed
-SELECT count("sepal.length") FROM read_csv('../../data/iris.csv') AS ("sepal.length" FLOAT);
+SELECT count(r['sepal.length']) FROM read_csv('../../data/iris.csv') r;
 -- Should fail because DuckDB execution is not allowed for this user
 SET ROLE user3;
 SELECT * FROM t;
diff --git a/test/regression/sql/read_functions.sql b/test/regression/sql/read_functions.sql
index b3af1915..a9bd5851 100644
--- a/test/regression/sql/read_functions.sql
+++ b/test/regression/sql/read_functions.sql
@@ -3,55 +3,51 @@
 
 -- read_parquet
 
-SELECT count("sepal.length") FROM read_parquet('../../data/iris.parquet') AS ("sepal.length" FLOAT);
+SELECT count(r['sepal.length']) FROM read_parquet('../../data/iris.parquet') r;
 
-SELECT "sepal.length" FROM read_parquet('../../data/iris.parquet') AS ("sepal.length" FLOAT) ORDER BY "sepal.length"  LIMIT 5;
+SELECT r['sepal.length'] FROM read_parquet('../../data/iris.parquet') r ORDER BY r['sepal.length']  LIMIT 5;
 
-SELECT "sepal.length", file_row_number, filename
-    FROM read_parquet('../../data/iris.parquet', file_row_number => true, filename => true)
-    AS ("sepal.length" FLOAT, file_row_number INT, filename VARCHAR) ORDER BY "sepal.length"  LIMIT 5;
+SELECT r['sepal.length'], r['file_row_number'], r['filename']
+    FROM read_parquet('../../data/iris.parquet', file_row_number => true, filename => true) r
+    ORDER BY r['sepal.length']  LIMIT 5;
 
 -- read_csv
 
-SELECT count("sepal.length") FROM read_csv('../../data/iris.csv') AS ("sepal.length" FLOAT);
+SELECT count(r['sepal.length']) FROM read_csv('../../data/iris.csv') r;
 
-SELECT "sepal.length" FROM read_csv('../../data/iris.csv') AS ("sepal.length" FLOAT) ORDER BY "sepal.length" LIMIT 5;
+SELECT r['sepal.length'] FROM read_csv('../../data/iris.csv') r ORDER BY r['sepal.length'] LIMIT 5;
 
-SELECT "sepal.length", filename
-    FROM read_csv('../../data/iris.csv', filename => true, header => true)
-    AS ("sepal.length" FLOAT, filename VARCHAR) ORDER BY "sepal.length"  LIMIT 5;
+SELECT r['sepal.length'], r['filename']
+    FROM read_csv('../../data/iris.csv', filename => true, header => true) r
+    ORDER BY r['sepal.length']  LIMIT 5;
 
-SELECT * FROM read_csv('../../non-existing-file.csv') AS ("sepal.length" FLOAT);
+SELECT * FROM read_csv('../../non-existing-file.csv');
 
 
 -- delta_scan
 
 SELECT duckdb.install_extension('delta');
 
-SELECT count(a) FROM delta_scan('../../data/delta_table') AS (a INT);
-SELECT * FROM delta_scan('../../data/delta_table') AS (a INT, b VARCHAR) WHERE (a = 1 OR b = 'delta_table_3');
+SELECT count(r['a']) FROM delta_scan('../../data/delta_table') r;
+SELECT * FROM delta_scan('../../data/delta_table') r WHERE (r['a'] = 1 OR r['b'] = 'delta_table_3');
 
 
 -- iceberg_*
 
 SELECT duckdb.install_extension('iceberg');
 
-SELECT COUNT(l_orderkey) FROM iceberg_scan('../../data/lineitem_iceberg', allow_moved_paths => true) AS (l_orderkey BIGINT);
+SELECT COUNT(r['l_orderkey']) FROM iceberg_scan('../../data/lineitem_iceberg', allow_moved_paths => true) r;
 
 -- TPCH query #6
 SELECT
-	sum(l_extendedprice * l_discount) as revenue
+	sum(r['l_extendedprice'] * r['l_discount']) as revenue
 FROM
-	iceberg_scan('../../data/lineitem_iceberg', allow_moved_paths => true) AS
-        (l_extendedprice DOUBLE PRECISION,
-         l_discount DOUBLE PRECISION,
-         l_shipdate DATE,
-         l_quantity DOUBLE PRECISION)
+	iceberg_scan('../../data/lineitem_iceberg', allow_moved_paths => true) r
 WHERE
-	l_shipdate >= date '1997-01-01'
-	AND l_shipdate < date '1997-01-01' + interval '1' year
-	AND l_discount between 0.08 - 0.01 and 0.08 + 0.01
-	AND l_quantity < 25
+	r['l_shipdate'] >= date '1997-01-01'
+	AND r['l_shipdate'] < date '1997-01-01' + interval '1' year
+	AND r['l_discount'] between 0.08 - 0.01 and 0.08 + 0.01
+	AND r['l_quantity'] < 25
 LIMIT 1;
 
 SELECT * FROM iceberg_snapshots('../../data/lineitem_iceberg');
@@ -59,6 +55,6 @@ SELECT * FROM iceberg_metadata('../../data/lineitem_iceberg',  allow_moved_paths
 
 -- read_json
 
-SELECT COUNT(a) FROM read_json('../../data/table.json') AS (a INT);
-SELECT COUNT(a) FROM read_json('../../data/table.json') AS (a INT, c FLOAT) WHERE c > 50.4;
-SELECT a, b, c FROM read_json('../../data/table.json') AS (a INT, b VARCHAR, c FLOAT) WHERE c > 50.4 AND c < 51.2;
+SELECT COUNT(r['a']) FROM read_json('../../data/table.json') r;
+SELECT COUNT(r['a']) FROM read_json('../../data/table.json') r WHERE r['c'] > 50.4;
+SELECT r['a'], r['b'], r['c'] FROM read_json('../../data/table.json') r WHERE r['c'] > 50.4 AND r['c'] < 51.2;
diff --git a/test/regression/sql/search_path.sql b/test/regression/sql/search_path.sql
index f8059d54..c458b6b8 100644
--- a/test/regression/sql/search_path.sql
+++ b/test/regression/sql/search_path.sql
@@ -26,7 +26,7 @@ SET search_path TO '';
 SELECT count(*) FROM t, other.t;
 SELECT count(*) FROM public.t, other.t;
 
-SELECT count(*) FROM public.read_csv(:'pwd' || '/data/web_page.csv') as (column00 int);
+SELECT count(*) FROM public.read_csv(:'pwd' || '/data/web_page.csv');
 
 -- Cleanup
 DROP TABLE other.t;

From 01d5ad5ddc39ce9a43589ea5ccc450a166ea030c Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Fri, 13 Dec 2024 16:38:59 +0100
Subject: [PATCH 04/40] Attempt to fix create table as, but very hacky

---
 src/pgduckdb_ddl.cpp                          | 59 +++++++++++++++----
 test/regression/expected/create_table_as.out  |  2 +-
 .../regression/expected/materialized_view.out | 43 ++++++--------
 test/regression/expected/temporary_tables.out |  4 +-
 test/regression/sql/materialized_view.sql     |  2 +-
 test/regression/sql/temporary_tables.sql      |  2 +-
 6 files changed, 71 insertions(+), 41 deletions(-)

diff --git a/src/pgduckdb_ddl.cpp b/src/pgduckdb_ddl.cpp
index ba5e962f..bd491aff 100644
--- a/src/pgduckdb_ddl.cpp
+++ b/src/pgduckdb_ddl.cpp
@@ -20,6 +20,7 @@ extern "C" {
 #include "commands/event_trigger.h"
 #include "executor/spi.h"
 #include "miscadmin.h"
+#include "rewrite/rewriteHandler.h"
 
 #include "pgduckdb/vendor/pg_ruleutils.h"
 #include "pgduckdb/pgduckdb_ruleutils.h"
@@ -39,33 +40,70 @@ extern "C" {
  * tables we force this value to false.
  */
 static bool ctas_skip_data = false;
+static List *ctas_original_target_list = NIL;
 
 static bool top_level_ddl = true;
 static ProcessUtility_hook_type prev_process_utility_hook = NULL;
 
 static void
-DuckdbHandleDDL(Node *parsetree) {
+DuckdbHandleDDL(PlannedStmt *pstmt, const char *query_string, ParamListInfo params) {
 	if (!pgduckdb::IsExtensionRegistered()) {
 		/* We're not installed, so don't mess with the query */
 		return;
 	}
 
+	Node *parsetree = pstmt->utilityStmt;
+
 	if (IsA(parsetree, CreateTableAsStmt)) {
 		auto stmt = castNode(CreateTableAsStmt, parsetree);
 		char *access_method = stmt->into->accessMethod ? stmt->into->accessMethod : default_table_access_method;
-		if (strcmp(access_method, "duckdb") != 0) {
-			/* not a duckdb table, so don't mess with the query */
+		bool is_duckdb_table = strcmp(access_method, "duckdb") == 0;
+		if (is_duckdb_table) {
+			/*
+			 * Force skipData to false for duckdb tables, so that Postgres does
+			 * not execute the query, and save the original value in ctas_skip_data
+			 * so we can use it later in duckdb_create_table_trigger to choose
+			 * whether to execute the query in DuckDB or not.
+			 */
+			ctas_skip_data = stmt->into->skipData;
+			stmt->into->skipData = true;
+		}
+		bool is_create_materialized_view = stmt->into->viewQuery != NULL;
+		bool skips_planning = stmt->into->skipData || is_create_materialized_view;
+		if (!skips_planning) {
+			// No need to plan here as well, because standard_ProcessUtility
+			// will already plan the query and thus get the correct columns.
 			return;
 		}
 
+		// TODO: Probably we should only do this if the targetlist actually
+		// contains some duckdb.unresolved_type or duckdb.row columns.
+		// XXX: This is a huge hack. Probably we should do something different
+		// here. The current hack doesn't work with materialized views yet.
+
+		List *rewritten;
+		PlannedStmt *plan;
+		Query *original_query = castNode(Query, stmt->query);
+		Query *query = (Query *)copyObjectImpl(original_query);
 		/*
-		 * Force skipData to false for duckdb tables, so that Postgres does
-		 * not execute the query, and save the original value in ctas_skip_data
-		 * so we can use it later in duckdb_create_table_trigger to choose
-		 * whether to execute the query in DuckDB or not.
+		 * Parse analysis was done already, but we still have to run the rule
+		 * rewriter.  We do not do AcquireRewriteLocks: we assume the query
+		 * either came straight from the parser, or suitable locks were
+		 * acquired by plancache.c.
 		 */
-		ctas_skip_data = stmt->into->skipData;
-		stmt->into->skipData = true;
+		rewritten = QueryRewrite(query);
+
+		/* SELECT should never rewrite to more or less than one SELECT query */
+		if (list_length(rewritten) != 1)
+			elog(ERROR, "unexpected rewrite result for CREATE TABLE AS SELECT");
+		query = linitial_node(Query, rewritten);
+		Assert(query->commandType == CMD_SELECT);
+
+		/* plan the query */
+		plan = pg_plan_query(query, query_string, CURSOR_OPT_PARALLEL_OK, params);
+		ctas_original_target_list = original_query->targetList;
+		original_query->targetList = plan->planTree->targetlist;
+
 	} else if (IsA(parsetree, CreateSchemaStmt) && !pgduckdb::doing_motherduck_sync) {
 		auto stmt = castNode(CreateSchemaStmt, parsetree);
 		if (stmt->schemaname) {
@@ -125,7 +163,7 @@ DuckdbUtilityHook_Cpp(PlannedStmt *pstmt, const char *query_string, bool read_on
 	bool prev_top_level_ddl = top_level_ddl;
 	top_level_ddl = context == PROCESS_UTILITY_TOPLEVEL;
 
-	DuckdbHandleDDL(parsetree);
+	DuckdbHandleDDL(pstmt, query_string, params);
 	prev_process_utility_hook(pstmt, query_string, read_only_tree, context, params, query_env, dest, qc);
 
 	top_level_ddl = prev_top_level_ddl;
@@ -345,6 +383,7 @@ DECLARE_PG_FUNCTION(duckdb_create_table_trigger) {
 	if (IsA(parsetree, CreateTableAsStmt) && !ctas_skip_data) {
 		auto stmt = castNode(CreateTableAsStmt, parsetree);
 		ctas_query = (Query *)stmt->query;
+		ctas_query->targetList = ctas_original_target_list;
 	}
 
 	pgduckdb::DuckDBQueryOrThrow(*connection, create_table_string);
diff --git a/test/regression/expected/create_table_as.out b/test/regression/expected/create_table_as.out
index aee30e06..b15334e7 100644
--- a/test/regression/expected/create_table_as.out
+++ b/test/regression/expected/create_table_as.out
@@ -1,5 +1,5 @@
 \set pwd `pwd`
-CREATE TABLE webpages AS SELECT * FROM read_csv(:'pwd' || '/data/web_page.csv') as (column00 int, column01 text, column02 date);
+CREATE TABLE webpages AS SELECT r['column00'], r['column01'], r['column02'] FROM read_csv(:'pwd' || '/data/web_page.csv') r;
 select * from webpages order by column00 limit 2;
  column00 |     column01     |  column02  
 ----------+------------------+------------
diff --git a/test/regression/expected/materialized_view.out b/test/regression/expected/materialized_view.out
index ec0940e8..dd2a6053 100644
--- a/test/regression/expected/materialized_view.out
+++ b/test/regression/expected/materialized_view.out
@@ -47,38 +47,29 @@ INSERT INTO t_csv VALUES (1,1),(2,2),(3,3);
 \set pwd `pwd`
 \set csv_file_path '\'' :pwd '/tmp_check/t_csv.csv'  '\''
 COPY t_csv TO :csv_file_path (FORMAT CSV, HEADER TRUE, DELIMITER ',');
-CREATE MATERIALIZED VIEW mv_csv AS SELECT * FROM read_csv(:csv_file_path) AS (a BIGINT);
+CREATE MATERIALIZED VIEW mv_csv AS SELECT * FROM read_csv(:csv_file_path);
+ERROR:  SELECT rule's target entry 1 has different type from column "a"
+DETAIL:  SELECT target entry has type duckdb."row", but column has type bigint.
 SELECT COUNT(*) FROM mv_csv;
- count 
--------
-     3
-(1 row)
-
+ERROR:  relation "mv_csv" does not exist
+LINE 1: SELECT COUNT(*) FROM mv_csv;
+                             ^
 SELECT * FROM mv_csv;
- a 
----
- 1
- 2
- 3
-(3 rows)
-
+ERROR:  relation "mv_csv" does not exist
+LINE 1: SELECT * FROM mv_csv;
+                      ^
 INSERT INTO t_csv VALUES (4,4);
 COPY t_csv TO :csv_file_path (FORMAT CSV, HEADER TRUE, DELIMITER ',');
 REFRESH MATERIALIZED VIEW mv_csv;
+ERROR:  relation "mv_csv" does not exist
 SELECT COUNT(*) FROM mv_csv;
- count 
--------
-     4
-(1 row)
-
+ERROR:  relation "mv_csv" does not exist
+LINE 1: SELECT COUNT(*) FROM mv_csv;
+                             ^
 SELECT * FROM mv_csv;
- a 
----
- 1
- 2
- 3
- 4
-(4 rows)
-
+ERROR:  relation "mv_csv" does not exist
+LINE 1: SELECT * FROM mv_csv;
+                      ^
 DROP MATERIALIZED VIEW mv_csv;
+ERROR:  materialized view "mv_csv" does not exist
 DROP TABLE t_csv;
diff --git a/test/regression/expected/temporary_tables.out b/test/regression/expected/temporary_tables.out
index ac87ab7d..ba28d24b 100644
--- a/test/regression/expected/temporary_tables.out
+++ b/test/regression/expected/temporary_tables.out
@@ -207,7 +207,7 @@ DROP TABLE t;
 CREATE TEMP TABLE t(a int) ON COMMIT DROP;
 ERROR:  DuckDB does not support ON COMMIT DROP
 -- CTAS fully in Duckdb
-CREATE TEMP TABLE webpages USING duckdb AS SELECT * FROM read_csv('../../data/web_page.csv') as (column00 int, column01 text, column02 date);
+CREATE TEMP TABLE webpages USING duckdb AS SELECT r['column00'], r['column01'], r['column02'] FROM read_csv('../../data/web_page.csv') r;
 SELECT * FROM webpages ORDER BY column00 LIMIT 2;
  column00 |     column01     |  column02  
 ----------+------------------+------------
@@ -247,7 +247,7 @@ NOTICE:  result: database_name	schema_name	sql
 VARCHAR	VARCHAR	VARCHAR	
 [ Rows: 2]
 pg_temp	main	CREATE TABLE t(b INTEGER);
-pg_temp	main	CREATE TABLE webpages(column00 INTEGER, column01 VARCHAR, column02 DATE);
+pg_temp	main	CREATE TABLE webpages(column00 BIGINT, column01 VARCHAR, column02 DATE);
 
 
  raw_query 
diff --git a/test/regression/sql/materialized_view.sql b/test/regression/sql/materialized_view.sql
index 729ced66..a189afa5 100644
--- a/test/regression/sql/materialized_view.sql
+++ b/test/regression/sql/materialized_view.sql
@@ -26,7 +26,7 @@ INSERT INTO t_csv VALUES (1,1),(2,2),(3,3);
 
 COPY t_csv TO :csv_file_path (FORMAT CSV, HEADER TRUE, DELIMITER ',');
 
-CREATE MATERIALIZED VIEW mv_csv AS SELECT * FROM read_csv(:csv_file_path) AS (a BIGINT);
+CREATE MATERIALIZED VIEW mv_csv AS SELECT * FROM read_csv(:csv_file_path);
 
 SELECT COUNT(*) FROM mv_csv;
 SELECT * FROM mv_csv;
diff --git a/test/regression/sql/temporary_tables.sql b/test/regression/sql/temporary_tables.sql
index cb0209ca..45f94f14 100644
--- a/test/regression/sql/temporary_tables.sql
+++ b/test/regression/sql/temporary_tables.sql
@@ -148,7 +148,7 @@ DROP TABLE t;
 CREATE TEMP TABLE t(a int) ON COMMIT DROP;
 
 -- CTAS fully in Duckdb
-CREATE TEMP TABLE webpages USING duckdb AS SELECT * FROM read_csv('../../data/web_page.csv') as (column00 int, column01 text, column02 date);
+CREATE TEMP TABLE webpages USING duckdb AS SELECT r['column00'], r['column01'], r['column02'] FROM read_csv('../../data/web_page.csv') r;
 SELECT * FROM webpages ORDER BY column00 LIMIT 2;
 
 CREATE TEMP TABLE t_heap(a int) USING heap;

From 69b044e758e077f97d1d81f85d6c6bdc66a661b7 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Fri, 13 Dec 2024 17:23:40 +0100
Subject: [PATCH 05/40] Add comment about bug so I don't forget

---
 src/vendor/pg_ruleutils_17.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/vendor/pg_ruleutils_17.c b/src/vendor/pg_ruleutils_17.c
index efcdf9e7..606a2fc7 100644
--- a/src/vendor/pg_ruleutils_17.c
+++ b/src/vendor/pg_ruleutils_17.c
@@ -6168,6 +6168,9 @@ get_target_list(List *targetList, deparse_context *context,
 			}
 			RangeTblEntry* rte = rt_fetch(varno, dpns->rtable);
 			char *original_column = strVal(list_nth(rte->eref->colnames, varattno - 1));
+			/* BUG: Thil logic breaks the following query:
+			 * SELECT * FROM (SELECT r['column00'] FROM read_csv('/home/jelte/work/pg_duckdb/test/regression/data/web_page.csv') r limit 1);
+			 */
 			duckdb_row_needs_as = strcmp(original_column, colname) != 0;
 		}
 

From 556d4761ec5eaf161208cc45aaa9b1b246168619 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Wed, 18 Dec 2024 10:28:45 +0100
Subject: [PATCH 06/40] Add support for duckdb.query

---
 sql/pg_duckdb--0.2.0--0.3.0.sql |  6 ++++++
 src/pgduckdb_metadata_cache.cpp | 11 +++++++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql
index ba554590..f371e921 100644
--- a/sql/pg_duckdb--0.2.0--0.3.0.sql
+++ b/sql/pg_duckdb--0.2.0--0.3.0.sql
@@ -814,3 +814,9 @@ RETURNS SETOF duckdb.row
 SET search_path = pg_catalog, pg_temp
 AS 'MODULE_PATHNAME', 'duckdb_only_function'
 LANGUAGE C;
+
+CREATE FUNCTION duckdb.query(query text)
+RETURNS SETOF duckdb.row
+SET search_path = pg_catalog, pg_temp
+AS 'MODULE_PATHNAME', 'duckdb_only_function'
+LANGUAGE C;
diff --git a/src/pgduckdb_metadata_cache.cpp b/src/pgduckdb_metadata_cache.cpp
index 3fac74af..6d892de7 100644
--- a/src/pgduckdb_metadata_cache.cpp
+++ b/src/pgduckdb_metadata_cache.cpp
@@ -115,8 +115,15 @@ BuildDuckdbOnlyFunctions() {
 	 * each of the found functions is actually part of our extension before
 	 * caching its OID as a DuckDB-only function.
 	 */
-	const char *function_names[] = {"read_parquet",      "read_csv",   "iceberg_scan", "iceberg_metadata",
-	                                "iceberg_snapshots", "delta_scan", "read_json",    "approx_count_distinct"};
+	const char *function_names[] = {"read_parquet",
+	                                "read_csv",
+	                                "iceberg_scan",
+	                                "iceberg_metadata",
+	                                "iceberg_snapshots",
+	                                "delta_scan",
+	                                "read_json",
+	                                "approx_count_distinct",
+	                                "query"};
 
 	for (uint32_t i = 0; i < lengthof(function_names); i++) {
 		CatCList *catlist = SearchSysCacheList1(PROCNAMEARGSNSP, CStringGetDatum(function_names[i]));

From 537753bdf46f40adf759e465c26e158df43e3cd6 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Fri, 20 Dec 2024 12:58:45 +0100
Subject: [PATCH 07/40] Add missing max aggregate

---
 sql/pg_duckdb--0.2.0--0.3.0.sql | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql
index f371e921..9c231484 100644
--- a/sql/pg_duckdb--0.2.0--0.3.0.sql
+++ b/sql/pg_duckdb--0.2.0--0.3.0.sql
@@ -435,6 +435,12 @@ CREATE AGGREGATE list(duckdb.unresolved_type) (
     FINALFUNC = duckdb_unresolved_type_final
 );
 
+CREATE AGGREGATE max(duckdb.unresolved_type) (
+    SFUNC = duckdb_unresolved_type_state_trans,
+    STYPE = duckdb.unresolved_type,
+    FINALFUNC = duckdb_unresolved_type_final
+);
+
 CREATE AGGREGATE max(duckdb.unresolved_type, "any") (
     SFUNC = duckdb_unresolved_type_state_trans,
     STYPE = duckdb.unresolved_type,

From 43e084a3ab94fab4caa4e14f2ee8a84661e10d45 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Tue, 14 Jan 2025 11:08:08 +0100
Subject: [PATCH 08/40] Add error hint

---
 src/pgduckdb_hooks.cpp | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/src/pgduckdb_hooks.cpp b/src/pgduckdb_hooks.cpp
index dc0dbf47..b901802f 100644
--- a/src/pgduckdb_hooks.cpp
+++ b/src/pgduckdb_hooks.cpp
@@ -37,6 +37,7 @@ static planner_hook_type prev_planner_hook = NULL;
 static ExecutorStart_hook_type prev_executor_start_hook = NULL;
 static ExecutorFinish_hook_type prev_executor_finish_hook = NULL;
 static ExplainOneQuery_hook_type prev_explain_one_query_hook = NULL;
+static emit_log_hook_type prev_emit_log_hook = NULL;
 
 static bool
 ContainsCatalogTable(List *rtes) {
@@ -353,6 +354,25 @@ DuckdbExplainOneQueryHook(Query *query, int cursorOptions, IntoClause *into, Exp
 	prev_explain_one_query_hook(query, cursorOptions, into, es, queryString, params, queryEnv);
 }
 
+static void
+DuckdbEmitLogHook(ErrorData *edata) {
+	if (prev_emit_log_hook) {
+		prev_emit_log_hook(edata);
+	}
+
+	if (edata->elevel == ERROR && edata->sqlerrcode == ERRCODE_UNDEFINED_COLUMN && pgduckdb::IsExtensionRegistered()) {
+		/*
+		 * XXX: It would be nice if we could check if the query contained any
+		 * of the functions, but we don't have access to the query string here.
+		 * So instead we simply always add this HINT for this specific error if
+		 * the pg_duckdb extension is installed.
+		 */
+		edata->hint = pstrdup(
+		    "If you use DuckDB functions like read_parquet, you need to use the r['colname'] syntax to use columns. If "
+		    "you're already doing that, maybe you forgot to to give the function the r alias.");
+	}
+}
+
 void
 DuckdbInitHooks(void) {
 	prev_planner_hook = planner_hook;
@@ -367,5 +387,8 @@ DuckdbInitHooks(void) {
 	prev_explain_one_query_hook = ExplainOneQuery_hook ? ExplainOneQuery_hook : standard_ExplainOneQuery;
 	ExplainOneQuery_hook = DuckdbExplainOneQueryHook;
 
+	prev_emit_log_hook = emit_log_hook ? emit_log_hook : NULL;
+	emit_log_hook = DuckdbEmitLogHook;
+
 	DuckdbInitUtilityHook();
 }

From 926d1b5965c97d88a9b9d320e3dffaad376016d6 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Tue, 14 Jan 2025 11:43:52 +0100
Subject: [PATCH 09/40] Ignore warning about declarations after statements

---
 Makefile | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Makefile b/Makefile
index c3a52bf1..45e98e86 100644
--- a/Makefile
+++ b/Makefile
@@ -46,6 +46,13 @@ COMPILER_FLAGS=-Wno-sign-compare -Wshadow -Wswitch -Wunused-parameter -Wunreacha
 
 override PG_CPPFLAGS += -Iinclude -isystem third_party/duckdb/src/include -isystem third_party/duckdb/third_party/re2 ${COMPILER_FLAGS}
 override PG_CXXFLAGS += -std=c++17 ${DUCKDB_BUILD_CXX_FLAGS} ${COMPILER_FLAGS} -Wno-register
+# Ignore declaration-after-statement warnings in our code. Postgres enforces
+# this because their ancient style guide requires it, but we don't care. It
+# would only apply to C files anyway, and we don't have many of those. The only
+# ones that we do have are vendored in from Postgres (ruleutils), and allowing
+# declarations to be anywhere is even a good thing for those as we can keep our
+# changes to the vendored code in one place.
+override PG_CFLAGS += -Wno-declaration-after-statement
 
 SHLIB_LINK += -Wl,-rpath,$(PG_LIB)/ -lpq -Lthird_party/duckdb/build/$(DUCKDB_BUILD_TYPE)/src -L$(PG_LIB) -lduckdb -lstdc++ -llz4
 

From e312a3b5d61be5bb4de5332466599420e671ae79 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Tue, 14 Jan 2025 13:28:56 +0100
Subject: [PATCH 10/40] Fix warning

---
 src/pgduckdb_ruleutils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pgduckdb_ruleutils.cpp b/src/pgduckdb_ruleutils.cpp
index 0ca738ef..b0e3c757 100644
--- a/src/pgduckdb_ruleutils.cpp
+++ b/src/pgduckdb_ruleutils.cpp
@@ -105,7 +105,7 @@ pgduckdb_duckdb_row_subscript_var(Expr *expr) {
 List *
 pgduckdb_star_start_vars(List *target_list) {
 	List *star_start_indexes = NIL;
-	Var *possible_star_start_var;
+	Var *possible_star_start_var = NULL;
 	int possible_star_start_var_index = 0;
 
 	int i = 0;

From e46b49f3b7ad5ccbea8eabef4353cc7b92f083e2 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Tue, 14 Jan 2025 13:51:11 +0100
Subject: [PATCH 11/40] Don't rename subscript expressions in subqueries

---
 include/pgduckdb/pgduckdb_ruleutils.h       |  2 +
 src/pgduckdb_ruleutils.cpp                  |  7 +++
 src/vendor/pg_ruleutils_17.c                | 19 ++++++--
 test/regression/expected/read_functions.out | 48 +++++++++++++++++++++
 test/regression/sql/read_functions.sql      | 27 ++++++++++++
 5 files changed, 99 insertions(+), 4 deletions(-)

diff --git a/include/pgduckdb/pgduckdb_ruleutils.h b/include/pgduckdb/pgduckdb_ruleutils.h
index 5c057b17..21e9a3b2 100644
--- a/include/pgduckdb/pgduckdb_ruleutils.h
+++ b/include/pgduckdb/pgduckdb_ruleutils.h
@@ -13,3 +13,5 @@ bool pgduckdb_var_is_duckdb_row(Var *var);
 bool pgduckdb_func_returns_duckdb_row(RangeTblFunction *rtfunc);
 Var *pgduckdb_duckdb_row_subscript_var(Expr *expr);
 List *pgduckdb_star_start_vars(List *target_list);
+
+extern bool processed_targetlist;
diff --git a/src/pgduckdb_ruleutils.cpp b/src/pgduckdb_ruleutils.cpp
index b0e3c757..071f2ded 100644
--- a/src/pgduckdb_ruleutils.cpp
+++ b/src/pgduckdb_ruleutils.cpp
@@ -28,6 +28,8 @@ extern "C" {
 #include "pgduckdb/pgduckdb_metadata_cache.hpp"
 
 extern "C" {
+bool processed_targetlist = false;
+
 char *
 pgduckdb_function_name(Oid function_oid) {
 	if (!pgduckdb::IsDuckdbOnlyFunction(function_oid)) {
@@ -273,9 +275,14 @@ pgduckdb_relation_name(Oid relation_oid) {
  * DuckDB understands). The reason this is not part of
  * pgduckdb_pg_get_querydef_internal is because we want to avoid changing that
  * vendored in function as much as possible to keep updates easy.
+ *
+ * Apart from that it also sets the processed_targetlist variable to false,
+ * which we use in get_target_list to determine if we're processing the
+ * outermost targetlist or not.
  */
 char *
 pgduckdb_get_querydef(Query *query) {
+	processed_targetlist = false;
 	auto save_nestlevel = NewGUCNestLevel();
 	SetConfigOption("DateStyle", "ISO, YMD", PGC_USERSET, PGC_S_SESSION);
 	char *result = pgduckdb_pg_get_querydef_internal(query, false);
diff --git a/src/vendor/pg_ruleutils_17.c b/src/vendor/pg_ruleutils_17.c
index 606a2fc7..ab25e072 100644
--- a/src/vendor/pg_ruleutils_17.c
+++ b/src/vendor/pg_ruleutils_17.c
@@ -6048,6 +6048,9 @@ get_target_list(List *targetList, deparse_context *context,
 	int varattno_star = 0;
 	bool added_star = false;
 
+	bool outermost_targetlist = !processed_targetlist;
+	processed_targetlist = true;
+
 	int i = 0;
 	foreach(l, targetList)
 	{
@@ -6140,9 +6143,20 @@ get_target_list(List *targetList, deparse_context *context,
 		else
 			colname = tle->resname;
 
+		/*
+		 * This makes sure we don't add Postgres its bad default alias to the
+		 * duckdb.row type.
+		 */
 		bool duckdb_row_needs_as = !pgduckdb_var_is_duckdb_row(var);
+
+		/*
+		 * For r['abc'] we don't want the column name to be r, but instead we
+		 * want it to be "abc". We can only to do this for the target list of
+		 * the outside most query though to make sure references to the column
+		 * name are still valid.
+		 */
 		Var *subscript_var = pgduckdb_duckdb_row_subscript_var(tle->expr);
-		if (subscript_var) {
+		if (outermost_targetlist && subscript_var) {
 			deparse_namespace* dpns = (deparse_namespace *) list_nth(context->namespaces,
 														subscript_var->varlevelsup);
 			int			varno;
@@ -6168,9 +6182,6 @@ get_target_list(List *targetList, deparse_context *context,
 			}
 			RangeTblEntry* rte = rt_fetch(varno, dpns->rtable);
 			char *original_column = strVal(list_nth(rte->eref->colnames, varattno - 1));
-			/* BUG: Thil logic breaks the following query:
-			 * SELECT * FROM (SELECT r['column00'] FROM read_csv('/home/jelte/work/pg_duckdb/test/regression/data/web_page.csv') r limit 1);
-			 */
 			duckdb_row_needs_as = strcmp(original_column, colname) != 0;
 		}
 
diff --git a/test/regression/expected/read_functions.out b/test/regression/expected/read_functions.out
index 9d9226ce..b3f4d5e8 100644
--- a/test/regression/expected/read_functions.out
+++ b/test/regression/expected/read_functions.out
@@ -60,6 +60,54 @@ SELECT r['sepal.length'], r['filename']
 
 SELECT * FROM read_csv('../../non-existing-file.csv');
 ERROR:  (PGDuckDB/CreatePlan) Prepared query returned an error: 'IO Error: No files found that match the pattern "../../non-existing-file.csv"
+-- We override Postgres its default column name for subscript expressions. In
+-- the following example the column would normally be named "r", which is
+-- pretty non-descriptive especially when selecting multiple columns from the
+-- same row.
+--
+-- NOTE: Jelte tried to change this behaviour in upstream Postgres, but met
+-- with some resistance:
+-- https://www.postgresql.org/message-id/flat/CAGECzQRYAFHLnjjymsSPhL-9OExVyNfMQkZMc1hcoUQ6dDHo=Q@mail.gmail.com
+SELECT r['column00'] FROM read_csv('../../data/web_page.csv') r limit 1;
+ column00 
+----------
+        1
+(1 row)
+
+-- If an explicit column name is given we
+SELECT r['column00'] AS col1 FROM read_csv('../../data/web_page.csv') r limit 1;
+ col1 
+------
+    1
+(1 row)
+
+-- ...except if that explicit column name is the same as the default column
+-- name. In which case we fail to recognize that fact.
+-- XXX: It would be nice to fix this, but it's not a high priority.
+SELECT r['column00'] AS r FROM read_csv('../../data/web_page.csv') r limit 1;
+ column00 
+----------
+        1
+(1 row)
+
+-- If we use the same trick inside subqueries, then references to columns from
+-- that subquery would not use that better name, and thus the query could not
+-- be executed. To avoid that we simply don't rename a subscript expression
+-- inside a subquery, and only do so in the outermost SELECT list (aka
+-- targetlist).
+SELECT * FROM (SELECT r['column00'] FROM read_csv('../../data/web_page.csv') r limit 1);
+ r 
+---
+ 1
+(1 row)
+
+-- If you give it a different alias then that alias is propegated though.
+SELECT * FROM (SELECT r['column00'] AS col1 FROM read_csv('../../data/web_page.csv') r limit 1);
+ col1 
+------
+    1
+(1 row)
+
 -- delta_scan
 SELECT duckdb.install_extension('delta');
  install_extension 
diff --git a/test/regression/sql/read_functions.sql b/test/regression/sql/read_functions.sql
index a9bd5851..be657413 100644
--- a/test/regression/sql/read_functions.sql
+++ b/test/regression/sql/read_functions.sql
@@ -23,6 +23,33 @@ SELECT r['sepal.length'], r['filename']
 
 SELECT * FROM read_csv('../../non-existing-file.csv');
 
+-- We override Postgres its default column name for subscript expressions. In
+-- the following example the column would normally be named "r", which is
+-- pretty non-descriptive especially when selecting multiple columns from the
+-- same row.
+--
+-- NOTE: Jelte tried to change this behaviour in upstream Postgres, but met
+-- with some resistance:
+-- https://www.postgresql.org/message-id/flat/CAGECzQRYAFHLnjjymsSPhL-9OExVyNfMQkZMc1hcoUQ6dDHo=Q@mail.gmail.com
+SELECT r['column00'] FROM read_csv('../../data/web_page.csv') r limit 1;
+
+-- If an explicit column name is given we
+SELECT r['column00'] AS col1 FROM read_csv('../../data/web_page.csv') r limit 1;
+
+-- ...except if that explicit column name is the same as the default column
+-- name. In which case we fail to recognize that fact.
+-- XXX: It would be nice to fix this, but it's not a high priority.
+SELECT r['column00'] AS r FROM read_csv('../../data/web_page.csv') r limit 1;
+
+-- If we use the same trick inside subqueries, then references to columns from
+-- that subquery would not use that better name, and thus the query could not
+-- be executed. To avoid that we simply don't rename a subscript expression
+-- inside a subquery, and only do so in the outermost SELECT list (aka
+-- targetlist).
+SELECT * FROM (SELECT r['column00'] FROM read_csv('../../data/web_page.csv') r limit 1);
+
+-- If you give it a different alias then that alias is propegated though.
+SELECT * FROM (SELECT r['column00'] AS col1 FROM read_csv('../../data/web_page.csv') r limit 1);
 
 -- delta_scan
 

From aa3d72b37c6e855f4a368f4f9feebb652feb343b Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Tue, 14 Jan 2025 15:52:44 +0100
Subject: [PATCH 12/40] Fix materialized views

---
 src/pgduckdb_ddl.cpp                          | 51 +++++++++++++++++--
 .../regression/expected/materialized_view.out | 41 +++++++++------
 2 files changed, 71 insertions(+), 21 deletions(-)

diff --git a/src/pgduckdb_ddl.cpp b/src/pgduckdb_ddl.cpp
index bd491aff..2a4a0cb9 100644
--- a/src/pgduckdb_ddl.cpp
+++ b/src/pgduckdb_ddl.cpp
@@ -12,7 +12,9 @@ extern "C" {
 #include "funcapi.h"
 #include "nodes/print.h"
 #include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
 #include "optimizer/optimizer.h"
+#include "parser/analyze.h"
 #include "tcop/utility.h"
 #include "utils/builtins.h"
 #include "utils/syscache.h"
@@ -45,6 +47,41 @@ static List *ctas_original_target_list = NIL;
 static bool top_level_ddl = true;
 static ProcessUtility_hook_type prev_process_utility_hook = NULL;
 
+static Query *
+WrapQueryInQueryCall(Query *query, List *target_list) {
+	char *duckdb_query_string = pgduckdb_get_querydef(query);
+
+	StringInfo buf = makeStringInfo();
+	appendStringInfo(buf, "SELECT ");
+	bool first = true;
+	foreach_node(TargetEntry, tle, target_list) {
+		if (!first) {
+			appendStringInfoString(buf, ", ");
+		}
+
+		Oid type = exprType((Node *)tle->expr);
+		Oid typemod = exprTypmod((Node *)tle->expr);
+		first = false;
+		appendStringInfo(buf, "r[%s]::%s AS %s", quote_literal_cstr(tle->resname),
+		                 format_type_with_typemod(type, typemod), quote_identifier(tle->resname));
+	}
+
+	appendStringInfo(buf, " FROM duckdb.query(%s) r", quote_literal_cstr(duckdb_query_string));
+
+	List *parsetree_list = pg_parse_query(buf->data);
+
+	/*
+	 * We only allow a single user statement in a prepared statement. This is
+	 * mainly to keep the protocol simple --- otherwise we'd need to worry
+	 * about multiple result tupdescs and things like that.
+	 */
+	if (list_length(parsetree_list) > 1)
+		ereport(ERROR,
+		        (errcode(ERRCODE_SYNTAX_ERROR), errmsg("BUG: pg_duckdb generated a command with multiple queries")));
+
+	return parse_analyze_fixedparams((RawStmt *)linitial(parsetree_list), buf->data, NULL, 0, NULL);
+}
+
 static void
 DuckdbHandleDDL(PlannedStmt *pstmt, const char *query_string, ParamListInfo params) {
 	if (!pgduckdb::IsExtensionRegistered()) {
@@ -82,8 +119,8 @@ DuckdbHandleDDL(PlannedStmt *pstmt, const char *query_string, ParamListInfo para
 		// here. The current hack doesn't work with materialized views yet.
 
 		List *rewritten;
-		PlannedStmt *plan;
 		Query *original_query = castNode(Query, stmt->query);
+
 		Query *query = (Query *)copyObjectImpl(original_query);
 		/*
 		 * Parse analysis was done already, but we still have to run the rule
@@ -99,10 +136,14 @@ DuckdbHandleDDL(PlannedStmt *pstmt, const char *query_string, ParamListInfo para
 		query = linitial_node(Query, rewritten);
 		Assert(query->commandType == CMD_SELECT);
 
-		/* plan the query */
-		plan = pg_plan_query(query, query_string, CURSOR_OPT_PARALLEL_OK, params);
-		ctas_original_target_list = original_query->targetList;
-		original_query->targetList = plan->planTree->targetlist;
+		Query *rewritten_query_copy = (Query *)copyObjectImpl(query);
+
+		PlannedStmt *plan = pg_plan_query(query, query_string, CURSOR_OPT_PARALLEL_OK, params);
+
+		List *target_list = plan->planTree->targetlist;
+
+		stmt->query = (Node *)WrapQueryInQueryCall(rewritten_query_copy, target_list);
+		stmt->into->viewQuery = (Node *)copyObjectImpl(stmt->query);
 
 	} else if (IsA(parsetree, CreateSchemaStmt) && !pgduckdb::doing_motherduck_sync) {
 		auto stmt = castNode(CreateSchemaStmt, parsetree);
diff --git a/test/regression/expected/materialized_view.out b/test/regression/expected/materialized_view.out
index dd2a6053..88b98440 100644
--- a/test/regression/expected/materialized_view.out
+++ b/test/regression/expected/materialized_view.out
@@ -48,28 +48,37 @@ INSERT INTO t_csv VALUES (1,1),(2,2),(3,3);
 \set csv_file_path '\'' :pwd '/tmp_check/t_csv.csv'  '\''
 COPY t_csv TO :csv_file_path (FORMAT CSV, HEADER TRUE, DELIMITER ',');
 CREATE MATERIALIZED VIEW mv_csv AS SELECT * FROM read_csv(:csv_file_path);
-ERROR:  SELECT rule's target entry 1 has different type from column "a"
-DETAIL:  SELECT target entry has type duckdb."row", but column has type bigint.
 SELECT COUNT(*) FROM mv_csv;
-ERROR:  relation "mv_csv" does not exist
-LINE 1: SELECT COUNT(*) FROM mv_csv;
-                             ^
+ count 
+-------
+     3
+(1 row)
+
 SELECT * FROM mv_csv;
-ERROR:  relation "mv_csv" does not exist
-LINE 1: SELECT * FROM mv_csv;
-                      ^
+ a | b 
+---+---
+ 1 | 1
+ 2 | 2
+ 3 | 3
+(3 rows)
+
 INSERT INTO t_csv VALUES (4,4);
 COPY t_csv TO :csv_file_path (FORMAT CSV, HEADER TRUE, DELIMITER ',');
 REFRESH MATERIALIZED VIEW mv_csv;
-ERROR:  relation "mv_csv" does not exist
 SELECT COUNT(*) FROM mv_csv;
-ERROR:  relation "mv_csv" does not exist
-LINE 1: SELECT COUNT(*) FROM mv_csv;
-                             ^
+ count 
+-------
+     4
+(1 row)
+
 SELECT * FROM mv_csv;
-ERROR:  relation "mv_csv" does not exist
-LINE 1: SELECT * FROM mv_csv;
-                      ^
+ a | b 
+---+---
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+(4 rows)
+
 DROP MATERIALIZED VIEW mv_csv;
-ERROR:  materialized view "mv_csv" does not exist
 DROP TABLE t_csv;

From 6055d593810e4a65f281955c753de7783240fd4f Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Tue, 14 Jan 2025 16:02:56 +0100
Subject: [PATCH 13/40] Only do CTAS type detection using duckdb for unresolved
 types

---
 include/pgduckdb/pgduckdb_ruleutils.h |  1 +
 src/pgduckdb_ddl.cpp                  | 36 +++++++++++++++++++++------
 src/pgduckdb_ruleutils.cpp            | 15 +++++++++++
 3 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/include/pgduckdb/pgduckdb_ruleutils.h b/include/pgduckdb/pgduckdb_ruleutils.h
index 21e9a3b2..28233b04 100644
--- a/include/pgduckdb/pgduckdb_ruleutils.h
+++ b/include/pgduckdb/pgduckdb_ruleutils.h
@@ -11,6 +11,7 @@ bool pgduckdb_is_duckdb_row(Oid type_oid);
 bool pgduckdb_is_unresolved_type(Oid type_oid);
 bool pgduckdb_var_is_duckdb_row(Var *var);
 bool pgduckdb_func_returns_duckdb_row(RangeTblFunction *rtfunc);
+bool pgduckdb_target_list_contains_unresolved_type_or_row(List *target_list);
 Var *pgduckdb_duckdb_row_subscript_var(Expr *expr);
 List *pgduckdb_star_start_vars(List *target_list);
 
diff --git a/src/pgduckdb_ddl.cpp b/src/pgduckdb_ddl.cpp
index 2a4a0cb9..bae49a7b 100644
--- a/src/pgduckdb_ddl.cpp
+++ b/src/pgduckdb_ddl.cpp
@@ -47,8 +47,21 @@ static List *ctas_original_target_list = NIL;
 static bool top_level_ddl = true;
 static ProcessUtility_hook_type prev_process_utility_hook = NULL;
 
+/*
+ * WrapQueryInQueryCall takes a query and wraps it an duckdb.query(...) call.
+ * It then explicitly references all the columns and the types from the
+ * original qeury its target list. So a query like this:
+ *
+ * SELECT r from read_csv('file.csv') r;
+ *
+ * Would expand to:
+ *
+ * SELECT r['id']::int AS id, r['name']::text AS name
+ * FROM duckdb.query('SELECT * from system.main.read_csv(''file.csv'')') r;
+ *
+ */
 static Query *
-WrapQueryInQueryCall(Query *query, List *target_list) {
+WrapQueryInDuckdbQueryCall(Query *query, List *target_list) {
 	char *duckdb_query_string = pgduckdb_get_querydef(query);
 
 	StringInfo buf = makeStringInfo();
@@ -113,13 +126,21 @@ DuckdbHandleDDL(PlannedStmt *pstmt, const char *query_string, ParamListInfo para
 			return;
 		}
 
-		// TODO: Probably we should only do this if the targetlist actually
-		// contains some duckdb.unresolved_type or duckdb.row columns.
-		// XXX: This is a huge hack. Probably we should do something different
-		// here. The current hack doesn't work with materialized views yet.
+		Query *original_query = castNode(Query, stmt->query);
 
+		// We need to do hacky things if the targetlist contains
+		// duckdb.unresolved_type or duckdb.row columns. In those cases we want
+		// to run the query through duckdb to get the actual result types for
+		// these queries. We also want to lock in those types creating a new
+		// query that will always retun them.
+		if (!pgduckdb_target_list_contains_unresolved_type_or_row(original_query->targetList)) {
+			// If the target list doesn't contain duckdb.row or
+			// duckdb.unresolved_type though, we are done now.
+			return;
+		}
+
+		/* NOTE: The below code is mostly copied from ExecCreateTableAs */
 		List *rewritten;
-		Query *original_query = castNode(Query, stmt->query);
 
 		Query *query = (Query *)copyObjectImpl(original_query);
 		/*
@@ -140,9 +161,10 @@ DuckdbHandleDDL(PlannedStmt *pstmt, const char *query_string, ParamListInfo para
 
 		PlannedStmt *plan = pg_plan_query(query, query_string, CURSOR_OPT_PARALLEL_OK, params);
 
+		/* This is where our custom code starts again */
 		List *target_list = plan->planTree->targetlist;
 
-		stmt->query = (Node *)WrapQueryInQueryCall(rewritten_query_copy, target_list);
+		stmt->query = (Node *)WrapQueryInDuckdbQueryCall(rewritten_query_copy, target_list);
 		stmt->into->viewQuery = (Node *)copyObjectImpl(stmt->query);
 
 	} else if (IsA(parsetree, CreateSchemaStmt) && !pgduckdb::doing_motherduck_sync) {
diff --git a/src/pgduckdb_ruleutils.cpp b/src/pgduckdb_ruleutils.cpp
index 071f2ded..99a66bdb 100644
--- a/src/pgduckdb_ruleutils.cpp
+++ b/src/pgduckdb_ruleutils.cpp
@@ -73,6 +73,21 @@ pgduckdb_func_returns_duckdb_row(RangeTblFunction *rtfunc) {
 	return pgduckdb_is_duckdb_row(func_expr->funcresulttype);
 }
 
+bool
+pgduckdb_target_list_contains_unresolved_type_or_row(List *target_list) {
+	foreach_node(TargetEntry, tle, target_list) {
+		Oid type = exprType((Node *)tle->expr);
+		if (pgduckdb_is_unresolved_type(type)) {
+			return true;
+		}
+
+		if (pgduckdb_is_duckdb_row(type)) {
+			return true;
+		}
+	}
+	return false;
+}
+
 /*
  * Returns NULL if the expression is not a subscript on a duckdb row. Returns
  * the Var of the duckdb row if it is.

From 284795b0c94c8efc0be1eb7da39342622f2c14ae Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Tue, 14 Jan 2025 16:50:32 +0100
Subject: [PATCH 14/40] Use C duckdb_only_function handler for
 approx_count_distinct

---
 sql/pg_duckdb--0.2.0--0.3.0.sql | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql
index 9c231484..ed91cccc 100644
--- a/sql/pg_duckdb--0.2.0--0.3.0.sql
+++ b/sql/pg_duckdb--0.2.0--0.3.0.sql
@@ -1,12 +1,8 @@
 CREATE FUNCTION @extschema@.approx_count_distinct_sfunc(bigint, anyelement)
-RETURNS bigint LANGUAGE 'plpgsql'
-SET search_path = pg_catalog, pg_temp
-AS
-$func$
-BEGIN
-    RAISE EXCEPTION 'Aggregate `approx_count_distinct(ANYELEMENT)` only works with Duckdb execution.';
-END;
-$func$;
+RETURNS bigint
+AS 'MODULE_PATHNAME', 'duckdb_only_function'
+LANGUAGE C;
+
 
 CREATE AGGREGATE @extschema@.approx_count_distinct(anyelement)
 (

From 3ce96f71493b69d8b9516ca0d3ba9ba7098e329f Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Tue, 14 Jan 2025 18:19:02 +0100
Subject: [PATCH 15/40] Support array/json indexing

---
 src/pgduckdb_options.cpp                    | 36 +++++++++++----------
 src/vendor/pg_ruleutils_17.c                | 20 +++++++++++-
 test/regression/expected/read_functions.out |  7 ++++
 test/regression/sql/read_functions.sql      |  3 ++
 4 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/src/pgduckdb_options.cpp b/src/pgduckdb_options.cpp
index 98576c52..bcaa4b22 100644
--- a/src/pgduckdb_options.cpp
+++ b/src/pgduckdb_options.cpp
@@ -388,11 +388,7 @@ DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct P
 		elog(ERROR, "Subscripting duckdb.row with an empty subscript is not supported");
 	}
 
-	if (list_length(indirection) != 1) {
-		/* TODO: Fix this, if the column contains an array we should be able to subscript that type */
-		elog(ERROR, "Subscripting duckdb.row is only supported with a single subscript");
-	}
-	pprint(indirection);
+	bool first = true;
 
 	// Transform each subscript expression
 	foreach_node(A_Indices, subscript, indirection) {
@@ -400,23 +396,29 @@ DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct P
 		Assert(subscript->uidx);
 
 		Node *subscript_expr = transformExpr(pstate, subscript->uidx, pstate->p_expr_kind);
-		pprint(subscript_expr);
 		int expr_location = exprLocation(subscript->uidx);
 		Oid subscript_expr_type = exprType(subscript_expr);
 
-		Node *coerced_expr = coerce_to_target_type(pstate, subscript_expr, subscript_expr_type, TEXTOID, -1,
-		                                           COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, expr_location);
-		if (!coerced_expr) {
-			ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("duckdb.row subscript must have text type"),
-			                parser_errposition(pstate, expr_location)));
-		}
+		/* The first subscript needs to be a TEXT constant, since it should be
+		 * a column reference. But the subscripts after that can be anything,
+		 * DuckDB should interpret those. */
+		if (first) {
+			Node *coerced_expr = coerce_to_target_type(pstate, subscript_expr, subscript_expr_type, TEXTOID, -1,
+			                                           COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, expr_location);
+			if (!coerced_expr) {
+				ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("duckdb.row subscript must have text type"),
+				                parser_errposition(pstate, expr_location)));
+			}
+
+			if (!IsA(subscript_expr, Const)) {
+				ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("duckdb.row subscript must be a constant"),
+				                parser_errposition(pstate, expr_location)));
+			}
 
-		if (!IsA(subscript_expr, Const)) {
-			pprint(subscript_expr);
-			ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("duckdb.row subscript must be a constant"),
-			                parser_errposition(pstate, expr_location)));
+			subscript_expr = coerced_expr;
+			first = false;
 		}
-		sbsref->refupperindexpr = lappend(sbsref->refupperindexpr, coerced_expr);
+		sbsref->refupperindexpr = lappend(sbsref->refupperindexpr, subscript_expr);
 	}
 
 	/* TODO: Trigger cache population, probably we should do this somewhere else */
diff --git a/src/vendor/pg_ruleutils_17.c b/src/vendor/pg_ruleutils_17.c
index ab25e072..d3de77e5 100644
--- a/src/vendor/pg_ruleutils_17.c
+++ b/src/vendor/pg_ruleutils_17.c
@@ -9129,9 +9129,15 @@ get_rule_expr(Node *node, deparse_context *context,
 					get_rule_expr(refassgnexpr, context, showimplicit);
 				}
 				else if (IsA(sbsref->refexpr, Var) && pgduckdb_var_is_duckdb_row((Var*) sbsref->refexpr)) {
+					/* Subscript expressions into the duckdb.row type we want to
+					 * change to regular column references in the DuckDB query.
+					 * Both because it's generally more common and thus
+					 * results in better optimized queries, and because
+					 * iceberg_scan doesn't support the subscripting syntax on
+					 * its results.
+					 */
 					Assert(sbsref->refupperindexpr);
 					Assert(!sbsref->reflowerindexpr);
-					Assert(list_length(sbsref->refupperindexpr) == 1);
 					Var *var = (Var *) sbsref->refexpr;
 
 					deparse_namespace* dpns = (deparse_namespace *) list_nth(context->namespaces,
@@ -9166,6 +9172,18 @@ get_rule_expr(Node *node, deparse_context *context,
 						appendStringInfoChar(buf, '.');
 					}
 					appendStringInfoString(context->buf, quote_identifier(extval));
+
+					if (list_length(sbsref->refupperindexpr) > 1) {
+						/*
+						 * If there are any additional subscript expressions we
+						 * should output them. Subscripts can be used in duckdb
+						 * to index into arrays or json objects.
+						 */
+						SubscriptingRef *shorter_sbsref = copyObject(sbsref);
+						/* strip the first subscript from the list */
+						shorter_sbsref->refupperindexpr = list_delete_first(shorter_sbsref->refupperindexpr);
+						printSubscripts(shorter_sbsref, context);
+					}
 				}
 				else
 				{
diff --git a/test/regression/expected/read_functions.out b/test/regression/expected/read_functions.out
index b3f4d5e8..35871439 100644
--- a/test/regression/expected/read_functions.out
+++ b/test/regression/expected/read_functions.out
@@ -29,6 +29,13 @@ SELECT r['sepal.length'], r['file_row_number'], r['filename']
           4.5 |              41 | ../../data/iris.parquet
 (5 rows)
 
+-- Supports subscripts
+SELECT r['jsoncol'][1], r['arraycol'][2] FROM read_parquet('../../data/indexable.parquet') r;
+ jsoncol[1] | arraycol[2] 
+------------+-------------
+ "d"        |          22
+(1 row)
+
 -- read_csv
 SELECT count(r['sepal.length']) FROM read_csv('../../data/iris.csv') r;
  count 
diff --git a/test/regression/sql/read_functions.sql b/test/regression/sql/read_functions.sql
index be657413..c17c1d95 100644
--- a/test/regression/sql/read_functions.sql
+++ b/test/regression/sql/read_functions.sql
@@ -11,6 +11,9 @@ SELECT r['sepal.length'], r['file_row_number'], r['filename']
     FROM read_parquet('../../data/iris.parquet', file_row_number => true, filename => true) r
     ORDER BY r['sepal.length']  LIMIT 5;
 
+-- Supports subscripts
+SELECT r['jsoncol'][1], r['arraycol'][2] FROM read_parquet('../../data/indexable.parquet') r;
+
 -- read_csv
 
 SELECT count(r['sepal.length']) FROM read_csv('../../data/iris.csv') r;

From 7678dbcd201cdcbc1e1fe1f1a92e2fa0ce562630 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Wed, 15 Jan 2025 11:07:34 +0100
Subject: [PATCH 16/40] Fix CTAS

---
 src/pgduckdb_ddl.cpp | 38 ++++++++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/src/pgduckdb_ddl.cpp b/src/pgduckdb_ddl.cpp
index bae49a7b..6b2e0c8e 100644
--- a/src/pgduckdb_ddl.cpp
+++ b/src/pgduckdb_ddl.cpp
@@ -42,7 +42,6 @@ extern "C" {
  * tables we force this value to false.
  */
 static bool ctas_skip_data = false;
-static List *ctas_original_target_list = NIL;
 
 static bool top_level_ddl = true;
 static ProcessUtility_hook_type prev_process_utility_hook = NULL;
@@ -118,24 +117,32 @@ DuckdbHandleDDL(PlannedStmt *pstmt, const char *query_string, ParamListInfo para
 			ctas_skip_data = stmt->into->skipData;
 			stmt->into->skipData = true;
 		}
+
 		bool is_create_materialized_view = stmt->into->viewQuery != NULL;
 		bool skips_planning = stmt->into->skipData || is_create_materialized_view;
 		if (!skips_planning) {
 			// No need to plan here as well, because standard_ProcessUtility
-			// will already plan the query and thus get the correct columns.
+			// will already plan the query and thus get the correct columns and
+			// their types.
 			return;
 		}
 
 		Query *original_query = castNode(Query, stmt->query);
 
-		// We need to do hacky things if the targetlist contains
-		// duckdb.unresolved_type or duckdb.row columns. In those cases we want
-		// to run the query through duckdb to get the actual result types for
-		// these queries. We also want to lock in those types creating a new
-		// query that will always retun them.
+		// For cases where Postgres does not usually plan the query, we need
+		// to do hacky things if the targetlist contains duckdb.unresolved_type
+		// or duckdb.row columns. In those cases we want to run the query
+		// through duckdb to get the actual result types for these queries,
+		// which won't happen automatically if the query is not planned by
+		// Postgres. So we will manually do it here.
+		//
+		// If we're doing that anyway we might as well slightly change the
+		// query so that it always returns the types that are expected by
+		// Postgres. This is especially useful for materialized views, since
+		// the query for is likely to be run many times.
 		if (!pgduckdb_target_list_contains_unresolved_type_or_row(original_query->targetList)) {
-			// If the target list doesn't contain duckdb.row or
-			// duckdb.unresolved_type though, we are done now.
+			// ... but if the target list doesn't contain duckdb.row or
+			// duckdb.unresolved_type, there's no need to do any of that.
 			return;
 		}
 
@@ -165,7 +172,16 @@ DuckdbHandleDDL(PlannedStmt *pstmt, const char *query_string, ParamListInfo para
 		List *target_list = plan->planTree->targetlist;
 
 		stmt->query = (Node *)WrapQueryInDuckdbQueryCall(rewritten_query_copy, target_list);
-		stmt->into->viewQuery = (Node *)copyObjectImpl(stmt->query);
+
+		if (is_create_materialized_view) {
+			/*
+			 * If this is a materialized view we also need to update its view
+			 * query. It's important not to set it for regular CTAS queries,
+			 * otherwise Postgres code will assume it's a materialized view
+			 * instead.
+			 */
+			stmt->into->viewQuery = (Node *)copyObjectImpl(stmt->query);
+		}
 
 	} else if (IsA(parsetree, CreateSchemaStmt) && !pgduckdb::doing_motherduck_sync) {
 		auto stmt = castNode(CreateSchemaStmt, parsetree);
@@ -446,12 +462,10 @@ DECLARE_PG_FUNCTION(duckdb_create_table_trigger) {
 	if (IsA(parsetree, CreateTableAsStmt) && !ctas_skip_data) {
 		auto stmt = castNode(CreateTableAsStmt, parsetree);
 		ctas_query = (Query *)stmt->query;
-		ctas_query->targetList = ctas_original_target_list;
 	}
 
 	pgduckdb::DuckDBQueryOrThrow(*connection, create_table_string);
 	if (ctas_query) {
-
 		const char *ctas_query_string = pgduckdb_get_querydef(ctas_query);
 
 		std::string insert_string =

From 83acc3558d238b68079c8534a8d05d88127eb430 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Wed, 15 Jan 2025 11:13:09 +0100
Subject: [PATCH 17/40] Move extension registration to top of subscript
 transform fuction

---
 src/pgduckdb_options.cpp | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/pgduckdb_options.cpp b/src/pgduckdb_options.cpp
index bcaa4b22..52525d8e 100644
--- a/src/pgduckdb_options.cpp
+++ b/src/pgduckdb_options.cpp
@@ -376,6 +376,15 @@ DECLARE_PG_FUNCTION(pgduckdb_recycle_ddb) {
 void
 DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool isSlice,
                             bool isAssignment) {
+	/*
+	 * We need to populate our cache for some of the code below. Normally this
+	 * cache is populated at the start of our planner hook, but this function
+	 * is being called from the parser.
+	 */
+	if (!pgduckdb::IsExtensionRegistered()) {
+		elog(ERROR, "BUG: Using duckdb.row but the pg_duckdb extension is not installed");
+	}
+
 	if (isAssignment) {
 		elog(ERROR, "Assignment to duckdb.row is not supported");
 	}
@@ -421,9 +430,6 @@ DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct P
 		sbsref->refupperindexpr = lappend(sbsref->refupperindexpr, subscript_expr);
 	}
 
-	/* TODO: Trigger cache population, probably we should do this somewhere else */
-	pgduckdb::IsExtensionRegistered();
-
 	// Set the result type of the subscripting operation
 	sbsref->refrestype = pgduckdb::DuckdbUnresolvedTypeOid();
 	sbsref->reftypmod = -1;

From a4cf3a1295c5f41d6269caf6508acc42ca5453aa Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Wed, 15 Jan 2025 11:13:43 +0100
Subject: [PATCH 18/40] Remove pprint

---
 src/pgduckdb_hooks.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/pgduckdb_hooks.cpp b/src/pgduckdb_hooks.cpp
index b901802f..b148bbba 100644
--- a/src/pgduckdb_hooks.cpp
+++ b/src/pgduckdb_hooks.cpp
@@ -190,7 +190,6 @@ DuckdbPlannerHook_Cpp(Query *parse, const char *query_string, int cursor_options
 	if (pgduckdb::IsExtensionRegistered()) {
 		if (NeedsDuckdbExecution(parse)) {
 			IsAllowedStatement(parse, true);
-			pprint(parse);
 
 			return DuckdbPlanNode(parse, query_string, cursor_options, bound_params, true);
 		} else if (duckdb_force_execution && IsAllowedStatement(parse) && ContainsFromClause(parse)) {

From adc717cd0c972f858864da218a80c771537e773c Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Wed, 15 Jan 2025 11:44:46 +0100
Subject: [PATCH 19/40] More comments and tests

---
 src/pgduckdb_options.cpp                    | 26 +++++++++++++++++----
 test/regression/expected/read_functions.out | 21 +++++++++++++++++
 test/regression/sql/read_functions.sql      | 10 ++++++++
 3 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/src/pgduckdb_options.cpp b/src/pgduckdb_options.cpp
index 52525d8e..3a6b2e23 100644
--- a/src/pgduckdb_options.cpp
+++ b/src/pgduckdb_options.cpp
@@ -373,6 +373,12 @@ DECLARE_PG_FUNCTION(pgduckdb_recycle_ddb) {
 	PG_RETURN_BOOL(true);
 }
 
+/*
+ * DuckdbRowSubscriptTransform is called by the parser when a subscripting
+ * operation is performed on a duckdb.row. It has two main puprposes:
+ * 1. Ensure that the row is being indexed using a string literal
+ * 2. Ensure that the return type of this index operation is duckdb.unresolved_type
+ */
 void
 DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool isSlice,
                             bool isAssignment) {
@@ -424,6 +430,12 @@ DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct P
 				                parser_errposition(pstate, expr_location)));
 			}
 
+			Const *subscript_const = castNode(Const, subscript_expr);
+			if (subscript_const->constisnull) {
+				ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("duckdb.row subscript cannot be NULL"),
+				                parser_errposition(pstate, expr_location)));
+			}
+
 			subscript_expr = coerced_expr;
 			first = false;
 		}
@@ -435,6 +447,12 @@ DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct P
 	sbsref->reftypmod = -1;
 }
 
+/*
+ * DuckdbRowSubscriptExecSetup is called by the executor when a subscripting
+ * operation is performed on a duckdb.row. This should never happen, because
+ * any query that contains a duckdb.row should automatically be use DuckDB
+ * execution.
+ */
 void
 DuckdbRowSubscriptExecSetup(const SubscriptingRef * /*sbsref*/, SubscriptingRefState * /*sbsrefstate*/,
                             SubscriptExecSteps * /*exprstate*/) {
@@ -449,6 +467,10 @@ static SubscriptRoutines duckdb_subscript_routines = {
     .store_leakproof = true,
 };
 
+DECLARE_PG_FUNCTION(duckdb_row_subscript) {
+	PG_RETURN_POINTER(&duckdb_subscript_routines);
+}
+
 DECLARE_PG_FUNCTION(duckdb_row_in) {
 	elog(ERROR, "Creating the duckdb.row type is not supported");
 }
@@ -465,10 +487,6 @@ DECLARE_PG_FUNCTION(duckdb_unresolved_type_out) {
 	return textout(fcinfo);
 }
 
-DECLARE_PG_FUNCTION(duckdb_row_subscript) {
-	PG_RETURN_POINTER(&duckdb_subscript_routines);
-}
-
 DECLARE_PG_FUNCTION(duckdb_unresolved_type_operator) {
 	elog(ERROR, "Unresolved duckdb types cannot be used by the Postgres executor");
 }
diff --git a/test/regression/expected/read_functions.out b/test/regression/expected/read_functions.out
index 35871439..be84b12c 100644
--- a/test/regression/expected/read_functions.out
+++ b/test/regression/expected/read_functions.out
@@ -115,6 +115,27 @@ SELECT * FROM (SELECT r['column00'] AS col1 FROM read_csv('../../data/web_page.c
     1
 (1 row)
 
+-- Only simple string literals are supported as column names
+SELECT r[NULL] FROM read_csv('../../data/web_page.csv') r limit 1;
+ERROR:  duckdb.row subscript cannot be NULL
+LINE 1: SELECT r[NULL] FROM read_csv('../../data/web_page.csv') r li...
+                 ^
+SELECT r[123] FROM read_csv('../../data/web_page.csv') r limit 1;
+ERROR:  duckdb.row subscript must have text type
+LINE 1: SELECT r[123] FROM read_csv('../../data/web_page.csv') r lim...
+                 ^
+SELECT r[3.14] FROM read_csv('../../data/web_page.csv') r limit 1;
+ERROR:  duckdb.row subscript must have text type
+LINE 1: SELECT r[3.14] FROM read_csv('../../data/web_page.csv') r li...
+                 ^
+SELECT r[q.col]
+FROM
+    read_csv('../../data/web_page.csv') r,
+    (SELECT 'abc'::text as col) q
+LIMIT 1;
+ERROR:  duckdb.row subscript must be a constant
+LINE 1: SELECT r[q.col]
+                 ^
 -- delta_scan
 SELECT duckdb.install_extension('delta');
  install_extension 
diff --git a/test/regression/sql/read_functions.sql b/test/regression/sql/read_functions.sql
index c17c1d95..ba3bc7f9 100644
--- a/test/regression/sql/read_functions.sql
+++ b/test/regression/sql/read_functions.sql
@@ -54,6 +54,16 @@ SELECT * FROM (SELECT r['column00'] FROM read_csv('../../data/web_page.csv') r l
 -- If you give it a different alias then that alias is propegated though.
 SELECT * FROM (SELECT r['column00'] AS col1 FROM read_csv('../../data/web_page.csv') r limit 1);
 
+-- Only simple string literals are supported as column names
+SELECT r[NULL] FROM read_csv('../../data/web_page.csv') r limit 1;
+SELECT r[123] FROM read_csv('../../data/web_page.csv') r limit 1;
+SELECT r[3.14] FROM read_csv('../../data/web_page.csv') r limit 1;
+SELECT r[q.col]
+FROM
+    read_csv('../../data/web_page.csv') r,
+    (SELECT 'abc'::text as col) q
+LIMIT 1;
+
 -- delta_scan
 
 SELECT duckdb.install_extension('delta');

From 595547101f0e27022124a659aca5aa8d3578057b Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Wed, 15 Jan 2025 12:37:45 +0100
Subject: [PATCH 20/40] Even more comments and tests

---
 src/pgduckdb_ruleutils.cpp                  |  29 ++++-
 test/regression/expected/read_functions.out | 113 ++++++++++++++++++++
 test/regression/sql/read_functions.sql      |  76 +++++++++++++
 3 files changed, 217 insertions(+), 1 deletion(-)

diff --git a/src/pgduckdb_ruleutils.cpp b/src/pgduckdb_ruleutils.cpp
index 99a66bdb..433213b0 100644
--- a/src/pgduckdb_ruleutils.cpp
+++ b/src/pgduckdb_ruleutils.cpp
@@ -117,7 +117,34 @@ pgduckdb_duckdb_row_subscript_var(Expr *expr) {
 }
 
 /*
- * Returns a list of duckdb
+ * In our DuckDB queries we sometimes want to use "SELECT *", when selecting
+ * from a function like read_parquet. That way DuckDB can figure out the actual
+ * columns that it should return. Sadly Postgres expands the * character from
+ * the original query to a list of columns. So we need to put a star, any time
+ * we want to replace duckdb.row columns with a "*" in the duckdb query.
+ *
+ * Since the original "*" might expand to many columns we need to remove all of
+ * those, when putting a "*" back. To do so we try to find a runs of Vars from
+ * the same FROM entry, aka RangeTableEntry (RTE) that we expect were created
+ * with a *.
+ *
+ * This function tries to find the indexes of the first column for each of
+ * those runs. It does so using this heuristic:
+ *
+ * 1. Find a column with varattno == 1 (aka the first column from an RTE)
+ * 2. Find a consecutive run of columns from the same RTE with varattnos that
+ *    keep increasing by 1.
+ * 3. Once we find a duckdb.row column in any of those consecutive columns, we
+ *    assume this run was created using a star expression and we track the
+ *    initial index. Then we start at 1 again to find the next run.
+ *
+ * NOTE: This function does not find the end of such runs, that's left as an
+ * excersice for the caller. It should be pretty easy for the caller to do
+ * that, because they need to remove such columns anyway. The main reason this
+ * function existis is so that the caller doesn't have to scan backwards to
+ * find the start of a run once it finds a duckdb.row column. Scanning
+ * backwards is difficult for the caller because it wants to write out columns
+ * to the DuckDB query as it consumes them.
  */
 List *
 pgduckdb_star_start_vars(List *target_list) {
diff --git a/test/regression/expected/read_functions.out b/test/regression/expected/read_functions.out
index be84b12c..1689428e 100644
--- a/test/regression/expected/read_functions.out
+++ b/test/regression/expected/read_functions.out
@@ -36,6 +36,119 @@ SELECT r['jsoncol'][1], r['arraycol'][2] FROM read_parquet('../../data/indexable
  "d"        |          22
 (1 row)
 
+-- Subqueries correctly expand *, in case of multiple columns.
+SELECT * FROM (
+    SELECT 'something' as prefix, *, 'something else' as postfix
+    FROM read_parquet('../../data/iris.parquet') r
+    LIMIT 1
+);
+  prefix   | sepal.length | sepal.width | petal.length | petal.width | variety |    postfix     
+-----------+--------------+-------------+--------------+-------------+---------+----------------
+ something |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  | something else
+(1 row)
+
+-- NOTE: A single "r" is equivalent to a *. The prefix and postfix columns are
+-- not explicitely selected, but still show up in the result. This is
+-- considered a bug, but it's one that we cannot easily solve because the "r"
+-- reference does not exist in the DuckDB query at all, so there's no way to
+-- reference only the columns coming from that part of the subquery. Very few
+-- people should be impacted by this though.
+SELECT r FROM (
+    SELECT 'something' as prefix, *, 'something else' as postfix
+    FROM read_parquet('../../data/iris.parquet') r
+    LIMIT 1
+);
+  prefix   | sepal.length | sepal.width | petal.length | petal.width | variety |    postfix     
+-----------+--------------+-------------+--------------+-------------+---------+----------------
+ something |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  | something else
+(1 row)
+
+-- ... but if you manually add the expected columns then they are merged into
+-- the new star expansion.
+SELECT prefix, r FROM (
+    SELECT 'something' as prefix, *, 'something else' as postfix
+    FROM read_parquet('../../data/iris.parquet') r
+    LIMIT 1
+);
+  prefix   | sepal.length | sepal.width | petal.length | petal.width | variety |    postfix     
+-----------+--------------+-------------+--------------+-------------+---------+----------------
+ something |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  | something else
+(1 row)
+
+SELECT prefix, r, postfix FROM (
+    SELECT 'something' as prefix, *, 'something else' as postfix
+    FROM read_parquet('../../data/iris.parquet') r
+    LIMIT 1
+);
+  prefix   | sepal.length | sepal.width | petal.length | petal.width | variety |    postfix     
+-----------+--------------+-------------+--------------+-------------+---------+----------------
+ something |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  | something else
+(1 row)
+
+-- This requires the prefix columns to be there though. If the prefix columns
+-- are not there the postfix columns don't get merged into the new star
+-- expansion automatically.
+-- NOTE: Would be nice to fix this, but it's not a high priority.
+SELECT r, postfix FROM (
+    SELECT 'something' as prefix, *, 'something else' as postfix
+    FROM read_parquet('../../data/iris.parquet') r
+    LIMIT 1
+);
+  prefix   | sepal.length | sepal.width | petal.length | petal.width | variety |    postfix     |    postfix     
+-----------+--------------+-------------+--------------+-------------+---------+----------------+----------------
+ something |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  | something else | something else
+(1 row)
+
+-- If you swap them around, they will get duplicated though. For the
+SELECT postfix, r, prefix FROM (
+    SELECT 'something' as prefix, *, 'something else' as postfix
+    FROM read_parquet('../../data/iris.parquet') r
+    LIMIT 1
+);
+    postfix     |  prefix   | sepal.length | sepal.width | petal.length | petal.width | variety |    postfix     |  prefix   
+----------------+-----------+--------------+-------------+--------------+-------------+---------+----------------+-----------
+ something else | something |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  | something else | something
+(1 row)
+
+-- Joining two subqueries a single * works as expected.
+SELECT *
+FROM (
+    SELECT 'something' as prefix, *, 'something else' as postfix
+    FROM read_parquet('../../data/iris.parquet') r
+    LIMIT 1
+) q1, (
+    SELECT * FROM read_parquet('../../data/unsigned_types.parquet') r
+) q2;
+  prefix   | sepal.length | sepal.width | petal.length | petal.width | variety |    postfix     | utinyint | usmallint |  uinteger  
+-----------+--------------+-------------+--------------+-------------+---------+----------------+----------+-----------+------------
+ something |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  | something else |      255 |     65535 | 4294967295
+(1 row)
+
+-- Combining multiple read_parquet calls in a single query also works as
+-- expected. They are not expanded to multiple *'s.
+-- BUG: This currently doesn't work correctly!
+SELECT 'something' as prefix, *, 'something else' as postfix
+FROM read_parquet('../../data/iris.parquet') r,
+        read_parquet('../../data/unsigned_types.parquet') r2
+LIMIT 1;
+  prefix   | sepal.length | sepal.width | petal.length | petal.width | variety | utinyint | usmallint |  uinteger  | sepal.length | sepal.width | petal.length | petal.width | variety | utinyint | usmallint |  uinteger  |    postfix     
+-----------+--------------+-------------+--------------+-------------+---------+----------+-----------+------------+--------------+-------------+--------------+-------------+---------+----------+-----------+------------+----------------
+ something |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  |      255 |     65535 | 4294967295 |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  |      255 |     65535 | 4294967295 | something else
+(1 row)
+
+-- And also when done in a subquery
+-- BUG: Broken in the same way as the above query.
+SELECT * FROM (
+    SELECT 'something' as prefix, *, 'something else' as postfix
+    FROM read_parquet('../../data/iris.parquet') r,
+         read_parquet('../../data/unsigned_types.parquet') r2
+    LIMIT 1
+);
+  prefix   | sepal.length | sepal.width | petal.length | petal.width | variety | utinyint | usmallint |  uinteger  | sepal.length_1 | sepal.width_1 | petal.length_1 | petal.width_1 | variety_1 | utinyint_1 | usmallint_1 | uinteger_1 |    postfix     
+-----------+--------------+-------------+--------------+-------------+---------+----------+-----------+------------+----------------+---------------+----------------+---------------+-----------+------------+-------------+------------+----------------
+ something |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  |      255 |     65535 | 4294967295 |            5.1 |           3.5 |            1.4 |           0.2 | Setosa    |        255 |       65535 | 4294967295 | something else
+(1 row)
+
 -- read_csv
 SELECT count(r['sepal.length']) FROM read_csv('../../data/iris.csv') r;
  count 
diff --git a/test/regression/sql/read_functions.sql b/test/regression/sql/read_functions.sql
index ba3bc7f9..11c18862 100644
--- a/test/regression/sql/read_functions.sql
+++ b/test/regression/sql/read_functions.sql
@@ -14,6 +14,82 @@ SELECT r['sepal.length'], r['file_row_number'], r['filename']
 -- Supports subscripts
 SELECT r['jsoncol'][1], r['arraycol'][2] FROM read_parquet('../../data/indexable.parquet') r;
 
+-- Subqueries correctly expand *, in case of multiple columns.
+SELECT * FROM (
+    SELECT 'something' as prefix, *, 'something else' as postfix
+    FROM read_parquet('../../data/iris.parquet') r
+    LIMIT 1
+);
+
+-- NOTE: A single "r" is equivalent to a *. The prefix and postfix columns are
+-- not explicitely selected, but still show up in the result. This is
+-- considered a bug, but it's one that we cannot easily solve because the "r"
+-- reference does not exist in the DuckDB query at all, so there's no way to
+-- reference only the columns coming from that part of the subquery. Very few
+-- people should be impacted by this though.
+SELECT r FROM (
+    SELECT 'something' as prefix, *, 'something else' as postfix
+    FROM read_parquet('../../data/iris.parquet') r
+    LIMIT 1
+);
+
+-- ... but if you manually add the expected columns then they are merged into
+-- the new star expansion.
+SELECT prefix, r FROM (
+    SELECT 'something' as prefix, *, 'something else' as postfix
+    FROM read_parquet('../../data/iris.parquet') r
+    LIMIT 1
+);
+SELECT prefix, r, postfix FROM (
+    SELECT 'something' as prefix, *, 'something else' as postfix
+    FROM read_parquet('../../data/iris.parquet') r
+    LIMIT 1
+);
+
+-- This requires the prefix columns to be there though. If the prefix columns
+-- are not there the postfix columns don't get merged into the new star
+-- expansion automatically.
+-- NOTE: Would be nice to fix this, but it's not a high priority.
+SELECT r, postfix FROM (
+    SELECT 'something' as prefix, *, 'something else' as postfix
+    FROM read_parquet('../../data/iris.parquet') r
+    LIMIT 1
+);
+
+-- If you swap them around, they will get duplicated though. For the
+SELECT postfix, r, prefix FROM (
+    SELECT 'something' as prefix, *, 'something else' as postfix
+    FROM read_parquet('../../data/iris.parquet') r
+    LIMIT 1
+);
+
+-- Joining two subqueries a single * works as expected.
+SELECT *
+FROM (
+    SELECT 'something' as prefix, *, 'something else' as postfix
+    FROM read_parquet('../../data/iris.parquet') r
+    LIMIT 1
+) q1, (
+    SELECT * FROM read_parquet('../../data/unsigned_types.parquet') r
+) q2;
+
+-- Combining multiple read_parquet calls in a single query also works as
+-- expected. They are not expanded to multiple *'s.
+-- BUG: This currently doesn't work correctly!
+SELECT 'something' as prefix, *, 'something else' as postfix
+FROM read_parquet('../../data/iris.parquet') r,
+        read_parquet('../../data/unsigned_types.parquet') r2
+LIMIT 1;
+
+-- And also when done in a subquery
+-- BUG: Broken in the same way as the above query.
+SELECT * FROM (
+    SELECT 'something' as prefix, *, 'something else' as postfix
+    FROM read_parquet('../../data/iris.parquet') r,
+         read_parquet('../../data/unsigned_types.parquet') r2
+    LIMIT 1
+);
+
 -- read_csv
 
 SELECT count(r['sepal.length']) FROM read_csv('../../data/iris.csv') r;

From c238af7d19fc0b1f553c000f55ccdc766cce0de2 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Wed, 15 Jan 2025 12:40:52 +0100
Subject: [PATCH 21/40] Add missing file

---
 test/regression/data/indexable.parquet | Bin 0 -> 390 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 test/regression/data/indexable.parquet

diff --git a/test/regression/data/indexable.parquet b/test/regression/data/indexable.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..d19e8cddebd749868c0df8a6f29f97ae3bddf145
GIT binary patch
literal 390
zcmZ`$O-sW-5S^?`jJ4jJWg|VrK$ZxhG-5P|UX-3hq<HAT4<uj3XqrIM7QFcb{L#*e
z)?Pf!@P>K3d5@W1-h~M0V-O%CV4?|dc*6jI89nL`ZsrD#o4Zf6J;OL4$`9=*=b8rG
z&~$TWpSqwrrb1>vMj#LJRrZ#r&t3LXEE42^eUHiA*RH9vrt%a-Oj=3XChs(o0TUuF
zc~y2xWTc_FMO7?{dg-|!m_4uq1q1Bc2=+z>nPB2t+$w7<dw+hnu>k*{4SCgyOe@A4
zE8iA=Pm{hWX5wDW#**uBlIy7P7N${reC`RREb#pag`QkF>!R(-rgqlhFdUvbe!42F
V+?k)pqx2#l#q$$T2Edd)@CEsNMEL*!

literal 0
HcmV?d00001


From 09b7f56f0884b19cfcc4616456de32427fce5692 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Fri, 17 Jan 2025 10:18:52 +0100
Subject: [PATCH 22/40] Revert "Generate query that works for iceberg scans"

This reverts commit a799f7ee0691ef9defa93332416343bb4c62283f.
---
 src/vendor/pg_ruleutils_17.c                | 37 ++-------------------
 test/regression/expected/read_functions.out | 36 ++++++++++----------
 2 files changed, 19 insertions(+), 54 deletions(-)

diff --git a/src/vendor/pg_ruleutils_17.c b/src/vendor/pg_ruleutils_17.c
index d3de77e5..60509067 100644
--- a/src/vendor/pg_ruleutils_17.c
+++ b/src/vendor/pg_ruleutils_17.c
@@ -7592,14 +7592,6 @@ get_variable(Var *var, int levelsup, bool istoplevel, deparse_context *context)
 			elog(ERROR, "invalid attnum %d for relation \"%s\"",
 				 attnum, rte->eref->aliasname);
 		if (pgduckdb_var_is_duckdb_row(var)) {
-			if (rte->rtekind == RTE_FUNCTION) {
-				if (istoplevel) {
-					appendStringInfoChar(buf, '*');
-					return NULL;
-				}
-				return NULL;
-			}
-
 			if (istoplevel) {
 				/* If the duckdb row is at the top level target list of a
 				 * select, then we want to generate r.*, to unpack all the
@@ -9138,29 +9130,6 @@ get_rule_expr(Node *node, deparse_context *context,
 					 */
 					Assert(sbsref->refupperindexpr);
 					Assert(!sbsref->reflowerindexpr);
-					Var *var = (Var *) sbsref->refexpr;
-
-					deparse_namespace* dpns = (deparse_namespace *) list_nth(context->namespaces,
-																var->varlevelsup);
-					int			varno;
-
-					/*
-					* If we have a syntactic referent for the Var, and we're working from a
-					* parse tree, prefer to use the syntactic referent.  Otherwise, fall back
-					* on the semantic referent.  (Forcing use of the semantic referent when
-					* printing plan trees is a design choice that's perhaps more motivated by
-					* backwards compatibility than anything else.  But it does have the
-					* advantage of making plans more explicit.)
-					*/
-					if (var->varnosyn > 0 && dpns->plan == NULL)
-					{
-						varno = var->varnosyn;
-					}
-					else
-					{
-						varno = var->varno;
-					}
-					RangeTblEntry* rte = rt_fetch(varno, dpns->rtable);
 					Oid			typoutput;
 					bool		typIsVarlena;
 					Const *constval = castNode(Const, linitial(sbsref->refupperindexpr));
@@ -9168,10 +9137,8 @@ get_rule_expr(Node *node, deparse_context *context,
 									&typoutput, &typIsVarlena);
 
 					char *extval = OidOutputFunctionCall(typoutput, constval->constvalue);
-					if (rte->rtekind != RTE_FUNCTION) {
-						appendStringInfoChar(buf, '.');
-					}
-					appendStringInfoString(context->buf, quote_identifier(extval));
+
+					appendStringInfo(context->buf, ".%s", quote_identifier(extval));
 
 					if (list_length(sbsref->refupperindexpr) > 1) {
 						/*
diff --git a/test/regression/expected/read_functions.out b/test/regression/expected/read_functions.out
index 1689428e..8d912d21 100644
--- a/test/regression/expected/read_functions.out
+++ b/test/regression/expected/read_functions.out
@@ -31,9 +31,9 @@ SELECT r['sepal.length'], r['file_row_number'], r['filename']
 
 -- Supports subscripts
 SELECT r['jsoncol'][1], r['arraycol'][2] FROM read_parquet('../../data/indexable.parquet') r;
- jsoncol[1] | arraycol[2] 
-------------+-------------
- "d"        |          22
+ r.jsoncol[1] | r.arraycol[2] 
+--------------+---------------
+ "d"          |            22
 (1 row)
 
 -- Subqueries correctly expand *, in case of multiple columns.
@@ -131,9 +131,9 @@ SELECT 'something' as prefix, *, 'something else' as postfix
 FROM read_parquet('../../data/iris.parquet') r,
         read_parquet('../../data/unsigned_types.parquet') r2
 LIMIT 1;
-  prefix   | sepal.length | sepal.width | petal.length | petal.width | variety | utinyint | usmallint |  uinteger  | sepal.length | sepal.width | petal.length | petal.width | variety | utinyint | usmallint |  uinteger  |    postfix     
------------+--------------+-------------+--------------+-------------+---------+----------+-----------+------------+--------------+-------------+--------------+-------------+---------+----------+-----------+------------+----------------
- something |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  |      255 |     65535 | 4294967295 |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  |      255 |     65535 | 4294967295 | something else
+  prefix   | sepal.length | sepal.width | petal.length | petal.width | variety | utinyint | usmallint |  uinteger  |    postfix     
+-----------+--------------+-------------+--------------+-------------+---------+----------+-----------+------------+----------------
+ something |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  |      255 |     65535 | 4294967295 | something else
 (1 row)
 
 -- And also when done in a subquery
@@ -144,9 +144,9 @@ SELECT * FROM (
          read_parquet('../../data/unsigned_types.parquet') r2
     LIMIT 1
 );
-  prefix   | sepal.length | sepal.width | petal.length | petal.width | variety | utinyint | usmallint |  uinteger  | sepal.length_1 | sepal.width_1 | petal.length_1 | petal.width_1 | variety_1 | utinyint_1 | usmallint_1 | uinteger_1 |    postfix     
------------+--------------+-------------+--------------+-------------+---------+----------+-----------+------------+----------------+---------------+----------------+---------------+-----------+------------+-------------+------------+----------------
- something |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  |      255 |     65535 | 4294967295 |            5.1 |           3.5 |            1.4 |           0.2 | Setosa    |        255 |       65535 | 4294967295 | something else
+  prefix   | sepal.length | sepal.width | petal.length | petal.width | variety | utinyint | usmallint |  uinteger  |    postfix     
+-----------+--------------+-------------+--------------+-------------+---------+----------+-----------+------------+----------------
+ something |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  |      255 |     65535 | 4294967295 | something else
 (1 row)
 
 -- read_csv
@@ -277,11 +277,10 @@ SELECT duckdb.install_extension('iceberg');
 (1 row)
 
 SELECT COUNT(r['l_orderkey']) FROM iceberg_scan('../../data/lineitem_iceberg', allow_moved_paths => true) r;
- count 
--------
- 51793
-(1 row)
-
+ERROR:  (PGDuckDB/CreatePlan) Prepared query returned an error: 'Binder Error: Referenced table "r" not found!
+Candidate tables: "iceberg_scan_data"
+LINE 1: SELECT count(r.l_orderkey) AS count FROM system.main...
+                     ^
 -- TPCH query #6
 SELECT
 	sum(r['l_extendedprice'] * r['l_discount']) as revenue
@@ -293,11 +292,10 @@ WHERE
 	AND r['l_discount'] between 0.08 - 0.01 and 0.08 + 0.01
 	AND r['l_quantity'] < 25
 LIMIT 1;
-   revenue    
---------------
- 1520873.7806
-(1 row)
-
+ERROR:  (PGDuckDB/CreatePlan) Prepared query returned an error: 'Binder Error: Referenced table "r" not found!
+Candidate tables: "iceberg_scan_data"
+LINE 1: ...C allow_moved_paths => true) r WHERE ((r.l_shipdate >= '1997-01-01'::date) AND...
+                                                  ^
 SELECT * FROM iceberg_snapshots('../../data/lineitem_iceberg');
  sequence_number |     snapshot_id     |         timestamp_ms         |                                         manifest_list                                          
 -----------------+---------------------+------------------------------+------------------------------------------------------------------------------------------------

From 1c9dddaa606c0d5e1ec6c4f0bf588985e5866618 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Fri, 17 Jan 2025 14:31:43 +0100
Subject: [PATCH 23/40] Fix iceberg_scan in a better way

---
 src/vendor/pg_ruleutils_17.c                | 14 ++++++++++++++
 test/regression/expected/read_functions.out | 18 ++++++++++--------
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/src/vendor/pg_ruleutils_17.c b/src/vendor/pg_ruleutils_17.c
index 60509067..f706d0c4 100644
--- a/src/vendor/pg_ruleutils_17.c
+++ b/src/vendor/pg_ruleutils_17.c
@@ -10615,6 +10615,16 @@ get_func_expr(FuncExpr *expr, deparse_context *context,
 		nargs++;
 	}
 
+	/*
+	 * iceberg_scan needs to be wrapped in a subquery to resolve a bug where
+	 * aliasses on iceberg_scan are ignored:
+	 * https://github.com/duckdb/duckdb-iceberg/issues/44
+	 */
+	bool is_iceberg_scan = strcmp(pgduckdb_function_name(funcoid), "system.main.iceberg_scan") == 0;
+	if (is_iceberg_scan) {
+		appendStringInfoString(buf, "(FROM ");
+	}
+
 	appendStringInfo(buf, "%s(",
 					 generate_function_name(funcoid, nargs,
 											argnames, argtypes,
@@ -10631,6 +10641,10 @@ get_func_expr(FuncExpr *expr, deparse_context *context,
 		get_rule_expr((Node *) lfirst(l), context, true);
 	}
 	appendStringInfoChar(buf, ')');
+
+	if (is_iceberg_scan) {
+		appendStringInfoChar(buf, ')');
+	}
 }
 
 /*
diff --git a/test/regression/expected/read_functions.out b/test/regression/expected/read_functions.out
index 8d912d21..10d0807b 100644
--- a/test/regression/expected/read_functions.out
+++ b/test/regression/expected/read_functions.out
@@ -277,10 +277,11 @@ SELECT duckdb.install_extension('iceberg');
 (1 row)
 
 SELECT COUNT(r['l_orderkey']) FROM iceberg_scan('../../data/lineitem_iceberg', allow_moved_paths => true) r;
-ERROR:  (PGDuckDB/CreatePlan) Prepared query returned an error: 'Binder Error: Referenced table "r" not found!
-Candidate tables: "iceberg_scan_data"
-LINE 1: SELECT count(r.l_orderkey) AS count FROM system.main...
-                     ^
+ count 
+-------
+ 51793
+(1 row)
+
 -- TPCH query #6
 SELECT
 	sum(r['l_extendedprice'] * r['l_discount']) as revenue
@@ -292,10 +293,11 @@ WHERE
 	AND r['l_discount'] between 0.08 - 0.01 and 0.08 + 0.01
 	AND r['l_quantity'] < 25
 LIMIT 1;
-ERROR:  (PGDuckDB/CreatePlan) Prepared query returned an error: 'Binder Error: Referenced table "r" not found!
-Candidate tables: "iceberg_scan_data"
-LINE 1: ...C allow_moved_paths => true) r WHERE ((r.l_shipdate >= '1997-01-01'::date) AND...
-                                                  ^
+   revenue    
+--------------
+ 1520873.7806
+(1 row)
+
 SELECT * FROM iceberg_snapshots('../../data/lineitem_iceberg');
  sequence_number |     snapshot_id     |         timestamp_ms         |                                         manifest_list                                          
 -----------------+---------------------+------------------------------+------------------------------------------------------------------------------------------------

From 259e0f96d393ddda0e3ffb069a632c6134171e8f Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Fri, 17 Jan 2025 14:40:38 +0100
Subject: [PATCH 24/40] Update iceberg_scan fix

---
 src/vendor/pg_ruleutils_17.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/vendor/pg_ruleutils_17.c b/src/vendor/pg_ruleutils_17.c
index f706d0c4..23702362 100644
--- a/src/vendor/pg_ruleutils_17.c
+++ b/src/vendor/pg_ruleutils_17.c
@@ -10616,11 +10616,19 @@ get_func_expr(FuncExpr *expr, deparse_context *context,
 	}
 
 	/*
-	 * iceberg_scan needs to be wrapped in a subquery to resolve a bug where
-	 * aliasses on iceberg_scan are ignored:
+	 * iceberg_scan needs to be wrapped in an additianol subquery to resolve a
+	 * bug where aliasses on iceberg_scan are ignored:
 	 * https://github.com/duckdb/duckdb-iceberg/issues/44
+	 *
+	 * By wrapping the iceberg_scan call the alias is given to the subquery,
+	 * instead of th call. This subquery is easily optimized away by DuckDB,
+	 * because it doesn't do anything.
+	 *
+	 * TODO: Probably check this in a bit more efficient way and move it to
+	 * pgduckdb_ruleutils.cpp
 	 */
-	bool is_iceberg_scan = strcmp(pgduckdb_function_name(funcoid), "system.main.iceberg_scan") == 0;
+	char *duckdb_function_name = pgduckdb_function_name(funcoid);
+	bool is_iceberg_scan = duckdb_function_name != NULL && strcmp(duckdb_function_name, "system.main.iceberg_scan") == 0;
 	if (is_iceberg_scan) {
 		appendStringInfoString(buf, "(FROM ");
 	}

From 13c7a0e60a6c0c04b87fd69176d3d098d78035eb Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Fri, 17 Jan 2025 15:05:34 +0100
Subject: [PATCH 25/40] Also wrap "query" in a subquery

---
 src/vendor/pg_ruleutils_17.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/vendor/pg_ruleutils_17.c b/src/vendor/pg_ruleutils_17.c
index 23702362..ff65c6dc 100644
--- a/src/vendor/pg_ruleutils_17.c
+++ b/src/vendor/pg_ruleutils_17.c
@@ -10624,12 +10624,19 @@ get_func_expr(FuncExpr *expr, deparse_context *context,
 	 * instead of th call. This subquery is easily optimized away by DuckDB,
 	 * because it doesn't do anything.
 	 *
+	 * This problem is also true for the "query" function, which we use when
+	 * creating materialized views and CTAS.
+	 * https://github.com/duckdb/duckdb/issues/15570#issuecomment-2598419474
+	 *
 	 * TODO: Probably check this in a bit more efficient way and move it to
 	 * pgduckdb_ruleutils.cpp
 	 */
 	char *duckdb_function_name = pgduckdb_function_name(funcoid);
-	bool is_iceberg_scan = duckdb_function_name != NULL && strcmp(duckdb_function_name, "system.main.iceberg_scan") == 0;
-	if (is_iceberg_scan) {
+	bool function_needs_subquery = duckdb_function_name != NULL && (
+		strcmp(duckdb_function_name, "system.main.iceberg_scan") == 0
+		|| strcmp(duckdb_function_name, "system.main.query") == 0
+	);
+	if (function_needs_subquery) {
 		appendStringInfoString(buf, "(FROM ");
 	}
 
@@ -10650,7 +10657,7 @@ get_func_expr(FuncExpr *expr, deparse_context *context,
 	}
 	appendStringInfoChar(buf, ')');
 
-	if (is_iceberg_scan) {
+	if (function_needs_subquery) {
 		appendStringInfoChar(buf, ')');
 	}
 }

From 8700df4ac419c3ecc273ae11ddd4e297bc7578b2 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Sat, 18 Jan 2025 10:15:15 +0100
Subject: [PATCH 26/40] Update comment to reflect reality

---
 src/vendor/pg_ruleutils_17.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/vendor/pg_ruleutils_17.c b/src/vendor/pg_ruleutils_17.c
index ff65c6dc..926bcbb2 100644
--- a/src/vendor/pg_ruleutils_17.c
+++ b/src/vendor/pg_ruleutils_17.c
@@ -9123,10 +9123,9 @@ get_rule_expr(Node *node, deparse_context *context,
 				else if (IsA(sbsref->refexpr, Var) && pgduckdb_var_is_duckdb_row((Var*) sbsref->refexpr)) {
 					/* Subscript expressions into the duckdb.row type we want to
 					 * change to regular column references in the DuckDB query.
-					 * Both because it's generally more common and thus
-					 * results in better optimized queries, and because
-					 * iceberg_scan doesn't support the subscripting syntax on
-					 * its results.
+					 * The main reason we do this is so that DuckDB generates
+					 * nicer column names, i.e. without the square brackets:
+					 * "mycolumn" instead of "r['mycolumn']"
 					 */
 					Assert(sbsref->refupperindexpr);
 					Assert(!sbsref->reflowerindexpr);

From b14a98e84310fc676e9ff6b1b0c37ec515374a9f Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Tue, 21 Jan 2025 11:59:23 +0100
Subject: [PATCH 27/40] Simplify and comment

---
 include/pgduckdb/pgduckdb_ruleutils.h |  2 ++
 src/pgduckdb_ruleutils.cpp            | 47 +++++++++++++++++++++++++++
 src/vendor/pg_ruleutils_17.c          | 35 ++++++--------------
 3 files changed, 59 insertions(+), 25 deletions(-)

diff --git a/include/pgduckdb/pgduckdb_ruleutils.h b/include/pgduckdb/pgduckdb_ruleutils.h
index 28233b04..724eb0e6 100644
--- a/include/pgduckdb/pgduckdb_ruleutils.h
+++ b/include/pgduckdb/pgduckdb_ruleutils.h
@@ -14,5 +14,7 @@ bool pgduckdb_func_returns_duckdb_row(RangeTblFunction *rtfunc);
 bool pgduckdb_target_list_contains_unresolved_type_or_row(List *target_list);
 Var *pgduckdb_duckdb_row_subscript_var(Expr *expr);
 List *pgduckdb_star_start_vars(List *target_list);
+bool pgduckdb_function_needs_subquery(Oid function_oid);
+int pgduckdb_show_type(Const *constval, int original_showtype);
 
 extern bool processed_targetlist;
diff --git a/src/pgduckdb_ruleutils.cpp b/src/pgduckdb_ruleutils.cpp
index 433213b0..d7dfaed3 100644
--- a/src/pgduckdb_ruleutils.cpp
+++ b/src/pgduckdb_ruleutils.cpp
@@ -184,6 +184,53 @@ pgduckdb_star_start_vars(List *target_list) {
 	return star_start_indexes;
 }
 
+/*
+ * iceberg_scan needs to be wrapped in an additianol subquery to resolve a
+ * bug where aliasses on iceberg_scan are ignored:
+ * https://github.com/duckdb/duckdb-iceberg/issues/44
+ *
+ * By wrapping the iceberg_scan call the alias is given to the subquery,
+ * instead of th call. This subquery is easily optimized away by DuckDB,
+ * because it doesn't do anything.
+ *
+ * This problem is also true for the "query" function, which we use when
+ * creating materialized views and CTAS.
+ * https://github.com/duckdb/duckdb/issues/15570#issuecomment-2598419474
+ *
+ * TODO: Probably check this in a bit more efficient way and move it to
+ * pgduckdb_ruleutils.cpp
+ */
+bool
+pgduckdb_function_needs_subquery(Oid function_oid) {
+	if (!pgduckdb::IsDuckdbOnlyFunction(function_oid)) {
+		return false;
+	}
+
+	auto func_name = get_func_name(function_oid);
+	if (strcmp(func_name, "iceberg_scan") == 0) {
+		return true;
+	}
+
+	if (strcmp(func_name, "query") == 0) {
+		return true;
+	}
+	return false;
+}
+
+/*
+ * We never want to show the unresolved_type in DuckDB query. The
+ * unrosolved_type does not actually exist in DuckDB, we only use it to keep
+ * the Postgres parser happy. DuckDB can simply figure out the correct type
+ * itself without an explicit cast.
+ */
+int
+pgduckdb_show_type(Const *constval, int original_showtype) {
+	if (pgduckdb_is_unresolved_type(constval->consttype)) {
+		return -1;
+	}
+	return original_showtype;
+}
+
 /*
  * Given a postgres schema name, this returns a list of two elements: the first
  * is the DuckDB database name and the second is the duckdb schema name. These
diff --git a/src/vendor/pg_ruleutils_17.c b/src/vendor/pg_ruleutils_17.c
index 926bcbb2..7b519221 100644
--- a/src/vendor/pg_ruleutils_17.c
+++ b/src/vendor/pg_ruleutils_17.c
@@ -10614,27 +10614,7 @@ get_func_expr(FuncExpr *expr, deparse_context *context,
 		nargs++;
 	}
 
-	/*
-	 * iceberg_scan needs to be wrapped in an additianol subquery to resolve a
-	 * bug where aliasses on iceberg_scan are ignored:
-	 * https://github.com/duckdb/duckdb-iceberg/issues/44
-	 *
-	 * By wrapping the iceberg_scan call the alias is given to the subquery,
-	 * instead of th call. This subquery is easily optimized away by DuckDB,
-	 * because it doesn't do anything.
-	 *
-	 * This problem is also true for the "query" function, which we use when
-	 * creating materialized views and CTAS.
-	 * https://github.com/duckdb/duckdb/issues/15570#issuecomment-2598419474
-	 *
-	 * TODO: Probably check this in a bit more efficient way and move it to
-	 * pgduckdb_ruleutils.cpp
-	 */
-	char *duckdb_function_name = pgduckdb_function_name(funcoid);
-	bool function_needs_subquery = duckdb_function_name != NULL && (
-		strcmp(duckdb_function_name, "system.main.iceberg_scan") == 0
-		|| strcmp(duckdb_function_name, "system.main.query") == 0
-	);
+	bool function_needs_subquery = pgduckdb_function_needs_subquery(funcoid);
 	if (function_needs_subquery) {
 		appendStringInfoString(buf, "(FROM ");
 	}
@@ -11247,9 +11227,7 @@ get_const_expr(Const *constval, deparse_context *context, int showtype)
 	char	   *extval;
 	bool		needlabel = false;
 
-	if (pgduckdb_is_unresolved_type(constval->consttype)) {
-		showtype = -1;
-	}
+	showtype = pgduckdb_show_type(constval, showtype);
 
 	if (constval->constisnull)
 	{
@@ -12291,7 +12269,14 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
 		get_rte_alias(rte, varno, false, context);
 
 		if (pgduckdb_func_returns_duckdb_row(rtfunc1)) {
-			/* Don't print column aliases for functions that return a duckdb.row */
+			/*
+			 * We never want to print column aliases for functions that return
+			 * a duckdb.row. The common pattern is for people to not provide an
+			 * explicit column alias (i.e. "r" becomes "r(r)"). This obviously
+			 * is a naming collision and DuckDB resolves that in the opposite
+			 * way that we want. Never adding column aliases for duckdb.row
+			 * avoids this conflict.
+			 */
 		}
 		/* Print the column definitions or aliases, if needed */
 		else if (rtfunc1 && rtfunc1->funccolnames != NIL)

From fb4813359b36dc851ad9b48906180dd55e882c1a Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Tue, 21 Jan 2025 14:57:28 +0100
Subject: [PATCH 28/40] Simplify star reconstruction logic and move it almost
 fully to pgduckdb_ruleutils.cpp

---
 include/pgduckdb/pgduckdb_ruleutils.h |  10 +-
 src/pgduckdb_ruleutils.cpp            | 167 +++++++++++++++++++-------
 src/vendor/pg_ruleutils_17.c          |  40 +-----
 3 files changed, 136 insertions(+), 81 deletions(-)

diff --git a/include/pgduckdb/pgduckdb_ruleutils.h b/include/pgduckdb/pgduckdb_ruleutils.h
index 724eb0e6..b56f34a8 100644
--- a/include/pgduckdb/pgduckdb_ruleutils.h
+++ b/include/pgduckdb/pgduckdb_ruleutils.h
@@ -1,4 +1,12 @@
 #include "postgres.h"
+#include "pgduckdb/vendor/pg_list.hpp"
+
+typedef struct StarReconstructionContext {
+	List *target_list;
+	int varno_star;
+	int varattno_star;
+	bool added_current_star;
+} StarReconstructionContext;
 
 char *pgduckdb_relation_name(Oid relid);
 char *pgduckdb_function_name(Oid function_oid);
@@ -13,7 +21,7 @@ bool pgduckdb_var_is_duckdb_row(Var *var);
 bool pgduckdb_func_returns_duckdb_row(RangeTblFunction *rtfunc);
 bool pgduckdb_target_list_contains_unresolved_type_or_row(List *target_list);
 Var *pgduckdb_duckdb_row_subscript_var(Expr *expr);
-List *pgduckdb_star_start_vars(List *target_list);
+bool pgduckdb_reconstruct_star_step(StarReconstructionContext *ctx, ListCell *tle_cell);
 bool pgduckdb_function_needs_subquery(Oid function_oid);
 int pgduckdb_show_type(Const *constval, int original_showtype);
 
diff --git a/src/pgduckdb_ruleutils.cpp b/src/pgduckdb_ruleutils.cpp
index d7dfaed3..d8dccabd 100644
--- a/src/pgduckdb_ruleutils.cpp
+++ b/src/pgduckdb_ruleutils.cpp
@@ -20,6 +20,7 @@ extern "C" {
 #include "storage/lockdefs.h"
 
 #include "pgduckdb/vendor/pg_ruleutils.h"
+#include "pgduckdb/pgduckdb_ruleutils.h"
 }
 
 #include "pgduckdb/pgduckdb.h"
@@ -116,6 +117,75 @@ pgduckdb_duckdb_row_subscript_var(Expr *expr) {
 	return refexpr;
 }
 
+/*
+ * pgduckdb_check_for_star_start tries to figure out if this is tle_cell
+ * contains a Var that is the start of a run of Vars that should be
+ * reconstructed as a star. If that's the case it sets the varno_star and
+ * varattno_star of the ctx.
+ */
+static void
+pgduckdb_check_for_star_start(StarReconstructionContext *ctx, ListCell *tle_cell) {
+	TargetEntry *first_tle = (TargetEntry *)lfirst(tle_cell);
+
+	if (!IsA(first_tle->expr, Var)) {
+		/* Not a Var so we're not at the start of a run of Vars. */
+		return;
+	}
+
+	Var *first_var = (Var *)first_tle->expr;
+
+	if (first_var->varattno != 1) {
+		/* If we don't have varattno 1, then we are not at a run of Vars */
+		return;
+	}
+
+	/*
+	 * We found a Var that could potentially be the first of a run of Vars for
+	 * which we have to reconstruct the star. To check if this is indeed the
+	 * case we see if we can find a duckdb.row in this list of Vars.
+	 */
+	int varno = first_var->varno;
+	int varattno = first_var->varattno;
+
+	do {
+		TargetEntry *tle = (TargetEntry *)lfirst(tle_cell);
+
+		if (!IsA(tle->expr, Var)) {
+			/*
+			 * We found the end of this run of Vars, by finding something else
+			 * than a Var.
+			 */
+			return;
+		}
+
+		Var *var = (Var *)tle->expr;
+
+		if (var->varno != varno) {
+			/* A Var from a different RTE */
+			return;
+		}
+
+		if (var->varattno != varattno) {
+			/* Not a consecutive Var */
+			return;
+		}
+		if (pgduckdb_var_is_duckdb_row(var)) {
+			/*
+			 * If we have a duckdb.row, then we found a run of Vars that we
+			 * have to reconstruct the star for.
+			 */
+
+			ctx->varno_star = varno;
+			ctx->varattno_star = first_var->varattno;
+			ctx->added_current_star = false;
+			return;
+		}
+
+		/* Look for the next Var in the run */
+		varattno++;
+	} while ((tle_cell = lnext(ctx->target_list, tle_cell)));
+}
+
 /*
  * In our DuckDB queries we sometimes want to use "SELECT *", when selecting
  * from a function like read_parquet. That way DuckDB can figure out the actual
@@ -128,60 +198,69 @@ pgduckdb_duckdb_row_subscript_var(Expr *expr) {
  * the same FROM entry, aka RangeTableEntry (RTE) that we expect were created
  * with a *.
  *
- * This function tries to find the indexes of the first column for each of
- * those runs. It does so using this heuristic:
- *
- * 1. Find a column with varattno == 1 (aka the first column from an RTE)
- * 2. Find a consecutive run of columns from the same RTE with varattnos that
- *    keep increasing by 1.
- * 3. Once we find a duckdb.row column in any of those consecutive columns, we
- *    assume this run was created using a star expression and we track the
- *    initial index. Then we start at 1 again to find the next run.
- *
- * NOTE: This function does not find the end of such runs, that's left as an
- * excersice for the caller. It should be pretty easy for the caller to do
- * that, because they need to remove such columns anyway. The main reason this
- * function existis is so that the caller doesn't have to scan backwards to
- * find the start of a run once it finds a duckdb.row column. Scanning
- * backwards is difficult for the caller because it wants to write out columns
- * to the DuckDB query as it consumes them.
+ * This function returns true if we should skip writing this tle_cell to the
+ * DuckDB query because it is part of a run of Vars that will be reconstructed
+ * as a star.
  */
-List *
-pgduckdb_star_start_vars(List *target_list) {
-	List *star_start_indexes = NIL;
-	Var *possible_star_start_var = NULL;
-	int possible_star_start_var_index = 0;
+bool
+pgduckdb_reconstruct_star_step(StarReconstructionContext *ctx, ListCell *tle_cell) {
+	/* Detect start of a Var run that should be reconstructed to a star */
+	pgduckdb_check_for_star_start(ctx, tle_cell);
 
-	int i = 0;
+	/*
+	 * If we're not currently reconstructing a star we don't need to do
+	 * anything.
+	 */
+	if (!ctx->varno_star) {
+		return false;
+	}
 
-	foreach_node(TargetEntry, tle, target_list) {
-		i++;
+	TargetEntry *tle = (TargetEntry *)lfirst(tle_cell);
 
-		if (!IsA(tle->expr, Var)) {
-			possible_star_start_var = NULL;
-			possible_star_start_var_index = 0;
-			continue;
-		}
+	/*
+	 * Find out if this target entry is the next element in the run of Vars for
+	 * the star we're currently reconstructing.
+	 */
+	if (tle->expr && IsA(tle->expr, Var)) {
+		Var *var = castNode(Var, tle->expr);
 
-		Var *var = (Var *)tle->expr;
+		if (var->varno == ctx->varno_star && var->varattno == ctx->varattno_star) {
+			/*
+			 * We're still in the run of Vars, increment the varattno to look
+			 * for the next Var on the next call.
+			 */
+			ctx->varattno_star++;
 
-		if (var->varattno == 1) {
-			possible_star_start_var = var;
-			possible_star_start_var_index = i;
-		} else if (possible_star_start_var) {
-			if (var->varno != possible_star_start_var->varno || var->varno == i - possible_star_start_var_index + 1) {
-				possible_star_start_var = NULL;
-				possible_star_start_var_index = 0;
+			/* If we already added star we skip writing this target entry */
+			if (ctx->added_current_star) {
+				return true;
 			}
-		}
 
-		if (pgduckdb_var_is_duckdb_row(var)) {
-			star_start_indexes = lappend_int(star_start_indexes, possible_star_start_var_index);
-			possible_star_start_var = NULL;
-			possible_star_start_var_index = 0;
+			/*
+			 * If it's not a duckdb row we skip this target entry too. The way
+			 * we add a single star is by expanding the first duckdb.row torget
+			 * entry, which we've defined to expand to a star. So we need to
+			 * skip any non duckdb.row Vars that precede the first duckdb.row.
+			 */
+			if (!pgduckdb_var_is_duckdb_row(var)) {
+				return true;
+			}
+
+			ctx->added_current_star = true;
+			return false;
 		}
 	}
-	return star_start_indexes;
+
+	/*
+	 * If it was not, that means we've successfully expanded this star and we
+	 * should start looking for the next star start. So reset all the state
+	 * used for this star reconstruction.
+	 */
+	ctx->varno_star = 0;
+	ctx->varattno_star = 0;
+	ctx->added_current_star = false;
+
+	return false;
 }
 
 /*
diff --git a/src/vendor/pg_ruleutils_17.c b/src/vendor/pg_ruleutils_17.c
index 7b519221..45b105a8 100644
--- a/src/vendor/pg_ruleutils_17.c
+++ b/src/vendor/pg_ruleutils_17.c
@@ -6041,55 +6041,23 @@ get_target_list(List *targetList, deparse_context *context,
 
 	sep = " ";
 	colno = 0;
-	List *star_starts = pgduckdb_star_start_vars(targetList);
-	ListCell *current_star_start = list_head(star_starts);
 
-	int varno_star = 0;
-	int varattno_star = 0;
-	bool added_star = false;
+	StarReconstructionContext star_reconstruction_context = {0};
+	star_reconstruction_context.target_list = targetList;
 
 	bool outermost_targetlist = !processed_targetlist;
 	processed_targetlist = true;
 
-	int i = 0;
 	foreach(l, targetList)
 	{
 		TargetEntry *tle = (TargetEntry *) lfirst(l);
 		char	   *colname;
 		char	   *attname;
-		i++;
-
-		if (current_star_start && lfirst_int(current_star_start) == i) {
-			Var *var = castNode(Var, tle->expr);
-			varno_star = var->varno;
-			varattno_star = var->varattno;
-		}
-
-		if (varno_star) {
-			bool reset = true;
-			if (tle->expr && IsA(tle->expr, Var)) {
-				Var *var = castNode(Var, tle->expr);
-
-				if (var->varno == varno_star && var->varattno == varattno_star) {
-					varattno_star++;
-					reset = false;
-					if (added_star || !pgduckdb_var_is_duckdb_row(var)) {
-						continue;
-					}
-					added_star = true;
-				}
-			}
 
-			if (reset) {
-				varno_star = 0;
-				varattno_star = 0;
-				current_star_start = lnext(star_starts, current_star_start);
-				added_star = false;
-			}
+		if (pgduckdb_reconstruct_star_step(&star_reconstruction_context, l)) {
+			continue;
 		}
 
-
-
 		if (tle->resjunk)
 			continue;			/* ignore junk entries */
 

From f4f5d17a1ea2025cbe9508fc8b219650bde8106f Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Tue, 21 Jan 2025 15:10:17 +0100
Subject: [PATCH 29/40] Move some more logic to pgduckdb_ruleutils.cpp

---
 include/pgduckdb/pgduckdb_ruleutils.h |  1 +
 src/pgduckdb_ruleutils.cpp            | 28 +++++++++++++++
 src/vendor/pg_ruleutils_17.c          | 49 +++++++++------------------
 3 files changed, 45 insertions(+), 33 deletions(-)

diff --git a/include/pgduckdb/pgduckdb_ruleutils.h b/include/pgduckdb/pgduckdb_ruleutils.h
index b56f34a8..ded54cbc 100644
--- a/include/pgduckdb/pgduckdb_ruleutils.h
+++ b/include/pgduckdb/pgduckdb_ruleutils.h
@@ -24,5 +24,6 @@ Var *pgduckdb_duckdb_row_subscript_var(Expr *expr);
 bool pgduckdb_reconstruct_star_step(StarReconstructionContext *ctx, ListCell *tle_cell);
 bool pgduckdb_function_needs_subquery(Oid function_oid);
 int pgduckdb_show_type(Const *constval, int original_showtype);
+bool pgduckdb_subscript_has_custom_alias(Plan *plan, List *rtable, Var *subscript_var, char *colname);
 
 extern bool processed_targetlist;
diff --git a/src/pgduckdb_ruleutils.cpp b/src/pgduckdb_ruleutils.cpp
index d8dccabd..c975730a 100644
--- a/src/pgduckdb_ruleutils.cpp
+++ b/src/pgduckdb_ruleutils.cpp
@@ -10,6 +10,7 @@ extern "C" {
 #include "commands/dbcommands.h"
 #include "nodes/nodeFuncs.h"
 #include "lib/stringinfo.h"
+#include "parser/parsetree.h"
 #include "utils/builtins.h"
 #include "utils/guc.h"
 #include "utils/lsyscache.h"
@@ -310,6 +311,33 @@ pgduckdb_show_type(Const *constval, int original_showtype) {
 	return original_showtype;
 }
 
+bool
+pgduckdb_subscript_has_custom_alias(Plan *plan, List *rtable, Var *subscript_var, char *colname) {
+	/* The first bit of this logic is taken from get_variable() */
+	int varno;
+	int varattno;
+
+	/*
+	 * If we have a syntactic referent for the Var, and we're working from a
+	 * parse tree, prefer to use the syntactic referent.  Otherwise, fall back
+	 * on the semantic referent.  (See comments in get_variable().)
+	 */
+	if (subscript_var->varnosyn > 0 && plan == NULL) {
+		varno = subscript_var->varnosyn;
+		varattno = subscript_var->varattnosyn;
+	} else {
+		varno = subscript_var->varno;
+		varattno = subscript_var->varattno;
+	}
+
+	RangeTblEntry *rte = rt_fetch(varno, rtable);
+
+	/* Custom code starts here */
+	char *original_column = strVal(list_nth(rte->eref->colnames, varattno - 1));
+
+	return strcmp(original_column, colname) != 0;
+}
+
 /*
  * Given a postgres schema name, this returns a list of two elements: the first
  * is the DuckDB database name and the second is the duckdb schema name. These
diff --git a/src/vendor/pg_ruleutils_17.c b/src/vendor/pg_ruleutils_17.c
index 45b105a8..b1326b81 100644
--- a/src/vendor/pg_ruleutils_17.c
+++ b/src/vendor/pg_ruleutils_17.c
@@ -6115,46 +6115,29 @@ get_target_list(List *targetList, deparse_context *context,
 		 * This makes sure we don't add Postgres its bad default alias to the
 		 * duckdb.row type.
 		 */
-		bool duckdb_row_needs_as = !pgduckdb_var_is_duckdb_row(var);
+		bool duckdb_skip_as = pgduckdb_var_is_duckdb_row(var);
 
 		/*
-		 * For r['abc'] we don't want the column name to be r, but instead we
-		 * want it to be "abc". We can only to do this for the target list of
-		 * the outside most query though to make sure references to the column
-		 * name are still valid.
+		 * For r['abc'] expressions we don't want the column name to be r, but
+		 * instead we want it to be "abc". We can only to do this for the
+		 * target list of the outside most query though to make sure references
+		 * to the column name are still valid.
 		 */
-		Var *subscript_var = pgduckdb_duckdb_row_subscript_var(tle->expr);
-		if (outermost_targetlist && subscript_var) {
-			deparse_namespace* dpns = (deparse_namespace *) list_nth(context->namespaces,
-														subscript_var->varlevelsup);
-			int			varno;
-			int			varattno;
-
-			/*
-			* If we have a syntactic referent for the Var, and we're working from a
-			* parse tree, prefer to use the syntactic referent.  Otherwise, fall back
-			* on the semantic referent.  (Forcing use of the semantic referent when
-			* printing plan trees is a design choice that's perhaps more motivated by
-			* backwards compatibility than anything else.  But it does have the
-			* advantage of making plans more explicit.)
-			*/
-			if (subscript_var->varnosyn > 0 && dpns->plan == NULL)
-			{
-				varno = subscript_var->varnosyn;
-				varattno = subscript_var->varattnosyn;
-			}
-			else
-			{
-				varno = subscript_var->varno;
-				varattno = subscript_var->varattno;
+		if (!duckdb_skip_as && outermost_targetlist) {
+			Var *subscript_var = pgduckdb_duckdb_row_subscript_var(tle->expr);
+			if (subscript_var) {
+				/*
+				 * This cannot be moved to pgduckdb_ruleutils, because of
+				 * the reliance on the non-public deparse_namespace type.
+				 */
+				deparse_namespace* dpns = (deparse_namespace *) list_nth(context->namespaces,
+															subscript_var->varlevelsup);
+				duckdb_skip_as = !pgduckdb_subscript_has_custom_alias(dpns->plan, dpns->rtable, subscript_var, colname);
 			}
-			RangeTblEntry* rte = rt_fetch(varno, dpns->rtable);
-			char *original_column = strVal(list_nth(rte->eref->colnames, varattno - 1));
-			duckdb_row_needs_as = strcmp(original_column, colname) != 0;
 		}
 
 		/* Show AS unless the column's name is correct as-is */
-		if (colname && duckdb_row_needs_as)
+		if (colname && !duckdb_skip_as)
 		{
 			if (attname == NULL || strcmp(attname, colname) != 0)
 				appendStringInfo(&targetbuf, " AS %s", quote_identifier(colname));

From 2410c1020ba9c78243713ca45614338f138a78fb Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Tue, 21 Jan 2025 15:49:49 +0100
Subject: [PATCH 30/40] Move subscript to columnref conversion to
 pgduckdb_ruleutils.cpp

---
 include/pgduckdb/pgduckdb_ruleutils.h |  1 +
 src/pgduckdb_ruleutils.cpp            | 40 +++++++++++++++++++++++++++
 src/vendor/pg_ruleutils_17.c          | 34 ++---------------------
 3 files changed, 43 insertions(+), 32 deletions(-)

diff --git a/include/pgduckdb/pgduckdb_ruleutils.h b/include/pgduckdb/pgduckdb_ruleutils.h
index ded54cbc..c226bae9 100644
--- a/include/pgduckdb/pgduckdb_ruleutils.h
+++ b/include/pgduckdb/pgduckdb_ruleutils.h
@@ -25,5 +25,6 @@ bool pgduckdb_reconstruct_star_step(StarReconstructionContext *ctx, ListCell *tl
 bool pgduckdb_function_needs_subquery(Oid function_oid);
 int pgduckdb_show_type(Const *constval, int original_showtype);
 bool pgduckdb_subscript_has_custom_alias(Plan *plan, List *rtable, Var *subscript_var, char *colname);
+SubscriptingRef *pgduckdb_strip_first_subscript(SubscriptingRef *sbsref, StringInfo buf);
 
 extern bool processed_targetlist;
diff --git a/src/pgduckdb_ruleutils.cpp b/src/pgduckdb_ruleutils.cpp
index c975730a..ea20cf7c 100644
--- a/src/pgduckdb_ruleutils.cpp
+++ b/src/pgduckdb_ruleutils.cpp
@@ -22,6 +22,7 @@ extern "C" {
 
 #include "pgduckdb/vendor/pg_ruleutils.h"
 #include "pgduckdb/pgduckdb_ruleutils.h"
+#include "pgduckdb/vendor/pg_list.hpp"
 }
 
 #include "pgduckdb/pgduckdb.h"
@@ -338,6 +339,45 @@ pgduckdb_subscript_has_custom_alias(Plan *plan, List *rtable, Var *subscript_var
 	return strcmp(original_column, colname) != 0;
 }
 
+/*
+ * Subscript expressions that index into the duckdb.row type need to be changed
+ * to regular column references in the DuckDB query. The main reason we do this
+ * is so that DuckDB generates nicer column names, i.e. without the square
+ * brackets: "mycolumn" instead of "r['mycolumn']"
+ */
+SubscriptingRef *
+pgduckdb_strip_first_subscript(SubscriptingRef *sbsref, StringInfo buf) {
+	if (!IsA(sbsref->refexpr, Var)) {
+		return sbsref;
+	}
+
+	if (!pgduckdb_var_is_duckdb_row((Var *)sbsref->refexpr)) {
+		return sbsref;
+	}
+
+	Assert(sbsref->refupperindexpr);
+	Assert(!sbsref->reflowerindexpr);
+	Oid typoutput;
+	bool typIsVarlena;
+	Const *constval = castNode(Const, linitial(sbsref->refupperindexpr));
+	getTypeOutputInfo(constval->consttype, &typoutput, &typIsVarlena);
+
+	char *extval = OidOutputFunctionCall(typoutput, constval->constvalue);
+
+	appendStringInfo(buf, ".%s", quote_identifier(extval));
+
+	/*
+	 * If there are any additional subscript expressions we should output them.
+	 * Subscripts can be used in duckdb to index into arrays or json objects.
+	 * It's fine if this results in an empty List, because printSubscripts
+	 * handles that case correctly.
+	 */
+	SubscriptingRef *shorter_sbsref = (SubscriptingRef *)copyObjectImpl(sbsref);
+	/* strip the first subscript from the list */
+	shorter_sbsref->refupperindexpr = list_delete_first(shorter_sbsref->refupperindexpr);
+	return shorter_sbsref;
+}
+
 /*
  * Given a postgres schema name, this returns a list of two elements: the first
  * is the DuckDB database name and the second is the duckdb schema name. These
diff --git a/src/vendor/pg_ruleutils_17.c b/src/vendor/pg_ruleutils_17.c
index b1326b81..b3f9684e 100644
--- a/src/vendor/pg_ruleutils_17.c
+++ b/src/vendor/pg_ruleutils_17.c
@@ -9071,41 +9071,11 @@ get_rule_expr(Node *node, deparse_context *context,
 					appendStringInfoString(buf, " := ");
 					get_rule_expr(refassgnexpr, context, showimplicit);
 				}
-				else if (IsA(sbsref->refexpr, Var) && pgduckdb_var_is_duckdb_row((Var*) sbsref->refexpr)) {
-					/* Subscript expressions into the duckdb.row type we want to
-					 * change to regular column references in the DuckDB query.
-					 * The main reason we do this is so that DuckDB generates
-					 * nicer column names, i.e. without the square brackets:
-					 * "mycolumn" instead of "r['mycolumn']"
-					 */
-					Assert(sbsref->refupperindexpr);
-					Assert(!sbsref->reflowerindexpr);
-					Oid			typoutput;
-					bool		typIsVarlena;
-					Const *constval = castNode(Const, linitial(sbsref->refupperindexpr));
-					getTypeOutputInfo(constval->consttype,
-									&typoutput, &typIsVarlena);
-
-					char *extval = OidOutputFunctionCall(typoutput, constval->constvalue);
-
-					appendStringInfo(context->buf, ".%s", quote_identifier(extval));
-
-					if (list_length(sbsref->refupperindexpr) > 1) {
-						/*
-						 * If there are any additional subscript expressions we
-						 * should output them. Subscripts can be used in duckdb
-						 * to index into arrays or json objects.
-						 */
-						SubscriptingRef *shorter_sbsref = copyObject(sbsref);
-						/* strip the first subscript from the list */
-						shorter_sbsref->refupperindexpr = list_delete_first(shorter_sbsref->refupperindexpr);
-						printSubscripts(shorter_sbsref, context);
-					}
-				}
 				else
 				{
+					SubscriptingRef *new_sbsref = pgduckdb_strip_first_subscript(sbsref, context->buf);
 					/* Just an ordinary container fetch, so print subscripts */
-					printSubscripts(sbsref, context);
+					printSubscripts(new_sbsref, context);
 				}
 			}
 			break;

From 855ce1859eb911a6165d5868e62ac65bb74f0339 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Tue, 21 Jan 2025 16:59:42 +0100
Subject: [PATCH 31/40] Move duckdb.row refname generation to
 pgduckdb_ruleutils.cpp

---
 include/pgduckdb/pgduckdb_ruleutils.h |  1 +
 src/pgduckdb_ruleutils.cpp            | 29 ++++++++++++++++++
 src/vendor/pg_ruleutils_17.c          | 43 ++++++++++-----------------
 3 files changed, 45 insertions(+), 28 deletions(-)

diff --git a/include/pgduckdb/pgduckdb_ruleutils.h b/include/pgduckdb/pgduckdb_ruleutils.h
index c226bae9..4ca2bd38 100644
--- a/include/pgduckdb/pgduckdb_ruleutils.h
+++ b/include/pgduckdb/pgduckdb_ruleutils.h
@@ -26,5 +26,6 @@ bool pgduckdb_function_needs_subquery(Oid function_oid);
 int pgduckdb_show_type(Const *constval, int original_showtype);
 bool pgduckdb_subscript_has_custom_alias(Plan *plan, List *rtable, Var *subscript_var, char *colname);
 SubscriptingRef *pgduckdb_strip_first_subscript(SubscriptingRef *sbsref, StringInfo buf);
+char *pgduckdb_write_row_refname(StringInfo buf, char *refname, bool is_top_level);
 
 extern bool processed_targetlist;
diff --git a/src/pgduckdb_ruleutils.cpp b/src/pgduckdb_ruleutils.cpp
index ea20cf7c..249f2681 100644
--- a/src/pgduckdb_ruleutils.cpp
+++ b/src/pgduckdb_ruleutils.cpp
@@ -378,6 +378,35 @@ pgduckdb_strip_first_subscript(SubscriptingRef *sbsref, StringInfo buf) {
 	return shorter_sbsref;
 }
 
+/*
+ * Writes the refname to the buf in a way that results in the correct output
+ * for the duckdb.row type.
+ *
+ * Returns the "attname" that should be passed back to the caller of
+ * get_variable().
+ */
+char *
+pgduckdb_write_row_refname(StringInfo buf, char *refname, bool is_top_level) {
+	appendStringInfoString(buf, quote_identifier(refname));
+
+	if (is_top_level) {
+		/*
+		 * If the duckdb.row is at the top level target list of a select, then
+		 * we want to generate r.*, to unpack all the columns instead of
+		 * returning a STRUCT from the query.
+		 *
+		 * Since we use .* there is no attname.
+		 */
+		appendStringInfoString(buf, ".*");
+		return NULL;
+	}
+
+	/*
+	 * In any other case, we want to simply use the alias of the TargetEntry.
+	 */
+	return refname;
+}
+
 /*
  * Given a postgres schema name, this returns a list of two elements: the first
  * is the DuckDB database name and the second is the duckdb schema name. These
diff --git a/src/vendor/pg_ruleutils_17.c b/src/vendor/pg_ruleutils_17.c
index b3f9684e..26ec23ca 100644
--- a/src/vendor/pg_ruleutils_17.c
+++ b/src/vendor/pg_ruleutils_17.c
@@ -7542,35 +7542,22 @@ get_variable(Var *var, int levelsup, bool istoplevel, deparse_context *context)
 		if (attnum > colinfo->num_cols)
 			elog(ERROR, "invalid attnum %d for relation \"%s\"",
 				 attnum, rte->eref->aliasname);
-		if (pgduckdb_var_is_duckdb_row(var)) {
-			if (istoplevel) {
-				/* If the duckdb row is at the top level target list of a
-				 * select, then we want to generate r.*, to unpack all the
-				 * columns instead of returning a STRUCT from the query. */
-				attname = NULL;
-			} else {
-				/* In any other case, we want to simply use the alias of the
-				 * TargetEntry, without the column name that postgres gave to
-				 * it. Because even though according to the Postgres parser we
-				 * are referencing a column of type "duckdb.row", that column
-				 * won't actually exist in the DuckDB query.
-				 */
-				attname = refname;
-				refname = NULL;
-			}
-		} else {
-			attname = colinfo->colnames[attnum - 1];
 
-			/*
-			 * If we find a Var referencing a dropped column, it seems better to
-			 * print something (anything) than to fail.  In general this should
-			 * not happen, but it used to be possible for some cases involving
-			 * functions returning named composite types, and perhaps there are
-			 * still bugs out there.
-			 */
-			if (attname == NULL)
-				attname = "?dropped?column?";
+		if (pgduckdb_var_is_duckdb_row(var)) {
+			return pgduckdb_write_row_refname(context->buf, refname, istoplevel);
 		}
+
+		attname = colinfo->colnames[attnum - 1];
+
+		/*
+		 * If we find a Var referencing a dropped column, it seems better to
+		 * print something (anything) than to fail.  In general this should
+		 * not happen, but it used to be possible for some cases involving
+		 * functions returning named composite types, and perhaps there are
+		 * still bugs out there.
+		 */
+		if (attname == NULL)
+			attname = "?dropped?column?";
 	}
 	else
 	{
@@ -7588,7 +7575,7 @@ get_variable(Var *var, int levelsup, bool istoplevel, deparse_context *context)
 	else
 	{
 		appendStringInfoChar(buf, '*');
-		if (istoplevel && !pgduckdb_var_is_duckdb_row(var))
+		if (istoplevel)
 			appendStringInfo(buf, "::%s",
 							 format_type_with_typemod(var->vartype,
 													  var->vartypmod));

From 2c3fae59f5541359f28504ef9156024c3a142cd1 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Tue, 21 Jan 2025 20:03:16 +0100
Subject: [PATCH 32/40] Improve subscript support

---
 sql/pg_duckdb--0.2.0--0.3.0.sql             |  4 +-
 src/pgduckdb_options.cpp                    | 96 +++++++++++++++++++--
 src/pgduckdb_ruleutils.cpp                  |  4 +-
 test/regression/expected/read_functions.out | 30 ++++++-
 test/regression/sql/read_functions.sql      | 16 +++-
 5 files changed, 137 insertions(+), 13 deletions(-)

diff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql
index ed91cccc..722a8cc0 100644
--- a/sql/pg_duckdb--0.2.0--0.3.0.sql
+++ b/sql/pg_duckdb--0.2.0--0.3.0.sql
@@ -30,10 +30,12 @@ CREATE TYPE duckdb.row (
 
 CREATE FUNCTION duckdb.unresolved_type_in(cstring) RETURNS duckdb.unresolved_type AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_in' LANGUAGE C IMMUTABLE STRICT;
 CREATE FUNCTION duckdb.unresolved_type_out(duckdb.unresolved_type) RETURNS cstring AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_out' LANGUAGE C IMMUTABLE STRICT;
+CREATE FUNCTION duckdb.unresolved_type_subscript(internal) RETURNS internal AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_subscript' LANGUAGE C IMMUTABLE STRICT;
 CREATE TYPE duckdb.unresolved_type (
     INTERNALLENGTH = VARIABLE,
     INPUT = duckdb.unresolved_type_in,
-    OUTPUT = duckdb.unresolved_type_out
+    OUTPUT = duckdb.unresolved_type_out,
+    SUBSCRIPT = duckdb.unresolved_type_subscript
 );
 
 -- Dummy functions for binary operators with unresolved type on the lefthand
diff --git a/src/pgduckdb_options.cpp b/src/pgduckdb_options.cpp
index 3a6b2e23..93d92762 100644
--- a/src/pgduckdb_options.cpp
+++ b/src/pgduckdb_options.cpp
@@ -395,10 +395,6 @@ DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct P
 		elog(ERROR, "Assignment to duckdb.row is not supported");
 	}
 
-	if (isSlice) {
-		elog(ERROR, "Creating a slice out of duckdb.row is not supported");
-	}
-
 	if (indirection == NIL) {
 		elog(ERROR, "Subscripting duckdb.row with an empty subscript is not supported");
 	}
@@ -407,17 +403,21 @@ DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct P
 
 	// Transform each subscript expression
 	foreach_node(A_Indices, subscript, indirection) {
-		Assert(!subscript->is_slice);
 		Assert(subscript->uidx);
 
 		Node *subscript_expr = transformExpr(pstate, subscript->uidx, pstate->p_expr_kind);
-		int expr_location = exprLocation(subscript->uidx);
-		Oid subscript_expr_type = exprType(subscript_expr);
 
 		/* The first subscript needs to be a TEXT constant, since it should be
 		 * a column reference. But the subscripts after that can be anything,
 		 * DuckDB should interpret those. */
 		if (first) {
+			int expr_location = exprLocation(subscript->uidx);
+			Oid subscript_expr_type = exprType(subscript_expr);
+
+			if (subscript->lidx) {
+				elog(ERROR, "Creating a slice out of duckdb.row is not supported");
+			}
+
 			Node *coerced_expr = coerce_to_target_type(pstate, subscript_expr, subscript_expr_type, TEXTOID, -1,
 			                                           COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, expr_location);
 			if (!coerced_expr) {
@@ -440,6 +440,13 @@ DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct P
 			first = false;
 		}
 		sbsref->refupperindexpr = lappend(sbsref->refupperindexpr, subscript_expr);
+		if (isSlice) {
+			Node *lower_subscript_expr = NULL;
+			if (subscript->uidx) {
+				lower_subscript_expr = transformExpr(pstate, subscript->lidx, pstate->p_expr_kind);
+			}
+			sbsref->reflowerindexpr = lappend(sbsref->reflowerindexpr, lower_subscript_expr);
+		}
 	}
 
 	// Set the result type of the subscripting operation
@@ -459,7 +466,7 @@ DuckdbRowSubscriptExecSetup(const SubscriptingRef * /*sbsref*/, SubscriptingRefS
 	elog(ERROR, "Subscripting duckdb.row is not supported in the Postgres Executor");
 }
 
-static SubscriptRoutines duckdb_subscript_routines = {
+static SubscriptRoutines duckdb_row_subscript_routines = {
     .transform = DuckdbRowSubscriptTransform,
     .exec_setup = DuckdbRowSubscriptExecSetup,
     .fetch_strict = false,
@@ -468,7 +475,78 @@ static SubscriptRoutines duckdb_subscript_routines = {
 };
 
 DECLARE_PG_FUNCTION(duckdb_row_subscript) {
-	PG_RETURN_POINTER(&duckdb_subscript_routines);
+	PG_RETURN_POINTER(&duckdb_row_subscript_routines);
+}
+
+/*
+ * DuckdbUnresolvedTypeSubscriptTransform is called by the parser when a
+ * subscripting operation is performed on a duckdb.unresolved_type. All this
+ * does is parse ensre that any subscript on duckdb.unresolved_type returns an
+ * unrsolved type again.
+ */
+void
+DuckdbUnresolvedTypeSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate,
+                                       bool isSlice, bool isAssignment) {
+	/*
+	 * We need to populate our cache for some of the code below. Normally this
+	 * cache is populated at the start of our planner hook, but this function
+	 * is being called from the parser.
+	 */
+	if (!pgduckdb::IsExtensionRegistered()) {
+		elog(ERROR, "BUG: Using duckdb.unresolved_type but the pg_duckdb extension is not installed");
+	}
+
+	if (isAssignment) {
+		elog(ERROR, "Assignment to duckdb.unresolved_type is not supported");
+	}
+
+	if (indirection == NIL) {
+		elog(ERROR, "Subscripting duckdb.row with an empty subscript is not supported");
+	}
+
+	// Transform each subscript expression
+	foreach_node(A_Indices, subscript, indirection) {
+		Assert(subscript->uidx);
+
+		Node *subscript_expr = transformExpr(pstate, subscript->uidx, pstate->p_expr_kind);
+
+		sbsref->refupperindexpr = lappend(sbsref->refupperindexpr, subscript_expr);
+		if (isSlice) {
+			Node *lower_subscript_expr = NULL;
+			if (subscript->uidx) {
+				lower_subscript_expr = transformExpr(pstate, subscript->lidx, pstate->p_expr_kind);
+			}
+			sbsref->reflowerindexpr = lappend(sbsref->reflowerindexpr, lower_subscript_expr);
+		}
+	}
+
+	// Set the result type of the subscripting operation
+	sbsref->refrestype = pgduckdb::DuckdbUnresolvedTypeOid();
+	sbsref->reftypmod = -1;
+}
+
+/*
+ * DuckdbUnresolvedTypeSubscriptExecSetup is called by the executor when a
+ * subscripting operation is performed on a duckdb.unresolved_type. This should
+ * never happen, because any query that contains a duckdb.unresolved_type should
+ * automatically be use DuckDB execution.
+ */
+void
+DuckdbUnresolvedTypeSubscriptExecSetup(const SubscriptingRef * /*sbsref*/, SubscriptingRefState * /*sbsrefstate*/,
+                                       SubscriptExecSteps * /*exprstate*/) {
+	elog(ERROR, "Subscripting duckdb.unresolved_type is not supported in the Postgres Executor");
+}
+
+static SubscriptRoutines duckdb_unresolved_type_subscript_routines = {
+    .transform = DuckdbUnresolvedTypeSubscriptTransform,
+    .exec_setup = DuckdbUnresolvedTypeSubscriptExecSetup,
+    .fetch_strict = false,
+    .fetch_leakproof = true,
+    .store_leakproof = true,
+};
+
+DECLARE_PG_FUNCTION(duckdb_unresolved_type_subscript) {
+	PG_RETURN_POINTER(&duckdb_unresolved_type_subscript_routines);
 }
 
 DECLARE_PG_FUNCTION(duckdb_row_in) {
diff --git a/src/pgduckdb_ruleutils.cpp b/src/pgduckdb_ruleutils.cpp
index 249f2681..cb51c568 100644
--- a/src/pgduckdb_ruleutils.cpp
+++ b/src/pgduckdb_ruleutils.cpp
@@ -356,7 +356,6 @@ pgduckdb_strip_first_subscript(SubscriptingRef *sbsref, StringInfo buf) {
 	}
 
 	Assert(sbsref->refupperindexpr);
-	Assert(!sbsref->reflowerindexpr);
 	Oid typoutput;
 	bool typIsVarlena;
 	Const *constval = castNode(Const, linitial(sbsref->refupperindexpr));
@@ -375,6 +374,9 @@ pgduckdb_strip_first_subscript(SubscriptingRef *sbsref, StringInfo buf) {
 	SubscriptingRef *shorter_sbsref = (SubscriptingRef *)copyObjectImpl(sbsref);
 	/* strip the first subscript from the list */
 	shorter_sbsref->refupperindexpr = list_delete_first(shorter_sbsref->refupperindexpr);
+	if (shorter_sbsref->reflowerindexpr) {
+		shorter_sbsref->reflowerindexpr = list_delete_first(shorter_sbsref->reflowerindexpr);
+	}
 	return shorter_sbsref;
 }
 
diff --git a/test/regression/expected/read_functions.out b/test/regression/expected/read_functions.out
index 10d0807b..f75dbb1b 100644
--- a/test/regression/expected/read_functions.out
+++ b/test/regression/expected/read_functions.out
@@ -29,13 +29,39 @@ SELECT r['sepal.length'], r['file_row_number'], r['filename']
           4.5 |              41 | ../../data/iris.parquet
 (5 rows)
 
--- Supports subscripts
+-- Further subscripting is supported on duckdb.row
 SELECT r['jsoncol'][1], r['arraycol'][2] FROM read_parquet('../../data/indexable.parquet') r;
  r.jsoncol[1] | r.arraycol[2] 
 --------------+---------------
  "d"          |            22
 (1 row)
 
+-- And not just on duckdb.row, but also on duckdb.unresolved_type (the parenth
+SELECT jsoncol[1], arraycol[2] FROM (
+    SELECT r['jsoncol'] jsoncol, r['arraycol'] arraycol
+    FROM read_parquet('../../data/indexable.parquet') r
+);
+ jsoncol | arraycol 
+---------+----------
+ "d"     |       22
+(1 row)
+
+-- And the same for slice subscripts
+SELECT r['arraycol'][1:2] FROM read_parquet('../../data/indexable.parquet') r;
+ r.arraycol[1:2] 
+-----------------
+ {11,22}
+(1 row)
+
+SELECT arraycol[1:2] FROM (
+    SELECT r['arraycol'] arraycol
+    FROM read_parquet('../../data/indexable.parquet') r
+);
+ arraycol 
+----------
+ {11,22}
+(1 row)
+
 -- Subqueries correctly expand *, in case of multiple columns.
 SELECT * FROM (
     SELECT 'something' as prefix, *, 'something else' as postfix
@@ -241,6 +267,8 @@ SELECT r[3.14] FROM read_csv('../../data/web_page.csv') r limit 1;
 ERROR:  duckdb.row subscript must have text type
 LINE 1: SELECT r[3.14] FROM read_csv('../../data/web_page.csv') r li...
                  ^
+SELECT r['abc':'abc'] FROM read_csv('../../data/web_page.csv') r limit 1;
+ERROR:  Creating a slice out of duckdb.row is not supported
 SELECT r[q.col]
 FROM
     read_csv('../../data/web_page.csv') r,
diff --git a/test/regression/sql/read_functions.sql b/test/regression/sql/read_functions.sql
index 11c18862..4b1600aa 100644
--- a/test/regression/sql/read_functions.sql
+++ b/test/regression/sql/read_functions.sql
@@ -11,9 +11,22 @@ SELECT r['sepal.length'], r['file_row_number'], r['filename']
     FROM read_parquet('../../data/iris.parquet', file_row_number => true, filename => true) r
     ORDER BY r['sepal.length']  LIMIT 5;
 
--- Supports subscripts
+-- Further subscripting is supported on duckdb.row
 SELECT r['jsoncol'][1], r['arraycol'][2] FROM read_parquet('../../data/indexable.parquet') r;
 
+-- And not just on duckdb.row, but also on duckdb.unresolved_type (the parenth
+SELECT jsoncol[1], arraycol[2] FROM (
+    SELECT r['jsoncol'] jsoncol, r['arraycol'] arraycol
+    FROM read_parquet('../../data/indexable.parquet') r
+);
+
+-- And the same for slice subscripts
+SELECT r['arraycol'][1:2] FROM read_parquet('../../data/indexable.parquet') r;
+SELECT arraycol[1:2] FROM (
+    SELECT r['arraycol'] arraycol
+    FROM read_parquet('../../data/indexable.parquet') r
+);
+
 -- Subqueries correctly expand *, in case of multiple columns.
 SELECT * FROM (
     SELECT 'something' as prefix, *, 'something else' as postfix
@@ -134,6 +147,7 @@ SELECT * FROM (SELECT r['column00'] AS col1 FROM read_csv('../../data/web_page.c
 SELECT r[NULL] FROM read_csv('../../data/web_page.csv') r limit 1;
 SELECT r[123] FROM read_csv('../../data/web_page.csv') r limit 1;
 SELECT r[3.14] FROM read_csv('../../data/web_page.csv') r limit 1;
+SELECT r['abc':'abc'] FROM read_csv('../../data/web_page.csv') r limit 1;
 SELECT r[q.col]
 FROM
     read_csv('../../data/web_page.csv') r,

From 1b9f7d5bce7a347c8a74f75d1d6d9e9de8f39b28 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Tue, 21 Jan 2025 20:16:26 +0100
Subject: [PATCH 33/40] Port to PG16 ruleutils

---
 src/vendor/pg_ruleutils_16.c | 71 ++++++++++++++++++++++++++++++++++--
 1 file changed, 68 insertions(+), 3 deletions(-)

diff --git a/src/vendor/pg_ruleutils_16.c b/src/vendor/pg_ruleutils_16.c
index 7ed7ae78..601863f3 100644
--- a/src/vendor/pg_ruleutils_16.c
+++ b/src/vendor/pg_ruleutils_16.c
@@ -6027,12 +6027,23 @@ get_target_list(List *targetList, deparse_context *context,
 
 	sep = " ";
 	colno = 0;
+
+	StarReconstructionContext star_reconstruction_context = {0};
+	star_reconstruction_context.target_list = targetList;
+
+	bool outermost_targetlist = !processed_targetlist;
+	processed_targetlist = true;
+
 	foreach(l, targetList)
 	{
 		TargetEntry *tle = (TargetEntry *) lfirst(l);
 		char	   *colname;
 		char	   *attname;
 
+		if (pgduckdb_reconstruct_star_step(&star_reconstruction_context, l)) {
+				continue;
+		}
+
 		if (tle->resjunk)
 			continue;			/* ignore junk entries */
 
@@ -6057,8 +6068,10 @@ get_target_list(List *targetList, deparse_context *context,
 		 * directly so that we can tell it to do the right thing, and so that
 		 * we can get the attribute name which is the default AS label.
 		 */
+		Var *var = NULL;
 		if (tle->expr && (IsA(tle->expr, Var)))
 		{
+			var = (Var *) tle->expr;
 			attname = get_variable((Var *) tle->expr, 0, true, context);
 		}
 		else
@@ -6084,8 +6097,33 @@ get_target_list(List *targetList, deparse_context *context,
 		else
 			colname = tle->resname;
 
+		/*
+			* This makes sure we don't add Postgres its bad default alias to the
+			* duckdb.row type.
+			*/
+		bool duckdb_skip_as = pgduckdb_var_is_duckdb_row(var);
+
+		/*
+			* For r['abc'] expressions we don't want the column name to be r, but
+			* instead we want it to be "abc". We can only to do this for the
+			* target list of the outside most query though to make sure references
+			* to the column name are still valid.
+			*/
+		if (!duckdb_skip_as && outermost_targetlist) {
+				Var *subscript_var = pgduckdb_duckdb_row_subscript_var(tle->expr);
+				if (subscript_var) {
+						/*
+							* This cannot be moved to pgduckdb_ruleutils, because of
+							* the reliance on the non-public deparse_namespace type.
+							*/
+						deparse_namespace* dpns = (deparse_namespace *) list_nth(context->namespaces,
+																												subscript_var->varlevelsup);
+						duckdb_skip_as = !pgduckdb_subscript_has_custom_alias(dpns->plan, dpns->rtable, subscript_var, colname);
+				}
+		}
+
 		/* Show AS unless the column's name is correct as-is */
-		if (colname)			/* resname could be NULL */
+		if (colname && !duckdb_skip_as)			/* resname could be NULL */
 		{
 			if (attname == NULL || strcmp(attname, colname) != 0)
 				appendStringInfo(&targetbuf, " AS %s", quote_identifier(colname));
@@ -7448,6 +7486,11 @@ get_variable(Var *var, int levelsup, bool istoplevel, deparse_context *context)
 		if (attnum > colinfo->num_cols)
 			elog(ERROR, "invalid attnum %d for relation \"%s\"",
 				 attnum, rte->eref->aliasname);
+
+		if (pgduckdb_var_is_duckdb_row(var)) {
+				return pgduckdb_write_row_refname(context->buf, refname, istoplevel);
+		}
+
 		attname = colinfo->colnames[attnum - 1];
 
 		/*
@@ -8743,8 +8786,9 @@ get_rule_expr(Node *node, deparse_context *context,
 				}
 				else
 				{
+					SubscriptingRef *new_sbsref = pgduckdb_strip_first_subscript(sbsref, context->buf);
 					/* Just an ordinary container fetch, so print subscripts */
-					printSubscripts(sbsref, context);
+					printSubscripts(new_sbsref, context);
 				}
 			}
 			break;
@@ -10075,6 +10119,11 @@ get_func_expr(FuncExpr *expr, deparse_context *context,
 		nargs++;
 	}
 
+	bool function_needs_subquery = pgduckdb_function_needs_subquery(funcoid);
+	if (function_needs_subquery) {
+		appendStringInfoString(buf, "(FROM ");
+	}
+
 	appendStringInfo(buf, "%s(",
 					 generate_function_name(funcoid, nargs,
 											argnames, argtypes,
@@ -10091,6 +10140,10 @@ get_func_expr(FuncExpr *expr, deparse_context *context,
 		get_rule_expr((Node *) lfirst(l), context, true);
 	}
 	appendStringInfoChar(buf, ')');
+
+	if (function_needs_subquery) {
+		appendStringInfoChar(buf, ')');
+	}
 }
 
 /*
@@ -10669,6 +10722,8 @@ get_const_expr(Const *constval, deparse_context *context, int showtype)
 	char	   *extval;
 	bool		needlabel = false;
 
+	showtype = pgduckdb_show_type(constval, showtype);
+
 	if (constval->constisnull)
 	{
 		/*
@@ -11463,8 +11518,18 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
 		/* Print the relation alias, if needed */
 		get_rte_alias(rte, varno, false, context);
 
+		if (pgduckdb_func_returns_duckdb_row(rtfunc1)) {
+			/*
+			 * We never want to print column aliases for functions that return
+			 * a duckdb.row. The common pattern is for people to not provide an
+			 * explicit column alias (i.e. "r" becomes "r(r)"). This obviously
+			 * is a naming collision and DuckDB resolves that in the opposite
+			 * way that we want. Never adding column aliases for duckdb.row
+			 * avoids this conflict.
+			 */
+		}
 		/* Print the column definitions or aliases, if needed */
-		if (rtfunc1 && rtfunc1->funccolnames != NIL)
+		else if (rtfunc1 && rtfunc1->funccolnames != NIL)
 		{
 			/* Reconstruct the columndef list, which is also the aliases */
 			get_from_clause_coldeflist(rtfunc1, colinfo, context);

From bdcc965cd97674bff66c30e7cfecc5645205a021 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Tue, 21 Jan 2025 20:23:05 +0100
Subject: [PATCH 34/40] Port to PG14 ruleutils

---
 src/vendor/pg_ruleutils_14.c | 71 ++++++++++++++++++++++++++++++++++--
 1 file changed, 68 insertions(+), 3 deletions(-)

diff --git a/src/vendor/pg_ruleutils_14.c b/src/vendor/pg_ruleutils_14.c
index 572954ea..f9d59c47 100644
--- a/src/vendor/pg_ruleutils_14.c
+++ b/src/vendor/pg_ruleutils_14.c
@@ -5987,12 +5987,23 @@ get_target_list(List *targetList, deparse_context *context,
 
 	sep = " ";
 	colno = 0;
+
+	StarReconstructionContext star_reconstruction_context = {0};
+	star_reconstruction_context.target_list = targetList;
+
+	bool outermost_targetlist = !processed_targetlist;
+	processed_targetlist = true;
+
 	foreach(l, targetList)
 	{
 		TargetEntry *tle = (TargetEntry *) lfirst(l);
 		char	   *colname;
 		char	   *attname;
 
+		if (pgduckdb_reconstruct_star_step(&star_reconstruction_context, l)) {
+			continue;
+		}
+
 		if (tle->resjunk)
 			continue;			/* ignore junk entries */
 
@@ -6017,8 +6028,10 @@ get_target_list(List *targetList, deparse_context *context,
 		 * directly so that we can tell it to do the right thing, and so that
 		 * we can get the attribute name which is the default AS label.
 		 */
+		Var *var = NULL;
 		if (tle->expr && (IsA(tle->expr, Var)))
 		{
+			var = (Var *) tle->expr;
 			attname = get_variable((Var *) tle->expr, 0, true, context);
 		}
 		else
@@ -6044,8 +6057,33 @@ get_target_list(List *targetList, deparse_context *context,
 		else
 			colname = tle->resname;
 
+		/*
+		 * This makes sure we don't add Postgres its bad default alias to the
+		 * duckdb.row type.
+		 */
+		bool duckdb_skip_as = pgduckdb_var_is_duckdb_row(var);
+
+		/*
+		 * For r['abc'] expressions we don't want the column name to be r, but
+		 * instead we want it to be "abc". We can only to do this for the
+		 * target list of the outside most query though to make sure references
+		 * to the column name are still valid.
+		 */
+		if (!duckdb_skip_as && outermost_targetlist) {
+			Var *subscript_var = pgduckdb_duckdb_row_subscript_var(tle->expr);
+			if (subscript_var) {
+				/*
+				 * This cannot be moved to pgduckdb_ruleutils, because of
+				 * the reliance on the non-public deparse_namespace type.
+				 */
+				deparse_namespace* dpns = (deparse_namespace *) list_nth(context->namespaces,
+															subscript_var->varlevelsup);
+				duckdb_skip_as = !pgduckdb_subscript_has_custom_alias(dpns->plan, dpns->rtable, subscript_var, colname);
+			}
+		}
+
 		/* Show AS unless the column's name is correct as-is */
-		if (colname)			/* resname could be NULL */
+		if (colname && !duckdb_skip_as)			/* resname could be NULL */
 		{
 			if (attname == NULL || strcmp(attname, colname) != 0)
 				appendStringInfo(&targetbuf, " AS %s", quote_identifier(colname));
@@ -7285,6 +7323,11 @@ get_variable(Var *var, int levelsup, bool istoplevel, deparse_context *context)
 		if (attnum > colinfo->num_cols)
 			elog(ERROR, "invalid attnum %d for relation \"%s\"",
 				 attnum, rte->eref->aliasname);
+
+		if (pgduckdb_var_is_duckdb_row(var)) {
+			return pgduckdb_write_row_refname(context->buf, refname, istoplevel);
+		}
+
 		attname = colinfo->colnames[attnum - 1];
 
 		/*
@@ -8598,8 +8641,9 @@ get_rule_expr(Node *node, deparse_context *context,
 				}
 				else
 				{
+					SubscriptingRef *new_sbsref = pgduckdb_strip_first_subscript(sbsref, context->buf);
 					/* Just an ordinary container fetch, so print subscripts */
-					printSubscripts(sbsref, context);
+					printSubscripts(new_sbsref, context);
 				}
 			}
 			break;
@@ -9882,6 +9926,11 @@ get_func_expr(FuncExpr *expr, deparse_context *context,
 		nargs++;
 	}
 
+	bool function_needs_subquery = pgduckdb_function_needs_subquery(funcoid);
+	if (function_needs_subquery) {
+		appendStringInfoString(buf, "(FROM ");
+	}
+
 	appendStringInfo(buf, "%s(",
 					 generate_function_name(funcoid, nargs,
 											argnames, argtypes,
@@ -9898,6 +9947,10 @@ get_func_expr(FuncExpr *expr, deparse_context *context,
 		get_rule_expr((Node *) lfirst(l), context, true);
 	}
 	appendStringInfoChar(buf, ')');
+
+	if (function_needs_subquery) {
+		appendStringInfoChar(buf, ')');
+	}
 }
 
 /*
@@ -10414,6 +10467,8 @@ get_const_expr(Const *constval, deparse_context *context, int showtype)
 	char	   *extval;
 	bool		needlabel = false;
 
+	showtype = pgduckdb_show_type(constval, showtype);
+
 	if (constval->constisnull)
 	{
 		/*
@@ -11056,8 +11111,18 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
 		/* Print the relation alias, if needed */
 		get_rte_alias(rte, varno, false, context);
 
+		if (pgduckdb_func_returns_duckdb_row(rtfunc1)) {
+			/*
+			 * We never want to print column aliases for functions that return
+			 * a duckdb.row. The common pattern is for people to not provide an
+			 * explicit column alias (i.e. "r" becomes "r(r)"). This obviously
+			 * is a naming collision and DuckDB resolves that in the opposite
+			 * way that we want. Never adding column aliases for duckdb.row
+			 * avoids this conflict.
+			 */
+		}
 		/* Print the column definitions or aliases, if needed */
-		if (rtfunc1 && rtfunc1->funccolnames != NIL)
+		else if (rtfunc1 && rtfunc1->funccolnames != NIL)
 		{
 			/* Reconstruct the columndef list, which is also the aliases */
 			get_from_clause_coldeflist(rtfunc1, colinfo, context);

From ac2bdf9ab8e9ea4d9bdfa524b88acec5207effb5 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Tue, 21 Jan 2025 20:23:44 +0100
Subject: [PATCH 35/40] Add back removed comment

---
 src/vendor/pg_ruleutils_17.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/vendor/pg_ruleutils_17.c b/src/vendor/pg_ruleutils_17.c
index 26ec23ca..31a17e99 100644
--- a/src/vendor/pg_ruleutils_17.c
+++ b/src/vendor/pg_ruleutils_17.c
@@ -6137,7 +6137,7 @@ get_target_list(List *targetList, deparse_context *context,
 		}
 
 		/* Show AS unless the column's name is correct as-is */
-		if (colname && !duckdb_skip_as)
+		if (colname && !duckdb_skip_as)			/* resname could be NULL */
 		{
 			if (attname == NULL || strcmp(attname, colname) != 0)
 				appendStringInfo(&targetbuf, " AS %s", quote_identifier(colname));

From f24fc1996c5a693f99f97cdbec53e8896c28f2ab Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Tue, 21 Jan 2025 20:29:59 +0100
Subject: [PATCH 36/40] Port to PG15 ruleutils

---
 src/vendor/pg_ruleutils_15.c | 71 ++++++++++++++++++++++++++++++++++--
 1 file changed, 68 insertions(+), 3 deletions(-)

diff --git a/src/vendor/pg_ruleutils_15.c b/src/vendor/pg_ruleutils_15.c
index 42cabfc1..ff58ef00 100644
--- a/src/vendor/pg_ruleutils_15.c
+++ b/src/vendor/pg_ruleutils_15.c
@@ -6068,12 +6068,23 @@ get_target_list(List *targetList, deparse_context *context,
 
 	sep = " ";
 	colno = 0;
+
+	StarReconstructionContext star_reconstruction_context = {0};
+	star_reconstruction_context.target_list = targetList;
+
+	bool outermost_targetlist = !processed_targetlist;
+	processed_targetlist = true;
+
 	foreach(l, targetList)
 	{
 		TargetEntry *tle = (TargetEntry *) lfirst(l);
 		char	   *colname;
 		char	   *attname;
 
+		if (pgduckdb_reconstruct_star_step(&star_reconstruction_context, l)) {
+			continue;
+		}
+
 		if (tle->resjunk)
 			continue;			/* ignore junk entries */
 
@@ -6098,8 +6109,10 @@ get_target_list(List *targetList, deparse_context *context,
 		 * directly so that we can tell it to do the right thing, and so that
 		 * we can get the attribute name which is the default AS label.
 		 */
+		Var *var = NULL;
 		if (tle->expr && (IsA(tle->expr, Var)))
 		{
+			var = (Var *) tle->expr;
 			attname = get_variable((Var *) tle->expr, 0, true, context);
 		}
 		else
@@ -6125,8 +6138,33 @@ get_target_list(List *targetList, deparse_context *context,
 		else
 			colname = tle->resname;
 
+		/*
+		 * This makes sure we don't add Postgres its bad default alias to the
+		 * duckdb.row type.
+		 */
+		bool duckdb_skip_as = pgduckdb_var_is_duckdb_row(var);
+
+		/*
+		 * For r['abc'] expressions we don't want the column name to be r, but
+		 * instead we want it to be "abc". We can only to do this for the
+		 * target list of the outside most query though to make sure references
+		 * to the column name are still valid.
+		 */
+		if (!duckdb_skip_as && outermost_targetlist) {
+			Var *subscript_var = pgduckdb_duckdb_row_subscript_var(tle->expr);
+			if (subscript_var) {
+				/*
+				 * This cannot be moved to pgduckdb_ruleutils, because of
+				 * the reliance on the non-public deparse_namespace type.
+				 */
+				deparse_namespace* dpns = (deparse_namespace *) list_nth(context->namespaces,
+															subscript_var->varlevelsup);
+				duckdb_skip_as = !pgduckdb_subscript_has_custom_alias(dpns->plan, dpns->rtable, subscript_var, colname);
+			}
+		}
+
 		/* Show AS unless the column's name is correct as-is */
-		if (colname)			/* resname could be NULL */
+		if (colname && !duckdb_skip_as)			/* resname could be NULL */
 		{
 			if (attname == NULL || strcmp(attname, colname) != 0)
 				appendStringInfo(&targetbuf, " AS %s", quote_identifier(colname));
@@ -7488,6 +7526,11 @@ get_variable(Var *var, int levelsup, bool istoplevel, deparse_context *context)
 		if (attnum > colinfo->num_cols)
 			elog(ERROR, "invalid attnum %d for relation \"%s\"",
 				 attnum, rte->eref->aliasname);
+
+		if (pgduckdb_var_is_duckdb_row(var)) {
+			return pgduckdb_write_row_refname(context->buf, refname, istoplevel);
+		}
+
 		attname = colinfo->colnames[attnum - 1];
 
 		/*
@@ -8801,8 +8844,9 @@ get_rule_expr(Node *node, deparse_context *context,
 				}
 				else
 				{
+					SubscriptingRef *new_sbsref = pgduckdb_strip_first_subscript(sbsref, context->buf);
 					/* Just an ordinary container fetch, so print subscripts */
-					printSubscripts(sbsref, context);
+					printSubscripts(new_sbsref, context);
 				}
 			}
 			break;
@@ -10084,6 +10128,11 @@ get_func_expr(FuncExpr *expr, deparse_context *context,
 		nargs++;
 	}
 
+	bool function_needs_subquery = pgduckdb_function_needs_subquery(funcoid);
+	if (function_needs_subquery) {
+		appendStringInfoString(buf, "(FROM ");
+	}
+
 	appendStringInfo(buf, "%s(",
 					 generate_function_name(funcoid, nargs,
 											argnames, argtypes,
@@ -10100,6 +10149,10 @@ get_func_expr(FuncExpr *expr, deparse_context *context,
 		get_rule_expr((Node *) lfirst(l), context, true);
 	}
 	appendStringInfoChar(buf, ')');
+
+	if (function_needs_subquery) {
+		appendStringInfoChar(buf, ')');
+	}
 }
 
 /*
@@ -10616,6 +10669,8 @@ get_const_expr(Const *constval, deparse_context *context, int showtype)
 	char	   *extval;
 	bool		needlabel = false;
 
+	showtype = pgduckdb_show_type(constval, showtype);
+
 	if (constval->constisnull)
 	{
 		/*
@@ -11258,8 +11313,18 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
 		/* Print the relation alias, if needed */
 		get_rte_alias(rte, varno, false, context);
 
+		if (pgduckdb_func_returns_duckdb_row(rtfunc1)) {
+			/*
+			 * We never want to print column aliases for functions that return
+			 * a duckdb.row. The common pattern is for people to not provide an
+			 * explicit column alias (i.e. "r" becomes "r(r)"). This obviously
+			 * is a naming collision and DuckDB resolves that in the opposite
+			 * way that we want. Never adding column aliases for duckdb.row
+			 * avoids this conflict.
+			 */
+		}
 		/* Print the column definitions or aliases, if needed */
-		if (rtfunc1 && rtfunc1->funccolnames != NIL)
+		else if (rtfunc1 && rtfunc1->funccolnames != NIL)
 		{
 			/* Reconstruct the columndef list, which is also the aliases */
 			get_from_clause_coldeflist(rtfunc1, colinfo, context);

From 89e2849cd4317a075bf2fb24ece7be208218e5fd Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Tue, 21 Jan 2025 20:32:24 +0100
Subject: [PATCH 37/40] Add missing include for PG below 17

---
 src/pgduckdb_options.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/pgduckdb_options.cpp b/src/pgduckdb_options.cpp
index 93d92762..dba6e306 100644
--- a/src/pgduckdb_options.cpp
+++ b/src/pgduckdb_options.cpp
@@ -29,6 +29,8 @@ extern "C" {
 #include "utils/snapmgr.h"
 
 #include "nodes/print.h"
+
+#include "pgduckdb/vendor/pg_list.hpp"
 }
 
 #include "pgduckdb/pgduckdb_options.hpp"

From c41ea06ee37daabbc6d2638efbae74a6ce06b502 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Tue, 21 Jan 2025 20:41:19 +0100
Subject: [PATCH 38/40] Add some PG14 version specific code

---
 src/pgduckdb_ddl.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/pgduckdb_ddl.cpp b/src/pgduckdb_ddl.cpp
index 6b2e0c8e..eb755809 100644
--- a/src/pgduckdb_ddl.cpp
+++ b/src/pgduckdb_ddl.cpp
@@ -91,7 +91,11 @@ WrapQueryInDuckdbQueryCall(Query *query, List *target_list) {
 		ereport(ERROR,
 		        (errcode(ERRCODE_SYNTAX_ERROR), errmsg("BUG: pg_duckdb generated a command with multiple queries")));
 
+#if PG_VERSION_NUM >= 150000
 	return parse_analyze_fixedparams((RawStmt *)linitial(parsetree_list), buf->data, NULL, 0, NULL);
+#else
+	return parse_analyze((RawStmt *)linitial(parsetree_list), buf->data, NULL, 0, NULL);
+#endif
 }
 
 static void

From 1b2d1cf187b8879b76a462a429b7d4d3004ff8bc Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Tue, 21 Jan 2025 20:46:33 +0100
Subject: [PATCH 39/40] Add subquery aliases to make PG15 & PG14 happy

---
 test/regression/expected/read_functions.out | 22 ++++++++++-----------
 test/regression/sql/read_functions.sql      | 22 ++++++++++-----------
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/test/regression/expected/read_functions.out b/test/regression/expected/read_functions.out
index f75dbb1b..61dfa755 100644
--- a/test/regression/expected/read_functions.out
+++ b/test/regression/expected/read_functions.out
@@ -40,7 +40,7 @@ SELECT r['jsoncol'][1], r['arraycol'][2] FROM read_parquet('../../data/indexable
 SELECT jsoncol[1], arraycol[2] FROM (
     SELECT r['jsoncol'] jsoncol, r['arraycol'] arraycol
     FROM read_parquet('../../data/indexable.parquet') r
-);
+) q;
  jsoncol | arraycol 
 ---------+----------
  "d"     |       22
@@ -56,7 +56,7 @@ SELECT r['arraycol'][1:2] FROM read_parquet('../../data/indexable.parquet') r;
 SELECT arraycol[1:2] FROM (
     SELECT r['arraycol'] arraycol
     FROM read_parquet('../../data/indexable.parquet') r
-);
+) q;
  arraycol 
 ----------
  {11,22}
@@ -67,7 +67,7 @@ SELECT * FROM (
     SELECT 'something' as prefix, *, 'something else' as postfix
     FROM read_parquet('../../data/iris.parquet') r
     LIMIT 1
-);
+) q;
   prefix   | sepal.length | sepal.width | petal.length | petal.width | variety |    postfix     
 -----------+--------------+-------------+--------------+-------------+---------+----------------
  something |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  | something else
@@ -83,7 +83,7 @@ SELECT r FROM (
     SELECT 'something' as prefix, *, 'something else' as postfix
     FROM read_parquet('../../data/iris.parquet') r
     LIMIT 1
-);
+) q;
   prefix   | sepal.length | sepal.width | petal.length | petal.width | variety |    postfix     
 -----------+--------------+-------------+--------------+-------------+---------+----------------
  something |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  | something else
@@ -95,7 +95,7 @@ SELECT prefix, r FROM (
     SELECT 'something' as prefix, *, 'something else' as postfix
     FROM read_parquet('../../data/iris.parquet') r
     LIMIT 1
-);
+) q;
   prefix   | sepal.length | sepal.width | petal.length | petal.width | variety |    postfix     
 -----------+--------------+-------------+--------------+-------------+---------+----------------
  something |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  | something else
@@ -105,7 +105,7 @@ SELECT prefix, r, postfix FROM (
     SELECT 'something' as prefix, *, 'something else' as postfix
     FROM read_parquet('../../data/iris.parquet') r
     LIMIT 1
-);
+) q;
   prefix   | sepal.length | sepal.width | petal.length | petal.width | variety |    postfix     
 -----------+--------------+-------------+--------------+-------------+---------+----------------
  something |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  | something else
@@ -119,7 +119,7 @@ SELECT r, postfix FROM (
     SELECT 'something' as prefix, *, 'something else' as postfix
     FROM read_parquet('../../data/iris.parquet') r
     LIMIT 1
-);
+) q;
   prefix   | sepal.length | sepal.width | petal.length | petal.width | variety |    postfix     |    postfix     
 -----------+--------------+-------------+--------------+-------------+---------+----------------+----------------
  something |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  | something else | something else
@@ -130,7 +130,7 @@ SELECT postfix, r, prefix FROM (
     SELECT 'something' as prefix, *, 'something else' as postfix
     FROM read_parquet('../../data/iris.parquet') r
     LIMIT 1
-);
+) q;
     postfix     |  prefix   | sepal.length | sepal.width | petal.length | petal.width | variety |    postfix     |  prefix   
 ----------------+-----------+--------------+-------------+--------------+-------------+---------+----------------+-----------
  something else | something |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  | something else | something
@@ -169,7 +169,7 @@ SELECT * FROM (
     FROM read_parquet('../../data/iris.parquet') r,
          read_parquet('../../data/unsigned_types.parquet') r2
     LIMIT 1
-);
+) q;
   prefix   | sepal.length | sepal.width | petal.length | petal.width | variety | utinyint | usmallint |  uinteger  |    postfix     
 -----------+--------------+-------------+--------------+-------------+---------+----------+-----------+------------+----------------
  something |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  |      255 |     65535 | 4294967295 | something else
@@ -241,14 +241,14 @@ SELECT r['column00'] AS r FROM read_csv('../../data/web_page.csv') r limit 1;
 -- be executed. To avoid that we simply don't rename a subscript expression
 -- inside a subquery, and only do so in the outermost SELECT list (aka
 -- targetlist).
-SELECT * FROM (SELECT r['column00'] FROM read_csv('../../data/web_page.csv') r limit 1);
+SELECT * FROM (SELECT r['column00'] FROM read_csv('../../data/web_page.csv') r limit 1) q;
  r 
 ---
  1
 (1 row)
 
 -- If you give it a different alias then that alias is propegated though.
-SELECT * FROM (SELECT r['column00'] AS col1 FROM read_csv('../../data/web_page.csv') r limit 1);
+SELECT * FROM (SELECT r['column00'] AS col1 FROM read_csv('../../data/web_page.csv') r limit 1) q;
  col1 
 ------
     1
diff --git a/test/regression/sql/read_functions.sql b/test/regression/sql/read_functions.sql
index 4b1600aa..82d32db5 100644
--- a/test/regression/sql/read_functions.sql
+++ b/test/regression/sql/read_functions.sql
@@ -18,21 +18,21 @@ SELECT r['jsoncol'][1], r['arraycol'][2] FROM read_parquet('../../data/indexable
 SELECT jsoncol[1], arraycol[2] FROM (
     SELECT r['jsoncol'] jsoncol, r['arraycol'] arraycol
     FROM read_parquet('../../data/indexable.parquet') r
-);
+) q;
 
 -- And the same for slice subscripts
 SELECT r['arraycol'][1:2] FROM read_parquet('../../data/indexable.parquet') r;
 SELECT arraycol[1:2] FROM (
     SELECT r['arraycol'] arraycol
     FROM read_parquet('../../data/indexable.parquet') r
-);
+) q;
 
 -- Subqueries correctly expand *, in case of multiple columns.
 SELECT * FROM (
     SELECT 'something' as prefix, *, 'something else' as postfix
     FROM read_parquet('../../data/iris.parquet') r
     LIMIT 1
-);
+) q;
 
 -- NOTE: A single "r" is equivalent to a *. The prefix and postfix columns are
 -- not explicitely selected, but still show up in the result. This is
@@ -44,7 +44,7 @@ SELECT r FROM (
     SELECT 'something' as prefix, *, 'something else' as postfix
     FROM read_parquet('../../data/iris.parquet') r
     LIMIT 1
-);
+) q;
 
 -- ... but if you manually add the expected columns then they are merged into
 -- the new star expansion.
@@ -52,12 +52,12 @@ SELECT prefix, r FROM (
     SELECT 'something' as prefix, *, 'something else' as postfix
     FROM read_parquet('../../data/iris.parquet') r
     LIMIT 1
-);
+) q;
 SELECT prefix, r, postfix FROM (
     SELECT 'something' as prefix, *, 'something else' as postfix
     FROM read_parquet('../../data/iris.parquet') r
     LIMIT 1
-);
+) q;
 
 -- This requires the prefix columns to be there though. If the prefix columns
 -- are not there the postfix columns don't get merged into the new star
@@ -67,14 +67,14 @@ SELECT r, postfix FROM (
     SELECT 'something' as prefix, *, 'something else' as postfix
     FROM read_parquet('../../data/iris.parquet') r
     LIMIT 1
-);
+) q;
 
 -- If you swap them around, they will get duplicated though. For the
 SELECT postfix, r, prefix FROM (
     SELECT 'something' as prefix, *, 'something else' as postfix
     FROM read_parquet('../../data/iris.parquet') r
     LIMIT 1
-);
+) q;
 
 -- Joining two subqueries a single * works as expected.
 SELECT *
@@ -101,7 +101,7 @@ SELECT * FROM (
     FROM read_parquet('../../data/iris.parquet') r,
          read_parquet('../../data/unsigned_types.parquet') r2
     LIMIT 1
-);
+) q;
 
 -- read_csv
 
@@ -138,10 +138,10 @@ SELECT r['column00'] AS r FROM read_csv('../../data/web_page.csv') r limit 1;
 -- be executed. To avoid that we simply don't rename a subscript expression
 -- inside a subquery, and only do so in the outermost SELECT list (aka
 -- targetlist).
-SELECT * FROM (SELECT r['column00'] FROM read_csv('../../data/web_page.csv') r limit 1);
+SELECT * FROM (SELECT r['column00'] FROM read_csv('../../data/web_page.csv') r limit 1) q;
 
 -- If you give it a different alias then that alias is propegated though.
-SELECT * FROM (SELECT r['column00'] AS col1 FROM read_csv('../../data/web_page.csv') r limit 1);
+SELECT * FROM (SELECT r['column00'] AS col1 FROM read_csv('../../data/web_page.csv') r limit 1) q;
 
 -- Only simple string literals are supported as column names
 SELECT r[NULL] FROM read_csv('../../data/web_page.csv') r limit 1;

From c890f35f77a94bcdc4ef22206b1497730b21ffd9 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <github-tech@jeltef.nl>
Date: Thu, 23 Jan 2025 18:38:25 +0100
Subject: [PATCH 40/40] Address review

---
 src/pgduckdb_ddl.cpp                        |   7 +-
 src/pgduckdb_hooks.cpp                      |  17 ++-
 src/pgduckdb_options.cpp                    | 129 ++++++++++++--------
 test/regression/expected/read_functions.out |  23 ++++
 test/regression/sql/read_functions.sql      |   9 ++
 5 files changed, 131 insertions(+), 54 deletions(-)

diff --git a/src/pgduckdb_ddl.cpp b/src/pgduckdb_ddl.cpp
index eb755809..8a85a6ed 100644
--- a/src/pgduckdb_ddl.cpp
+++ b/src/pgduckdb_ddl.cpp
@@ -151,8 +151,6 @@ DuckdbHandleDDL(PlannedStmt *pstmt, const char *query_string, ParamListInfo para
 		}
 
 		/* NOTE: The below code is mostly copied from ExecCreateTableAs */
-		List *rewritten;
-
 		Query *query = (Query *)copyObjectImpl(original_query);
 		/*
 		 * Parse analysis was done already, but we still have to run the rule
@@ -160,11 +158,12 @@ DuckdbHandleDDL(PlannedStmt *pstmt, const char *query_string, ParamListInfo para
 		 * either came straight from the parser, or suitable locks were
 		 * acquired by plancache.c.
 		 */
-		rewritten = QueryRewrite(query);
+		List *rewritten = QueryRewrite(query);
 
 		/* SELECT should never rewrite to more or less than one SELECT query */
 		if (list_length(rewritten) != 1)
-			elog(ERROR, "unexpected rewrite result for CREATE TABLE AS SELECT");
+			elog(ERROR, "unexpected rewrite result for CREATE TABLE AS SELECT, contains %d queries",
+			     list_length(rewritten));
 		query = linitial_node(Query, rewritten);
 		Assert(query->commandType == CMD_SELECT);
 
diff --git a/src/pgduckdb_hooks.cpp b/src/pgduckdb_hooks.cpp
index b148bbba..e9e0d1cb 100644
--- a/src/pgduckdb_hooks.cpp
+++ b/src/pgduckdb_hooks.cpp
@@ -362,13 +362,28 @@ DuckdbEmitLogHook(ErrorData *edata) {
 	if (edata->elevel == ERROR && edata->sqlerrcode == ERRCODE_UNDEFINED_COLUMN && pgduckdb::IsExtensionRegistered()) {
 		/*
 		 * XXX: It would be nice if we could check if the query contained any
-		 * of the functions, but we don't have access to the query string here.
+		 * of the functions. We could probably check the debug_query_string
+		 * global for this. For now we don't consider that too important though.
 		 * So instead we simply always add this HINT for this specific error if
 		 * the pg_duckdb extension is installed.
 		 */
 		edata->hint = pstrdup(
 		    "If you use DuckDB functions like read_parquet, you need to use the r['colname'] syntax to use columns. If "
 		    "you're already doing that, maybe you forgot to to give the function the r alias.");
+	} else if (edata->elevel == ERROR && edata->sqlerrcode == ERRCODE_SYNTAX_ERROR &&
+	           pgduckdb::IsExtensionRegistered() &&
+	           strcmp(edata->message_id,
+	                  "a column definition list is only allowed for functions returning \"record\"") == 0) {
+		/*
+		 * NOTE: We can probably remove this hint after a few releases once
+		 * we've updated all known blogposts that still used the old syntax.
+		 *
+		 * Similarly to the other hint, this could check for actual usages of
+		 * the relevant DuckDB functions.
+		 */
+		edata->hint = pstrdup(
+		    "If you use DuckDB functions like read_parquet, you need to use the r['colname'] syntax introduced "
+		    "in pg_duckdb 0.3.0. It seems like you might be using the outdated \"AS (colname coltype, ...)\" syntax");
 	}
 }
 
diff --git a/src/pgduckdb_options.cpp b/src/pgduckdb_options.cpp
index dba6e306..00f34683 100644
--- a/src/pgduckdb_options.cpp
+++ b/src/pgduckdb_options.cpp
@@ -375,6 +375,79 @@ DECLARE_PG_FUNCTION(pgduckdb_recycle_ddb) {
 	PG_RETURN_BOOL(true);
 }
 
+Node *
+CoerceRowSubscriptToText(struct ParseState *pstate, A_Indices *subscript) {
+	if (!subscript->uidx) {
+		elog(ERROR, "Creating a slice out of duckdb.row is not supported");
+	}
+
+	Node *subscript_expr = transformExpr(pstate, subscript->uidx, pstate->p_expr_kind);
+	int expr_location = exprLocation(subscript->uidx);
+	Oid subscript_expr_type = exprType(subscript_expr);
+
+	if (subscript->lidx) {
+		elog(ERROR, "Creating a slice out of duckdb.row is not supported");
+	}
+
+	Node *coerced_expr = coerce_to_target_type(pstate, subscript_expr, subscript_expr_type, TEXTOID, -1,
+	                                           COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, expr_location);
+	if (!coerced_expr) {
+		ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("duckdb.row subscript must have text type"),
+		                parser_errposition(pstate, expr_location)));
+	}
+
+	if (!IsA(subscript_expr, Const)) {
+		ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("duckdb.row subscript must be a constant"),
+		                parser_errposition(pstate, expr_location)));
+	}
+
+	Const *subscript_const = castNode(Const, subscript_expr);
+	if (subscript_const->constisnull) {
+		ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("duckdb.row subscript cannot be NULL"),
+		                parser_errposition(pstate, expr_location)));
+	}
+
+	return coerced_expr;
+}
+
+/*
+ * In Postgres all index operations in a row ar all slices or all plain
+ * index operations. If you mix them, all are converted to slices.
+ * There's no difference in representation possible between
+ * "col[1:2][1]" and "col[1:2][1:]". If you want this seperation you
+ * need to use parenthesis to seperate: "(col[1:2])[1]"
+ * This might seem like fairly strange behaviour, but Postgres uses
+ * this to be able to slice in multi-dimensional arrays and thtis
+ * behaviour is documented here:
+ * https://www.postgresql.org/docs/current/arrays.html#ARRAYS-ACCESSING
+ *
+ * This is different from DuckDB, but there's not much we can do about
+ * that. So we'll have this same behaviour by, which means we need to always
+ * add the lower subscript to the slice. The lower subscript will be NULL in
+ * that case.
+ *
+ * See also comments on SubscriptingRef in nodes/subscripting.h
+ */
+void
+AddSubscriptExpressions(SubscriptingRef *sbsref, struct ParseState *pstate, A_Indices *subscript, bool isSlice) {
+	Assert(isSlice || subscript->uidx);
+
+	Node *upper_subscript_expr = NULL;
+	if (subscript->uidx) {
+		upper_subscript_expr = transformExpr(pstate, subscript->uidx, pstate->p_expr_kind);
+	}
+
+	sbsref->refupperindexpr = lappend(sbsref->refupperindexpr, upper_subscript_expr);
+
+	if (isSlice) {
+		Node *lower_subscript_expr = NULL;
+		if (subscript->uidx) {
+			lower_subscript_expr = transformExpr(pstate, subscript->lidx, pstate->p_expr_kind);
+		}
+		sbsref->reflowerindexpr = lappend(sbsref->reflowerindexpr, lower_subscript_expr);
+	}
+}
+
 /*
  * DuckdbRowSubscriptTransform is called by the parser when a subscripting
  * operation is performed on a duckdb.row. It has two main puprposes:
@@ -405,50 +478,19 @@ DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct P
 
 	// Transform each subscript expression
 	foreach_node(A_Indices, subscript, indirection) {
-		Assert(subscript->uidx);
-
-		Node *subscript_expr = transformExpr(pstate, subscript->uidx, pstate->p_expr_kind);
-
 		/* The first subscript needs to be a TEXT constant, since it should be
 		 * a column reference. But the subscripts after that can be anything,
 		 * DuckDB should interpret those. */
 		if (first) {
-			int expr_location = exprLocation(subscript->uidx);
-			Oid subscript_expr_type = exprType(subscript_expr);
-
-			if (subscript->lidx) {
-				elog(ERROR, "Creating a slice out of duckdb.row is not supported");
+			sbsref->refupperindexpr = lappend(sbsref->refupperindexpr, CoerceRowSubscriptToText(pstate, subscript));
+			if (isSlice) {
+				sbsref->reflowerindexpr = lappend(sbsref->reflowerindexpr, NULL);
 			}
-
-			Node *coerced_expr = coerce_to_target_type(pstate, subscript_expr, subscript_expr_type, TEXTOID, -1,
-			                                           COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, expr_location);
-			if (!coerced_expr) {
-				ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("duckdb.row subscript must have text type"),
-				                parser_errposition(pstate, expr_location)));
-			}
-
-			if (!IsA(subscript_expr, Const)) {
-				ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("duckdb.row subscript must be a constant"),
-				                parser_errposition(pstate, expr_location)));
-			}
-
-			Const *subscript_const = castNode(Const, subscript_expr);
-			if (subscript_const->constisnull) {
-				ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("duckdb.row subscript cannot be NULL"),
-				                parser_errposition(pstate, expr_location)));
-			}
-
-			subscript_expr = coerced_expr;
 			first = false;
+			continue;
 		}
-		sbsref->refupperindexpr = lappend(sbsref->refupperindexpr, subscript_expr);
-		if (isSlice) {
-			Node *lower_subscript_expr = NULL;
-			if (subscript->uidx) {
-				lower_subscript_expr = transformExpr(pstate, subscript->lidx, pstate->p_expr_kind);
-			}
-			sbsref->reflowerindexpr = lappend(sbsref->reflowerindexpr, lower_subscript_expr);
-		}
+
+		AddSubscriptExpressions(sbsref, pstate, subscript, isSlice);
 	}
 
 	// Set the result type of the subscripting operation
@@ -508,18 +550,7 @@ DuckdbUnresolvedTypeSubscriptTransform(SubscriptingRef *sbsref, List *indirectio
 
 	// Transform each subscript expression
 	foreach_node(A_Indices, subscript, indirection) {
-		Assert(subscript->uidx);
-
-		Node *subscript_expr = transformExpr(pstate, subscript->uidx, pstate->p_expr_kind);
-
-		sbsref->refupperindexpr = lappend(sbsref->refupperindexpr, subscript_expr);
-		if (isSlice) {
-			Node *lower_subscript_expr = NULL;
-			if (subscript->uidx) {
-				lower_subscript_expr = transformExpr(pstate, subscript->lidx, pstate->p_expr_kind);
-			}
-			sbsref->reflowerindexpr = lappend(sbsref->reflowerindexpr, lower_subscript_expr);
-		}
+		AddSubscriptExpressions(sbsref, pstate, subscript, isSlice);
 	}
 
 	// Set the result type of the subscripting operation
diff --git a/test/regression/expected/read_functions.out b/test/regression/expected/read_functions.out
index 61dfa755..4684c971 100644
--- a/test/regression/expected/read_functions.out
+++ b/test/regression/expected/read_functions.out
@@ -62,6 +62,21 @@ SELECT arraycol[1:2] FROM (
  {11,22}
 (1 row)
 
+SELECT r['arraycol'][:] FROM read_parquet('../../data/indexable.parquet') r;
+ r.arraycol[:] 
+---------------
+ {11,22,33}
+(1 row)
+
+SELECT arraycol[:] FROM (
+    SELECT r['arraycol'] arraycol
+    FROM read_parquet('../../data/indexable.parquet') r
+) q;
+  arraycol  
+------------
+ {11,22,33}
+(1 row)
+
 -- Subqueries correctly expand *, in case of multiple columns.
 SELECT * FROM (
     SELECT 'something' as prefix, *, 'something else' as postfix
@@ -175,6 +190,12 @@ SELECT * FROM (
  something |          5.1 |         3.5 |          1.4 |         0.2 | Setosa  |      255 |     65535 | 4294967295 | something else
 (1 row)
 
+-- We show a hint for the new syntax when someone uses the old syntax.
+SELECT count("sepal.length") FROM read_parquet('../../data/iris.parquet') AS ("sepal.length" FLOAT);
+ERROR:  a column definition list is only allowed for functions returning "record"
+LINE 1: ... FROM read_parquet('../../data/iris.parquet') AS ("sepal.len...
+                                                             ^
+HINT:  If you use DuckDB functions like read_parquet, you need to use the r['colname'] syntax introduced in pg_duckdb 0.3.0. It seems like you might be using the outdated "AS (colname coltype, ...)" syntax
 -- read_csv
 SELECT count(r['sepal.length']) FROM read_csv('../../data/iris.csv') r;
  count 
@@ -269,6 +290,8 @@ LINE 1: SELECT r[3.14] FROM read_csv('../../data/web_page.csv') r li...
                  ^
 SELECT r['abc':'abc'] FROM read_csv('../../data/web_page.csv') r limit 1;
 ERROR:  Creating a slice out of duckdb.row is not supported
+SELECT r[:] FROM read_csv('../../data/web_page.csv') r limit 1;
+ERROR:  Creating a slice out of duckdb.row is not supported
 SELECT r[q.col]
 FROM
     read_csv('../../data/web_page.csv') r,
diff --git a/test/regression/sql/read_functions.sql b/test/regression/sql/read_functions.sql
index 82d32db5..3b9633bb 100644
--- a/test/regression/sql/read_functions.sql
+++ b/test/regression/sql/read_functions.sql
@@ -26,6 +26,11 @@ SELECT arraycol[1:2] FROM (
     SELECT r['arraycol'] arraycol
     FROM read_parquet('../../data/indexable.parquet') r
 ) q;
+SELECT r['arraycol'][:] FROM read_parquet('../../data/indexable.parquet') r;
+SELECT arraycol[:] FROM (
+    SELECT r['arraycol'] arraycol
+    FROM read_parquet('../../data/indexable.parquet') r
+) q;
 
 -- Subqueries correctly expand *, in case of multiple columns.
 SELECT * FROM (
@@ -103,6 +108,9 @@ SELECT * FROM (
     LIMIT 1
 ) q;
 
+-- We show a hint for the new syntax when someone uses the old syntax.
+SELECT count("sepal.length") FROM read_parquet('../../data/iris.parquet') AS ("sepal.length" FLOAT);
+
 -- read_csv
 
 SELECT count(r['sepal.length']) FROM read_csv('../../data/iris.csv') r;
@@ -148,6 +156,7 @@ SELECT r[NULL] FROM read_csv('../../data/web_page.csv') r limit 1;
 SELECT r[123] FROM read_csv('../../data/web_page.csv') r limit 1;
 SELECT r[3.14] FROM read_csv('../../data/web_page.csv') r limit 1;
 SELECT r['abc':'abc'] FROM read_csv('../../data/web_page.csv') r limit 1;
+SELECT r[:] FROM read_csv('../../data/web_page.csv') r limit 1;
 SELECT r[q.col]
 FROM
     read_csv('../../data/web_page.csv') r,