From ca54b67b7b9195437af14eeb5d5b8ca9fad11f16 Mon Sep 17 00:00:00 2001 From: "Y." Date: Thu, 8 Aug 2024 10:11:49 -0400 Subject: [PATCH 01/13] Make DuckDB a singleton per connection process --- include/pgduckdb/pgduckdb_planner.hpp | 6 + include/pgduckdb/pgduckdb_types.hpp | 1 + src/pgduckdb_node.cpp | 55 ++++++- src/pgduckdb_planner.cpp | 52 +++++-- src/pgduckdb_types.cpp | 147 ++++++++++++++++++ test/regression/expected/basic.out | 22 +++ .../regression/expected/extended_protocol.out | 43 +++++ test/regression/sql/basic.sql | 3 + test/regression/sql/extended_protocol.sql | 36 +++++ 9 files changed, 343 insertions(+), 22 deletions(-) create mode 100644 test/regression/expected/extended_protocol.out create mode 100644 test/regression/sql/extended_protocol.sql diff --git a/include/pgduckdb/pgduckdb_planner.hpp b/include/pgduckdb/pgduckdb_planner.hpp index b95fb65b..998bc8cc 100644 --- a/include/pgduckdb/pgduckdb_planner.hpp +++ b/include/pgduckdb/pgduckdb_planner.hpp @@ -1,10 +1,16 @@ #pragma once +#include "duckdb.hpp" + extern "C" { #include "postgres.h" #include "optimizer/planner.h" } +#include "pgduckdb/pgduckdb_duckdb.hpp" + extern bool duckdb_explain_analyze; PlannedStmt *DuckdbPlanNode(Query *parse, int cursor_options, ParamListInfo bound_params); +std::tuple, duckdb::unique_ptr> +DuckdbPrepare(Query *query, ParamListInfo bound_params); diff --git a/include/pgduckdb/pgduckdb_types.hpp b/include/pgduckdb/pgduckdb_types.hpp index 2f0d00a3..0d5e0a00 100644 --- a/include/pgduckdb/pgduckdb_types.hpp +++ b/include/pgduckdb/pgduckdb_types.hpp @@ -19,6 +19,7 @@ constexpr int64_t PGDUCKDB_DUCK_TIMESTAMP_OFFSET = INT64CONST(10957) * USECS_PER duckdb::LogicalType ConvertPostgresToDuckColumnType(Form_pg_attribute &attribute); Oid GetPostgresDuckDBType(duckdb::LogicalType type); +duckdb::Value ConvertPostgresToDuckValue(Datum value, Oid postgres_type); void ConvertPostgresToDuckValue(Datum value, duckdb::Vector &result, idx_t offset); bool ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col); void InsertTupleIntoChunk(duckdb::DataChunk &output, duckdb::shared_ptr scan_global_state, diff --git a/src/pgduckdb_node.cpp b/src/pgduckdb_node.cpp index daf267ed..6129f383 100644 --- a/src/pgduckdb_node.cpp +++ b/src/pgduckdb_node.cpp @@ -4,10 +4,15 @@ extern "C" { #include "postgres.h" #include "miscadmin.h" +#include "tcop/pquery.h" +#include "nodes/params.h" +#include "utils/ruleutils.h" } #include "pgduckdb/pgduckdb_node.hpp" #include "pgduckdb/pgduckdb_types.hpp" +#include "pgduckdb/pgduckdb_duckdb.hpp" +#include "pgduckdb/pgduckdb_planner.hpp" /* global variables */ CustomScanMethods duckdb_scan_scan_methods; @@ -17,6 +22,8 @@ static CustomExecMethods duckdb_scan_exec_methods; typedef struct DuckdbScanState { CustomScanState css; /* must be first field */ + Query *query; + ParamListInfo params; duckdb::Connection *duckdb_connection; duckdb::PreparedStatement *prepared_statement; bool is_executed; @@ -46,10 +53,9 @@ static Node * Duckdb_CreateCustomScanState(CustomScan *cscan) { DuckdbScanState *duckdb_scan_state = (DuckdbScanState *)newNode(sizeof(DuckdbScanState), T_CustomScanState); CustomScanState *custom_scan_state = &duckdb_scan_state->css; - duckdb_scan_state->duckdb_connection = (duckdb::Connection *)linitial(cscan->custom_private); - duckdb_scan_state->prepared_statement = (duckdb::PreparedStatement *)lsecond(cscan->custom_private); - duckdb_scan_state->is_executed = false; - duckdb_scan_state->fetch_next = true; + + duckdb_scan_state->query = (Query *)linitial(cscan->custom_private); + /* FIXME: We should pass a sensible bound_params, this breaks prepared statements */ custom_scan_state->methods = &duckdb_scan_exec_methods; return (Node *)custom_scan_state; } @@ -57,6 +63,19 @@ Duckdb_CreateCustomScanState(CustomScan *cscan) { void Duckdb_BeginCustomScan(CustomScanState *cscanstate, EState *estate, int eflags) { DuckdbScanState *duckdb_scan_state = (DuckdbScanState *)cscanstate; + auto prepare_result = DuckdbPrepare(duckdb_scan_state->query, estate->es_param_list_info); + auto prepared_query = std::move(std::get<0>(prepare_result)); + auto duckdb_connection = std::move(std::get<1>(prepare_result)); + + if (prepared_query->HasError()) { + elog(ERROR, "DuckDB re-planning failed %s", prepared_query->GetError().c_str()); + } + + duckdb_scan_state->duckdb_connection = duckdb_connection.release(); + duckdb_scan_state->prepared_statement = prepared_query.release(); + duckdb_scan_state->params = estate->es_param_list_info; + duckdb_scan_state->is_executed = false; + duckdb_scan_state->fetch_next = true; duckdb_scan_state->css.ss.ps.ps_ResultTupleDesc = duckdb_scan_state->css.ss.ss_ScanTupleSlot->tts_tupleDescriptor; HOLD_CANCEL_INTERRUPTS(); } @@ -66,8 +85,34 @@ ExecuteQuery(DuckdbScanState *state) { auto &prepared = *state->prepared_statement; auto &query_results = state->query_results; auto &connection = state->duckdb_connection; + auto pg_params = state->params; + duckdb::vector duckdb_params; + if (pg_params) { + for (int i = 0; i < pg_params->numParams; i++) { + ParamExternData *pg_param; + ParamExternData tmp_workspace; + + /* give hook a chance in case parameter is dynamic */ + if (pg_params->paramFetch != NULL) + pg_param = pg_params->paramFetch(pg_params, i + 1, false, &tmp_workspace); + else + pg_param = &pg_params->params[i]; + + if (pg_param->isnull) { + duckdb_params.push_back(duckdb::Value()); + } else { + if (!OidIsValid(pg_param->ptype)) { + elog(ERROR, "parameter with invalid type during execution"); + } + duckdb_params.push_back(pgduckdb::ConvertPostgresToDuckValue(pg_param->value, pg_param->ptype)); + } + } + } - auto pending = prepared.PendingQuery(); + auto pending = prepared.PendingQuery(duckdb_params, true); + if (pending->HasError()) { + elog(ERROR, "Duckdb execute returned an error: %s", pending->GetError().c_str()); + } duckdb::PendingExecutionResult execution_result; do { execution_result = pending->ExecuteTask(); diff --git a/src/pgduckdb_planner.cpp b/src/pgduckdb_planner.cpp index 68cbfdd5..9c11743d 100644 --- a/src/pgduckdb_planner.cpp +++ b/src/pgduckdb_planner.cpp @@ -4,6 +4,8 @@ extern "C" { #include "postgres.h" #include "catalog/pg_type.h" #include "nodes/makefuncs.h" +#include "nodes/nodes.h" +#include "nodes/params.h" #include "optimizer/optimizer.h" #include "tcop/pquery.h" #include "utils/syscache.h" @@ -51,21 +53,46 @@ PlanQuery(Query *parse, ParamListInfo bound_params) { ); } -static Plan * -CreatePlan(Query *query, const char *query_string, ParamListInfo bound_params) { +std::tuple, duckdb::unique_ptr> +DuckdbPrepare(Query *query, ParamListInfo bound_params) { + const char *query_string = pgduckdb_pg_get_querydef(query, false); - List *rtables = query->rtable; + /* TODO: Move this state into custom_private probably */ + if (ActivePortal && ActivePortal->commandTag == CMDTAG_EXPLAIN) { + if (duckdb_explain_analyze) { + query_string = psprintf("EXPLAIN ANALYZE %s", query_string); + } else { + query_string = psprintf("EXPLAIN %s", query_string); + } + } + List *rtables = query->rtable; /* Extract required vars for table */ int flags = PVC_RECURSE_AGGREGATES | PVC_RECURSE_WINDOWFUNCS | PVC_RECURSE_PLACEHOLDERS; List *vars = list_concat(pull_var_clause((Node *)query->targetList, flags), pull_var_clause((Node *)query->jointree->quals, flags)); - PlannerInfo *query_planner_info = PlanQuery(query, bound_params); auto duckdb_connection = pgduckdb::DuckdbCreateConnection(rtables, query_planner_info, vars, query_string); auto context = duckdb_connection->context; - auto prepared_query = context->Prepare(query_string); + return {std::move(prepared_query), std::move(duckdb_connection)}; +} + +static Plan * +CreatePlan(Query *query, ParamListInfo bound_params) { + /* + * Copy the arguments so we can attach unmodified versions to the + * custom_private field at the end of the function. DuckdbPrepare will + * slightly modify the query tree, because it calls subquery_planner, and + * that slightly modifies the query tree) + */ + auto query_copy = (Query *)copyObjectImpl(query); + + /* + * Prepare the query, se we can get the returned types and column names. + */ + auto prepare_result = DuckdbPrepare(query, bound_params); + auto prepared_query = std::move(std::get<0>(prepare_result)); if (prepared_query->HasError()) { elog(WARNING, "(PGDuckDB/CreatePlan) Prepared query returned an error: '%s", @@ -101,12 +128,12 @@ CreatePlan(Query *query, const char *query_string, ParamListInfo bound_params) { duckdb_node->custom_scan_tlist = lappend(duckdb_node->custom_scan_tlist, - makeTargetEntry((Expr *)var, i + 1, (char *)prepared_query->GetNames()[i].c_str(), false)); + makeTargetEntry((Expr *)var, i + 1, (char *)pstrdup(prepared_query->GetNames()[i].c_str()), false)); ReleaseSysCache(tp); } - duckdb_node->custom_private = list_make2(duckdb_connection.release(), prepared_query.release()); + duckdb_node->custom_private = list_make1(query_copy); duckdb_node->methods = &duckdb_scan_scan_methods; return (Plan *)duckdb_node; @@ -114,17 +141,8 @@ CreatePlan(Query *query, const char *query_string, ParamListInfo bound_params) { PlannedStmt * DuckdbPlanNode(Query *parse, int cursor_options, ParamListInfo bound_params) { - const char *query_string = pgduckdb_pg_get_querydef(parse, false); - - if (ActivePortal && ActivePortal->commandTag == CMDTAG_EXPLAIN) { - if (duckdb_explain_analyze) { - query_string = psprintf("EXPLAIN ANALYZE %s", query_string); - } else { - query_string = psprintf("EXPLAIN %s", query_string); - } - } /* We need to check can we DuckDB create plan */ - Plan *duckdb_plan = (Plan *)castNode(CustomScan, CreatePlan(parse, query_string, bound_params)); + Plan *duckdb_plan = (Plan *)castNode(CustomScan, CreatePlan(parse, bound_params)); if (!duckdb_plan) { return nullptr; diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index b0f1e6cd..a755e0c6 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -707,6 +707,153 @@ ConvertDecimal(const NumericVar &numeric) { return (NumericIsNegative(numeric) ? -base_res : base_res); } +duckdb::Value +ConvertPostgresToDuckValue(Datum value, Oid postgres_type) { + switch (postgres_type) { + case BOOLOID: + return duckdb::Value::BOOLEAN(DatumGetBool(value)); + case INT2OID: + return duckdb::Value::SMALLINT(DatumGetInt16(value)); + case INT4OID: + return duckdb::Value::INTEGER(DatumGetInt32(value)); + case INT8OID: + return duckdb::Value::BIGINT(DatumGetInt64(value)); + case BPCHAROID: + case TEXTOID: + // TODO: Is this the correct place for JSONOID? I'm unable to use it in a + // test so I don't know. + // case JSONOID: + case VARCHAROID: { + return duckdb::Value((const char *)value); + } + case DATEOID: + return duckdb::Value::DATE(duckdb::date_t(static_cast(value + PGDUCKDB_DUCK_DATE_OFFSET))); + case TIMESTAMPOID: + return duckdb::Value::TIMESTAMP( + duckdb::timestamp_t(static_cast(value + PGDUCKDB_DUCK_TIMESTAMP_OFFSET))); + case FLOAT4OID: { + return duckdb::Value::FLOAT(DatumGetFloat4(value)); + } + case FLOAT8OID: { + return duckdb::Value::DOUBLE(DatumGetFloat8(value)); + } + // case duckdb::LogicalTypeId::DECIMAL: { + // auto physical_type = type.InternalType(); + // auto numeric = DatumGetNumeric(value); + // auto numeric_var = FromNumeric(numeric); + // switch (physical_type) { + // case duckdb::PhysicalType::INT16: { + // Append(result, ConvertDecimal(numeric_var), offset); + // break; + // } + // case duckdb::PhysicalType::INT32: { + // Append(result, ConvertDecimal(numeric_var), offset); + // break; + // } + // case duckdb::PhysicalType::INT64: { + // Append(result, ConvertDecimal(numeric_var), offset); + // break; + // } + // case duckdb::PhysicalType::INT128: { + // Append(result, ConvertDecimal(numeric_var), offset); + // break; + // } + // default: { + // throw duckdb::InternalException("Unrecognized physical type (%s) for DECIMAL value", + // duckdb::EnumUtil::ToString(physical_type)); + // break; + // } + // } + // break; + // } + // case duckdb::LogicalTypeId::UUID: { + // auto uuid = DatumGetPointer(value); + // hugeint_t duckdb_uuid; + // D_ASSERT(UUID_LEN == sizeof(hugeint_t)); + // for (idx_t i = 0; i < UUID_LEN; i++) { + // ((uint8_t *)&duckdb_uuid)[UUID_LEN - 1 - i] = ((uint8_t *)uuid)[i]; + // } + // duckdb_uuid.upper ^= (uint64_t(1) << 63); + // Append(result, duckdb_uuid, offset); + // break; + // } + // case duckdb::LogicalTypeId::LIST: { + // // Convert Datum to ArrayType + // auto array = DatumGetArrayTypeP(value); + // + // auto ndims = ARR_NDIM(array); + // int *dims = ARR_DIMS(array); + // + // int16 typlen; + // bool typbyval; + // char typalign; + // get_typlenbyvalalign(ARR_ELEMTYPE(array), &typlen, &typbyval, &typalign); + // + // int nelems; + // Datum *elems; + // bool *nulls; + // // Deconstruct the array into Datum elements + // deconstruct_array(array, ARR_ELEMTYPE(array), typlen, typbyval, typalign, &elems, &nulls, &nelems); + // + // if (ndims == -1) { + // throw duckdb::InternalException("Array type has an ndims of -1, so it's actually not an array??"); + // } + // // Set the list_entry_t metadata + // duckdb::Vector *vec = &result; + // int write_offset = offset; + // for (int dim = 0; dim < ndims; dim++) { + // auto previous_dimension = dim ? dims[dim - 1] : 1; + // auto dimension = dims[dim]; + // if (vec->GetType().id() != duckdb::LogicalTypeId::LIST) { + // throw duckdb::InvalidInputException( + // "Dimensionality of the schema and the data does not match, data contains more dimensions than + // the " "amount of dimensions specified by the schema"); + // } + // auto child_offset = duckdb::ListVector::GetListSize(*vec); + // auto list_data = duckdb::FlatVector::GetData(*vec); + // for (int entry = 0; entry < previous_dimension; entry++) { + // list_data[write_offset + entry] = duckdb::list_entry_t( + // // All lists in a postgres row are enforced to have the same dimension + // // [[1,2],[2,3,4]] is not allowed, second list has 3 elements instead of 2 + // child_offset + (dimension * entry), dimension); + // } + // auto new_child_size = child_offset + (dimension * previous_dimension); + // duckdb::ListVector::Reserve(*vec, new_child_size); + // duckdb::ListVector::SetListSize(*vec, new_child_size); + // write_offset = child_offset; + // auto &child = duckdb::ListVector::GetEntry(*vec); + // vec = &child; + // } + // if (ndims == 0) { + // D_ASSERT(nelems == 0); + // auto child_offset = duckdb::ListVector::GetListSize(*vec); + // auto list_data = duckdb::FlatVector::GetData(*vec); + // list_data[write_offset] = duckdb::list_entry_t(child_offset, 0); + // vec = &duckdb::ListVector::GetEntry(*vec); + // } + // + // if (vec->GetType().id() == duckdb::LogicalTypeId::LIST) { + // throw duckdb::InvalidInputException( + // "Dimensionality of the schema and the data does not match, data contains fewer dimensions than the " + // "amount of dimensions specified by the schema"); + // } + // + // for (int i = 0; i < nelems; i++) { + // idx_t dest_idx = write_offset + i; + // if (nulls[i]) { + // auto &array_mask = duckdb::FlatVector::Validity(*vec); + // array_mask.SetInvalid(dest_idx); + // continue; + // } + // ConvertPostgresToDuckValue(elems[i], *vec, dest_idx); + // } + // break; + // } + default: + elog(ERROR, "Could not convert Postgres type: %d to DuckDB type", postgres_type); + } +} + void ConvertPostgresToDuckValue(Datum value, duckdb::Vector &result, idx_t offset) { auto &type = result.GetType(); diff --git a/test/regression/expected/basic.out b/test/regression/expected/basic.out index fc407602..ed181bea 100644 --- a/test/regression/expected/basic.out +++ b/test/regression/expected/basic.out @@ -16,6 +16,28 @@ SELECT a, COUNT(*) FROM t WHERE a > 5 GROUP BY a ORDER BY a; 9 | 100 (4 rows) +select COUNT(*) from t \bind \g + count +------- + 1000 +(1 row) + +select a, COUNT(*) from t WHERE a > $1 GROUP BY a ORDER BY a \bind 5 \g + a | count +---+------- + 6 | 100 + 7 | 100 + 8 | 100 + 9 | 100 +(4 rows) + +\bind 7 \g + a | count +---+------- + 8 | 100 + 9 | 100 +(2 rows) + SET duckdb.max_threads_per_query to 4; SELECT COUNT(*) FROM t; count diff --git a/test/regression/expected/extended_protocol.out b/test/regression/expected/extended_protocol.out new file mode 100644 index 00000000..90e91384 --- /dev/null +++ b/test/regression/expected/extended_protocol.out @@ -0,0 +1,43 @@ +CREATE TABLE t( + bool BOOLEAN, + i2 SMALLINT, + i4 INT, + i8 BIGINT, + fl4 REAL, + fl8 DOUBLE PRECISION, + t1 TEXT, + t2 VARCHAR, + d DATE, + ts TIMESTAMP); +INSERT INTO t VALUES (true, 2, 4, 8, 4.0, 8.0, 't1', 't2', '2024-05-04', '2020-01-01 01:02:03'); +select fl4 from t; + fl4 +----- + 4 +(1 row) + +SELECT bool, i2, i4, i8, fl4, fl8, t1, t2, d, ts FROM t WHERE + bool = $1 + and i2 = $2 + and i4 = $3 + and i8 = $4 + -- FIXME: The following "larger than" comparisons all have a bugs + -- somewhere, the comparison reports that they are larger, but they clearly + -- are not. The floats are actually smaler (to rule out any floating point + -- funkyness) and the strings are equal. + and fl4 > $5 + and fl8 > $6 + and t1 > $7 + and t2 > $8 + and d = $9 + and ts = $10 +\bind true 2 4 8 5.0 9.0 t1 t2 '2024-05-04' '2020-01-01 01:02:03' \g + bool | i2 | i4 | i8 | fl4 | fl8 | t1 | t2 | d | ts +------+----+----+----+-----+-----+----+----+------------+-------------------------- + t | 2 | 4 | 8 | 4 | 8 | t1 | t2 | 05-04-2024 | Wed Jan 01 01:02:03 2020 +(1 row) + +-- TODO: Fix this by supporting the UNKNOWN type somehow +SELECT $1 FROM t \bind something \g +ERROR: Could not convert DuckDB type: UNKNOWN to Postgres type +drop table t; diff --git a/test/regression/sql/basic.sql b/test/regression/sql/basic.sql index d32d3f0a..b9b3f5e9 100644 --- a/test/regression/sql/basic.sql +++ b/test/regression/sql/basic.sql @@ -6,6 +6,9 @@ SET client_min_messages to 'DEBUG1'; SELECT COUNT(*) FROM t; SELECT a, COUNT(*) FROM t WHERE a > 5 GROUP BY a ORDER BY a; +select COUNT(*) from t \bind \g +select a, COUNT(*) from t WHERE a > $1 GROUP BY a ORDER BY a \bind 5 \g +\bind 7 \g SET duckdb.max_threads_per_query to 4; diff --git a/test/regression/sql/extended_protocol.sql b/test/regression/sql/extended_protocol.sql new file mode 100644 index 00000000..a16eb64a --- /dev/null +++ b/test/regression/sql/extended_protocol.sql @@ -0,0 +1,36 @@ +CREATE TABLE t( + bool BOOLEAN, + i2 SMALLINT, + i4 INT, + i8 BIGINT, + fl4 REAL, + fl8 DOUBLE PRECISION, + t1 TEXT, + t2 VARCHAR, + d DATE, + ts TIMESTAMP); +INSERT INTO t VALUES (true, 2, 4, 8, 4.0, 8.0, 't1', 't2', '2024-05-04', '2020-01-01 01:02:03'); + +select fl4 from t; + +SELECT bool, i2, i4, i8, fl4, fl8, t1, t2, d, ts FROM t WHERE + bool = $1 + and i2 = $2 + and i4 = $3 + and i8 = $4 + -- FIXME: The following "larger than" comparisons all have a bugs + -- somewhere, the comparison reports that they are larger, but they clearly + -- are not. The floats are actually smaler (to rule out any floating point + -- funkyness) and the strings are equal. + and fl4 > $5 + and fl8 > $6 + and t1 > $7 + and t2 > $8 + and d = $9 + and ts = $10 +\bind true 2 4 8 5.0 9.0 t1 t2 '2024-05-04' '2020-01-01 01:02:03' \g + +-- TODO: Fix this by supporting the UNKNOWN type somehow +SELECT $1 FROM t \bind something \g + +drop table t; From 917a74bd259e8e5e5b1689e14f5a48c04f7c2347 Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Mon, 16 Sep 2024 16:02:13 +0200 Subject: [PATCH 02/13] Apply review feedback --- include/pgduckdb/pgduckdb_types.hpp | 2 +- src/pgduckdb_node.cpp | 37 ++++++++++++++--------------- src/pgduckdb_types.cpp | 8 ++++++- 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/include/pgduckdb/pgduckdb_types.hpp b/include/pgduckdb/pgduckdb_types.hpp index 0d5e0a00..d444bef3 100644 --- a/include/pgduckdb/pgduckdb_types.hpp +++ b/include/pgduckdb/pgduckdb_types.hpp @@ -19,7 +19,7 @@ constexpr int64_t PGDUCKDB_DUCK_TIMESTAMP_OFFSET = INT64CONST(10957) * USECS_PER duckdb::LogicalType ConvertPostgresToDuckColumnType(Form_pg_attribute &attribute); Oid GetPostgresDuckDBType(duckdb::LogicalType type); -duckdb::Value ConvertPostgresToDuckValue(Datum value, Oid postgres_type); +duckdb::Value ConvertPostgresParameterToDuckValue(Datum value, Oid postgres_type); void ConvertPostgresToDuckValue(Datum value, duckdb::Vector &result, idx_t offset); bool ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col); void InsertTupleIntoChunk(duckdb::DataChunk &output, duckdb::shared_ptr scan_global_state, diff --git a/src/pgduckdb_node.cpp b/src/pgduckdb_node.cpp index 6129f383..a8b92950 100644 --- a/src/pgduckdb_node.cpp +++ b/src/pgduckdb_node.cpp @@ -86,32 +86,31 @@ ExecuteQuery(DuckdbScanState *state) { auto &query_results = state->query_results; auto &connection = state->duckdb_connection; auto pg_params = state->params; + const auto num_params = pg_params ? pg_params->numParams : 0; duckdb::vector duckdb_params; - if (pg_params) { - for (int i = 0; i < pg_params->numParams; i++) { - ParamExternData *pg_param; - ParamExternData tmp_workspace; - - /* give hook a chance in case parameter is dynamic */ - if (pg_params->paramFetch != NULL) - pg_param = pg_params->paramFetch(pg_params, i + 1, false, &tmp_workspace); - else - pg_param = &pg_params->params[i]; - - if (pg_param->isnull) { - duckdb_params.push_back(duckdb::Value()); - } else { - if (!OidIsValid(pg_param->ptype)) { - elog(ERROR, "parameter with invalid type during execution"); - } - duckdb_params.push_back(pgduckdb::ConvertPostgresToDuckValue(pg_param->value, pg_param->ptype)); + for (int i = 0; i < num_params; i++) { + ParamExternData *pg_param; + ParamExternData tmp_workspace; + + /* give hook a chance in case parameter is dynamic */ + if (pg_params->paramFetch != NULL) + pg_param = pg_params->paramFetch(pg_params, i + 1, false, &tmp_workspace); + else + pg_param = &pg_params->params[i]; + + if (pg_param->isnull) { + duckdb_params.push_back(duckdb::Value()); + } else { + if (!OidIsValid(pg_param->ptype)) { + elog(ERROR, "parameter with invalid type during execution"); } + duckdb_params.push_back(pgduckdb::ConvertPostgresParameterToDuckValue(pg_param->value, pg_param->ptype)); } } auto pending = prepared.PendingQuery(duckdb_params, true); if (pending->HasError()) { - elog(ERROR, "Duckdb execute returned an error: %s", pending->GetError().c_str()); + elog(ERROR, "DuckDB execute returned an error: %s", pending->GetError().c_str()); } duckdb::PendingExecutionResult execution_result; do { diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index a755e0c6..d0d803ff 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -707,8 +707,14 @@ ConvertDecimal(const NumericVar &numeric) { return (NumericIsNegative(numeric) ? -base_res : base_res); } +/* + * Convert a Postgres Datum to a DuckDB Value. This is meant to be used to + * covert query parameters in a prepared statement to its DuckDB equivalent. + * Passing it a Datum that is stored on disk results in undefined behavior, + * because this fuction makes no effert to detoast the Datum. + */ duckdb::Value -ConvertPostgresToDuckValue(Datum value, Oid postgres_type) { +ConvertPostgresParameterToDuckValue(Datum value, Oid postgres_type) { switch (postgres_type) { case BOOLOID: return duckdb::Value::BOOLEAN(DatumGetBool(value)); From 7eab0fecf451720cab00b72ab3d8552d79f03294 Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Tue, 17 Sep 2024 12:35:47 +0200 Subject: [PATCH 03/13] Don't crash on explain --- src/pgduckdb_node.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/pgduckdb_node.cpp b/src/pgduckdb_node.cpp index a8b92950..a7f80905 100644 --- a/src/pgduckdb_node.cpp +++ b/src/pgduckdb_node.cpp @@ -203,15 +203,16 @@ Duckdb_ReScanCustomScan(CustomScanState *node) { void Duckdb_ExplainCustomScan(CustomScanState *node, List *ancestors, ExplainState *es) { + elog(NOTICE, "DuckDB explain custom scan"); DuckdbScanState *duckdb_scan_state = (DuckdbScanState *)node; - auto res = duckdb_scan_state->prepared_statement->Execute(); - std::string explain_output = "\n\n"; - auto chunk = res->Fetch(); + ExecuteQuery(duckdb_scan_state); + auto chunk = duckdb_scan_state->query_results->Fetch(); if (!chunk || chunk->size() == 0) { return; } /* Is it safe to hardcode this as result of DuckDB explain? */ auto value = chunk->GetValue(1, 0); + std::string explain_output = "\n\n"; explain_output += value.GetValue(); explain_output += "\n"; ExplainPropertyText("DuckDB Execution Plan", explain_output.c_str(), es); From e667ec73b42c39b91dfcf0282d8153d86ef3b0c1 Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Tue, 17 Sep 2024 13:25:09 +0200 Subject: [PATCH 04/13] Test extended explain queries --- test/pycheck/explain_test.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/test/pycheck/explain_test.py b/test/pycheck/explain_test.py index a1411610..3569ba11 100644 --- a/test/pycheck/explain_test.py +++ b/test/pycheck/explain_test.py @@ -2,7 +2,7 @@ def test_explain(cur: Cursor): - cur.sql("CREATE TABLE test_table (id int primary key, name text)") + cur.sql("CREATE TABLE test_table (id int, name text)") result = cur.sql("EXPLAIN SELECT count(*) FROM test_table") plan = "\n".join(result) assert "UNGROUPED_AGGREGATE" in plan @@ -13,3 +13,17 @@ def test_explain(cur: Cursor): assert "Query Profiling Information" in plan assert "UNGROUPED_AGGREGATE" in plan assert "Total Time:" in plan + + result = cur.sql("EXPLAIN SELECT count(*) FROM test_table where id = %s", (1,)) + plan = "\n".join(result) + assert "UNGROUPED_AGGREGATE" in plan + assert "id=1 AND id IS NOT NULL" in plan + assert "Total Time:" not in plan + + result = cur.sql( + "EXPLAIN ANALYZE SELECT count(*) FROM test_table where id = %s", (1,) + ) + plan = "\n".join(result) + assert "UNGROUPED_AGGREGATE" in plan + assert "id=1 AND id IS NOT NULL" in plan + assert "Total Time:" in plan From e4d0fae8b6bdcfc7ce5d2f7f9e315540af47264b Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Tue, 17 Sep 2024 15:24:09 +0200 Subject: [PATCH 05/13] Support prepared statements --- include/pgduckdb/pgduckdb_planner.hpp | 2 +- src/pgduckdb_node.cpp | 4 ++-- src/pgduckdb_planner.cpp | 27 ++++++++++++--------------- test/pycheck/prepared_test.py | 14 ++++++++++++++ test/pycheck/utils.py | 12 ++++++------ 5 files changed, 35 insertions(+), 24 deletions(-) create mode 100644 test/pycheck/prepared_test.py diff --git a/include/pgduckdb/pgduckdb_planner.hpp b/include/pgduckdb/pgduckdb_planner.hpp index 998bc8cc..1784a46f 100644 --- a/include/pgduckdb/pgduckdb_planner.hpp +++ b/include/pgduckdb/pgduckdb_planner.hpp @@ -13,4 +13,4 @@ extern bool duckdb_explain_analyze; PlannedStmt *DuckdbPlanNode(Query *parse, int cursor_options, ParamListInfo bound_params); std::tuple, duckdb::unique_ptr> -DuckdbPrepare(Query *query, ParamListInfo bound_params); +DuckdbPrepare(const Query *query, ParamListInfo bound_params); diff --git a/src/pgduckdb_node.cpp b/src/pgduckdb_node.cpp index a7f80905..8a6c53fd 100644 --- a/src/pgduckdb_node.cpp +++ b/src/pgduckdb_node.cpp @@ -22,7 +22,7 @@ static CustomExecMethods duckdb_scan_exec_methods; typedef struct DuckdbScanState { CustomScanState css; /* must be first field */ - Query *query; + const Query *query; ParamListInfo params; duckdb::Connection *duckdb_connection; duckdb::PreparedStatement *prepared_statement; @@ -54,7 +54,7 @@ Duckdb_CreateCustomScanState(CustomScan *cscan) { DuckdbScanState *duckdb_scan_state = (DuckdbScanState *)newNode(sizeof(DuckdbScanState), T_CustomScanState); CustomScanState *custom_scan_state = &duckdb_scan_state->css; - duckdb_scan_state->query = (Query *)linitial(cscan->custom_private); + duckdb_scan_state->query = (const Query *)linitial(cscan->custom_private); /* FIXME: We should pass a sensible bound_params, this breaks prepared statements */ custom_scan_state->methods = &duckdb_scan_exec_methods; return (Node *)custom_scan_state; diff --git a/src/pgduckdb_planner.cpp b/src/pgduckdb_planner.cpp index 9c11743d..f5f286a8 100644 --- a/src/pgduckdb_planner.cpp +++ b/src/pgduckdb_planner.cpp @@ -54,8 +54,13 @@ PlanQuery(Query *parse, ParamListInfo bound_params) { } std::tuple, duckdb::unique_ptr> -DuckdbPrepare(Query *query, ParamListInfo bound_params) { - const char *query_string = pgduckdb_pg_get_querydef(query, false); +DuckdbPrepare(const Query *query, ParamListInfo bound_params) { + /* + * Copy the query, so the original one is not modified by the + * subquery_planner call that PlanQuery does. + */ + Query *copied_query = (Query *)copyObjectImpl(query); + const char *query_string = pgduckdb_pg_get_querydef(copied_query, false); /* TODO: Move this state into custom_private probably */ if (ActivePortal && ActivePortal->commandTag == CMDTAG_EXPLAIN) { @@ -66,12 +71,12 @@ DuckdbPrepare(Query *query, ParamListInfo bound_params) { } } - List *rtables = query->rtable; + List *rtables = copied_query->rtable; /* Extract required vars for table */ int flags = PVC_RECURSE_AGGREGATES | PVC_RECURSE_WINDOWFUNCS | PVC_RECURSE_PLACEHOLDERS; - List *vars = list_concat(pull_var_clause((Node *)query->targetList, flags), - pull_var_clause((Node *)query->jointree->quals, flags)); - PlannerInfo *query_planner_info = PlanQuery(query, bound_params); + List *vars = list_concat(pull_var_clause((Node *)copied_query->targetList, flags), + pull_var_clause((Node *)copied_query->jointree->quals, flags)); + PlannerInfo *query_planner_info = PlanQuery(copied_query, bound_params); auto duckdb_connection = pgduckdb::DuckdbCreateConnection(rtables, query_planner_info, vars, query_string); auto context = duckdb_connection->context; auto prepared_query = context->Prepare(query_string); @@ -80,14 +85,6 @@ DuckdbPrepare(Query *query, ParamListInfo bound_params) { static Plan * CreatePlan(Query *query, ParamListInfo bound_params) { - /* - * Copy the arguments so we can attach unmodified versions to the - * custom_private field at the end of the function. DuckdbPrepare will - * slightly modify the query tree, because it calls subquery_planner, and - * that slightly modifies the query tree) - */ - auto query_copy = (Query *)copyObjectImpl(query); - /* * Prepare the query, se we can get the returned types and column names. */ @@ -133,7 +130,7 @@ CreatePlan(Query *query, ParamListInfo bound_params) { ReleaseSysCache(tp); } - duckdb_node->custom_private = list_make1(query_copy); + duckdb_node->custom_private = list_make1(query); duckdb_node->methods = &duckdb_scan_scan_methods; return (Plan *)duckdb_node; diff --git a/test/pycheck/prepared_test.py b/test/pycheck/prepared_test.py new file mode 100644 index 00000000..dd49c12f --- /dev/null +++ b/test/pycheck/prepared_test.py @@ -0,0 +1,14 @@ +from .utils import Cursor + + +def test_prepared(cur: Cursor): + cur.sql("CREATE TABLE test_table (id int)") + + q1 = "SELECT count(*) FROM test_table" + # Run it 6 times because after the 5th time some special logic kicks in + assert cur.sql(q1, prepare=True) == 0 + assert cur.sql(q1) == 0 + assert cur.sql(q1) == 0 + assert cur.sql(q1) == 0 + assert cur.sql(q1) == 0 + assert cur.sql(q1) == 0 diff --git a/test/pycheck/utils.py b/test/pycheck/utils.py index 89b864a1..33fbb7b0 100644 --- a/test/pycheck/utils.py +++ b/test/pycheck/utils.py @@ -237,8 +237,8 @@ def __init__(self, cursor: psycopg.Cursor): def __getattr__(self, name): return getattr(self.cursor, name) - def sql(self, query, params=None) -> Any: - self.execute(query, params) + def sql(self, query, params=None, **kwargs) -> Any: + self.execute(query, params, **kwargs) try: return simplify_query_results(self.fetchall()) except psycopg.ProgrammingError as e: @@ -256,11 +256,11 @@ def __init__(self, cursor: psycopg.AsyncCursor): def __getattr__(self, name): return getattr(self.cursor, name) - def sql(self, query, params=None): - return asyncio.ensure_future(self.sql_coroutine(query, params)) + def sql(self, query, params=None, **kwargs): + return asyncio.ensure_future(self.sql_coroutine(query, params, **kwargs)) - async def sql_coroutine(self, query, params=None) -> Any: - await self.execute(query, params) + async def sql_coroutine(self, query, params=None, **kwargs) -> Any: + await self.execute(query, params, **kwargs) try: return simplify_query_results(await self.fetchall()) except psycopg.ProgrammingError as e: From 97871ba69f627668d2199a85c0f86ef5542e219a Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Tue, 17 Sep 2024 15:31:28 +0200 Subject: [PATCH 06/13] Some more prepared statement tests --- test/pycheck/prepared_test.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/test/pycheck/prepared_test.py b/test/pycheck/prepared_test.py index dd49c12f..fa302510 100644 --- a/test/pycheck/prepared_test.py +++ b/test/pycheck/prepared_test.py @@ -5,10 +5,26 @@ def test_prepared(cur: Cursor): cur.sql("CREATE TABLE test_table (id int)") q1 = "SELECT count(*) FROM test_table" - # Run it 6 times because after the 5th time some special logic kicks in + # Run it 6 times because after the 5th time some special logic kicks in. + # See this link for more details: + # https://github.com/postgres/postgres/blob/89f908a6d0ac1337c868625008c9598487d184e7/src/backend/utils/cache/plancache.c#L1073-L1075 assert cur.sql(q1, prepare=True) == 0 assert cur.sql(q1) == 0 assert cur.sql(q1) == 0 assert cur.sql(q1) == 0 assert cur.sql(q1) == 0 assert cur.sql(q1) == 0 + + cur.sql("INSERT INTO test_table VALUES (1), (2), (3)") + # Ensure that the result is different now + assert cur.sql(q1) == 3 + + q2 = "SELECT count(*) FROM test_table where id = %s" + assert cur.sql(q2, (1,), prepare=True) == 1 + assert cur.sql(q2, (1,)) == 1 + assert cur.sql(q2, (1,)) == 1 + assert cur.sql(q2, (1,)) == 1 + assert cur.sql(q2, (1,)) == 1 + assert cur.sql(q2, (1,)) == 1 + + assert cur.sql(q2, (4,)) == 0 From feb8b194f01a1f871324f93c8858bc552120f744 Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Tue, 17 Sep 2024 15:38:34 +0200 Subject: [PATCH 07/13] Remove leftover comment and debugging elog --- src/pgduckdb_node.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/pgduckdb_node.cpp b/src/pgduckdb_node.cpp index 8a6c53fd..998e9a13 100644 --- a/src/pgduckdb_node.cpp +++ b/src/pgduckdb_node.cpp @@ -55,7 +55,6 @@ Duckdb_CreateCustomScanState(CustomScan *cscan) { CustomScanState *custom_scan_state = &duckdb_scan_state->css; duckdb_scan_state->query = (const Query *)linitial(cscan->custom_private); - /* FIXME: We should pass a sensible bound_params, this breaks prepared statements */ custom_scan_state->methods = &duckdb_scan_exec_methods; return (Node *)custom_scan_state; } @@ -203,7 +202,6 @@ Duckdb_ReScanCustomScan(CustomScanState *node) { void Duckdb_ExplainCustomScan(CustomScanState *node, List *ancestors, ExplainState *es) { - elog(NOTICE, "DuckDB explain custom scan"); DuckdbScanState *duckdb_scan_state = (DuckdbScanState *)node; ExecuteQuery(duckdb_scan_state); auto chunk = duckdb_scan_state->query_results->Fetch(); From 988e2c1645bd508b63006cbfd9e50c7166abbb34 Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Wed, 18 Sep 2024 10:56:17 +0200 Subject: [PATCH 08/13] Remove unnecessary TODO --- src/pgduckdb_hooks.cpp | 11 +++++++++++ src/pgduckdb_planner.cpp | 1 - 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/pgduckdb_hooks.cpp b/src/pgduckdb_hooks.cpp index 4d74e8ed..aa92a764 100644 --- a/src/pgduckdb_hooks.cpp +++ b/src/pgduckdb_hooks.cpp @@ -158,6 +158,17 @@ extern "C" { void DuckdbExplainOneQueryHook(Query *query, int cursorOptions, IntoClause *into, ExplainState *es, const char *queryString, ParamListInfo params, QueryEnvironment *queryEnv) { + /* + * It might seem sensible to store this data in the custom_private + * field of the CustomScan node, but that's not a trivial change to make. + * Storing this in a global variable works fine, as long as we only use + * this variable during planning when we're actually executing an explain + * QUERY (this can be checked by checking the commandTag of the + * ActivePortal). This even works when plans would normally be cached, + * because EXPLAIN always execute this hook whenever they are executed. + * EXPLAIN queries are also always re-planned (see + * standard_ExplainOneQuery). + */ duckdb_explain_analyze = es->analyze; prev_explain_one_query_hook(query, cursorOptions, into, es, queryString, params, queryEnv); } diff --git a/src/pgduckdb_planner.cpp b/src/pgduckdb_planner.cpp index f5f286a8..c8ee0aa0 100644 --- a/src/pgduckdb_planner.cpp +++ b/src/pgduckdb_planner.cpp @@ -62,7 +62,6 @@ DuckdbPrepare(const Query *query, ParamListInfo bound_params) { Query *copied_query = (Query *)copyObjectImpl(query); const char *query_string = pgduckdb_pg_get_querydef(copied_query, false); - /* TODO: Move this state into custom_private probably */ if (ActivePortal && ActivePortal->commandTag == CMDTAG_EXPLAIN) { if (duckdb_explain_analyze) { query_string = psprintf("EXPLAIN ANALYZE %s", query_string); From c96d0af10db1f252976bcaa20fa40f688dbbb40d Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Wed, 18 Sep 2024 16:10:05 +0200 Subject: [PATCH 09/13] Update test and add logging --- src/pgduckdb_planner.cpp | 2 ++ test/pycheck/prepared_test.py | 26 ++++++++++++++++---------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/pgduckdb_planner.cpp b/src/pgduckdb_planner.cpp index c8ee0aa0..d6c7fe87 100644 --- a/src/pgduckdb_planner.cpp +++ b/src/pgduckdb_planner.cpp @@ -70,6 +70,8 @@ DuckdbPrepare(const Query *query, ParamListInfo bound_params) { } } + elog(DEBUG2, "(PGDuckDB/DuckdbPrepare) Preparing: %s", query_string); + List *rtables = copied_query->rtable; /* Extract required vars for table */ int flags = PVC_RECURSE_AGGREGATES | PVC_RECURSE_WINDOWFUNCS | PVC_RECURSE_PLACEHOLDERS; diff --git a/test/pycheck/prepared_test.py b/test/pycheck/prepared_test.py index fa302510..22357efe 100644 --- a/test/pycheck/prepared_test.py +++ b/test/pycheck/prepared_test.py @@ -4,27 +4,33 @@ def test_prepared(cur: Cursor): cur.sql("CREATE TABLE test_table (id int)") + # Try prepared query without parameters q1 = "SELECT count(*) FROM test_table" - # Run it 6 times because after the 5th time some special logic kicks in. - # See this link for more details: - # https://github.com/postgres/postgres/blob/89f908a6d0ac1337c868625008c9598487d184e7/src/backend/utils/cache/plancache.c#L1073-L1075 assert cur.sql(q1, prepare=True) == 0 assert cur.sql(q1) == 0 assert cur.sql(q1) == 0 - assert cur.sql(q1) == 0 - assert cur.sql(q1) == 0 - assert cur.sql(q1) == 0 cur.sql("INSERT INTO test_table VALUES (1), (2), (3)") - # Ensure that the result is different now assert cur.sql(q1) == 3 + # The following tests a prepared query that has parameters. + # There are two ways in which prepared queries that have parameters can be + # executed: + # 1. With a custom plan, where the query is prepared with the exact values + # 2. With a generic plan, where the query is planned without the values and + # the values get only substituted at execution time + # + # The below tests both of these cases, by setting the plan_cache_mode. q2 = "SELECT count(*) FROM test_table where id = %s" + cur.sql("SET plan_cache_mode = 'force_custom_plan'") assert cur.sql(q2, (1,), prepare=True) == 1 assert cur.sql(q2, (1,)) == 1 assert cur.sql(q2, (1,)) == 1 - assert cur.sql(q2, (1,)) == 1 - assert cur.sql(q2, (1,)) == 1 - assert cur.sql(q2, (1,)) == 1 + assert cur.sql(q2, (3,)) == 1 + assert cur.sql(q2, (4,)) == 0 + cur.sql("SET plan_cache_mode = 'force_generic_plan'") + assert cur.sql(q2, (1,)) == 1 # creates generic plan + assert cur.sql(q2, (1,)) == 1 + assert cur.sql(q2, (3,)) == 1 assert cur.sql(q2, (4,)) == 0 From ec5acdf64325b0d26fa87eacc5328802cf68fe29 Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Thu, 19 Sep 2024 17:18:51 +0200 Subject: [PATCH 10/13] Add tests for all types --- src/pgduckdb_types.cpp | 6 +++- test/pycheck/prepared_test.py | 53 +++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index d0d803ff..794e92d8 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -9,6 +9,7 @@ extern "C" { #include "miscadmin.h" #include "catalog/pg_type.h" #include "executor/tuptable.h" +#include "utils/builtins.h" #include "utils/numeric.h" #include "utils/uuid.h" #include "utils/array.h" @@ -730,7 +731,10 @@ ConvertPostgresParameterToDuckValue(Datum value, Oid postgres_type) { // test so I don't know. // case JSONOID: case VARCHAROID: { - return duckdb::Value((const char *)value); + // FIXME: TextDatumGetCstring allocates so it needs a + // guard, but it's a macro not a function, so our current gaurd + // template does not handle it. + return duckdb::Value(TextDatumGetCString(value)); } case DATEOID: return duckdb::Value::DATE(duckdb::date_t(static_cast(value + PGDUCKDB_DUCK_DATE_OFFSET))); diff --git a/test/pycheck/prepared_test.py b/test/pycheck/prepared_test.py index 22357efe..a68e063c 100644 --- a/test/pycheck/prepared_test.py +++ b/test/pycheck/prepared_test.py @@ -1,5 +1,7 @@ from .utils import Cursor +import datetime + def test_prepared(cur: Cursor): cur.sql("CREATE TABLE test_table (id int)") @@ -34,3 +36,54 @@ def test_prepared(cur: Cursor): assert cur.sql(q2, (1,)) == 1 assert cur.sql(q2, (3,)) == 1 assert cur.sql(q2, (4,)) == 0 + + +def test_extended(cur: Cursor): + cur.sql(""" + CREATE TABLE t( + bool BOOLEAN, + i2 SMALLINT, + i4 INT, + i8 BIGINT, + fl4 REAL, + fl8 DOUBLE PRECISION, + t1 TEXT, + t2 VARCHAR, + t3 BPCHAR, + d DATE, + ts TIMESTAMP); + """) + + row = ( + True, + 2, + 4, + 8, + 4.0, + 8.0, + "t1", + "t2", + "t3", + datetime.date(2024, 5, 4), + datetime.datetime(2020, 1, 1, 1, 2, 3), + ) + cur.sql("INSERT INTO t VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", row) + + assert (True,) * len(row) == cur.sql( + """ + SELECT + bool = %s, + i2 = %s, + i4 = %s, + i8 = %s, + fl4 = %s, + fl8 = %s, + t1 = %s, + t2 = %s, + t3 = %s, + d = %s, + ts = %s + FROM t; + """, + row, + ) From ba34b3b8c88d89569c6453bad73af021782ca817 Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Thu, 19 Sep 2024 17:54:53 +0200 Subject: [PATCH 11/13] Remove commented out code and support JSON parameter --- src/pgduckdb_types.cpp | 118 +--------------------------------- test/pycheck/prepared_test.py | 12 +++- 2 files changed, 11 insertions(+), 119 deletions(-) diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index 794e92d8..245d6ae4 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -727,9 +727,7 @@ ConvertPostgresParameterToDuckValue(Datum value, Oid postgres_type) { return duckdb::Value::BIGINT(DatumGetInt64(value)); case BPCHAROID: case TEXTOID: - // TODO: Is this the correct place for JSONOID? I'm unable to use it in a - // test so I don't know. - // case JSONOID: + case JSONOID: case VARCHAROID: { // FIXME: TextDatumGetCstring allocates so it needs a // guard, but it's a macro not a function, so our current gaurd @@ -747,120 +745,8 @@ ConvertPostgresParameterToDuckValue(Datum value, Oid postgres_type) { case FLOAT8OID: { return duckdb::Value::DOUBLE(DatumGetFloat8(value)); } - // case duckdb::LogicalTypeId::DECIMAL: { - // auto physical_type = type.InternalType(); - // auto numeric = DatumGetNumeric(value); - // auto numeric_var = FromNumeric(numeric); - // switch (physical_type) { - // case duckdb::PhysicalType::INT16: { - // Append(result, ConvertDecimal(numeric_var), offset); - // break; - // } - // case duckdb::PhysicalType::INT32: { - // Append(result, ConvertDecimal(numeric_var), offset); - // break; - // } - // case duckdb::PhysicalType::INT64: { - // Append(result, ConvertDecimal(numeric_var), offset); - // break; - // } - // case duckdb::PhysicalType::INT128: { - // Append(result, ConvertDecimal(numeric_var), offset); - // break; - // } - // default: { - // throw duckdb::InternalException("Unrecognized physical type (%s) for DECIMAL value", - // duckdb::EnumUtil::ToString(physical_type)); - // break; - // } - // } - // break; - // } - // case duckdb::LogicalTypeId::UUID: { - // auto uuid = DatumGetPointer(value); - // hugeint_t duckdb_uuid; - // D_ASSERT(UUID_LEN == sizeof(hugeint_t)); - // for (idx_t i = 0; i < UUID_LEN; i++) { - // ((uint8_t *)&duckdb_uuid)[UUID_LEN - 1 - i] = ((uint8_t *)uuid)[i]; - // } - // duckdb_uuid.upper ^= (uint64_t(1) << 63); - // Append(result, duckdb_uuid, offset); - // break; - // } - // case duckdb::LogicalTypeId::LIST: { - // // Convert Datum to ArrayType - // auto array = DatumGetArrayTypeP(value); - // - // auto ndims = ARR_NDIM(array); - // int *dims = ARR_DIMS(array); - // - // int16 typlen; - // bool typbyval; - // char typalign; - // get_typlenbyvalalign(ARR_ELEMTYPE(array), &typlen, &typbyval, &typalign); - // - // int nelems; - // Datum *elems; - // bool *nulls; - // // Deconstruct the array into Datum elements - // deconstruct_array(array, ARR_ELEMTYPE(array), typlen, typbyval, typalign, &elems, &nulls, &nelems); - // - // if (ndims == -1) { - // throw duckdb::InternalException("Array type has an ndims of -1, so it's actually not an array??"); - // } - // // Set the list_entry_t metadata - // duckdb::Vector *vec = &result; - // int write_offset = offset; - // for (int dim = 0; dim < ndims; dim++) { - // auto previous_dimension = dim ? dims[dim - 1] : 1; - // auto dimension = dims[dim]; - // if (vec->GetType().id() != duckdb::LogicalTypeId::LIST) { - // throw duckdb::InvalidInputException( - // "Dimensionality of the schema and the data does not match, data contains more dimensions than - // the " "amount of dimensions specified by the schema"); - // } - // auto child_offset = duckdb::ListVector::GetListSize(*vec); - // auto list_data = duckdb::FlatVector::GetData(*vec); - // for (int entry = 0; entry < previous_dimension; entry++) { - // list_data[write_offset + entry] = duckdb::list_entry_t( - // // All lists in a postgres row are enforced to have the same dimension - // // [[1,2],[2,3,4]] is not allowed, second list has 3 elements instead of 2 - // child_offset + (dimension * entry), dimension); - // } - // auto new_child_size = child_offset + (dimension * previous_dimension); - // duckdb::ListVector::Reserve(*vec, new_child_size); - // duckdb::ListVector::SetListSize(*vec, new_child_size); - // write_offset = child_offset; - // auto &child = duckdb::ListVector::GetEntry(*vec); - // vec = &child; - // } - // if (ndims == 0) { - // D_ASSERT(nelems == 0); - // auto child_offset = duckdb::ListVector::GetListSize(*vec); - // auto list_data = duckdb::FlatVector::GetData(*vec); - // list_data[write_offset] = duckdb::list_entry_t(child_offset, 0); - // vec = &duckdb::ListVector::GetEntry(*vec); - // } - // - // if (vec->GetType().id() == duckdb::LogicalTypeId::LIST) { - // throw duckdb::InvalidInputException( - // "Dimensionality of the schema and the data does not match, data contains fewer dimensions than the " - // "amount of dimensions specified by the schema"); - // } - // - // for (int i = 0; i < nelems; i++) { - // idx_t dest_idx = write_offset + i; - // if (nulls[i]) { - // auto &array_mask = duckdb::FlatVector::Validity(*vec); - // array_mask.SetInvalid(dest_idx); - // continue; - // } - // ConvertPostgresToDuckValue(elems[i], *vec, dest_idx); - // } - // break; - // } default: - elog(ERROR, "Could not convert Postgres type: %d to DuckDB type", postgres_type); + elog(ERROR, "Could not convert Postgres parameter of type: %d to DuckDB type", postgres_type); } } diff --git a/test/pycheck/prepared_test.py b/test/pycheck/prepared_test.py index a68e063c..ecf31582 100644 --- a/test/pycheck/prepared_test.py +++ b/test/pycheck/prepared_test.py @@ -1,6 +1,7 @@ from .utils import Cursor import datetime +import psycopg.types.json def test_prepared(cur: Cursor): @@ -51,7 +52,8 @@ def test_extended(cur: Cursor): t2 VARCHAR, t3 BPCHAR, d DATE, - ts TIMESTAMP); + ts TIMESTAMP, + json_obj JSON); """) row = ( @@ -66,8 +68,11 @@ def test_extended(cur: Cursor): "t3", datetime.date(2024, 5, 4), datetime.datetime(2020, 1, 1, 1, 2, 3), + psycopg.types.json.Json({"a": 1}), + ) + cur.sql( + "INSERT INTO t VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", row ) - cur.sql("INSERT INTO t VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", row) assert (True,) * len(row) == cur.sql( """ @@ -82,7 +87,8 @@ def test_extended(cur: Cursor): t2 = %s, t3 = %s, d = %s, - ts = %s + ts = %s, + json_obj::text = %s::text FROM t; """, row, From f0a69c32a947199bfae7e02811d355d5c5257495 Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Thu, 19 Sep 2024 17:57:34 +0200 Subject: [PATCH 12/13] Remove test that was moved to the python suite --- .../regression/expected/extended_protocol.out | 43 ------------------- test/regression/sql/extended_protocol.sql | 36 ---------------- 2 files changed, 79 deletions(-) delete mode 100644 test/regression/expected/extended_protocol.out delete mode 100644 test/regression/sql/extended_protocol.sql diff --git a/test/regression/expected/extended_protocol.out b/test/regression/expected/extended_protocol.out deleted file mode 100644 index 90e91384..00000000 --- a/test/regression/expected/extended_protocol.out +++ /dev/null @@ -1,43 +0,0 @@ -CREATE TABLE t( - bool BOOLEAN, - i2 SMALLINT, - i4 INT, - i8 BIGINT, - fl4 REAL, - fl8 DOUBLE PRECISION, - t1 TEXT, - t2 VARCHAR, - d DATE, - ts TIMESTAMP); -INSERT INTO t VALUES (true, 2, 4, 8, 4.0, 8.0, 't1', 't2', '2024-05-04', '2020-01-01 01:02:03'); -select fl4 from t; - fl4 ------ - 4 -(1 row) - -SELECT bool, i2, i4, i8, fl4, fl8, t1, t2, d, ts FROM t WHERE - bool = $1 - and i2 = $2 - and i4 = $3 - and i8 = $4 - -- FIXME: The following "larger than" comparisons all have a bugs - -- somewhere, the comparison reports that they are larger, but they clearly - -- are not. The floats are actually smaler (to rule out any floating point - -- funkyness) and the strings are equal. - and fl4 > $5 - and fl8 > $6 - and t1 > $7 - and t2 > $8 - and d = $9 - and ts = $10 -\bind true 2 4 8 5.0 9.0 t1 t2 '2024-05-04' '2020-01-01 01:02:03' \g - bool | i2 | i4 | i8 | fl4 | fl8 | t1 | t2 | d | ts -------+----+----+----+-----+-----+----+----+------------+-------------------------- - t | 2 | 4 | 8 | 4 | 8 | t1 | t2 | 05-04-2024 | Wed Jan 01 01:02:03 2020 -(1 row) - --- TODO: Fix this by supporting the UNKNOWN type somehow -SELECT $1 FROM t \bind something \g -ERROR: Could not convert DuckDB type: UNKNOWN to Postgres type -drop table t; diff --git a/test/regression/sql/extended_protocol.sql b/test/regression/sql/extended_protocol.sql deleted file mode 100644 index a16eb64a..00000000 --- a/test/regression/sql/extended_protocol.sql +++ /dev/null @@ -1,36 +0,0 @@ -CREATE TABLE t( - bool BOOLEAN, - i2 SMALLINT, - i4 INT, - i8 BIGINT, - fl4 REAL, - fl8 DOUBLE PRECISION, - t1 TEXT, - t2 VARCHAR, - d DATE, - ts TIMESTAMP); -INSERT INTO t VALUES (true, 2, 4, 8, 4.0, 8.0, 't1', 't2', '2024-05-04', '2020-01-01 01:02:03'); - -select fl4 from t; - -SELECT bool, i2, i4, i8, fl4, fl8, t1, t2, d, ts FROM t WHERE - bool = $1 - and i2 = $2 - and i4 = $3 - and i8 = $4 - -- FIXME: The following "larger than" comparisons all have a bugs - -- somewhere, the comparison reports that they are larger, but they clearly - -- are not. The floats are actually smaler (to rule out any floating point - -- funkyness) and the strings are equal. - and fl4 > $5 - and fl8 > $6 - and t1 > $7 - and t2 > $8 - and d = $9 - and ts = $10 -\bind true 2 4 8 5.0 9.0 t1 t2 '2024-05-04' '2020-01-01 01:02:03' \g - --- TODO: Fix this by supporting the UNKNOWN type somehow -SELECT $1 FROM t \bind something \g - -drop table t; From 6f028f2c2e949007b01f713d2b21498e38e1c8c6 Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Fri, 20 Sep 2024 13:03:42 +0200 Subject: [PATCH 13/13] Use Datum -> Date/Timestamp conversion functions --- src/pgduckdb_types.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index 245d6ae4..edd433ea 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -16,6 +16,8 @@ extern "C" { #include "fmgr.h" #include "utils/lsyscache.h" #include "utils/syscache.h" +#include "utils/date.h" +#include "utils/timestamp.h" } #include "pgduckdb/pgduckdb.h" @@ -735,10 +737,9 @@ ConvertPostgresParameterToDuckValue(Datum value, Oid postgres_type) { return duckdb::Value(TextDatumGetCString(value)); } case DATEOID: - return duckdb::Value::DATE(duckdb::date_t(static_cast(value + PGDUCKDB_DUCK_DATE_OFFSET))); + return duckdb::Value::DATE(duckdb::date_t(DatumGetDateADT(value) + PGDUCKDB_DUCK_DATE_OFFSET)); case TIMESTAMPOID: - return duckdb::Value::TIMESTAMP( - duckdb::timestamp_t(static_cast(value + PGDUCKDB_DUCK_TIMESTAMP_OFFSET))); + return duckdb::Value::TIMESTAMP(duckdb::timestamp_t(DatumGetTimestamp(value) + PGDUCKDB_DUCK_TIMESTAMP_OFFSET)); case FLOAT4OID: { return duckdb::Value::FLOAT(DatumGetFloat4(value)); }