From 1e77b6627886f2eea6d84423d9057c3f67ec6756 Mon Sep 17 00:00:00 2001 From: Tishj Date: Wed, 18 Sep 2024 15:55:47 +0200 Subject: [PATCH 01/16] add postgres -> duckdb conversion for ENUM types, still need the reverse --- src/pgduckdb_types.cpp | 59 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index a677e13f..0246bef7 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -8,8 +8,10 @@ extern "C" { #include "fmgr.h" #include "miscadmin.h" #include "catalog/pg_type.h" +#include "catalog/pg_enum.h" #include "executor/tuptable.h" #include "utils/numeric.h" +#include "utils/catcache.h" #include "utils/uuid.h" #include "utils/array.h" #include "fmgr.h" @@ -493,6 +495,60 @@ ChildTypeFromArray(Oid array_type) { } } +static bool IsEnumType(Oid type_oid) { + bool result = false; + auto type_tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(type_oid)); + + if (HeapTupleIsValid(type_tuple)) { + auto type_form = (Form_pg_type) GETSTRUCT(type_tuple); + + // Check if the type is an enum + if (type_form->typtype == 'e') { + result = true; + } + ReleaseSysCache(type_tuple); + } + return result; +} + +duckdb::LogicalType ConvertPostgresEnumToDuckEnum(Oid enum_oid) { + /* Get the list of existing members of the enum */ + auto list = SearchSysCacheList1(ENUMTYPOIDNAME, ObjectIdGetDatum(enum_oid)); + auto nelems = list->n_members; + + /* Sort the existing members by enumsortorder */ + std::vector enum_members(nelems); + for (int i = 0; i < nelems; ++i) { + enum_members[i] = &list->members[i]->tuple; + } + + auto sort_order_cmp = [](const HeapTuple& v1, const HeapTuple& v2) { + Form_pg_enum en1 = (Form_pg_enum) GETSTRUCT(v1); + Form_pg_enum en2 = (Form_pg_enum) GETSTRUCT(v2); + + if (en1->enumsortorder < en2->enumsortorder) { + return true; // v1 < v2 + } else if (en1->enumsortorder > en2->enumsortorder) { + return false; // v1 > v2 + } else { + return false; // v1 == v2 + } + }; + + std::sort(enum_members.begin(), enum_members.end(), sort_order_cmp); + + auto duck_enum_vec = duckdb::Vector(duckdb::LogicalType::VARCHAR, enum_members.size()); + auto enum_vec_data = duckdb::FlatVector::GetData(duck_enum_vec); + for (idx_t i = 0; i < enum_members.size(); i++) { + auto &member = enum_members[i]; + auto enum_data = (Form_pg_enum) GETSTRUCT(member); + enum_vec_data[i] = duckdb::StringVector::AddString(duck_enum_vec, enum_data->enumlabel.data); + } + + ReleaseCatCacheList(list); + return duckdb::LogicalType::ENUM(duck_enum_vec, enum_members.size()); +} + duckdb::LogicalType ConvertPostgresToDuckColumnType(Form_pg_attribute &attribute) { auto &type = attribute->atttypid; @@ -546,6 +602,9 @@ ConvertPostgresToDuckColumnType(Form_pg_attribute &attribute) { case REGCLASSOID: return duckdb::LogicalTypeId::UINTEGER; default: { + if (IsEnumType(type)) { + return ConvertPostgresEnumToDuckEnum(type); + } std::string name = "UnsupportedPostgresType (Oid=" + std::to_string(type) + ")"; return duckdb::LogicalType::USER(name); } From 22aae7b2eaea1ceee2c95dbad4aa95552c429424 Mon Sep 17 00:00:00 2001 From: Tishj Date: Wed, 18 Sep 2024 16:55:19 +0200 Subject: [PATCH 02/16] vendor EnumTypeInfoTemplated (lives in a cpp file..), subclass it as PGDuckDBEnumTypeInfo to inject the postgres enum OID --- include/pgduckdb/duckdb_vendor/.clang-format | 2 + .../enum_type_info_templated.hpp | 54 +++++++++++++++++ include/pgduckdb/types/pgduckdb_enum.hpp | 59 +++++++++++++++++++ src/pgduckdb_types.cpp | 19 +++--- 4 files changed, 126 insertions(+), 8 deletions(-) create mode 100644 include/pgduckdb/duckdb_vendor/.clang-format create mode 100644 include/pgduckdb/duckdb_vendor/enum_type_info_templated.hpp create mode 100644 include/pgduckdb/types/pgduckdb_enum.hpp diff --git a/include/pgduckdb/duckdb_vendor/.clang-format b/include/pgduckdb/duckdb_vendor/.clang-format new file mode 100644 index 00000000..9d159247 --- /dev/null +++ b/include/pgduckdb/duckdb_vendor/.clang-format @@ -0,0 +1,2 @@ +DisableFormat: true +SortIncludes: false diff --git a/include/pgduckdb/duckdb_vendor/enum_type_info_templated.hpp b/include/pgduckdb/duckdb_vendor/enum_type_info_templated.hpp new file mode 100644 index 00000000..cecc3fee --- /dev/null +++ b/include/pgduckdb/duckdb_vendor/enum_type_info_templated.hpp @@ -0,0 +1,54 @@ +#pragma once + +#include "duckdb/common/extra_type_info.hpp" +#include "duckdb/common/vector.hpp" +#include "duckdb/common/types/vector.hpp" +#include "duckdb/common/string_map_set.hpp" +#include "duckdb/common/serializer/deserializer.hpp" + +namespace duckdb { +template +struct EnumTypeInfoTemplated : public EnumTypeInfo { + explicit EnumTypeInfoTemplated(Vector &values_insert_order_p, idx_t size_p) + : EnumTypeInfo(values_insert_order_p, size_p) { + D_ASSERT(values_insert_order_p.GetType().InternalType() == PhysicalType::VARCHAR); + + UnifiedVectorFormat vdata; + values_insert_order.ToUnifiedFormat(size_p, vdata); + + auto data = UnifiedVectorFormat::GetData(vdata); + for (idx_t i = 0; i < size_p; i++) { + auto idx = vdata.sel->get_index(i); + if (!vdata.validity.RowIsValid(idx)) { + throw InternalException("Attempted to create ENUM type with NULL value"); + } + if (values.count(data[idx]) > 0) { + throw InvalidInputException("Attempted to create ENUM type with duplicate value %s", + data[idx].GetString()); + } + values[data[idx]] = UnsafeNumericCast(i); + } + } + + static shared_ptr Deserialize(Deserializer &deserializer, uint32_t size) { + Vector values_insert_order(LogicalType::VARCHAR, size); + auto strings = FlatVector::GetData(values_insert_order); + + deserializer.ReadList(201, "values", [&](Deserializer::List &list, idx_t i) { + strings[i] = StringVector::AddStringOrBlob(values_insert_order, list.ReadElement()); + }); + return make_shared_ptr(values_insert_order, size); + } + + const string_map_t &GetValues() const { + return values; + } + + EnumTypeInfoTemplated(const EnumTypeInfoTemplated &) = delete; + EnumTypeInfoTemplated &operator=(const EnumTypeInfoTemplated &) = delete; + +private: + string_map_t values; +}; + +} // namespace duckdb diff --git a/include/pgduckdb/types/pgduckdb_enum.hpp b/include/pgduckdb/types/pgduckdb_enum.hpp new file mode 100644 index 00000000..80348f99 --- /dev/null +++ b/include/pgduckdb/types/pgduckdb_enum.hpp @@ -0,0 +1,59 @@ +#pragma once + +#include "pgduckdb/duckdb_vendor/enum_type_info_templated.hpp" + +namespace pgduckdb { + +using duckdb::EnumTypeInfo; +using duckdb::EnumTypeInfoTemplated; +using duckdb::ExtraTypeInfo; +using duckdb::InternalException; +using duckdb::LogicalType; +using duckdb::LogicalTypeId; +using duckdb::make_shared_ptr; +using duckdb::PhysicalType; +using duckdb::shared_ptr; +using duckdb::Vector; + +template +class PGDuckDBEnumTypeInfo : public EnumTypeInfoTemplated { +public: + PGDuckDBEnumTypeInfo(Vector &values_insert_order_p, idx_t dict_size_p, Oid type_oid) + : EnumTypeInfoTemplated(values_insert_order_p, dict_size_p), type_oid(type_oid) { + } + +public: + Oid + GetTypeOid() const { + return type_oid; + } + +private: + Oid type_oid = InvalidOid; +}; + +struct PGDuckDBEnum { + static LogicalType + CreateEnumType(Vector &ordered_data, idx_t size, Oid type_oid) { + // Generate EnumTypeInfo + shared_ptr info; + auto enum_internal_type = EnumTypeInfo::DictType(size); + switch (enum_internal_type) { + case PhysicalType::UINT8: + info = make_shared_ptr>(ordered_data, size, type_oid); + break; + case PhysicalType::UINT16: + info = make_shared_ptr>(ordered_data, size, type_oid); + break; + case PhysicalType::UINT32: + info = make_shared_ptr>(ordered_data, size, type_oid); + break; + default: + throw InternalException("Invalid Physical Type for ENUMs"); + } + // Generate Actual Enum Type + return LogicalType(LogicalTypeId::ENUM, info); + } +}; + +} // namespace pgduckdb diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index 0246bef7..dd191ca1 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -22,6 +22,7 @@ extern "C" { #include "pgduckdb/pgduckdb.h" #include "pgduckdb/scan/postgres_scan.hpp" #include "pgduckdb/types/decimal.hpp" +#include "pgduckdb/types/pgduckdb_enum.hpp" #include "pgduckdb/pgduckdb_filter.hpp" #include "pgduckdb/pgduckdb_detoast.hpp" #include "pgduckdb/pgduckdb_types.hpp" @@ -495,12 +496,13 @@ ChildTypeFromArray(Oid array_type) { } } -static bool IsEnumType(Oid type_oid) { +static bool +IsEnumType(Oid type_oid) { bool result = false; auto type_tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(type_oid)); if (HeapTupleIsValid(type_tuple)) { - auto type_form = (Form_pg_type) GETSTRUCT(type_tuple); + auto type_form = (Form_pg_type)GETSTRUCT(type_tuple); // Check if the type is an enum if (type_form->typtype == 'e') { @@ -511,7 +513,8 @@ static bool IsEnumType(Oid type_oid) { return result; } -duckdb::LogicalType ConvertPostgresEnumToDuckEnum(Oid enum_oid) { +duckdb::LogicalType +ConvertPostgresEnumToDuckEnum(Oid enum_oid) { /* Get the list of existing members of the enum */ auto list = SearchSysCacheList1(ENUMTYPOIDNAME, ObjectIdGetDatum(enum_oid)); auto nelems = list->n_members; @@ -522,9 +525,9 @@ duckdb::LogicalType ConvertPostgresEnumToDuckEnum(Oid enum_oid) { enum_members[i] = &list->members[i]->tuple; } - auto sort_order_cmp = [](const HeapTuple& v1, const HeapTuple& v2) { - Form_pg_enum en1 = (Form_pg_enum) GETSTRUCT(v1); - Form_pg_enum en2 = (Form_pg_enum) GETSTRUCT(v2); + auto sort_order_cmp = [](const HeapTuple &v1, const HeapTuple &v2) { + Form_pg_enum en1 = (Form_pg_enum)GETSTRUCT(v1); + Form_pg_enum en2 = (Form_pg_enum)GETSTRUCT(v2); if (en1->enumsortorder < en2->enumsortorder) { return true; // v1 < v2 @@ -541,12 +544,12 @@ duckdb::LogicalType ConvertPostgresEnumToDuckEnum(Oid enum_oid) { auto enum_vec_data = duckdb::FlatVector::GetData(duck_enum_vec); for (idx_t i = 0; i < enum_members.size(); i++) { auto &member = enum_members[i]; - auto enum_data = (Form_pg_enum) GETSTRUCT(member); + auto enum_data = (Form_pg_enum)GETSTRUCT(member); enum_vec_data[i] = duckdb::StringVector::AddString(duck_enum_vec, enum_data->enumlabel.data); } ReleaseCatCacheList(list); - return duckdb::LogicalType::ENUM(duck_enum_vec, enum_members.size()); + return PGDuckDBEnum::CreateEnumType(duck_enum_vec, enum_members.size(), enum_oid); } duckdb::LogicalType From bda3e7f150088c08b62dde1474bfd48e8884c58b Mon Sep 17 00:00:00 2001 From: Tishj Date: Wed, 18 Sep 2024 17:14:39 +0200 Subject: [PATCH 03/16] duckdb -> postgres binding, return the type_oid we saved earlier --- src/pgduckdb_types.cpp | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index dd191ca1..77acd63d 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -614,6 +614,22 @@ ConvertPostgresToDuckColumnType(Form_pg_attribute &attribute) { } } +Oid GetEnumTypeOid(Oid enum_oid) { + /* Get the pg_type tuple for the enum type */ + auto tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(enum_oid)); + Oid result = InvalidOid; + if (!HeapTupleIsValid(tuple)) { + elog(ERROR, "cache lookup failed for type %u", enum_oid); + } + + auto typeForm = (Form_pg_type) GETSTRUCT(tuple); + result = typeForm->oid; + + /* Release the cache tuple */ + ReleaseSysCache(tuple); + return result; +} + Oid GetPostgresDuckDBType(duckdb::LogicalType type) { auto id = type.id(); @@ -674,6 +690,20 @@ GetPostgresDuckDBType(duckdb::LogicalType type) { type.ToString().c_str()); } } + case duckdb::LogicalTypeId::ENUM: { + auto type_info = type.AuxInfo(); + auto &enum_type_info = type_info->Cast(); + switch (enum_type_info.DictType(enum_type_info.GetDictSize())) { + case PhysicalType::UINT8: + return GetEnumTypeOid(enum_type_info.Cast>().GetTypeOid()); + case PhysicalType::UINT16: + return GetEnumTypeOid(enum_type_info.Cast>().GetTypeOid()); + case PhysicalType::UINT32: + return GetEnumTypeOid(enum_type_info.Cast>().GetTypeOid()); + default: + throw InternalException("Invalid Physical Type for ENUMs"); + } + } default: { elog(ERROR, "Could not convert DuckDB type: %s to Postgres type", type.ToString().c_str()); } From 3cdef4e36b0b1eeefbd7c5e093a1a1377bd761d8 Mon Sep 17 00:00:00 2001 From: Tishj Date: Wed, 18 Sep 2024 18:07:01 +0200 Subject: [PATCH 04/16] nearly there, the TupleTableSlot needs to be populated with the OID of the enum member (not the enum type oid) --- include/pgduckdb/types/pgduckdb_enum.hpp | 19 +++--- src/pgduckdb_types.cpp | 79 +++++++++++++++++++++--- 2 files changed, 80 insertions(+), 18 deletions(-) diff --git a/include/pgduckdb/types/pgduckdb_enum.hpp b/include/pgduckdb/types/pgduckdb_enum.hpp index 80348f99..dc117a28 100644 --- a/include/pgduckdb/types/pgduckdb_enum.hpp +++ b/include/pgduckdb/types/pgduckdb_enum.hpp @@ -18,35 +18,34 @@ using duckdb::Vector; template class PGDuckDBEnumTypeInfo : public EnumTypeInfoTemplated { public: - PGDuckDBEnumTypeInfo(Vector &values_insert_order_p, idx_t dict_size_p, Oid type_oid) - : EnumTypeInfoTemplated(values_insert_order_p, dict_size_p), type_oid(type_oid) { + PGDuckDBEnumTypeInfo(Vector &values_insert_order_p, idx_t dict_size_p, Vector &oid_vec) + : EnumTypeInfoTemplated(values_insert_order_p, dict_size_p), oid_vec(oid_vec) { } public: - Oid - GetTypeOid() const { - return type_oid; + Vector &GetMemberOids() const { + return oid_vec; } private: - Oid type_oid = InvalidOid; + Vector oid_vec; }; struct PGDuckDBEnum { static LogicalType - CreateEnumType(Vector &ordered_data, idx_t size, Oid type_oid) { + CreateEnumType(Vector &ordered_data, idx_t size, Vector &oid_vec) { // Generate EnumTypeInfo shared_ptr info; auto enum_internal_type = EnumTypeInfo::DictType(size); switch (enum_internal_type) { case PhysicalType::UINT8: - info = make_shared_ptr>(ordered_data, size, type_oid); + info = make_shared_ptr>(ordered_data, size, oid_vec); break; case PhysicalType::UINT16: - info = make_shared_ptr>(ordered_data, size, type_oid); + info = make_shared_ptr>(ordered_data, size, oid_vec); break; case PhysicalType::UINT32: - info = make_shared_ptr>(ordered_data, size, type_oid); + info = make_shared_ptr>(ordered_data, size, oid_vec); break; default: throw InternalException("Invalid Physical Type for ENUMs"); diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index 77acd63d..4df789c6 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -324,6 +324,16 @@ ConvertDuckToPostgresArray(TupleTableSlot *slot, duckdb::Value &value, idx_t col slot->tts_values[col] = PointerGetDatum(arr); } +void ConvertDuckToPostgresEnumValue(TupleTableSlot *slot, duckdb::Value &val, idx_t col) { + auto &enum_type_info = val.type().AuxInfo()->Cast(); + + // TODO: make EnumTypeInfoTemplated inherit from PGDuckDBEnum - not the other way around + + // Find the OID of the enum value (not the enum type oid) + // TODO: we should store this inside the PGDuckDBEnumTypeInfo + slot->tts_values[col] = ObjectIdGetDatum(enum_value_oid); +} + void ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col) { Oid oid = slot->tts_tupleDescriptor->attrs[col].atttypid; @@ -465,9 +475,16 @@ ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col ConvertDuckToPostgresArray>>(slot, value, col); break; } - default: + default: { + auto type_id = value.type().id(); + switch (type_id) { + case LogicalTypeId::ENUM: { + auto &aux_info + } + } throw duckdb::NotImplementedException("(DuckDB/ConvertDuckToPostgresValue) Unsuported pgduckdb type: %d", oid); } + } } static inline int @@ -541,15 +558,18 @@ ConvertPostgresEnumToDuckEnum(Oid enum_oid) { std::sort(enum_members.begin(), enum_members.end(), sort_order_cmp); auto duck_enum_vec = duckdb::Vector(duckdb::LogicalType::VARCHAR, enum_members.size()); + auto enum_oid_vec = duckdb::Vector(duckdb::LogicalType::UINT32, enum_members.size()); auto enum_vec_data = duckdb::FlatVector::GetData(duck_enum_vec); + auto enum_oid_data = duckdb::FlatVector::GetData(enum_oid_vec); for (idx_t i = 0; i < enum_members.size(); i++) { auto &member = enum_members[i]; auto enum_data = (Form_pg_enum)GETSTRUCT(member); enum_vec_data[i] = duckdb::StringVector::AddString(duck_enum_vec, enum_data->enumlabel.data); + enum_oid_data[i] = enum_data->oid; } ReleaseCatCacheList(list); - return PGDuckDBEnum::CreateEnumType(duck_enum_vec, enum_members.size(), enum_oid); + return PGDuckDBEnum::CreateEnumType(duck_enum_vec, enum_members.size(), enum_oid_data); } duckdb::LogicalType @@ -614,12 +634,13 @@ ConvertPostgresToDuckColumnType(Form_pg_attribute &attribute) { } } -Oid GetEnumTypeOid(Oid enum_oid) { +Oid GetEnumTypeOid(Vector &oids) { /* Get the pg_type tuple for the enum type */ - auto tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(enum_oid)); + member_oid = FlatVector::GetData(oids)[0]; + auto tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(member_oid)); Oid result = InvalidOid; if (!HeapTupleIsValid(tuple)) { - elog(ERROR, "cache lookup failed for type %u", enum_oid); + elog(ERROR, "cache lookup failed for enum member with oid %u", member_oid); } auto typeForm = (Form_pg_type) GETSTRUCT(tuple); @@ -695,11 +716,11 @@ GetPostgresDuckDBType(duckdb::LogicalType type) { auto &enum_type_info = type_info->Cast(); switch (enum_type_info.DictType(enum_type_info.GetDictSize())) { case PhysicalType::UINT8: - return GetEnumTypeOid(enum_type_info.Cast>().GetTypeOid()); + return GetEnumTypeOid(enum_type_info.Cast>().GetMemberOids()); case PhysicalType::UINT16: - return GetEnumTypeOid(enum_type_info.Cast>().GetTypeOid()); + return GetEnumTypeOid(enum_type_info.Cast>().GetMemberOids()); case PhysicalType::UINT32: - return GetEnumTypeOid(enum_type_info.Cast>().GetTypeOid()); + return GetEnumTypeOid(enum_type_info.Cast>().GetMemberOids()); default: throw InternalException("Invalid Physical Type for ENUMs"); } @@ -794,6 +815,30 @@ ConvertDecimal(const NumericVar &numeric) { return (NumericIsNegative(numeric) ? -base_res : base_res); } +int GetEnumPosition(Datum enum_oid) { + HeapTuple enum_tuple; + Form_pg_enum enum_entry; + int enum_position; + + // Use SearchSysCache1 to fetch the enum entry from pg_enum using its OID + enum_tuple = SearchSysCache1(ENUMOID, enum_oid); + + if (!HeapTupleIsValid(enum_tuple)) { + elog(ERROR, "could not find enum with OID %u", DatumGetObjectId(enum_oid)); + } + + // Get the pg_enum structure from the tuple + enum_entry = (Form_pg_enum) GETSTRUCT(enum_tuple); + + // The enumsortorder field gives us the position of the enum value + enum_position = (int) enum_entry->enumsortorder; + + // Release the cache to free up memory + ReleaseSysCache(enum_tuple); + + return enum_position; +} + void ConvertPostgresToDuckValue(Datum value, duckdb::Vector &result, idx_t offset) { auto &type = result.GetType(); @@ -970,6 +1015,24 @@ ConvertPostgresToDuckValue(Datum value, duckdb::Vector &result, idx_t offset) { } break; } + case LogicalTypeId::ENUM: { + auto enum_position = GetEnumPosition(value); + auto physical_type = result.GetType().InternalType(); + switch (physical_type) { + case PhysicalType::UINT8: + Append(result, static_cast(enum_position-1), offset); + break; + case PhysicalType::UINT16: + Append(result, static_cast(enum_position-1), offset); + break; + case PhysicalType::UINT32: + Append(result, static_cast(enum_position-1), offset); + break; + default: + throw InternalException("Invalid Physical Type for ENUMs"); + } + break; + } default: throw duckdb::NotImplementedException("(DuckDB/ConvertPostgresToDuckValue) Unsupported pgduckdb type: %s", result.GetType().ToString().c_str()); From 5672b48f43662d7a05db1c4b5c36d773f20a98fc Mon Sep 17 00:00:00 2001 From: Tishj Date: Thu, 19 Sep 2024 09:37:16 +0200 Subject: [PATCH 05/16] fixed the last problem, we can now consume enums from postgres and roundtrip these back from duckdb -> postgres in the final result --- include/pgduckdb/types/pgduckdb_enum.hpp | 2 +- src/pgduckdb_types.cpp | 58 ++++++++++++++++++------ 2 files changed, 45 insertions(+), 15 deletions(-) diff --git a/include/pgduckdb/types/pgduckdb_enum.hpp b/include/pgduckdb/types/pgduckdb_enum.hpp index dc117a28..f2daf40e 100644 --- a/include/pgduckdb/types/pgduckdb_enum.hpp +++ b/include/pgduckdb/types/pgduckdb_enum.hpp @@ -23,7 +23,7 @@ class PGDuckDBEnumTypeInfo : public EnumTypeInfoTemplated { } public: - Vector &GetMemberOids() const { + const Vector &GetMemberOids() const { return oid_vec; } diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index 4df789c6..046fa028 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -324,13 +324,41 @@ ConvertDuckToPostgresArray(TupleTableSlot *slot, duckdb::Value &value, idx_t col slot->tts_values[col] = PointerGetDatum(arr); } -void ConvertDuckToPostgresEnumValue(TupleTableSlot *slot, duckdb::Value &val, idx_t col) { - auto &enum_type_info = val.type().AuxInfo()->Cast(); +idx_t GetEnumPosition(duckdb::Value &val) { + D_ASSERT(val.type().id() == LogicalTypeId::ENUM); + auto physical_type = val.type().InternalType(); + switch (physical_type) { + case PhysicalType::UINT8: + return val.GetValue(); + case PhysicalType::UINT16: + return val.GetValue(); + case PhysicalType::UINT32: + return val.GetValue(); + default: + throw InternalException("Invalid Physical Type for ENUMs"); + } +} - // TODO: make EnumTypeInfoTemplated inherit from PGDuckDBEnum - not the other way around +const Vector &GetEnumMemberOids(duckdb::Value &val) { + D_ASSERT(val.type().id() == LogicalTypeId::ENUM); + auto physical_type = val.type().InternalType(); + auto &enum_type_info = val.type().AuxInfo()->Cast(); + switch (physical_type) { + case PhysicalType::UINT8: + return enum_type_info.Cast>().GetMemberOids(); + case PhysicalType::UINT16: + return enum_type_info.Cast>().GetMemberOids(); + case PhysicalType::UINT32: + return enum_type_info.Cast>().GetMemberOids(); + default: + throw InternalException("Invalid Physical Type for ENUMs"); + } +} - // Find the OID of the enum value (not the enum type oid) - // TODO: we should store this inside the PGDuckDBEnumTypeInfo +void ConvertDuckToPostgresEnumValue(TupleTableSlot *slot, duckdb::Value &val, idx_t col) { + auto position = GetEnumPosition(val); + auto &enum_oids = GetEnumMemberOids(val); + auto enum_value_oid = duckdb::FlatVector::GetData(enum_oids)[position]; slot->tts_values[col] = ObjectIdGetDatum(enum_value_oid); } @@ -479,10 +507,12 @@ ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col auto type_id = value.type().id(); switch (type_id) { case LogicalTypeId::ENUM: { - auto &aux_info + ConvertDuckToPostgresEnumValue(slot, value, col); + break; + default: + throw duckdb::NotImplementedException("(DuckDB/ConvertDuckToPostgresValue) Unsuported pgduckdb type: %d", oid); } } - throw duckdb::NotImplementedException("(DuckDB/ConvertDuckToPostgresValue) Unsuported pgduckdb type: %d", oid); } } } @@ -558,7 +588,7 @@ ConvertPostgresEnumToDuckEnum(Oid enum_oid) { std::sort(enum_members.begin(), enum_members.end(), sort_order_cmp); auto duck_enum_vec = duckdb::Vector(duckdb::LogicalType::VARCHAR, enum_members.size()); - auto enum_oid_vec = duckdb::Vector(duckdb::LogicalType::UINT32, enum_members.size()); + auto enum_oid_vec = duckdb::Vector(duckdb::LogicalType::UINTEGER, enum_members.size()); auto enum_vec_data = duckdb::FlatVector::GetData(duck_enum_vec); auto enum_oid_data = duckdb::FlatVector::GetData(enum_oid_vec); for (idx_t i = 0; i < enum_members.size(); i++) { @@ -569,7 +599,7 @@ ConvertPostgresEnumToDuckEnum(Oid enum_oid) { } ReleaseCatCacheList(list); - return PGDuckDBEnum::CreateEnumType(duck_enum_vec, enum_members.size(), enum_oid_data); + return PGDuckDBEnum::CreateEnumType(duck_enum_vec, enum_members.size(), enum_oid_vec); } duckdb::LogicalType @@ -634,17 +664,17 @@ ConvertPostgresToDuckColumnType(Form_pg_attribute &attribute) { } } -Oid GetEnumTypeOid(Vector &oids) { +Oid GetEnumTypeOid(const Vector &oids) { /* Get the pg_type tuple for the enum type */ - member_oid = FlatVector::GetData(oids)[0]; - auto tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(member_oid)); + auto member_oid = duckdb::FlatVector::GetData(oids)[0]; + auto tuple = SearchSysCache1(ENUMOID, ObjectIdGetDatum(member_oid)); Oid result = InvalidOid; if (!HeapTupleIsValid(tuple)) { elog(ERROR, "cache lookup failed for enum member with oid %u", member_oid); } - auto typeForm = (Form_pg_type) GETSTRUCT(tuple); - result = typeForm->oid; + auto enum_form = (Form_pg_enum) GETSTRUCT(tuple); + result = enum_form->enumtypid; /* Release the cache tuple */ ReleaseSysCache(tuple); From 95ad2430f5ef7509de34a01c86076a60e253997d Mon Sep 17 00:00:00 2001 From: Tishj Date: Thu, 19 Sep 2024 09:42:15 +0200 Subject: [PATCH 06/16] add test + make format --- include/pgduckdb/types/pgduckdb_enum.hpp | 3 +- src/pgduckdb_types.cpp | 80 +++++++++++++----------- test/regression/expected/enum.out | 15 +++++ test/regression/schedule | 1 + test/regression/sql/enum.sql | 11 ++++ 5 files changed, 72 insertions(+), 38 deletions(-) create mode 100644 test/regression/expected/enum.out create mode 100644 test/regression/sql/enum.sql diff --git a/include/pgduckdb/types/pgduckdb_enum.hpp b/include/pgduckdb/types/pgduckdb_enum.hpp index f2daf40e..fe42d7e9 100644 --- a/include/pgduckdb/types/pgduckdb_enum.hpp +++ b/include/pgduckdb/types/pgduckdb_enum.hpp @@ -23,7 +23,8 @@ class PGDuckDBEnumTypeInfo : public EnumTypeInfoTemplated { } public: - const Vector &GetMemberOids() const { + const Vector & + GetMemberOids() const { return oid_vec; } diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index 046fa028..ee8bbc28 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -324,7 +324,8 @@ ConvertDuckToPostgresArray(TupleTableSlot *slot, duckdb::Value &value, idx_t col slot->tts_values[col] = PointerGetDatum(arr); } -idx_t GetEnumPosition(duckdb::Value &val) { +idx_t +GetEnumPosition(duckdb::Value &val) { D_ASSERT(val.type().id() == LogicalTypeId::ENUM); auto physical_type = val.type().InternalType(); switch (physical_type) { @@ -339,7 +340,8 @@ idx_t GetEnumPosition(duckdb::Value &val) { } } -const Vector &GetEnumMemberOids(duckdb::Value &val) { +const Vector & +GetEnumMemberOids(duckdb::Value &val) { D_ASSERT(val.type().id() == LogicalTypeId::ENUM); auto physical_type = val.type().InternalType(); auto &enum_type_info = val.type().AuxInfo()->Cast(); @@ -355,7 +357,8 @@ const Vector &GetEnumMemberOids(duckdb::Value &val) { } } -void ConvertDuckToPostgresEnumValue(TupleTableSlot *slot, duckdb::Value &val, idx_t col) { +void +ConvertDuckToPostgresEnumValue(TupleTableSlot *slot, duckdb::Value &val, idx_t col) { auto position = GetEnumPosition(val); auto &enum_oids = GetEnumMemberOids(val); auto enum_value_oid = duckdb::FlatVector::GetData(enum_oids)[position]; @@ -506,12 +509,13 @@ ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col default: { auto type_id = value.type().id(); switch (type_id) { - case LogicalTypeId::ENUM: { - ConvertDuckToPostgresEnumValue(slot, value, col); - break; - default: - throw duckdb::NotImplementedException("(DuckDB/ConvertDuckToPostgresValue) Unsuported pgduckdb type: %d", oid); - } + case LogicalTypeId::ENUM: { + ConvertDuckToPostgresEnumValue(slot, value, col); + break; + default: + throw duckdb::NotImplementedException("(DuckDB/ConvertDuckToPostgresValue) Unsuported pgduckdb type: %d", + oid); + } } } } @@ -664,20 +668,21 @@ ConvertPostgresToDuckColumnType(Form_pg_attribute &attribute) { } } -Oid GetEnumTypeOid(const Vector &oids) { - /* Get the pg_type tuple for the enum type */ +Oid +GetEnumTypeOid(const Vector &oids) { + /* Get the pg_type tuple for the enum type */ auto member_oid = duckdb::FlatVector::GetData(oids)[0]; - auto tuple = SearchSysCache1(ENUMOID, ObjectIdGetDatum(member_oid)); + auto tuple = SearchSysCache1(ENUMOID, ObjectIdGetDatum(member_oid)); Oid result = InvalidOid; - if (!HeapTupleIsValid(tuple)) { - elog(ERROR, "cache lookup failed for enum member with oid %u", member_oid); - } + if (!HeapTupleIsValid(tuple)) { + elog(ERROR, "cache lookup failed for enum member with oid %u", member_oid); + } - auto enum_form = (Form_pg_enum) GETSTRUCT(tuple); + auto enum_form = (Form_pg_enum)GETSTRUCT(tuple); result = enum_form->enumtypid; - /* Release the cache tuple */ - ReleaseSysCache(tuple); + /* Release the cache tuple */ + ReleaseSysCache(tuple); return result; } @@ -845,28 +850,29 @@ ConvertDecimal(const NumericVar &numeric) { return (NumericIsNegative(numeric) ? -base_res : base_res); } -int GetEnumPosition(Datum enum_oid) { - HeapTuple enum_tuple; - Form_pg_enum enum_entry; - int enum_position; +int +GetEnumPosition(Datum enum_oid) { + HeapTuple enum_tuple; + Form_pg_enum enum_entry; + int enum_position; - // Use SearchSysCache1 to fetch the enum entry from pg_enum using its OID - enum_tuple = SearchSysCache1(ENUMOID, enum_oid); + // Use SearchSysCache1 to fetch the enum entry from pg_enum using its OID + enum_tuple = SearchSysCache1(ENUMOID, enum_oid); - if (!HeapTupleIsValid(enum_tuple)) { - elog(ERROR, "could not find enum with OID %u", DatumGetObjectId(enum_oid)); - } + if (!HeapTupleIsValid(enum_tuple)) { + elog(ERROR, "could not find enum with OID %u", DatumGetObjectId(enum_oid)); + } - // Get the pg_enum structure from the tuple - enum_entry = (Form_pg_enum) GETSTRUCT(enum_tuple); + // Get the pg_enum structure from the tuple + enum_entry = (Form_pg_enum)GETSTRUCT(enum_tuple); - // The enumsortorder field gives us the position of the enum value - enum_position = (int) enum_entry->enumsortorder; + // The enumsortorder field gives us the position of the enum value + enum_position = (int)enum_entry->enumsortorder; - // Release the cache to free up memory - ReleaseSysCache(enum_tuple); + // Release the cache to free up memory + ReleaseSysCache(enum_tuple); - return enum_position; + return enum_position; } void @@ -1050,13 +1056,13 @@ ConvertPostgresToDuckValue(Datum value, duckdb::Vector &result, idx_t offset) { auto physical_type = result.GetType().InternalType(); switch (physical_type) { case PhysicalType::UINT8: - Append(result, static_cast(enum_position-1), offset); + Append(result, static_cast(enum_position - 1), offset); break; case PhysicalType::UINT16: - Append(result, static_cast(enum_position-1), offset); + Append(result, static_cast(enum_position - 1), offset); break; case PhysicalType::UINT32: - Append(result, static_cast(enum_position-1), offset); + Append(result, static_cast(enum_position - 1), offset); break; default: throw InternalException("Invalid Physical Type for ENUMs"); diff --git a/test/regression/expected/enum.out b/test/regression/expected/enum.out new file mode 100644 index 00000000..7f50330e --- /dev/null +++ b/test/regression/expected/enum.out @@ -0,0 +1,15 @@ +CREATE TYPE mood AS ENUM ('happy', 'sad', 'neutral'); +create table tbl (a mood); +insert into tbl select 'happy'; +insert into tbl select 'neutral'; +insert into tbl select 'sad'; +select * from tbl; + a +--------- + happy + neutral + sad +(3 rows) + +drop table tbl; +drop type mood; diff --git a/test/regression/schedule b/test/regression/schedule index 8489a20f..904471d9 100644 --- a/test/regression/schedule +++ b/test/regression/schedule @@ -12,3 +12,4 @@ test: duckdb_only_functions test: cte test: create_table_as test: standard_conforming_strings +test: enum diff --git a/test/regression/sql/enum.sql b/test/regression/sql/enum.sql new file mode 100644 index 00000000..ccb92ae0 --- /dev/null +++ b/test/regression/sql/enum.sql @@ -0,0 +1,11 @@ +CREATE TYPE mood AS ENUM ('happy', 'sad', 'neutral'); +create table tbl (a mood); + +insert into tbl select 'happy'; +insert into tbl select 'neutral'; +insert into tbl select 'sad'; + +select * from tbl; + +drop table tbl; +drop type mood; From 161e52a95ab62284f6828883afd0d633d93a1c93 Mon Sep 17 00:00:00 2001 From: Tishj Date: Thu, 19 Sep 2024 21:06:30 +0200 Subject: [PATCH 07/16] move into pgduckdb_enum --- Makefile | 1 + include/pgduckdb/types/pgduckdb_enum.hpp | 43 ++++---- src/pgduckdb_types.cpp | 122 ++--------------------- src/types/pgduckdb_enum.cpp | 113 +++++++++++++++++++++ test/regression/sql/enum.sql | 13 ++- 5 files changed, 153 insertions(+), 139 deletions(-) create mode 100644 src/types/pgduckdb_enum.cpp diff --git a/Makefile b/Makefile index 812c17ec..edc283c7 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,7 @@ SRCS = src/scan/heap_reader.cpp \ src/scan/postgres_seq_scan.cpp \ src/utility/copy.cpp \ src/vendor/pg_explain.cpp \ + src/types/pgduckdb_enum.cpp \ src/pgduckdb_metadata_cache.cpp \ src/pgduckdb_detoast.cpp \ src/pgduckdb_duckdb.cpp \ diff --git a/include/pgduckdb/types/pgduckdb_enum.hpp b/include/pgduckdb/types/pgduckdb_enum.hpp index fe42d7e9..44b0d4ff 100644 --- a/include/pgduckdb/types/pgduckdb_enum.hpp +++ b/include/pgduckdb/types/pgduckdb_enum.hpp @@ -2,6 +2,14 @@ #include "pgduckdb/duckdb_vendor/enum_type_info_templated.hpp" +extern "C" { + #include "postgres.h" + #include "catalog/pg_enum.h" + #include "catalog/pg_type.h" + #include "utils/syscache.h" + #include "access/htup_details.h" +} + namespace pgduckdb { using duckdb::EnumTypeInfo; @@ -18,42 +26,27 @@ using duckdb::Vector; template class PGDuckDBEnumTypeInfo : public EnumTypeInfoTemplated { public: - PGDuckDBEnumTypeInfo(Vector &values_insert_order_p, idx_t dict_size_p, Vector &oid_vec) - : EnumTypeInfoTemplated(values_insert_order_p, dict_size_p), oid_vec(oid_vec) { + PGDuckDBEnumTypeInfo(Vector &values_insert_order_p, idx_t dict_size_p, Vector &enum_member_oids) + : EnumTypeInfoTemplated(values_insert_order_p, dict_size_p), enum_member_oids(enum_member_oids) { } public: const Vector & GetMemberOids() const { - return oid_vec; + return enum_member_oids; } private: - Vector oid_vec; + Vector enum_member_oids; }; struct PGDuckDBEnum { - static LogicalType - CreateEnumType(Vector &ordered_data, idx_t size, Vector &oid_vec) { - // Generate EnumTypeInfo - shared_ptr info; - auto enum_internal_type = EnumTypeInfo::DictType(size); - switch (enum_internal_type) { - case PhysicalType::UINT8: - info = make_shared_ptr>(ordered_data, size, oid_vec); - break; - case PhysicalType::UINT16: - info = make_shared_ptr>(ordered_data, size, oid_vec); - break; - case PhysicalType::UINT32: - info = make_shared_ptr>(ordered_data, size, oid_vec); - break; - default: - throw InternalException("Invalid Physical Type for ENUMs"); - } - // Generate Actual Enum Type - return LogicalType(LogicalTypeId::ENUM, info); - } + static LogicalType CreateEnumType(Vector &ordered_data, idx_t size, Vector &enum_member_oids); + static idx_t GetDuckDBEnumPosition(duckdb::Value &val); + static int GetEnumPosition(Datum enum_member_oid); + static bool IsEnumType(Oid type_oid); + static Oid GetEnumTypeOid(const Vector &oids); + static const Vector &GetMemberOids(const duckdb::LogicalType &type); }; } // namespace pgduckdb diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index ee8bbc28..1b2b875e 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -8,7 +8,6 @@ extern "C" { #include "fmgr.h" #include "miscadmin.h" #include "catalog/pg_type.h" -#include "catalog/pg_enum.h" #include "executor/tuptable.h" #include "utils/numeric.h" #include "utils/catcache.h" @@ -22,10 +21,10 @@ extern "C" { #include "pgduckdb/pgduckdb.h" #include "pgduckdb/scan/postgres_scan.hpp" #include "pgduckdb/types/decimal.hpp" -#include "pgduckdb/types/pgduckdb_enum.hpp" #include "pgduckdb/pgduckdb_filter.hpp" #include "pgduckdb/pgduckdb_detoast.hpp" #include "pgduckdb/pgduckdb_types.hpp" +#include "pgduckdb/types/pgduckdb_enum.hpp" namespace pgduckdb { @@ -324,43 +323,10 @@ ConvertDuckToPostgresArray(TupleTableSlot *slot, duckdb::Value &value, idx_t col slot->tts_values[col] = PointerGetDatum(arr); } -idx_t -GetEnumPosition(duckdb::Value &val) { - D_ASSERT(val.type().id() == LogicalTypeId::ENUM); - auto physical_type = val.type().InternalType(); - switch (physical_type) { - case PhysicalType::UINT8: - return val.GetValue(); - case PhysicalType::UINT16: - return val.GetValue(); - case PhysicalType::UINT32: - return val.GetValue(); - default: - throw InternalException("Invalid Physical Type for ENUMs"); - } -} - -const Vector & -GetEnumMemberOids(duckdb::Value &val) { - D_ASSERT(val.type().id() == LogicalTypeId::ENUM); - auto physical_type = val.type().InternalType(); - auto &enum_type_info = val.type().AuxInfo()->Cast(); - switch (physical_type) { - case PhysicalType::UINT8: - return enum_type_info.Cast>().GetMemberOids(); - case PhysicalType::UINT16: - return enum_type_info.Cast>().GetMemberOids(); - case PhysicalType::UINT32: - return enum_type_info.Cast>().GetMemberOids(); - default: - throw InternalException("Invalid Physical Type for ENUMs"); - } -} - void ConvertDuckToPostgresEnumValue(TupleTableSlot *slot, duckdb::Value &val, idx_t col) { - auto position = GetEnumPosition(val); - auto &enum_oids = GetEnumMemberOids(val); + auto position = PGDuckDBEnum::GetDuckDBEnumPosition(val); + auto &enum_oids = PGDuckDBEnum::GetMemberOids(val.type()); auto enum_value_oid = duckdb::FlatVector::GetData(enum_oids)[position]; slot->tts_values[col] = ObjectIdGetDatum(enum_value_oid); } @@ -547,27 +513,10 @@ ChildTypeFromArray(Oid array_type) { } } -static bool -IsEnumType(Oid type_oid) { - bool result = false; - auto type_tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(type_oid)); - - if (HeapTupleIsValid(type_tuple)) { - auto type_form = (Form_pg_type)GETSTRUCT(type_tuple); - - // Check if the type is an enum - if (type_form->typtype == 'e') { - result = true; - } - ReleaseSysCache(type_tuple); - } - return result; -} - duckdb::LogicalType -ConvertPostgresEnumToDuckEnum(Oid enum_oid) { +ConvertPostgresEnumToDuckEnum(Oid enum_type_oid) { /* Get the list of existing members of the enum */ - auto list = SearchSysCacheList1(ENUMTYPOIDNAME, ObjectIdGetDatum(enum_oid)); + auto list = SearchSysCacheList1(ENUMTYPOIDNAME, ObjectIdGetDatum(enum_type_oid)); auto nelems = list->n_members; /* Sort the existing members by enumsortorder */ @@ -659,7 +608,7 @@ ConvertPostgresToDuckColumnType(Form_pg_attribute &attribute) { case REGCLASSOID: return duckdb::LogicalTypeId::UINTEGER; default: { - if (IsEnumType(type)) { + if (PGDuckDBEnum::IsEnumType(type)) { return ConvertPostgresEnumToDuckEnum(type); } std::string name = "UnsupportedPostgresType (Oid=" + std::to_string(type) + ")"; @@ -668,24 +617,6 @@ ConvertPostgresToDuckColumnType(Form_pg_attribute &attribute) { } } -Oid -GetEnumTypeOid(const Vector &oids) { - /* Get the pg_type tuple for the enum type */ - auto member_oid = duckdb::FlatVector::GetData(oids)[0]; - auto tuple = SearchSysCache1(ENUMOID, ObjectIdGetDatum(member_oid)); - Oid result = InvalidOid; - if (!HeapTupleIsValid(tuple)) { - elog(ERROR, "cache lookup failed for enum member with oid %u", member_oid); - } - - auto enum_form = (Form_pg_enum)GETSTRUCT(tuple); - result = enum_form->enumtypid; - - /* Release the cache tuple */ - ReleaseSysCache(tuple); - return result; -} - Oid GetPostgresDuckDBType(duckdb::LogicalType type) { auto id = type.id(); @@ -747,18 +678,8 @@ GetPostgresDuckDBType(duckdb::LogicalType type) { } } case duckdb::LogicalTypeId::ENUM: { - auto type_info = type.AuxInfo(); - auto &enum_type_info = type_info->Cast(); - switch (enum_type_info.DictType(enum_type_info.GetDictSize())) { - case PhysicalType::UINT8: - return GetEnumTypeOid(enum_type_info.Cast>().GetMemberOids()); - case PhysicalType::UINT16: - return GetEnumTypeOid(enum_type_info.Cast>().GetMemberOids()); - case PhysicalType::UINT32: - return GetEnumTypeOid(enum_type_info.Cast>().GetMemberOids()); - default: - throw InternalException("Invalid Physical Type for ENUMs"); - } + auto &member_oids = PGDuckDBEnum::GetMemberOids(type); + return PGDuckDBEnum::GetEnumTypeOid(member_oids); } default: { elog(ERROR, "Could not convert DuckDB type: %s to Postgres type", type.ToString().c_str()); @@ -850,31 +771,6 @@ ConvertDecimal(const NumericVar &numeric) { return (NumericIsNegative(numeric) ? -base_res : base_res); } -int -GetEnumPosition(Datum enum_oid) { - HeapTuple enum_tuple; - Form_pg_enum enum_entry; - int enum_position; - - // Use SearchSysCache1 to fetch the enum entry from pg_enum using its OID - enum_tuple = SearchSysCache1(ENUMOID, enum_oid); - - if (!HeapTupleIsValid(enum_tuple)) { - elog(ERROR, "could not find enum with OID %u", DatumGetObjectId(enum_oid)); - } - - // Get the pg_enum structure from the tuple - enum_entry = (Form_pg_enum)GETSTRUCT(enum_tuple); - - // The enumsortorder field gives us the position of the enum value - enum_position = (int)enum_entry->enumsortorder; - - // Release the cache to free up memory - ReleaseSysCache(enum_tuple); - - return enum_position; -} - void ConvertPostgresToDuckValue(Datum value, duckdb::Vector &result, idx_t offset) { auto &type = result.GetType(); @@ -1052,7 +948,7 @@ ConvertPostgresToDuckValue(Datum value, duckdb::Vector &result, idx_t offset) { break; } case LogicalTypeId::ENUM: { - auto enum_position = GetEnumPosition(value); + auto enum_position = PGDuckDBEnum::GetEnumPosition(value); auto physical_type = result.GetType().InternalType(); switch (physical_type) { case PhysicalType::UINT8: diff --git a/src/types/pgduckdb_enum.cpp b/src/types/pgduckdb_enum.cpp new file mode 100644 index 00000000..6d74b3e8 --- /dev/null +++ b/src/types/pgduckdb_enum.cpp @@ -0,0 +1,113 @@ +#include "pgduckdb/types/pgduckdb_enum.hpp" + +namespace pgduckdb { + +LogicalType PGDuckDBEnum::CreateEnumType(Vector &ordered_data, idx_t size, Vector &enum_member_oids) { + // Generate EnumTypeInfo + shared_ptr info; + auto enum_internal_type = EnumTypeInfo::DictType(size); + switch (enum_internal_type) { + case PhysicalType::UINT8: + info = make_shared_ptr>(ordered_data, size, enum_member_oids); + break; + case PhysicalType::UINT16: + info = make_shared_ptr>(ordered_data, size, enum_member_oids); + break; + case PhysicalType::UINT32: + info = make_shared_ptr>(ordered_data, size, enum_member_oids); + break; + default: + throw InternalException("Invalid Physical Type for ENUMs"); + } + // Generate Actual Enum Type + return LogicalType(LogicalTypeId::ENUM, info); +} + +idx_t PGDuckDBEnum::GetDuckDBEnumPosition(duckdb::Value &val) { + D_ASSERT(val.type().id() == LogicalTypeId::ENUM); + auto physical_type = val.type().InternalType(); + switch (physical_type) { + case PhysicalType::UINT8: + return val.GetValue(); + case PhysicalType::UINT16: + return val.GetValue(); + case PhysicalType::UINT32: + return val.GetValue(); + default: + throw InternalException("Invalid Physical Type for ENUMs"); + } +} + +int PGDuckDBEnum::GetEnumPosition(Datum enum_member_oid) { + HeapTuple enum_tuple; + Form_pg_enum enum_entry; + int enum_position; + + // Use SearchSysCache1 to fetch the enum entry from pg_enum using its OID + enum_tuple = SearchSysCache1(ENUMOID, enum_member_oid); + + if (!HeapTupleIsValid(enum_tuple)) { + elog(ERROR, "could not find enum with OID %u", DatumGetObjectId(enum_member_oid)); + } + + // Get the pg_enum structure from the tuple + enum_entry = (Form_pg_enum)GETSTRUCT(enum_tuple); + + // The enumsortorder field gives us the position of the enum value + enum_position = (int)enum_entry->enumsortorder; + + // Release the cache to free up memory + ReleaseSysCache(enum_tuple); + + return enum_position; +} + +bool PGDuckDBEnum::IsEnumType(Oid type_oid) { + bool result = false; + auto type_tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(type_oid)); + + if (HeapTupleIsValid(type_tuple)) { + auto type_form = (Form_pg_type)GETSTRUCT(type_tuple); + + // Check if the type is an enum + if (type_form->typtype == 'e') { + result = true; + } + ReleaseSysCache(type_tuple); + } + return result; +} + +Oid PGDuckDBEnum::GetEnumTypeOid(const Vector &oids) { + /* Get the pg_type tuple for the enum type */ + auto member_oid = duckdb::FlatVector::GetData(oids)[0]; + auto tuple = SearchSysCache1(ENUMOID, ObjectIdGetDatum(member_oid)); + Oid result = InvalidOid; + if (!HeapTupleIsValid(tuple)) { + elog(ERROR, "cache lookup failed for enum member with oid %u", member_oid); + } + + auto enum_form = (Form_pg_enum)GETSTRUCT(tuple); + result = enum_form->enumtypid; + + /* Release the cache tuple */ + ReleaseSysCache(tuple); + return result; +} + +const Vector &PGDuckDBEnum::GetMemberOids(const duckdb::LogicalType &type) { + auto type_info = type.AuxInfo(); + auto &enum_type_info = type_info->Cast(); + switch (enum_type_info.DictType(enum_type_info.GetDictSize())) { + case PhysicalType::UINT8: + return enum_type_info.Cast>().GetMemberOids(); + case PhysicalType::UINT16: + return enum_type_info.Cast>().GetMemberOids(); + case PhysicalType::UINT32: + return enum_type_info.Cast>().GetMemberOids(); + default: + throw InternalException("Invalid Physical Type for ENUMs"); + } +} + +} // namespace pgduckdb diff --git a/test/regression/sql/enum.sql b/test/regression/sql/enum.sql index ccb92ae0..08786516 100644 --- a/test/regression/sql/enum.sql +++ b/test/regression/sql/enum.sql @@ -1,4 +1,4 @@ -CREATE TYPE mood AS ENUM ('happy', 'sad', 'neutral'); +CREATE TYPE mood AS ENUM ('sad', 'neutral', 'happy'); create table tbl (a mood); insert into tbl select 'happy'; @@ -7,5 +7,16 @@ insert into tbl select 'sad'; select * from tbl; +--select * from tbl where a = 'sad'; -- returns `sad` +--select * from tbl where a > 'neutral'; --- returns `happy` + +ALTER TYPE mood ADD VALUE 'a-bit-happy' BEFORE 'happy'; +ALTER TYPE mood ADD VALUE 'very-happy' AFTER 'happy'; +ALTER TYPE mood ADD VALUE 'very-sad' BEFORE 'sad'; + +insert into tbl values ('very-sad'), ('very-happy'), ('a-bit-happy'); +--select * from tbl where a > 'neutral'; --- returns `a-bit-happy`, `happy` and `very-happy`; +select * from tbl; + drop table tbl; drop type mood; From 5eab0b5018a62d96091e26894369a8f1a0d80e6f Mon Sep 17 00:00:00 2001 From: Tishj Date: Thu, 19 Sep 2024 21:47:13 +0200 Subject: [PATCH 08/16] add explanation comments --- include/pgduckdb/duckdb_vendor/enum_type_info_templated.hpp | 3 +++ include/pgduckdb/types/pgduckdb_enum.hpp | 4 ++++ src/types/pgduckdb_enum.cpp | 2 ++ 3 files changed, 9 insertions(+) diff --git a/include/pgduckdb/duckdb_vendor/enum_type_info_templated.hpp b/include/pgduckdb/duckdb_vendor/enum_type_info_templated.hpp index cecc3fee..3f4b5ca9 100644 --- a/include/pgduckdb/duckdb_vendor/enum_type_info_templated.hpp +++ b/include/pgduckdb/duckdb_vendor/enum_type_info_templated.hpp @@ -6,6 +6,9 @@ #include "duckdb/common/string_map_set.hpp" #include "duckdb/common/serializer/deserializer.hpp" +// This is copied directly without changes from 'duckdb/src/common/extra_type_info.cpp' +// The reason this is copied is to be able to inherit from it. + namespace duckdb { template struct EnumTypeInfoTemplated : public EnumTypeInfo { diff --git a/include/pgduckdb/types/pgduckdb_enum.hpp b/include/pgduckdb/types/pgduckdb_enum.hpp index 44b0d4ff..791dbdd8 100644 --- a/include/pgduckdb/types/pgduckdb_enum.hpp +++ b/include/pgduckdb/types/pgduckdb_enum.hpp @@ -23,6 +23,10 @@ using duckdb::PhysicalType; using duckdb::shared_ptr; using duckdb::Vector; +// To store additional metadata for a type, DuckDB's LogicalType class contains a 'type_info_' field. +// The type of this is ExtraTypeInfo, ENUMs make use of this in the form of EnumTypeInfo (see duckdb/include/common/extra_type_info.hpp). +// We derive from this further to store the ENUMs connection with the oids of Postgres' enum members. + template class PGDuckDBEnumTypeInfo : public EnumTypeInfoTemplated { public: diff --git a/src/types/pgduckdb_enum.cpp b/src/types/pgduckdb_enum.cpp index 6d74b3e8..5cb56d65 100644 --- a/src/types/pgduckdb_enum.cpp +++ b/src/types/pgduckdb_enum.cpp @@ -2,6 +2,8 @@ namespace pgduckdb { +// This is copied from 'duckdb::EnumTypeInfo::CreateType' with our additional metadata added + LogicalType PGDuckDBEnum::CreateEnumType(Vector &ordered_data, idx_t size, Vector &enum_member_oids) { // Generate EnumTypeInfo shared_ptr info; From d2f1bb0265ed826fef7e9016c5eff9014846edb3 Mon Sep 17 00:00:00 2001 From: Tishj Date: Thu, 19 Sep 2024 22:23:07 +0200 Subject: [PATCH 09/16] added guards --- include/pgduckdb/types/pgduckdb_enum.hpp | 15 ++++++------ src/pgduckdb_types.cpp | 7 ++++-- src/types/pgduckdb_enum.cpp | 31 +++++++++++++++--------- 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/include/pgduckdb/types/pgduckdb_enum.hpp b/include/pgduckdb/types/pgduckdb_enum.hpp index 791dbdd8..7cc7e1c4 100644 --- a/include/pgduckdb/types/pgduckdb_enum.hpp +++ b/include/pgduckdb/types/pgduckdb_enum.hpp @@ -3,11 +3,11 @@ #include "pgduckdb/duckdb_vendor/enum_type_info_templated.hpp" extern "C" { - #include "postgres.h" - #include "catalog/pg_enum.h" - #include "catalog/pg_type.h" - #include "utils/syscache.h" - #include "access/htup_details.h" +#include "postgres.h" +#include "catalog/pg_enum.h" +#include "catalog/pg_type.h" +#include "utils/syscache.h" +#include "access/htup_details.h" } namespace pgduckdb { @@ -24,8 +24,9 @@ using duckdb::shared_ptr; using duckdb::Vector; // To store additional metadata for a type, DuckDB's LogicalType class contains a 'type_info_' field. -// The type of this is ExtraTypeInfo, ENUMs make use of this in the form of EnumTypeInfo (see duckdb/include/common/extra_type_info.hpp). -// We derive from this further to store the ENUMs connection with the oids of Postgres' enum members. +// The type of this is ExtraTypeInfo, ENUMs make use of this in the form of EnumTypeInfo (see +// duckdb/include/common/extra_type_info.hpp). We derive from this further to store the ENUMs connection with the oids +// of Postgres' enum members. template class PGDuckDBEnumTypeInfo : public EnumTypeInfoTemplated { diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index 1f43b238..e7609ac7 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -19,6 +19,7 @@ extern "C" { } #include "pgduckdb/pgduckdb.h" +#include "pgduckdb/pgduckdb_utils.hpp" #include "pgduckdb/scan/postgres_scan.hpp" #include "pgduckdb/types/decimal.hpp" #include "pgduckdb/pgduckdb_filter.hpp" @@ -517,7 +518,9 @@ ChildTypeFromArray(Oid array_type) { duckdb::LogicalType ConvertPostgresEnumToDuckEnum(Oid enum_type_oid) { /* Get the list of existing members of the enum */ - auto list = SearchSysCacheList1(ENUMTYPOIDNAME, ObjectIdGetDatum(enum_type_oid)); + auto list = + PostgresFunctionGuard([](int cacheId, Datum key) { return SearchSysCacheList1(cacheId, key); }, + ENUMTYPOIDNAME, ObjectIdGetDatum(enum_type_oid)); auto nelems = list->n_members; /* Sort the existing members by enumsortorder */ @@ -552,7 +555,7 @@ ConvertPostgresEnumToDuckEnum(Oid enum_type_oid) { enum_oid_data[i] = enum_data->oid; } - ReleaseCatCacheList(list); + PostgresFunctionGuard(ReleaseCatCacheList, list); return PGDuckDBEnum::CreateEnumType(duck_enum_vec, enum_members.size(), enum_oid_vec); } diff --git a/src/types/pgduckdb_enum.cpp b/src/types/pgduckdb_enum.cpp index 5cb56d65..9b85ef5a 100644 --- a/src/types/pgduckdb_enum.cpp +++ b/src/types/pgduckdb_enum.cpp @@ -1,10 +1,12 @@ #include "pgduckdb/types/pgduckdb_enum.hpp" +#include "pgduckdb/pgduckdb_utils.hpp" namespace pgduckdb { // This is copied from 'duckdb::EnumTypeInfo::CreateType' with our additional metadata added -LogicalType PGDuckDBEnum::CreateEnumType(Vector &ordered_data, idx_t size, Vector &enum_member_oids) { +LogicalType +PGDuckDBEnum::CreateEnumType(Vector &ordered_data, idx_t size, Vector &enum_member_oids) { // Generate EnumTypeInfo shared_ptr info; auto enum_internal_type = EnumTypeInfo::DictType(size); @@ -25,7 +27,8 @@ LogicalType PGDuckDBEnum::CreateEnumType(Vector &ordered_data, idx_t size, Vecto return LogicalType(LogicalTypeId::ENUM, info); } -idx_t PGDuckDBEnum::GetDuckDBEnumPosition(duckdb::Value &val) { +idx_t +PGDuckDBEnum::GetDuckDBEnumPosition(duckdb::Value &val) { D_ASSERT(val.type().id() == LogicalTypeId::ENUM); auto physical_type = val.type().InternalType(); switch (physical_type) { @@ -40,13 +43,14 @@ idx_t PGDuckDBEnum::GetDuckDBEnumPosition(duckdb::Value &val) { } } -int PGDuckDBEnum::GetEnumPosition(Datum enum_member_oid) { +int +PGDuckDBEnum::GetEnumPosition(Datum enum_member_oid) { HeapTuple enum_tuple; Form_pg_enum enum_entry; int enum_position; // Use SearchSysCache1 to fetch the enum entry from pg_enum using its OID - enum_tuple = SearchSysCache1(ENUMOID, enum_member_oid); + enum_tuple = PostgresFunctionGuard(SearchSysCache1, ENUMOID, enum_member_oid); if (!HeapTupleIsValid(enum_tuple)) { elog(ERROR, "could not find enum with OID %u", DatumGetObjectId(enum_member_oid)); @@ -59,14 +63,15 @@ int PGDuckDBEnum::GetEnumPosition(Datum enum_member_oid) { enum_position = (int)enum_entry->enumsortorder; // Release the cache to free up memory - ReleaseSysCache(enum_tuple); + PostgresFunctionGuard(ReleaseSysCache, enum_tuple); return enum_position; } -bool PGDuckDBEnum::IsEnumType(Oid type_oid) { +bool +PGDuckDBEnum::IsEnumType(Oid type_oid) { bool result = false; - auto type_tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(type_oid)); + auto type_tuple = PostgresFunctionGuard(SearchSysCache1, TYPEOID, ObjectIdGetDatum(type_oid)); if (HeapTupleIsValid(type_tuple)) { auto type_form = (Form_pg_type)GETSTRUCT(type_tuple); @@ -75,15 +80,16 @@ bool PGDuckDBEnum::IsEnumType(Oid type_oid) { if (type_form->typtype == 'e') { result = true; } - ReleaseSysCache(type_tuple); + PostgresFunctionGuard(ReleaseSysCache, type_tuple); } return result; } -Oid PGDuckDBEnum::GetEnumTypeOid(const Vector &oids) { +Oid +PGDuckDBEnum::GetEnumTypeOid(const Vector &oids) { /* Get the pg_type tuple for the enum type */ auto member_oid = duckdb::FlatVector::GetData(oids)[0]; - auto tuple = SearchSysCache1(ENUMOID, ObjectIdGetDatum(member_oid)); + auto tuple = PostgresFunctionGuard(SearchSysCache1, ENUMOID, ObjectIdGetDatum(member_oid)); Oid result = InvalidOid; if (!HeapTupleIsValid(tuple)) { elog(ERROR, "cache lookup failed for enum member with oid %u", member_oid); @@ -93,11 +99,12 @@ Oid PGDuckDBEnum::GetEnumTypeOid(const Vector &oids) { result = enum_form->enumtypid; /* Release the cache tuple */ - ReleaseSysCache(tuple); + PostgresFunctionGuard(ReleaseSysCache, tuple); return result; } -const Vector &PGDuckDBEnum::GetMemberOids(const duckdb::LogicalType &type) { +const Vector & +PGDuckDBEnum::GetMemberOids(const duckdb::LogicalType &type) { auto type_info = type.AuxInfo(); auto &enum_type_info = type_info->Cast(); switch (enum_type_info.DictType(enum_type_info.GetDictSize())) { From 0c216c0076fabf64420adea39e281beec50c5f1a Mon Sep 17 00:00:00 2001 From: Tishj Date: Fri, 20 Sep 2024 11:12:44 +0200 Subject: [PATCH 10/16] fix sortorder issue --- include/pgduckdb/duckdb_vendor/.clang-format | 2 - .../enum_type_info_templated.hpp | 57 ------------- include/pgduckdb/types/pgduckdb_enum.hpp | 39 ++------- src/pgduckdb_types.cpp | 29 ++++--- src/types/pgduckdb_enum.cpp | 82 ++++++------------- test/regression/expected/enum.out | 24 +++++- test/regression/sql/enum.sql | 6 +- 7 files changed, 78 insertions(+), 161 deletions(-) delete mode 100644 include/pgduckdb/duckdb_vendor/.clang-format delete mode 100644 include/pgduckdb/duckdb_vendor/enum_type_info_templated.hpp diff --git a/include/pgduckdb/duckdb_vendor/.clang-format b/include/pgduckdb/duckdb_vendor/.clang-format deleted file mode 100644 index 9d159247..00000000 --- a/include/pgduckdb/duckdb_vendor/.clang-format +++ /dev/null @@ -1,2 +0,0 @@ -DisableFormat: true -SortIncludes: false diff --git a/include/pgduckdb/duckdb_vendor/enum_type_info_templated.hpp b/include/pgduckdb/duckdb_vendor/enum_type_info_templated.hpp deleted file mode 100644 index 3f4b5ca9..00000000 --- a/include/pgduckdb/duckdb_vendor/enum_type_info_templated.hpp +++ /dev/null @@ -1,57 +0,0 @@ -#pragma once - -#include "duckdb/common/extra_type_info.hpp" -#include "duckdb/common/vector.hpp" -#include "duckdb/common/types/vector.hpp" -#include "duckdb/common/string_map_set.hpp" -#include "duckdb/common/serializer/deserializer.hpp" - -// This is copied directly without changes from 'duckdb/src/common/extra_type_info.cpp' -// The reason this is copied is to be able to inherit from it. - -namespace duckdb { -template -struct EnumTypeInfoTemplated : public EnumTypeInfo { - explicit EnumTypeInfoTemplated(Vector &values_insert_order_p, idx_t size_p) - : EnumTypeInfo(values_insert_order_p, size_p) { - D_ASSERT(values_insert_order_p.GetType().InternalType() == PhysicalType::VARCHAR); - - UnifiedVectorFormat vdata; - values_insert_order.ToUnifiedFormat(size_p, vdata); - - auto data = UnifiedVectorFormat::GetData(vdata); - for (idx_t i = 0; i < size_p; i++) { - auto idx = vdata.sel->get_index(i); - if (!vdata.validity.RowIsValid(idx)) { - throw InternalException("Attempted to create ENUM type with NULL value"); - } - if (values.count(data[idx]) > 0) { - throw InvalidInputException("Attempted to create ENUM type with duplicate value %s", - data[idx].GetString()); - } - values[data[idx]] = UnsafeNumericCast(i); - } - } - - static shared_ptr Deserialize(Deserializer &deserializer, uint32_t size) { - Vector values_insert_order(LogicalType::VARCHAR, size); - auto strings = FlatVector::GetData(values_insert_order); - - deserializer.ReadList(201, "values", [&](Deserializer::List &list, idx_t i) { - strings[i] = StringVector::AddStringOrBlob(values_insert_order, list.ReadElement()); - }); - return make_shared_ptr(values_insert_order, size); - } - - const string_map_t &GetValues() const { - return values; - } - - EnumTypeInfoTemplated(const EnumTypeInfoTemplated &) = delete; - EnumTypeInfoTemplated &operator=(const EnumTypeInfoTemplated &) = delete; - -private: - string_map_t values; -}; - -} // namespace duckdb diff --git a/include/pgduckdb/types/pgduckdb_enum.hpp b/include/pgduckdb/types/pgduckdb_enum.hpp index 7cc7e1c4..b4491074 100644 --- a/include/pgduckdb/types/pgduckdb_enum.hpp +++ b/include/pgduckdb/types/pgduckdb_enum.hpp @@ -1,6 +1,7 @@ #pragma once -#include "pgduckdb/duckdb_vendor/enum_type_info_templated.hpp" +#include "duckdb/common/types/vector.hpp" +#include "duckdb/common/types.hpp" extern "C" { #include "postgres.h" @@ -12,46 +13,16 @@ extern "C" { namespace pgduckdb { -using duckdb::EnumTypeInfo; -using duckdb::EnumTypeInfoTemplated; -using duckdb::ExtraTypeInfo; -using duckdb::InternalException; using duckdb::LogicalType; -using duckdb::LogicalTypeId; -using duckdb::make_shared_ptr; -using duckdb::PhysicalType; -using duckdb::shared_ptr; using duckdb::Vector; - -// To store additional metadata for a type, DuckDB's LogicalType class contains a 'type_info_' field. -// The type of this is ExtraTypeInfo, ENUMs make use of this in the form of EnumTypeInfo (see -// duckdb/include/common/extra_type_info.hpp). We derive from this further to store the ENUMs connection with the oids -// of Postgres' enum members. - -template -class PGDuckDBEnumTypeInfo : public EnumTypeInfoTemplated { -public: - PGDuckDBEnumTypeInfo(Vector &values_insert_order_p, idx_t dict_size_p, Vector &enum_member_oids) - : EnumTypeInfoTemplated(values_insert_order_p, dict_size_p), enum_member_oids(enum_member_oids) { - } - -public: - const Vector & - GetMemberOids() const { - return enum_member_oids; - } - -private: - Vector enum_member_oids; -}; +using duckdb::idx_t; struct PGDuckDBEnum { - static LogicalType CreateEnumType(Vector &ordered_data, idx_t size, Vector &enum_member_oids); static idx_t GetDuckDBEnumPosition(duckdb::Value &val); - static int GetEnumPosition(Datum enum_member_oid); + static idx_t GetEnumPosition(Datum enum_member_oid, const duckdb::LogicalType &type); static bool IsEnumType(Oid type_oid); static Oid GetEnumTypeOid(const Vector &oids); - static const Vector &GetMemberOids(const duckdb::LogicalType &type); + static Vector GetMemberOids(const duckdb::LogicalType &type); }; } // namespace pgduckdb diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index e7609ac7..b9cef0eb 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -29,6 +29,10 @@ extern "C" { namespace pgduckdb { +using duckdb::LogicalTypeId; +using duckdb::PhysicalType; +using duckdb::EnumTypeInfo; + struct BoolArray { public: static ArrayType * @@ -327,7 +331,7 @@ ConvertDuckToPostgresArray(TupleTableSlot *slot, duckdb::Value &value, idx_t col void ConvertDuckToPostgresEnumValue(TupleTableSlot *slot, duckdb::Value &val, idx_t col) { auto position = PGDuckDBEnum::GetDuckDBEnumPosition(val); - auto &enum_oids = PGDuckDBEnum::GetMemberOids(val.type()); + auto enum_oids = PGDuckDBEnum::GetMemberOids(val.type()); auto enum_value_oid = duckdb::FlatVector::GetData(enum_oids)[position]; slot->tts_values[col] = ObjectIdGetDatum(enum_value_oid); } @@ -544,10 +548,13 @@ ConvertPostgresEnumToDuckEnum(Oid enum_type_oid) { std::sort(enum_members.begin(), enum_members.end(), sort_order_cmp); - auto duck_enum_vec = duckdb::Vector(duckdb::LogicalType::VARCHAR, enum_members.size()); - auto enum_oid_vec = duckdb::Vector(duckdb::LogicalType::UINTEGER, enum_members.size()); + idx_t allocation_size = enum_members.size(); + allocation_size += enum_members.size() / 4; + allocation_size += (enum_members.size() % 4) != 0; + + auto duck_enum_vec = duckdb::Vector(duckdb::LogicalType::VARCHAR, allocation_size); auto enum_vec_data = duckdb::FlatVector::GetData(duck_enum_vec); - auto enum_oid_data = duckdb::FlatVector::GetData(enum_oid_vec); + auto enum_oid_data = (uint32_t*)(enum_vec_data + enum_members.size()); for (idx_t i = 0; i < enum_members.size(); i++) { auto &member = enum_members[i]; auto enum_data = (Form_pg_enum)GETSTRUCT(member); @@ -556,7 +563,7 @@ ConvertPostgresEnumToDuckEnum(Oid enum_type_oid) { } PostgresFunctionGuard(ReleaseCatCacheList, list); - return PGDuckDBEnum::CreateEnumType(duck_enum_vec, enum_members.size(), enum_oid_vec); + return EnumTypeInfo::CreateType(duck_enum_vec, enum_members.size()); } duckdb::LogicalType @@ -683,7 +690,7 @@ GetPostgresDuckDBType(duckdb::LogicalType type) { } } case duckdb::LogicalTypeId::ENUM: { - auto &member_oids = PGDuckDBEnum::GetMemberOids(type); + auto member_oids = PGDuckDBEnum::GetMemberOids(type); return PGDuckDBEnum::GetEnumTypeOid(member_oids); } default: { @@ -955,20 +962,20 @@ ConvertPostgresToDuckValue(Datum value, duckdb::Vector &result, idx_t offset) { break; } case LogicalTypeId::ENUM: { - auto enum_position = PGDuckDBEnum::GetEnumPosition(value); + auto enum_position = PGDuckDBEnum::GetEnumPosition(value, result.GetType()); auto physical_type = result.GetType().InternalType(); switch (physical_type) { case PhysicalType::UINT8: - Append(result, static_cast(enum_position - 1), offset); + Append(result, static_cast(enum_position), offset); break; case PhysicalType::UINT16: - Append(result, static_cast(enum_position - 1), offset); + Append(result, static_cast(enum_position), offset); break; case PhysicalType::UINT32: - Append(result, static_cast(enum_position - 1), offset); + Append(result, static_cast(enum_position), offset); break; default: - throw InternalException("Invalid Physical Type for ENUMs"); + throw duckdb::InternalException("Invalid Physical Type for ENUMs"); } break; } diff --git a/src/types/pgduckdb_enum.cpp b/src/types/pgduckdb_enum.cpp index 9b85ef5a..040a221a 100644 --- a/src/types/pgduckdb_enum.cpp +++ b/src/types/pgduckdb_enum.cpp @@ -1,31 +1,12 @@ #include "pgduckdb/types/pgduckdb_enum.hpp" #include "pgduckdb/pgduckdb_utils.hpp" +#include "duckdb/common/extra_type_info.hpp" namespace pgduckdb { -// This is copied from 'duckdb::EnumTypeInfo::CreateType' with our additional metadata added - -LogicalType -PGDuckDBEnum::CreateEnumType(Vector &ordered_data, idx_t size, Vector &enum_member_oids) { - // Generate EnumTypeInfo - shared_ptr info; - auto enum_internal_type = EnumTypeInfo::DictType(size); - switch (enum_internal_type) { - case PhysicalType::UINT8: - info = make_shared_ptr>(ordered_data, size, enum_member_oids); - break; - case PhysicalType::UINT16: - info = make_shared_ptr>(ordered_data, size, enum_member_oids); - break; - case PhysicalType::UINT32: - info = make_shared_ptr>(ordered_data, size, enum_member_oids); - break; - default: - throw InternalException("Invalid Physical Type for ENUMs"); - } - // Generate Actual Enum Type - return LogicalType(LogicalTypeId::ENUM, info); -} +using duckdb::idx_t; +using duckdb::PhysicalType; +using duckdb::LogicalTypeId; idx_t PGDuckDBEnum::GetDuckDBEnumPosition(duckdb::Value &val) { @@ -39,33 +20,28 @@ PGDuckDBEnum::GetDuckDBEnumPosition(duckdb::Value &val) { case PhysicalType::UINT32: return val.GetValue(); default: - throw InternalException("Invalid Physical Type for ENUMs"); + throw duckdb::InternalException("Invalid Physical Type for ENUMs"); } } -int -PGDuckDBEnum::GetEnumPosition(Datum enum_member_oid) { - HeapTuple enum_tuple; - Form_pg_enum enum_entry; - int enum_position; - - // Use SearchSysCache1 to fetch the enum entry from pg_enum using its OID - enum_tuple = PostgresFunctionGuard(SearchSysCache1, ENUMOID, enum_member_oid); - - if (!HeapTupleIsValid(enum_tuple)) { - elog(ERROR, "could not find enum with OID %u", DatumGetObjectId(enum_member_oid)); - } +idx_t +PGDuckDBEnum::GetEnumPosition(Datum enum_member_oid_p, const duckdb::LogicalType &type) { + auto enum_member_oid = DatumGetObjectId(enum_member_oid_p); - // Get the pg_enum structure from the tuple - enum_entry = (Form_pg_enum)GETSTRUCT(enum_tuple); + auto &enum_type_info = type.AuxInfo()->Cast(); + auto dict_size = enum_type_info.GetDictSize(); - // The enumsortorder field gives us the position of the enum value - enum_position = (int)enum_entry->enumsortorder; + auto enum_member_oids = PGDuckDBEnum::GetMemberOids(type); + auto oids_data = duckdb::FlatVector::GetData(enum_member_oids); - // Release the cache to free up memory - PostgresFunctionGuard(ReleaseSysCache, enum_tuple); + uint32_t *begin = oids_data; + uint32_t *end = oids_data + dict_size; + uint32_t *result = std::find(begin, end, enum_member_oid); - return enum_position; + if (result == end) { + throw duckdb::InternalException("Could not find enum_member_oid: %d", enum_member_oid); + } + return (idx_t)(result - begin); } bool @@ -103,20 +79,16 @@ PGDuckDBEnum::GetEnumTypeOid(const Vector &oids) { return result; } -const Vector & -PGDuckDBEnum::GetMemberOids(const duckdb::LogicalType &type) { +Vector PGDuckDBEnum::GetMemberOids(const duckdb::LogicalType &type) { auto type_info = type.AuxInfo(); auto &enum_type_info = type_info->Cast(); - switch (enum_type_info.DictType(enum_type_info.GetDictSize())) { - case PhysicalType::UINT8: - return enum_type_info.Cast>().GetMemberOids(); - case PhysicalType::UINT16: - return enum_type_info.Cast>().GetMemberOids(); - case PhysicalType::UINT32: - return enum_type_info.Cast>().GetMemberOids(); - default: - throw InternalException("Invalid Physical Type for ENUMs"); - } + auto &enum_sort_order = enum_type_info.GetValuesInsertOrder(); + idx_t dict_size = enum_type_info.GetDictSize(); + + // We store the (Postgres) enum member oids behind the string data in the EnumTypeInfo + auto vector_data = duckdb::FlatVector::GetData(enum_sort_order); + // Create a Vector that wraps the existing data, without copying - only valid while the EnumTypeInfo is alive + return Vector(duckdb::LogicalType::UINTEGER, (duckdb::data_ptr_t)(vector_data + dict_size)); } } // namespace pgduckdb diff --git a/test/regression/expected/enum.out b/test/regression/expected/enum.out index 7f50330e..edf09345 100644 --- a/test/regression/expected/enum.out +++ b/test/regression/expected/enum.out @@ -1,4 +1,8 @@ -CREATE TYPE mood AS ENUM ('happy', 'sad', 'neutral'); +CREATE TYPE mood AS ENUM ( + 'sad', + 'neutral', + 'happy' +); create table tbl (a mood); insert into tbl select 'happy'; insert into tbl select 'neutral'; @@ -11,5 +15,23 @@ select * from tbl; sad (3 rows) +--select * from tbl where a = 'sad'; -- returns `sad` +--select * from tbl where a > 'neutral'; --- returns `happy` +ALTER TYPE mood ADD VALUE 'a-bit-happy' BEFORE 'happy'; +ALTER TYPE mood ADD VALUE 'very-happy' AFTER 'happy'; +ALTER TYPE mood ADD VALUE 'very-sad' BEFORE 'sad'; +insert into tbl values ('very-sad'), ('very-happy'), ('a-bit-happy'); +--select * from tbl where a > 'neutral'; --- returns `a-bit-happy`, `happy` and `very-happy`; +select * from tbl; + a +------------- + happy + neutral + sad + very-sad + very-happy + a-bit-happy +(6 rows) + drop table tbl; drop type mood; diff --git a/test/regression/sql/enum.sql b/test/regression/sql/enum.sql index 08786516..2c71fad5 100644 --- a/test/regression/sql/enum.sql +++ b/test/regression/sql/enum.sql @@ -1,4 +1,8 @@ -CREATE TYPE mood AS ENUM ('sad', 'neutral', 'happy'); +CREATE TYPE mood AS ENUM ( + 'sad', + 'neutral', + 'happy' +); create table tbl (a mood); insert into tbl select 'happy'; From 9eed6e662af7a0c59bba1b30f12ebca1b1c8497d Mon Sep 17 00:00:00 2001 From: Tishj Date: Fri, 20 Sep 2024 11:15:34 +0200 Subject: [PATCH 11/16] dont use elog(ERROR, ..) in duckdb execution path --- include/pgduckdb/types/pgduckdb_enum.hpp | 2 +- src/pgduckdb_types.cpp | 4 ++-- src/types/pgduckdb_enum.cpp | 7 ++++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/pgduckdb/types/pgduckdb_enum.hpp b/include/pgduckdb/types/pgduckdb_enum.hpp index b4491074..bf4a5637 100644 --- a/include/pgduckdb/types/pgduckdb_enum.hpp +++ b/include/pgduckdb/types/pgduckdb_enum.hpp @@ -13,9 +13,9 @@ extern "C" { namespace pgduckdb { +using duckdb::idx_t; using duckdb::LogicalType; using duckdb::Vector; -using duckdb::idx_t; struct PGDuckDBEnum { static idx_t GetDuckDBEnumPosition(duckdb::Value &val); diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index b9cef0eb..c7e853a0 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -29,9 +29,9 @@ extern "C" { namespace pgduckdb { +using duckdb::EnumTypeInfo; using duckdb::LogicalTypeId; using duckdb::PhysicalType; -using duckdb::EnumTypeInfo; struct BoolArray { public: @@ -554,7 +554,7 @@ ConvertPostgresEnumToDuckEnum(Oid enum_type_oid) { auto duck_enum_vec = duckdb::Vector(duckdb::LogicalType::VARCHAR, allocation_size); auto enum_vec_data = duckdb::FlatVector::GetData(duck_enum_vec); - auto enum_oid_data = (uint32_t*)(enum_vec_data + enum_members.size()); + auto enum_oid_data = (uint32_t *)(enum_vec_data + enum_members.size()); for (idx_t i = 0; i < enum_members.size(); i++) { auto &member = enum_members[i]; auto enum_data = (Form_pg_enum)GETSTRUCT(member); diff --git a/src/types/pgduckdb_enum.cpp b/src/types/pgduckdb_enum.cpp index 040a221a..68563d5d 100644 --- a/src/types/pgduckdb_enum.cpp +++ b/src/types/pgduckdb_enum.cpp @@ -5,8 +5,8 @@ namespace pgduckdb { using duckdb::idx_t; -using duckdb::PhysicalType; using duckdb::LogicalTypeId; +using duckdb::PhysicalType; idx_t PGDuckDBEnum::GetDuckDBEnumPosition(duckdb::Value &val) { @@ -68,7 +68,7 @@ PGDuckDBEnum::GetEnumTypeOid(const Vector &oids) { auto tuple = PostgresFunctionGuard(SearchSysCache1, ENUMOID, ObjectIdGetDatum(member_oid)); Oid result = InvalidOid; if (!HeapTupleIsValid(tuple)) { - elog(ERROR, "cache lookup failed for enum member with oid %u", member_oid); + throw duckdb::InvalidInputException("Cache lookup failed for enum member with oid %d", member_oid); } auto enum_form = (Form_pg_enum)GETSTRUCT(tuple); @@ -79,7 +79,8 @@ PGDuckDBEnum::GetEnumTypeOid(const Vector &oids) { return result; } -Vector PGDuckDBEnum::GetMemberOids(const duckdb::LogicalType &type) { +Vector +PGDuckDBEnum::GetMemberOids(const duckdb::LogicalType &type) { auto type_info = type.AuxInfo(); auto &enum_type_info = type_info->Cast(); auto &enum_sort_order = enum_type_info.GetValuesInsertOrder(); From bb889f9e31ba6ad7f3c34824af5b3d91934856c3 Mon Sep 17 00:00:00 2001 From: Tishj Date: Fri, 20 Sep 2024 11:19:45 +0200 Subject: [PATCH 12/16] last mentions of 'enum_oid' changed --- src/pgduckdb_types.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index c7e853a0..1795aea2 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -331,9 +331,9 @@ ConvertDuckToPostgresArray(TupleTableSlot *slot, duckdb::Value &value, idx_t col void ConvertDuckToPostgresEnumValue(TupleTableSlot *slot, duckdb::Value &val, idx_t col) { auto position = PGDuckDBEnum::GetDuckDBEnumPosition(val); - auto enum_oids = PGDuckDBEnum::GetMemberOids(val.type()); - auto enum_value_oid = duckdb::FlatVector::GetData(enum_oids)[position]; - slot->tts_values[col] = ObjectIdGetDatum(enum_value_oid); + auto enum_member_oids = PGDuckDBEnum::GetMemberOids(val.type()); + auto enum_member_oid = duckdb::FlatVector::GetData(enum_member_oids)[position]; + slot->tts_values[col] = ObjectIdGetDatum(enum_member_oid); } bool @@ -554,12 +554,12 @@ ConvertPostgresEnumToDuckEnum(Oid enum_type_oid) { auto duck_enum_vec = duckdb::Vector(duckdb::LogicalType::VARCHAR, allocation_size); auto enum_vec_data = duckdb::FlatVector::GetData(duck_enum_vec); - auto enum_oid_data = (uint32_t *)(enum_vec_data + enum_members.size()); + auto enum_member_oid_data = (uint32_t *)(enum_vec_data + enum_members.size()); for (idx_t i = 0; i < enum_members.size(); i++) { auto &member = enum_members[i]; auto enum_data = (Form_pg_enum)GETSTRUCT(member); enum_vec_data[i] = duckdb::StringVector::AddString(duck_enum_vec, enum_data->enumlabel.data); - enum_oid_data[i] = enum_data->oid; + enum_member_oid_data[i] = enum_data->oid; } PostgresFunctionGuard(ReleaseCatCacheList, list); From bdfe79915530b9c616a2974abda0ae794c9034c8 Mon Sep 17 00:00:00 2001 From: Tishj Date: Fri, 20 Sep 2024 13:59:37 +0200 Subject: [PATCH 13/16] move the implementation details to 'pgduckdb_enum.cpp' --- include/pgduckdb/types/pgduckdb_enum.hpp | 1 + src/pgduckdb_types.cpp | 18 ++---------------- src/types/pgduckdb_enum.cpp | 17 +++++++++++++++++ 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/include/pgduckdb/types/pgduckdb_enum.hpp b/include/pgduckdb/types/pgduckdb_enum.hpp index bf4a5637..f78a458d 100644 --- a/include/pgduckdb/types/pgduckdb_enum.hpp +++ b/include/pgduckdb/types/pgduckdb_enum.hpp @@ -18,6 +18,7 @@ using duckdb::LogicalType; using duckdb::Vector; struct PGDuckDBEnum { + static LogicalType CreateEnumType(std::vector &enum_members); static idx_t GetDuckDBEnumPosition(duckdb::Value &val); static idx_t GetEnumPosition(Datum enum_member_oid, const duckdb::LogicalType &type); static bool IsEnumType(Oid type_oid); diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index 1795aea2..6c104c20 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -547,23 +547,9 @@ ConvertPostgresEnumToDuckEnum(Oid enum_type_oid) { }; std::sort(enum_members.begin(), enum_members.end(), sort_order_cmp); - - idx_t allocation_size = enum_members.size(); - allocation_size += enum_members.size() / 4; - allocation_size += (enum_members.size() % 4) != 0; - - auto duck_enum_vec = duckdb::Vector(duckdb::LogicalType::VARCHAR, allocation_size); - auto enum_vec_data = duckdb::FlatVector::GetData(duck_enum_vec); - auto enum_member_oid_data = (uint32_t *)(enum_vec_data + enum_members.size()); - for (idx_t i = 0; i < enum_members.size(); i++) { - auto &member = enum_members[i]; - auto enum_data = (Form_pg_enum)GETSTRUCT(member); - enum_vec_data[i] = duckdb::StringVector::AddString(duck_enum_vec, enum_data->enumlabel.data); - enum_member_oid_data[i] = enum_data->oid; - } - + auto enum_type = PGDuckDBEnum::CreateEnumType(enum_members); PostgresFunctionGuard(ReleaseCatCacheList, list); - return EnumTypeInfo::CreateType(duck_enum_vec, enum_members.size()); + return enum_type; } duckdb::LogicalType diff --git a/src/types/pgduckdb_enum.cpp b/src/types/pgduckdb_enum.cpp index 68563d5d..f53d719a 100644 --- a/src/types/pgduckdb_enum.cpp +++ b/src/types/pgduckdb_enum.cpp @@ -8,6 +8,23 @@ using duckdb::idx_t; using duckdb::LogicalTypeId; using duckdb::PhysicalType; +LogicalType PGDuckDBEnum::CreateEnumType(std::vector &enum_members) { + idx_t allocation_size = enum_members.size(); + allocation_size += enum_members.size() / 4; + allocation_size += (enum_members.size() % 4) != 0; + + auto duck_enum_vec = duckdb::Vector(duckdb::LogicalType::VARCHAR, allocation_size); + auto enum_vec_data = duckdb::FlatVector::GetData(duck_enum_vec); + auto enum_member_oid_data = (uint32_t *)(enum_vec_data + enum_members.size()); + for (idx_t i = 0; i < enum_members.size(); i++) { + auto &member = enum_members[i]; + auto enum_data = (Form_pg_enum)GETSTRUCT(member); + enum_vec_data[i] = duckdb::StringVector::AddString(duck_enum_vec, enum_data->enumlabel.data); + enum_member_oid_data[i] = enum_data->oid; + } + return duckdb::EnumTypeInfo::CreateType(duck_enum_vec, enum_members.size()); +} + idx_t PGDuckDBEnum::GetDuckDBEnumPosition(duckdb::Value &val) { D_ASSERT(val.type().id() == LogicalTypeId::ENUM); From b46c9097b586212798d5399fbc8cfd15acdf5a88 Mon Sep 17 00:00:00 2001 From: Tishj Date: Fri, 20 Sep 2024 14:03:03 +0200 Subject: [PATCH 14/16] explain the implementation details --- src/types/pgduckdb_enum.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/types/pgduckdb_enum.cpp b/src/types/pgduckdb_enum.cpp index f53d719a..3006bff7 100644 --- a/src/types/pgduckdb_enum.cpp +++ b/src/types/pgduckdb_enum.cpp @@ -8,6 +8,11 @@ using duckdb::idx_t; using duckdb::LogicalTypeId; using duckdb::PhysicalType; +// This is medium hacky, we create extra space in the Vector that would normally only hold the strings corresponding to the enum's members. +// In that extra space we store the enum member oids. + +// We do this so we can find the OID from the offset (which is what DuckDB stores in a Vector of type ENUM) and return that when converting the result from DuckDB -> Postgres + LogicalType PGDuckDBEnum::CreateEnumType(std::vector &enum_members) { idx_t allocation_size = enum_members.size(); allocation_size += enum_members.size() / 4; From bd7d6985f1dee603ebca2293e9035fa9a9b8de70 Mon Sep 17 00:00:00 2001 From: Tishj Date: Tue, 24 Sep 2024 14:06:09 +0200 Subject: [PATCH 15/16] add filtering for enums --- Makefile | 1 + .../pgduckdb/catalog/pgduckdb_transaction.hpp | 3 + include/pgduckdb/catalog/pgduckdb_type.hpp | 31 ++++++++ include/pgduckdb/duckdb_vendor/.clang-format | 2 + .../enum_type_info_templated.hpp | 57 ++++++++++++++ include/pgduckdb/types/pgduckdb_enum.hpp | 38 ++++++++- src/catalog/pgduckdb_transaction.cpp | 77 ++++++++++++++++--- src/catalog/pgduckdb_type.cpp | 28 +++++++ src/pgduckdb_filter.cpp | 17 +++- src/pgduckdb_types.cpp | 4 +- src/types/pgduckdb_enum.cpp | 67 ++++++++++------ test/regression/expected/enum.out | 23 +++++- test/regression/sql/enum.sql | 6 +- 13 files changed, 313 insertions(+), 41 deletions(-) create mode 100644 include/pgduckdb/catalog/pgduckdb_type.hpp create mode 100644 include/pgduckdb/duckdb_vendor/.clang-format create mode 100644 include/pgduckdb/duckdb_vendor/enum_type_info_templated.hpp create mode 100644 src/catalog/pgduckdb_type.cpp diff --git a/Makefile b/Makefile index 97d6b8b8..960b30fd 100644 --- a/Makefile +++ b/Makefile @@ -27,6 +27,7 @@ SRCS = src/scan/heap_reader.cpp \ src/catalog/pgduckdb_storage.cpp \ src/catalog/pgduckdb_schema.cpp \ src/catalog/pgduckdb_table.cpp \ + src/catalog/pgduckdb_type.cpp \ src/catalog/pgduckdb_transaction.cpp \ src/catalog/pgduckdb_transaction_manager.cpp \ src/catalog/pgduckdb_catalog.cpp diff --git a/include/pgduckdb/catalog/pgduckdb_transaction.hpp b/include/pgduckdb/catalog/pgduckdb_transaction.hpp index c22362a0..b6bb46af 100644 --- a/include/pgduckdb/catalog/pgduckdb_transaction.hpp +++ b/include/pgduckdb/catalog/pgduckdb_transaction.hpp @@ -2,6 +2,7 @@ #include "duckdb/transaction/transaction.hpp" #include "pgduckdb/catalog/pgduckdb_table.hpp" +#include "pgduckdb/catalog/pgduckdb_type.hpp" #include "pgduckdb/catalog/pgduckdb_schema.hpp" namespace duckdb { @@ -15,11 +16,13 @@ class SchemaItems { public: optional_ptr GetTable(const string &name, PlannerInfo *planner_info); + optional_ptr GetType(const string &name); public: string name; unique_ptr schema; case_insensitive_map_t> tables; + case_insensitive_map_t> types; }; class PostgresTransaction : public Transaction { diff --git a/include/pgduckdb/catalog/pgduckdb_type.hpp b/include/pgduckdb/catalog/pgduckdb_type.hpp new file mode 100644 index 00000000..162d393f --- /dev/null +++ b/include/pgduckdb/catalog/pgduckdb_type.hpp @@ -0,0 +1,31 @@ +#pragma once + +#include "duckdb/catalog/catalog_entry/type_catalog_entry.hpp" +#include "duckdb/parser/parsed_data/create_type_info.hpp" +#include "duckdb/storage/table_storage_info.hpp" + +extern "C" { +#include "postgres.h" +#include "utils/snapshot.h" +#include "postgres.h" +#include "catalog/namespace.h" +#include "catalog/pg_class.h" +#include "optimizer/planmain.h" +#include "optimizer/planner.h" +#include "utils/builtins.h" +#include "utils/regproc.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" +#include "access/htup_details.h" +} + +namespace duckdb { + +class PostgresType : public TypeCatalogEntry { +public: + ~PostgresType() { + } + PostgresType(Catalog &catalog, SchemaCatalogEntry &schema, CreateTypeInfo &info); +}; + +} // namespace duckdb diff --git a/include/pgduckdb/duckdb_vendor/.clang-format b/include/pgduckdb/duckdb_vendor/.clang-format new file mode 100644 index 00000000..9d159247 --- /dev/null +++ b/include/pgduckdb/duckdb_vendor/.clang-format @@ -0,0 +1,2 @@ +DisableFormat: true +SortIncludes: false diff --git a/include/pgduckdb/duckdb_vendor/enum_type_info_templated.hpp b/include/pgduckdb/duckdb_vendor/enum_type_info_templated.hpp new file mode 100644 index 00000000..3f4b5ca9 --- /dev/null +++ b/include/pgduckdb/duckdb_vendor/enum_type_info_templated.hpp @@ -0,0 +1,57 @@ +#pragma once + +#include "duckdb/common/extra_type_info.hpp" +#include "duckdb/common/vector.hpp" +#include "duckdb/common/types/vector.hpp" +#include "duckdb/common/string_map_set.hpp" +#include "duckdb/common/serializer/deserializer.hpp" + +// This is copied directly without changes from 'duckdb/src/common/extra_type_info.cpp' +// The reason this is copied is to be able to inherit from it. + +namespace duckdb { +template +struct EnumTypeInfoTemplated : public EnumTypeInfo { + explicit EnumTypeInfoTemplated(Vector &values_insert_order_p, idx_t size_p) + : EnumTypeInfo(values_insert_order_p, size_p) { + D_ASSERT(values_insert_order_p.GetType().InternalType() == PhysicalType::VARCHAR); + + UnifiedVectorFormat vdata; + values_insert_order.ToUnifiedFormat(size_p, vdata); + + auto data = UnifiedVectorFormat::GetData(vdata); + for (idx_t i = 0; i < size_p; i++) { + auto idx = vdata.sel->get_index(i); + if (!vdata.validity.RowIsValid(idx)) { + throw InternalException("Attempted to create ENUM type with NULL value"); + } + if (values.count(data[idx]) > 0) { + throw InvalidInputException("Attempted to create ENUM type with duplicate value %s", + data[idx].GetString()); + } + values[data[idx]] = UnsafeNumericCast(i); + } + } + + static shared_ptr Deserialize(Deserializer &deserializer, uint32_t size) { + Vector values_insert_order(LogicalType::VARCHAR, size); + auto strings = FlatVector::GetData(values_insert_order); + + deserializer.ReadList(201, "values", [&](Deserializer::List &list, idx_t i) { + strings[i] = StringVector::AddStringOrBlob(values_insert_order, list.ReadElement()); + }); + return make_shared_ptr(values_insert_order, size); + } + + const string_map_t &GetValues() const { + return values; + } + + EnumTypeInfoTemplated(const EnumTypeInfoTemplated &) = delete; + EnumTypeInfoTemplated &operator=(const EnumTypeInfoTemplated &) = delete; + +private: + string_map_t values; +}; + +} // namespace duckdb diff --git a/include/pgduckdb/types/pgduckdb_enum.hpp b/include/pgduckdb/types/pgduckdb_enum.hpp index f78a458d..2b903be0 100644 --- a/include/pgduckdb/types/pgduckdb_enum.hpp +++ b/include/pgduckdb/types/pgduckdb_enum.hpp @@ -1,5 +1,6 @@ #pragma once +#include "pgduckdb/duckdb_vendor/enum_type_info_templated.hpp" #include "duckdb/common/types/vector.hpp" #include "duckdb/common/types.hpp" @@ -17,13 +18,48 @@ using duckdb::idx_t; using duckdb::LogicalType; using duckdb::Vector; +// To store additional metadata for a type, DuckDB's LogicalType class contains a 'type_info_' field. +// The type of this is ExtraTypeInfo, ENUMs make use of this in the form of EnumTypeInfo (see +// duckdb/include/common/extra_type_info.hpp). We derive from this further to store the ENUMs connection with the oids +// of Postgres' enum members. + +template +class PGDuckDBEnumTypeInfo : public duckdb::EnumTypeInfoTemplated { +public: + PGDuckDBEnumTypeInfo(Vector &values_insert_order_p, idx_t dict_size_p, Vector &enum_member_oids) + : duckdb::EnumTypeInfoTemplated(values_insert_order_p, dict_size_p), enum_member_oids(enum_member_oids) { + } + +public: + const Vector & + GetMemberOids() const { + return enum_member_oids; + } + + duckdb::shared_ptr + Copy() const override { + auto &insert_order = this->GetValuesInsertOrder(); + Vector values_insert_order_copy(LogicalType::VARCHAR, false, false, 0); + values_insert_order_copy.Reference(insert_order); + + Vector enum_member_oids_copy(LogicalType::UINTEGER, false, false, 0); + enum_member_oids_copy.Reference(enum_member_oids); + + return duckdb::make_shared_ptr(values_insert_order_copy, this->GetDictSize(), + enum_member_oids_copy); + } + +private: + Vector enum_member_oids; +}; + struct PGDuckDBEnum { static LogicalType CreateEnumType(std::vector &enum_members); static idx_t GetDuckDBEnumPosition(duckdb::Value &val); static idx_t GetEnumPosition(Datum enum_member_oid, const duckdb::LogicalType &type); static bool IsEnumType(Oid type_oid); static Oid GetEnumTypeOid(const Vector &oids); - static Vector GetMemberOids(const duckdb::LogicalType &type); + static const Vector &GetMemberOids(const duckdb::LogicalType &type); }; } // namespace pgduckdb diff --git a/src/catalog/pgduckdb_transaction.cpp b/src/catalog/pgduckdb_transaction.cpp index 67258ac4..d0c3f018 100644 --- a/src/catalog/pgduckdb_transaction.cpp +++ b/src/catalog/pgduckdb_transaction.cpp @@ -1,8 +1,11 @@ #include "pgduckdb/catalog/pgduckdb_catalog.hpp" #include "pgduckdb/catalog/pgduckdb_transaction.hpp" #include "pgduckdb/catalog/pgduckdb_table.hpp" +#include "pgduckdb/catalog/pgduckdb_type.hpp" +#include "pgduckdb/pgduckdb_types.hpp" #include "pgduckdb/scan/postgres_scan.hpp" #include "duckdb/parser/parsed_data/create_table_info.hpp" +#include "duckdb/parser/parsed_data/create_type_info.hpp" #include "duckdb/parser/parsed_data/create_schema_info.hpp" #include "duckdb/catalog/catalog.hpp" @@ -10,6 +13,7 @@ extern "C" { #include "postgres.h" #include "catalog/namespace.h" #include "catalog/pg_class.h" +#include "catalog/pg_type.h" #include "optimizer/planmain.h" #include "optimizer/planner.h" #include "utils/builtins.h" @@ -23,6 +27,7 @@ extern "C" { #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "parser/parsetree.h" +#include "parser/parse_type.h" #include "utils/rel.h" } @@ -68,6 +73,56 @@ IsIndexScan(const Path *nodePath) { return false; } +optional_ptr +SchemaItems::GetType(const string &entry_name) { + auto it = types.find(entry_name); + if (it != types.end()) { + return it->second.get(); + } + + auto &catalog = schema->catalog; + + List *name_list = NIL; + name_list = lappend(name_list, makeString(pstrdup(name.c_str()))); + name_list = lappend(name_list, makeString(pstrdup(entry_name.c_str()))); + + TypeName *type_name = makeTypeNameFromNameList(name_list); + + // Try to look up the type by name + Oid type_oid = InvalidOid; + HeapTuple type_entry = LookupTypeName(NULL, type_name, NULL, false); + + if (HeapTupleIsValid(type_entry)) { + // Cast to the correct form to access the fields + Form_pg_type type_form = (Form_pg_type)GETSTRUCT(type_entry); + + // Extract the OID from the type entry + type_oid = type_form->oid; + + // Release the system cache entry + ReleaseSysCache(type_entry); + } + + // If the type could not be found, handle it here + if (type_oid == InvalidOid) { + return nullptr; + } + + FormData_pg_attribute dummy_attr; + dummy_attr.atttypid = type_oid; + dummy_attr.atttypmod = 0; + dummy_attr.attndims = 0; + Form_pg_attribute attribute = &dummy_attr; + + CreateTypeInfo info; + info.name = entry_name; + info.type = pgduckdb::ConvertPostgresToDuckColumnType(attribute); + + auto type = make_uniq(catalog, *schema, info); + types[entry_name] = std::move(type); + return types[entry_name].get(); +} + optional_ptr SchemaItems::GetTable(const string &entry_name, PlannerInfo *planner_info) { auto it = tables.find(entry_name); @@ -157,23 +212,27 @@ PostgresTransaction::GetCatalogEntry(CatalogType type, const string &schema, con throw InternalException("Could not find 'postgres_state' in 'PostgresTransaction::GetCatalogEntry'"); } auto planner_info = scan_data->m_query_planner_info; + if (type == CatalogType::SCHEMA_ENTRY) { + return GetSchema(schema); + } + + auto it = schemas.find(schema); + if (it == schemas.end()) { + return nullptr; + } + auto &schema_entry = it->second; + switch (type) { case CatalogType::TABLE_ENTRY: { - auto it = schemas.find(schema); - if (it == schemas.end()) { - return nullptr; - } - auto &schema_entry = it->second; return schema_entry.GetTable(name, planner_info); } - case CatalogType::SCHEMA_ENTRY: { - return GetSchema(schema); + case CatalogType::TYPE_ENTRY: { + return schema_entry.GetType(name); } default: + D_ASSERT(type != CatalogType::SCHEMA_ENTRY); return nullptr; } } } // namespace duckdb - -// namespace duckdb diff --git a/src/catalog/pgduckdb_type.cpp b/src/catalog/pgduckdb_type.cpp new file mode 100644 index 00000000..582e4b44 --- /dev/null +++ b/src/catalog/pgduckdb_type.cpp @@ -0,0 +1,28 @@ +#include "pgduckdb/pgduckdb_types.hpp" +#include "pgduckdb/catalog/pgduckdb_schema.hpp" +#include "pgduckdb/catalog/pgduckdb_type.hpp" + +extern "C" { +#include "postgres.h" +#include "access/tableam.h" +#include "access/heapam.h" +#include "storage/bufmgr.h" +#include "catalog/namespace.h" +#include "catalog/pg_class.h" +#include "optimizer/planmain.h" +#include "optimizer/planner.h" +#include "utils/builtins.h" +#include "utils/regproc.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" +#include "access/htup_details.h" +#include "parser/parsetree.h" +} + +namespace duckdb { + +PostgresType::PostgresType(Catalog &catalog, SchemaCatalogEntry &schema, CreateTypeInfo &info) + : TypeCatalogEntry(catalog, schema, info) { +} + +} // namespace duckdb diff --git a/src/pgduckdb_filter.cpp b/src/pgduckdb_filter.cpp index 081271d2..029c0956 100644 --- a/src/pgduckdb_filter.cpp +++ b/src/pgduckdb_filter.cpp @@ -10,6 +10,7 @@ extern "C" { } #include "pgduckdb/pgduckdb_filter.hpp" +#include "pgduckdb/types/pgduckdb_enum.hpp" #include "pgduckdb/pgduckdb_detoast.hpp" #include "pgduckdb/pgduckdb_types.hpp" @@ -71,8 +72,22 @@ FilterOperationSwitch(Datum &value, duckdb::Value &constant, Oid type_oid) { case VARCHAROID: return StringFilterOperation(value, constant); default: + if (PGDuckDBEnum::IsEnumType(type_oid)) { + auto position = PGDuckDBEnum::GetEnumPosition(value, constant.type()); + auto physical_type = duckdb::EnumType::GetPhysicalType(constant.type()); + switch (physical_type) { + case duckdb::PhysicalType::UINT8: + return TemplatedFilterOperation(static_cast(position), constant); + case duckdb::PhysicalType::UINT16: + return TemplatedFilterOperation(static_cast(position), constant); + case duckdb::PhysicalType::UINT32: + return TemplatedFilterOperation(static_cast(position), constant); + default: + throw duckdb::InternalException("Invalid Physical Type for ENUMs"); + } + } throw duckdb::InvalidTypeException( - duckdb::string("(DuckDB/FilterOperationSwitch) Unsupported duckdb type: %d", type_oid)); + duckdb::StringUtil::Format("(DuckDB/FilterOperationSwitch) Unsupported duckdb type: %d", type_oid)); } } diff --git a/src/pgduckdb_types.cpp b/src/pgduckdb_types.cpp index dfb837db..a74ff948 100644 --- a/src/pgduckdb_types.cpp +++ b/src/pgduckdb_types.cpp @@ -334,7 +334,7 @@ ConvertDuckToPostgresArray(TupleTableSlot *slot, duckdb::Value &value, idx_t col void ConvertDuckToPostgresEnumValue(TupleTableSlot *slot, duckdb::Value &val, idx_t col) { auto position = PGDuckDBEnum::GetDuckDBEnumPosition(val); - auto enum_member_oids = PGDuckDBEnum::GetMemberOids(val.type()); + auto &enum_member_oids = PGDuckDBEnum::GetMemberOids(val.type()); auto enum_member_oid = duckdb::FlatVector::GetData(enum_member_oids)[position]; slot->tts_values[col] = ObjectIdGetDatum(enum_member_oid); } @@ -679,7 +679,7 @@ GetPostgresDuckDBType(duckdb::LogicalType type) { } } case duckdb::LogicalTypeId::ENUM: { - auto member_oids = PGDuckDBEnum::GetMemberOids(type); + auto &member_oids = PGDuckDBEnum::GetMemberOids(type); return PGDuckDBEnum::GetEnumTypeOid(member_oids); } default: { diff --git a/src/types/pgduckdb_enum.cpp b/src/types/pgduckdb_enum.cpp index 3006bff7..ffd587ec 100644 --- a/src/types/pgduckdb_enum.cpp +++ b/src/types/pgduckdb_enum.cpp @@ -8,26 +8,45 @@ using duckdb::idx_t; using duckdb::LogicalTypeId; using duckdb::PhysicalType; -// This is medium hacky, we create extra space in the Vector that would normally only hold the strings corresponding to the enum's members. -// In that extra space we store the enum member oids. +// This is medium hacky, we create extra space in the Vector that would normally only hold the strings corresponding to +// the enum's members. In that extra space we store the enum member oids. -// We do this so we can find the OID from the offset (which is what DuckDB stores in a Vector of type ENUM) and return that when converting the result from DuckDB -> Postgres +// We do this so we can find the OID from the offset (which is what DuckDB stores in a Vector of type ENUM) and return +// that when converting the result from DuckDB -> Postgres -LogicalType PGDuckDBEnum::CreateEnumType(std::vector &enum_members) { - idx_t allocation_size = enum_members.size(); - allocation_size += enum_members.size() / 4; - allocation_size += (enum_members.size() % 4) != 0; +LogicalType +PGDuckDBEnum::CreateEnumType(std::vector &enum_members) { + auto size = enum_members.size(); - auto duck_enum_vec = duckdb::Vector(duckdb::LogicalType::VARCHAR, allocation_size); + auto duck_enum_vec = duckdb::Vector(duckdb::LogicalType::VARCHAR, size); + auto enum_member_oid_vec = duckdb::Vector(duckdb::LogicalType::UINTEGER, size); auto enum_vec_data = duckdb::FlatVector::GetData(duck_enum_vec); - auto enum_member_oid_data = (uint32_t *)(enum_vec_data + enum_members.size()); - for (idx_t i = 0; i < enum_members.size(); i++) { + auto enum_member_oid_data = duckdb::FlatVector::GetData(enum_member_oid_vec); + for (idx_t i = 0; i < size; i++) { auto &member = enum_members[i]; auto enum_data = (Form_pg_enum)GETSTRUCT(member); enum_vec_data[i] = duckdb::StringVector::AddString(duck_enum_vec, enum_data->enumlabel.data); enum_member_oid_data[i] = enum_data->oid; } - return duckdb::EnumTypeInfo::CreateType(duck_enum_vec, enum_members.size()); + + // Generate EnumTypeInfo + duckdb::shared_ptr info; + auto enum_internal_type = duckdb::EnumTypeInfo::DictType(size); + switch (enum_internal_type) { + case PhysicalType::UINT8: + info = duckdb::make_shared_ptr>(duck_enum_vec, size, enum_member_oid_vec); + break; + case PhysicalType::UINT16: + info = duckdb::make_shared_ptr>(duck_enum_vec, size, enum_member_oid_vec); + break; + case PhysicalType::UINT32: + info = duckdb::make_shared_ptr>(duck_enum_vec, size, enum_member_oid_vec); + break; + default: + throw duckdb::InternalException("Invalid Physical Type for ENUMs"); + } + // Generate Actual Enum Type + return LogicalType(LogicalTypeId::ENUM, info); } idx_t @@ -53,12 +72,12 @@ PGDuckDBEnum::GetEnumPosition(Datum enum_member_oid_p, const duckdb::LogicalType auto &enum_type_info = type.AuxInfo()->Cast(); auto dict_size = enum_type_info.GetDictSize(); - auto enum_member_oids = PGDuckDBEnum::GetMemberOids(type); + auto &enum_member_oids = PGDuckDBEnum::GetMemberOids(type); auto oids_data = duckdb::FlatVector::GetData(enum_member_oids); - uint32_t *begin = oids_data; - uint32_t *end = oids_data + dict_size; - uint32_t *result = std::find(begin, end, enum_member_oid); + const uint32_t *begin = oids_data; + const uint32_t *end = oids_data + dict_size; + const uint32_t *result = std::find(begin, end, enum_member_oid); if (result == end) { throw duckdb::InternalException("Could not find enum_member_oid: %d", enum_member_oid); @@ -101,17 +120,21 @@ PGDuckDBEnum::GetEnumTypeOid(const Vector &oids) { return result; } -Vector +const Vector & PGDuckDBEnum::GetMemberOids(const duckdb::LogicalType &type) { auto type_info = type.AuxInfo(); auto &enum_type_info = type_info->Cast(); - auto &enum_sort_order = enum_type_info.GetValuesInsertOrder(); - idx_t dict_size = enum_type_info.GetDictSize(); - // We store the (Postgres) enum member oids behind the string data in the EnumTypeInfo - auto vector_data = duckdb::FlatVector::GetData(enum_sort_order); - // Create a Vector that wraps the existing data, without copying - only valid while the EnumTypeInfo is alive - return Vector(duckdb::LogicalType::UINTEGER, (duckdb::data_ptr_t)(vector_data + dict_size)); + switch (enum_type_info.DictType(enum_type_info.GetDictSize())) { + case PhysicalType::UINT8: + return enum_type_info.Cast>().GetMemberOids(); + case PhysicalType::UINT16: + return enum_type_info.Cast>().GetMemberOids(); + case PhysicalType::UINT32: + return enum_type_info.Cast>().GetMemberOids(); + default: + throw duckdb::InternalException("Invalid Physical Type for ENUMs"); + } } } // namespace pgduckdb diff --git a/test/regression/expected/enum.out b/test/regression/expected/enum.out index edf09345..7d55378c 100644 --- a/test/regression/expected/enum.out +++ b/test/regression/expected/enum.out @@ -15,13 +15,30 @@ select * from tbl; sad (3 rows) ---select * from tbl where a = 'sad'; -- returns `sad` ---select * from tbl where a > 'neutral'; --- returns `happy` +select * from tbl where a = 'sad'; + a +----- + sad +(1 row) + +select * from tbl where a > 'neutral'; + a +------- + happy +(1 row) + ALTER TYPE mood ADD VALUE 'a-bit-happy' BEFORE 'happy'; ALTER TYPE mood ADD VALUE 'very-happy' AFTER 'happy'; ALTER TYPE mood ADD VALUE 'very-sad' BEFORE 'sad'; insert into tbl values ('very-sad'), ('very-happy'), ('a-bit-happy'); ---select * from tbl where a > 'neutral'; --- returns `a-bit-happy`, `happy` and `very-happy`; +select * from tbl where a > 'neutral'; + a +------------- + happy + very-happy + a-bit-happy +(3 rows) + select * from tbl; a ------------- diff --git a/test/regression/sql/enum.sql b/test/regression/sql/enum.sql index 2c71fad5..17ab9830 100644 --- a/test/regression/sql/enum.sql +++ b/test/regression/sql/enum.sql @@ -11,15 +11,15 @@ insert into tbl select 'sad'; select * from tbl; ---select * from tbl where a = 'sad'; -- returns `sad` ---select * from tbl where a > 'neutral'; --- returns `happy` +select * from tbl where a = 'sad'; +select * from tbl where a > 'neutral'; ALTER TYPE mood ADD VALUE 'a-bit-happy' BEFORE 'happy'; ALTER TYPE mood ADD VALUE 'very-happy' AFTER 'happy'; ALTER TYPE mood ADD VALUE 'very-sad' BEFORE 'sad'; insert into tbl values ('very-sad'), ('very-happy'), ('a-bit-happy'); ---select * from tbl where a > 'neutral'; --- returns `a-bit-happy`, `happy` and `very-happy`; +select * from tbl where a > 'neutral'; select * from tbl; drop table tbl; From 0ef8cc27f17376bf48798e198194e3072506a9bd Mon Sep 17 00:00:00 2001 From: Tishj Date: Tue, 24 Sep 2024 14:15:38 +0200 Subject: [PATCH 16/16] added an id column for verification purposes --- test/regression/expected/enum.out | 64 +++++++++++++++---------------- test/regression/sql/enum.sql | 16 ++++---- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/test/regression/expected/enum.out b/test/regression/expected/enum.out index 7d55378c..9145cca0 100644 --- a/test/regression/expected/enum.out +++ b/test/regression/expected/enum.out @@ -3,51 +3,51 @@ CREATE TYPE mood AS ENUM ( 'neutral', 'happy' ); -create table tbl (a mood); -insert into tbl select 'happy'; -insert into tbl select 'neutral'; -insert into tbl select 'sad'; +create table tbl (id serial primary key, a mood); +insert into tbl(a) select 'happy'; +insert into tbl(a) select 'neutral'; +insert into tbl(a) select 'sad'; select * from tbl; - a ---------- - happy - neutral - sad + id | a +----+--------- + 1 | happy + 2 | neutral + 3 | sad (3 rows) -select * from tbl where a = 'sad'; - a ------ - sad +select * from tbl where a = 'sad'; -- returns `sad` + id | a +----+----- + 3 | sad (1 row) -select * from tbl where a > 'neutral'; - a -------- - happy +select * from tbl where a > 'neutral'; --- returns `happy` + id | a +----+------- + 1 | happy (1 row) ALTER TYPE mood ADD VALUE 'a-bit-happy' BEFORE 'happy'; ALTER TYPE mood ADD VALUE 'very-happy' AFTER 'happy'; ALTER TYPE mood ADD VALUE 'very-sad' BEFORE 'sad'; -insert into tbl values ('very-sad'), ('very-happy'), ('a-bit-happy'); -select * from tbl where a > 'neutral'; - a -------------- - happy - very-happy - a-bit-happy +insert into tbl(a) values ('very-sad'), ('very-happy'), ('a-bit-happy'); +select * from tbl where a > 'neutral'; --- returns `a-bit-happy`, `happy` and `very-happy`; + id | a +----+------------- + 1 | happy + 5 | very-happy + 6 | a-bit-happy (3 rows) select * from tbl; - a -------------- - happy - neutral - sad - very-sad - very-happy - a-bit-happy + id | a +----+------------- + 1 | happy + 2 | neutral + 3 | sad + 4 | very-sad + 5 | very-happy + 6 | a-bit-happy (6 rows) drop table tbl; diff --git a/test/regression/sql/enum.sql b/test/regression/sql/enum.sql index 17ab9830..f445ec45 100644 --- a/test/regression/sql/enum.sql +++ b/test/regression/sql/enum.sql @@ -3,23 +3,23 @@ CREATE TYPE mood AS ENUM ( 'neutral', 'happy' ); -create table tbl (a mood); +create table tbl (id serial primary key, a mood); -insert into tbl select 'happy'; -insert into tbl select 'neutral'; -insert into tbl select 'sad'; +insert into tbl(a) select 'happy'; +insert into tbl(a) select 'neutral'; +insert into tbl(a) select 'sad'; select * from tbl; -select * from tbl where a = 'sad'; -select * from tbl where a > 'neutral'; +select * from tbl where a = 'sad'; -- returns `sad` +select * from tbl where a > 'neutral'; --- returns `happy` ALTER TYPE mood ADD VALUE 'a-bit-happy' BEFORE 'happy'; ALTER TYPE mood ADD VALUE 'very-happy' AFTER 'happy'; ALTER TYPE mood ADD VALUE 'very-sad' BEFORE 'sad'; -insert into tbl values ('very-sad'), ('very-happy'), ('a-bit-happy'); -select * from tbl where a > 'neutral'; +insert into tbl(a) values ('very-sad'), ('very-happy'), ('a-bit-happy'); +select * from tbl where a > 'neutral'; --- returns `a-bit-happy`, `happy` and `very-happy`; select * from tbl; drop table tbl;