Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enum type support #193

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ SRCS = src/scan/heap_reader.cpp \
src/scan/postgres_seq_scan.cpp \
src/utility/copy.cpp \
src/vendor/pg_explain.cpp \
src/types/pgduckdb_enum.cpp \
src/pgduckdb_metadata_cache.cpp \
src/pgduckdb_detoast.cpp \
src/pgduckdb_duckdb.cpp \
Expand Down
28 changes: 28 additions & 0 deletions include/pgduckdb/types/pgduckdb_enum.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#pragma once

#include "duckdb/common/types/vector.hpp"
#include "duckdb/common/types.hpp"

extern "C" {
#include "postgres.h"
#include "catalog/pg_enum.h"
#include "catalog/pg_type.h"
#include "utils/syscache.h"
#include "access/htup_details.h"
}

namespace pgduckdb {

using duckdb::idx_t;
using duckdb::LogicalType;
using duckdb::Vector;

struct PGDuckDBEnum {
Tishj marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be changed to a namespace? Now that this struct doesn't have any fields anymore?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea possibly, in our codebase we're not too strict on static-membered-structs vs namespaces, I think when we also need a constexpr value here we can't use a namespace but can use a struct, so I almost always go for using a struct

static idx_t GetDuckDBEnumPosition(duckdb::Value &val);
static idx_t GetEnumPosition(Datum enum_member_oid, const duckdb::LogicalType &type);
static bool IsEnumType(Oid type_oid);
static Oid GetEnumTypeOid(const Vector &oids);
static Vector GetMemberOids(const duckdb::LogicalType &type);
};

} // namespace pgduckdb
102 changes: 99 additions & 3 deletions src/pgduckdb_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ extern "C" {
#include "catalog/pg_type.h"
#include "executor/tuptable.h"
#include "utils/numeric.h"
#include "utils/catcache.h"
#include "utils/uuid.h"
#include "utils/array.h"
#include "fmgr.h"
Expand All @@ -18,14 +19,20 @@ extern "C" {
}

#include "pgduckdb/pgduckdb.h"
#include "pgduckdb/pgduckdb_utils.hpp"
#include "pgduckdb/scan/postgres_scan.hpp"
#include "pgduckdb/types/decimal.hpp"
#include "pgduckdb/pgduckdb_filter.hpp"
#include "pgduckdb/pgduckdb_detoast.hpp"
#include "pgduckdb/pgduckdb_types.hpp"
#include "pgduckdb/types/pgduckdb_enum.hpp"

namespace pgduckdb {

using duckdb::EnumTypeInfo;
using duckdb::LogicalTypeId;
using duckdb::PhysicalType;

struct BoolArray {
public:
static ArrayType *
Expand Down Expand Up @@ -321,6 +328,14 @@ ConvertDuckToPostgresArray(TupleTableSlot *slot, duckdb::Value &value, idx_t col
slot->tts_values[col] = PointerGetDatum(arr);
}

void
ConvertDuckToPostgresEnumValue(TupleTableSlot *slot, duckdb::Value &val, idx_t col) {
auto position = PGDuckDBEnum::GetDuckDBEnumPosition(val);
auto enum_member_oids = PGDuckDBEnum::GetMemberOids(val.type());
auto enum_member_oid = duckdb::FlatVector::GetData<Oid>(enum_member_oids)[position];
slot->tts_values[col] = ObjectIdGetDatum(enum_member_oid);
}

bool
ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col) {
Oid oid = slot->tts_tupleDescriptor->attrs[col].atttypid;
Expand Down Expand Up @@ -462,9 +477,18 @@ ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col
ConvertDuckToPostgresArray<PODArray<PostgresIntegerOIDMapping<INT8OID>>>(slot, value, col);
break;
}
default:
elog(WARNING, "(PGDuckDB/ConvertDuckToPostgresValue) Unsuported pgduckdb type: %d", oid);
return false;
default: {
auto type_id = value.type().id();
switch (type_id) {
case LogicalTypeId::ENUM: {
ConvertDuckToPostgresEnumValue(slot, value, col);
break;
default:
elog(WARNING, "(PGDuckDB/ConvertDuckToPostgresValue) Unsuported pgduckdb type: %d", oid);
return false;
}
}
}
}
return true;
}
Expand Down Expand Up @@ -495,6 +519,53 @@ ChildTypeFromArray(Oid array_type) {
}
}

duckdb::LogicalType
ConvertPostgresEnumToDuckEnum(Oid enum_type_oid) {
/* Get the list of existing members of the enum */
auto list =
PostgresFunctionGuard<CatCList *>([](int cacheId, Datum key) { return SearchSysCacheList1(cacheId, key); },
ENUMTYPOIDNAME, ObjectIdGetDatum(enum_type_oid));
auto nelems = list->n_members;

/* Sort the existing members by enumsortorder */
std::vector<HeapTuple> enum_members(nelems);
for (int i = 0; i < nelems; ++i) {
enum_members[i] = &list->members[i]->tuple;
}

auto sort_order_cmp = [](const HeapTuple &v1, const HeapTuple &v2) {
Form_pg_enum en1 = (Form_pg_enum)GETSTRUCT(v1);
Form_pg_enum en2 = (Form_pg_enum)GETSTRUCT(v2);

if (en1->enumsortorder < en2->enumsortorder) {
return true; // v1 < v2
} else if (en1->enumsortorder > en2->enumsortorder) {
return false; // v1 > v2
} else {
return false; // v1 == v2
}
Comment on lines +543 to +549
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can be simplified like this:

Suggested change
if (en1->enumsortorder < en2->enumsortorder) {
return true; // v1 < v2
} else if (en1->enumsortorder > en2->enumsortorder) {
return false; // v1 > v2
} else {
return false; // v1 == v2
}
return en1->enumsortorder < en2->enumsortorder;

};

std::sort(enum_members.begin(), enum_members.end(), sort_order_cmp);

idx_t allocation_size = enum_members.size();
allocation_size += enum_members.size() / 4;
allocation_size += (enum_members.size() % 4) != 0;
Tishj marked this conversation as resolved.
Show resolved Hide resolved

auto duck_enum_vec = duckdb::Vector(duckdb::LogicalType::VARCHAR, allocation_size);
auto enum_vec_data = duckdb::FlatVector::GetData<duckdb::string_t>(duck_enum_vec);
auto enum_member_oid_data = (uint32_t *)(enum_vec_data + enum_members.size());
for (idx_t i = 0; i < enum_members.size(); i++) {
auto &member = enum_members[i];
auto enum_data = (Form_pg_enum)GETSTRUCT(member);
enum_vec_data[i] = duckdb::StringVector::AddString(duck_enum_vec, enum_data->enumlabel.data);
enum_member_oid_data[i] = enum_data->oid;
}

PostgresFunctionGuard(ReleaseCatCacheList, list);
return EnumTypeInfo::CreateType(duck_enum_vec, enum_members.size());
}

duckdb::LogicalType
ConvertPostgresToDuckColumnType(Form_pg_attribute &attribute) {
auto &type = attribute->atttypid;
Expand Down Expand Up @@ -548,6 +619,9 @@ ConvertPostgresToDuckColumnType(Form_pg_attribute &attribute) {
case REGCLASSOID:
return duckdb::LogicalTypeId::UINTEGER;
default: {
if (PGDuckDBEnum::IsEnumType(type)) {
return ConvertPostgresEnumToDuckEnum(type);
}
std::string name = "UnsupportedPostgresType (Oid=" + std::to_string(type) + ")";
return duckdb::LogicalType::USER(name);
}
Expand Down Expand Up @@ -615,6 +689,10 @@ GetPostgresDuckDBType(duckdb::LogicalType type) {
}
}
}
case duckdb::LogicalTypeId::ENUM: {
auto member_oids = PGDuckDBEnum::GetMemberOids(type);
return PGDuckDBEnum::GetEnumTypeOid(member_oids);
}
default: {
elog(WARNING, "(PGDuckDB/GetPostgresDuckDBType) Could not convert DuckDB type: %s to Postgres type",
type.ToString().c_str());
Expand Down Expand Up @@ -883,6 +961,24 @@ ConvertPostgresToDuckValue(Datum value, duckdb::Vector &result, idx_t offset) {
}
break;
}
case LogicalTypeId::ENUM: {
auto enum_position = PGDuckDBEnum::GetEnumPosition(value, result.GetType());
auto physical_type = result.GetType().InternalType();
switch (physical_type) {
case PhysicalType::UINT8:
Append(result, static_cast<uint8_t>(enum_position), offset);
break;
case PhysicalType::UINT16:
Append(result, static_cast<uint16_t>(enum_position), offset);
break;
case PhysicalType::UINT32:
Append(result, static_cast<uint32_t>(enum_position), offset);
break;
default:
throw duckdb::InternalException("Invalid Physical Type for ENUMs");
}
break;
}
default:
throw duckdb::NotImplementedException("(DuckDB/ConvertPostgresToDuckValue) Unsupported pgduckdb type: %s",
result.GetType().ToString().c_str());
Expand Down
95 changes: 95 additions & 0 deletions src/types/pgduckdb_enum.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#include "pgduckdb/types/pgduckdb_enum.hpp"
#include "pgduckdb/pgduckdb_utils.hpp"
#include "duckdb/common/extra_type_info.hpp"

namespace pgduckdb {

using duckdb::idx_t;
using duckdb::LogicalTypeId;
using duckdb::PhysicalType;

idx_t
PGDuckDBEnum::GetDuckDBEnumPosition(duckdb::Value &val) {
D_ASSERT(val.type().id() == LogicalTypeId::ENUM);
auto physical_type = val.type().InternalType();
switch (physical_type) {
case PhysicalType::UINT8:
return val.GetValue<uint8_t>();
case PhysicalType::UINT16:
return val.GetValue<uint16_t>();
case PhysicalType::UINT32:
return val.GetValue<uint32_t>();
default:
throw duckdb::InternalException("Invalid Physical Type for ENUMs");
}
}

idx_t
PGDuckDBEnum::GetEnumPosition(Datum enum_member_oid_p, const duckdb::LogicalType &type) {
auto enum_member_oid = DatumGetObjectId(enum_member_oid_p);

auto &enum_type_info = type.AuxInfo()->Cast<duckdb::EnumTypeInfo>();
auto dict_size = enum_type_info.GetDictSize();

auto enum_member_oids = PGDuckDBEnum::GetMemberOids(type);
auto oids_data = duckdb::FlatVector::GetData<uint32_t>(enum_member_oids);

uint32_t *begin = oids_data;
uint32_t *end = oids_data + dict_size;
uint32_t *result = std::find(begin, end, enum_member_oid);

if (result == end) {
throw duckdb::InternalException("Could not find enum_member_oid: %d", enum_member_oid);
}
return (idx_t)(result - begin);
}

bool
PGDuckDBEnum::IsEnumType(Oid type_oid) {
bool result = false;
auto type_tuple = PostgresFunctionGuard<HeapTuple>(SearchSysCache1, TYPEOID, ObjectIdGetDatum(type_oid));

if (HeapTupleIsValid(type_tuple)) {
auto type_form = (Form_pg_type)GETSTRUCT(type_tuple);

// Check if the type is an enum
if (type_form->typtype == 'e') {
result = true;
}
PostgresFunctionGuard(ReleaseSysCache, type_tuple);
}
return result;
}

Oid
PGDuckDBEnum::GetEnumTypeOid(const Vector &oids) {
/* Get the pg_type tuple for the enum type */
auto member_oid = duckdb::FlatVector::GetData<uint32_t>(oids)[0];
auto tuple = PostgresFunctionGuard<HeapTuple>(SearchSysCache1, ENUMOID, ObjectIdGetDatum(member_oid));
Oid result = InvalidOid;
if (!HeapTupleIsValid(tuple)) {
throw duckdb::InvalidInputException("Cache lookup failed for enum member with oid %d", member_oid);
}

auto enum_form = (Form_pg_enum)GETSTRUCT(tuple);
result = enum_form->enumtypid;

/* Release the cache tuple */
PostgresFunctionGuard(ReleaseSysCache, tuple);
return result;
}

Vector
PGDuckDBEnum::GetMemberOids(const duckdb::LogicalType &type) {
auto type_info = type.AuxInfo();
auto &enum_type_info = type_info->Cast<duckdb::EnumTypeInfo>();
auto &enum_sort_order = enum_type_info.GetValuesInsertOrder();
idx_t dict_size = enum_type_info.GetDictSize();

// We store the (Postgres) enum member oids behind the string data in the EnumTypeInfo
auto vector_data = duckdb::FlatVector::GetData<duckdb::string_t>(enum_sort_order);
// Create a Vector that wraps the existing data, without copying - only valid while the EnumTypeInfo is alive
return Vector(duckdb::LogicalType::UINTEGER, (duckdb::data_ptr_t)(vector_data + dict_size));
}

} // namespace pgduckdb
37 changes: 37 additions & 0 deletions test/regression/expected/enum.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
CREATE TYPE mood AS ENUM (
'sad',
'neutral',
'happy'
);
create table tbl (a mood);
insert into tbl select 'happy';
insert into tbl select 'neutral';
insert into tbl select 'sad';
select * from tbl;
a
---------
happy
neutral
sad
(3 rows)

--select * from tbl where a = 'sad'; -- returns `sad`
--select * from tbl where a > 'neutral'; --- returns `happy`
ALTER TYPE mood ADD VALUE 'a-bit-happy' BEFORE 'happy';
ALTER TYPE mood ADD VALUE 'very-happy' AFTER 'happy';
ALTER TYPE mood ADD VALUE 'very-sad' BEFORE 'sad';
insert into tbl values ('very-sad'), ('very-happy'), ('a-bit-happy');
--select * from tbl where a > 'neutral'; --- returns `a-bit-happy`, `happy` and `very-happy`;
select * from tbl;
a
-------------
happy
neutral
sad
very-sad
very-happy
a-bit-happy
(6 rows)

drop table tbl;
drop type mood;
1 change: 1 addition & 0 deletions test/regression/schedule
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ test: duckdb_only_functions
test: cte
test: create_table_as
test: standard_conforming_strings
test: enum
26 changes: 26 additions & 0 deletions test/regression/sql/enum.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
CREATE TYPE mood AS ENUM (
'sad',
'neutral',
'happy'
);
create table tbl (a mood);
Tishj marked this conversation as resolved.
Show resolved Hide resolved

insert into tbl select 'happy';
insert into tbl select 'neutral';
insert into tbl select 'sad';

select * from tbl;
Tishj marked this conversation as resolved.
Show resolved Hide resolved

--select * from tbl where a = 'sad'; -- returns `sad`
--select * from tbl where a > 'neutral'; --- returns `happy`

ALTER TYPE mood ADD VALUE 'a-bit-happy' BEFORE 'happy';
ALTER TYPE mood ADD VALUE 'very-happy' AFTER 'happy';
ALTER TYPE mood ADD VALUE 'very-sad' BEFORE 'sad';

insert into tbl values ('very-sad'), ('very-happy'), ('a-bit-happy');
--select * from tbl where a > 'neutral'; --- returns `a-bit-happy`, `happy` and `very-happy`;
select * from tbl;

drop table tbl;
drop type mood;