From c82a4562b67b1cae042a7dbd950fcd4b3fde8413 Mon Sep 17 00:00:00 2001
From: DuckDB Labs GitHub Bot <github_bot@duckdblabs.com>
Date: Wed, 15 Jan 2025 00:33:18 +0000
Subject: [PATCH] Update vendored DuckDB sources to 442deb32

---
 .../extension/json/buffered_json_reader.cpp   |   2 +-
 .../json/json_functions/read_json_objects.cpp |   4 +-
 src/duckdb/src/common/enum_util.cpp           |  14 +-
 src/duckdb/src/common/enums/metric_type.cpp   |   6 +
 .../src/common/enums/optimizer_type.cpp       |   1 +
 src/duckdb/src/common/local_file_system.cpp   |  17 +-
 .../csv_scanner/sniffer/header_detection.cpp  |   7 +
 .../csv_scanner/sniffer/type_detection.cpp    |   1 +
 .../src/function/scalar/strftime_format.cpp   |  10 +
 src/duckdb/src/function/table/table_scan.cpp  |   2 +-
 .../function/table/version/pragma_version.cpp |   6 +-
 .../duckdb/common/enums/metric_type.hpp       |   1 +
 .../duckdb/common/enums/optimizer_type.hpp    |   3 +-
 .../src/include/duckdb/common/helper.hpp      |   6 +
 .../csv_scanner/sniffer/csv_sniffer.hpp       |   2 +
 .../function/scalar/strftime_format.hpp       |   1 +
 .../duckdb/function/table/table_scan.hpp      |   7 +
 .../src/include/duckdb/main/client_config.hpp |   3 +
 src/duckdb/src/include/duckdb/main/config.hpp |   3 +
 .../src/include/duckdb/main/database.hpp      |   2 -
 .../include/duckdb/main/db_instance_cache.hpp |   1 +
 .../include/duckdb/main/extension_entries.hpp |   1 +
 .../src/include/duckdb/main/settings.hpp      |  11 +
 .../duckdb/optimizer/late_materialization.hpp |  45 ++
 .../optimizer/remove_unused_columns.hpp       |  48 +-
 .../duckdb/planner/operator/logical_get.hpp   |   2 +-
 .../storage/compression/roaring/roaring.hpp   |   6 +-
 src/duckdb/src/main/config.cpp                |   1 +
 src/duckdb/src/main/database.cpp              |  11 +-
 src/duckdb/src/main/db_instance_cache.cpp     |  21 +-
 .../main/settings/autogenerated_settings.cpp  |  17 +
 .../src/optimizer/late_materialization.cpp    | 414 ++++++++++++++++++
 src/duckdb/src/optimizer/optimizer.cpp        |   7 +
 .../src/optimizer/remove_unused_columns.cpp   |  23 +-
 .../storage/compression/roaring/compress.cpp  |  17 +-
 .../src/storage/compression/roaring/scan.cpp  |   6 +
 src/duckdb/src/storage/table/row_group.cpp    |  16 +-
 src/duckdb/ub_src_optimizer.cpp               |   2 +
 38 files changed, 664 insertions(+), 83 deletions(-)
 create mode 100644 src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp
 create mode 100644 src/duckdb/src/optimizer/late_materialization.cpp
diff --git a/src/duckdb/extension/json/buffered_json_reader.cpp b/src/duckdb/extension/json/buffered_json_reader.cpp
index f99fe032..68a830c7 100644
--- a/src/duckdb/extension/json/buffered_json_reader.cpp
+++ b/src/duckdb/extension/json/buffered_json_reader.cpp
@@ -35,7 +35,7 @@ void JSONFileHandle::Reset() {
 	requested_reads = 0;
 	actual_reads = 0;
 	last_read_requested = false;
-	if (IsOpen() && CanSeek()) {
+	if (IsOpen() && !file_handle->IsPipe()) {
 		file_handle->Reset();
 	}
 }
diff --git a/src/duckdb/extension/json/json_functions/read_json_objects.cpp b/src/duckdb/extension/json/json_functions/read_json_objects.cpp
index a0e6e6b8..c58b85fd 100644
--- a/src/duckdb/extension/json/json_functions/read_json_objects.cpp
+++ b/src/duckdb/extension/json/json_functions/read_json_objects.cpp
@@ -64,8 +64,8 @@ TableFunction GetReadJSONObjectsTableFunction(bool list_parameter, shared_ptr<JS
 
 TableFunctionSet JSONFunctions::GetReadJSONObjectsFunction() {
 	TableFunctionSet function_set("read_json_objects");
-	auto function_info =
-	    make_shared_ptr<JSONScanInfo>(JSONScanType::READ_JSON_OBJECTS, JSONFormat::ARRAY, JSONRecordType::RECORDS);
+	auto function_info = make_shared_ptr<JSONScanInfo>(JSONScanType::READ_JSON_OBJECTS, JSONFormat::AUTO_DETECT,
+	                                                   JSONRecordType::RECORDS);
 	function_set.AddFunction(GetReadJSONObjectsTableFunction(false, function_info));
 	function_set.AddFunction(GetReadJSONObjectsTableFunction(true, function_info));
 	return function_set;
diff --git a/src/duckdb/src/common/enum_util.cpp b/src/duckdb/src/common/enum_util.cpp
index 88660f98..c3a99f65 100644
--- a/src/duckdb/src/common/enum_util.cpp
+++ b/src/duckdb/src/common/enum_util.cpp
@@ -2399,19 +2399,20 @@ const StringUtil::EnumStringLiteral *GetMetricsTypeValues() {
 		{ static_cast<uint32_t>(MetricsType::OPTIMIZER_JOIN_FILTER_PUSHDOWN), "OPTIMIZER_JOIN_FILTER_PUSHDOWN" },
 		{ static_cast<uint32_t>(MetricsType::OPTIMIZER_EXTENSION), "OPTIMIZER_EXTENSION" },
 		{ static_cast<uint32_t>(MetricsType::OPTIMIZER_MATERIALIZED_CTE), "OPTIMIZER_MATERIALIZED_CTE" },
-		{ static_cast<uint32_t>(MetricsType::OPTIMIZER_SUM_REWRITER), "OPTIMIZER_SUM_REWRITER" }
+		{ static_cast<uint32_t>(MetricsType::OPTIMIZER_SUM_REWRITER), "OPTIMIZER_SUM_REWRITER" },
+		{ static_cast<uint32_t>(MetricsType::OPTIMIZER_LATE_MATERIALIZATION), "OPTIMIZER_LATE_MATERIALIZATION" }
 	};
 	return values;
 }
 
 template<>
 const char* EnumUtil::ToChars<MetricsType>(MetricsType value) {
-	return StringUtil::EnumToString(GetMetricsTypeValues(), 48, "MetricsType", static_cast<uint32_t>(value));
+	return StringUtil::EnumToString(GetMetricsTypeValues(), 49, "MetricsType", static_cast<uint32_t>(value));
 }
 
 template<>
 MetricsType EnumUtil::FromString<MetricsType>(const char *value) {
-	return static_cast<MetricsType>(StringUtil::StringToEnum(GetMetricsTypeValues(), 48, "MetricsType", value));
+	return static_cast<MetricsType>(StringUtil::StringToEnum(GetMetricsTypeValues(), 49, "MetricsType", value));
 }
 
 const StringUtil::EnumStringLiteral *GetNTypeValues() {
@@ -2584,19 +2585,20 @@ const StringUtil::EnumStringLiteral *GetOptimizerTypeValues() {
 		{ static_cast<uint32_t>(OptimizerType::JOIN_FILTER_PUSHDOWN), "JOIN_FILTER_PUSHDOWN" },
 		{ static_cast<uint32_t>(OptimizerType::EXTENSION), "EXTENSION" },
 		{ static_cast<uint32_t>(OptimizerType::MATERIALIZED_CTE), "MATERIALIZED_CTE" },
-		{ static_cast<uint32_t>(OptimizerType::SUM_REWRITER), "SUM_REWRITER" }
+		{ static_cast<uint32_t>(OptimizerType::SUM_REWRITER), "SUM_REWRITER" },
+		{ static_cast<uint32_t>(OptimizerType::LATE_MATERIALIZATION), "LATE_MATERIALIZATION" }
 	};
 	return values;
 }
 
 template<>
 const char* EnumUtil::ToChars<OptimizerType>(OptimizerType value) {
-	return StringUtil::EnumToString(GetOptimizerTypeValues(), 27, "OptimizerType", static_cast<uint32_t>(value));
+	return StringUtil::EnumToString(GetOptimizerTypeValues(), 28, "OptimizerType", static_cast<uint32_t>(value));
 }
 
 template<>
 OptimizerType EnumUtil::FromString<OptimizerType>(const char *value) {
-	return static_cast<OptimizerType>(StringUtil::StringToEnum(GetOptimizerTypeValues(), 27, "OptimizerType", value));
+	return static_cast<OptimizerType>(StringUtil::StringToEnum(GetOptimizerTypeValues(), 28, "OptimizerType", value));
 }
 
 const StringUtil::EnumStringLiteral *GetOrderByNullTypeValues() {
diff --git a/src/duckdb/src/common/enums/metric_type.cpp b/src/duckdb/src/common/enums/metric_type.cpp
index 94a75cc1..d97579c2 100644
--- a/src/duckdb/src/common/enums/metric_type.cpp
+++ b/src/duckdb/src/common/enums/metric_type.cpp
@@ -39,6 +39,7 @@ profiler_settings_t MetricsUtils::GetOptimizerMetrics() {
         MetricsType::OPTIMIZER_EXTENSION,
         MetricsType::OPTIMIZER_MATERIALIZED_CTE,
         MetricsType::OPTIMIZER_SUM_REWRITER,
+        MetricsType::OPTIMIZER_LATE_MATERIALIZATION,
     };
 }
 
@@ -109,6 +110,8 @@ MetricsType MetricsUtils::GetOptimizerMetricByType(OptimizerType type) {
             return MetricsType::OPTIMIZER_MATERIALIZED_CTE;
         case OptimizerType::SUM_REWRITER:
             return MetricsType::OPTIMIZER_SUM_REWRITER;
+        case OptimizerType::LATE_MATERIALIZATION:
+            return MetricsType::OPTIMIZER_LATE_MATERIALIZATION;
        default:
             throw InternalException("OptimizerType %s cannot be converted to a MetricsType", EnumUtil::ToString(type));
     };
@@ -168,6 +171,8 @@ OptimizerType MetricsUtils::GetOptimizerTypeByMetric(MetricsType type) {
             return OptimizerType::MATERIALIZED_CTE;
         case MetricsType::OPTIMIZER_SUM_REWRITER:
             return OptimizerType::SUM_REWRITER;
+        case MetricsType::OPTIMIZER_LATE_MATERIALIZATION:
+            return OptimizerType::LATE_MATERIALIZATION;
     default:
             return OptimizerType::INVALID;
     };
@@ -201,6 +206,7 @@ bool MetricsUtils::IsOptimizerMetric(MetricsType type) {
         case MetricsType::OPTIMIZER_EXTENSION:
         case MetricsType::OPTIMIZER_MATERIALIZED_CTE:
         case MetricsType::OPTIMIZER_SUM_REWRITER:
+        case MetricsType::OPTIMIZER_LATE_MATERIALIZATION:
             return true;
         default:
             return false;
diff --git a/src/duckdb/src/common/enums/optimizer_type.cpp b/src/duckdb/src/common/enums/optimizer_type.cpp
index f2555dc5..f4d02d68 100644
--- a/src/duckdb/src/common/enums/optimizer_type.cpp
+++ b/src/duckdb/src/common/enums/optimizer_type.cpp
@@ -38,6 +38,7 @@ static const DefaultOptimizerType internal_optimizer_types[] = {
     {"extension", OptimizerType::EXTENSION},
     {"materialized_cte", OptimizerType::MATERIALIZED_CTE},
     {"sum_rewriter", OptimizerType::SUM_REWRITER},
+    {"late_materialization", OptimizerType::LATE_MATERIALIZATION},
     {nullptr, OptimizerType::INVALID}};
 
 string OptimizerTypeToString(OptimizerType type) {
diff --git a/src/duckdb/src/common/local_file_system.cpp b/src/duckdb/src/common/local_file_system.cpp
index 7136dc71..89990c76 100644
--- a/src/duckdb/src/common/local_file_system.cpp
+++ b/src/duckdb/src/common/local_file_system.cpp
@@ -853,16 +853,27 @@ unique_ptr<FileHandle> LocalFileSystem::OpenFile(const string &path_p, FileOpenF
 	bool open_write = flags.OpenForWriting();
 	if (open_read && open_write) {
 		desired_access = GENERIC_READ | GENERIC_WRITE;
-		share_mode = 0;
 	} else if (open_read) {
 		desired_access = GENERIC_READ;
-		share_mode = FILE_SHARE_READ;
 	} else if (open_write) {
 		desired_access = GENERIC_WRITE;
-		share_mode = 0;
 	} else {
 		throw InternalException("READ, WRITE or both should be specified when opening a file");
 	}
+	switch (flags.Lock()) {
+	case FileLockType::NO_LOCK:
+		share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE;
+		break;
+	case FileLockType::READ_LOCK:
+		share_mode = FILE_SHARE_READ;
+		break;
+	case FileLockType::WRITE_LOCK:
+		share_mode = 0;
+		break;
+	default:
+		throw InternalException("Unknown FileLockType");
+	}
+
 	if (open_write) {
 		if (flags.CreateFileIfNotExists()) {
 			creation_disposition = OPEN_ALWAYS;
diff --git a/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp b/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp
index 424468c5..6581018c 100644
--- a/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp
+++ b/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp
@@ -335,6 +335,13 @@ void CSVSniffer::DetectHeader() {
 	auto &sniffer_state_machine = best_candidate->GetStateMachine();
 	names = DetectHeaderInternal(buffer_manager->context, best_header_row, sniffer_state_machine, set_columns,
 	                             best_sql_types_candidates_per_column_idx, options, *error_handler);
+	if (single_row_file && sniffer_state_machine.dialect_options.header.GetValue()) {
+		// This file only contains a header, lets default to the lowest type of all.
+		detected_types.clear();
+		for (idx_t i = 0; i < names.size(); i++) {
+			detected_types.push_back(LogicalType::BOOLEAN);
+		}
+	}
 	for (idx_t i = max_columns_found; i < names.size(); i++) {
 		detected_types.push_back(LogicalType::VARCHAR);
 	}
diff --git a/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp b/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp
index d6b2b1a6..e58b6643 100644
--- a/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp
+++ b/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp
@@ -467,6 +467,7 @@ void CSVSniffer::DetectTypes() {
 				best_format_candidates[format_candidate.first] = format_candidate.second.format;
 			}
 			if (chunk_size > 0) {
+				single_row_file = chunk_size == 1;
 				for (idx_t col_idx = 0; col_idx < data_chunk.ColumnCount(); col_idx++) {
 					auto &cur_vector = data_chunk.data[col_idx];
 					auto vector_data = FlatVector::GetData<string_t>(cur_vector);
diff --git a/src/duckdb/src/function/scalar/strftime_format.cpp b/src/duckdb/src/function/scalar/strftime_format.cpp
index 8ab46ace..58ccd995 100644
--- a/src/duckdb/src/function/scalar/strftime_format.cpp
+++ b/src/duckdb/src/function/scalar/strftime_format.cpp
@@ -1412,6 +1412,16 @@ StrpTimeFormat::ParseResult StrpTimeFormat::Parse(const string &format_string, c
 	return result;
 }
 
+bool StrpTimeFormat::TryParse(const string &format_string, const string &text, ParseResult &result) {
+	StrpTimeFormat format;
+	format.format_specifier = format_string;
+	string error = StrTimeFormat::ParseFormatSpecifier(format_string, format);
+	if (!error.empty()) {
+		throw InvalidInputException("Failed to parse format specifier %s: %s", format_string, error);
+	}
+	return format.Parse(text, result);
+}
+
 bool StrTimeFormat::Empty() const {
 	return format_specifier.empty();
 }
diff --git a/src/duckdb/src/function/table/table_scan.cpp b/src/duckdb/src/function/table/table_scan.cpp
index dafe070d..1b67d3b9 100644
--- a/src/duckdb/src/function/table/table_scan.cpp
+++ b/src/duckdb/src/function/table/table_scan.cpp
@@ -105,7 +105,7 @@ class DuckIndexScanState : public TableScanGlobalState {
 
 	//! The batch index of the next Sink.
 	//! Also determines the offset of the next chunk. I.e., offset = next_batch_index * STANDARD_VECTOR_SIZE.
-	idx_t next_batch_index;
+	atomic<idx_t> next_batch_index;
 	//! The total scanned row IDs.
 	unsafe_vector<row_t> row_ids;
 	//! The column IDs of the to-be-scanned columns.
diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp
index 0b422410..de1ba727 100644
--- a/src/duckdb/src/function/table/version/pragma_version.cpp
+++ b/src/duckdb/src/function/table/version/pragma_version.cpp
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "4-dev4516"
+#define DUCKDB_PATCH_VERSION "4-dev4570"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.1.4-dev4516"
+#define DUCKDB_VERSION "v1.1.4-dev4570"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "2e533ec9df"
+#define DUCKDB_SOURCE_ID "d1740d6cf7"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"
diff --git a/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp b/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp
index c133faad..14389bf4 100644
--- a/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp
+++ b/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp
@@ -68,6 +68,7 @@ enum class MetricsType : uint8_t {
     OPTIMIZER_EXTENSION,
     OPTIMIZER_MATERIALIZED_CTE,
     OPTIMIZER_SUM_REWRITER,
+    OPTIMIZER_LATE_MATERIALIZATION,
 };
 
 struct MetricsTypeHashFunction {
diff --git a/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp b/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp
index cfb7acb6..adabacec 100644
--- a/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp
+++ b/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp
@@ -40,7 +40,8 @@ enum class OptimizerType : uint32_t {
 	JOIN_FILTER_PUSHDOWN,
 	EXTENSION,
 	MATERIALIZED_CTE,
-	SUM_REWRITER
+	SUM_REWRITER,
+	LATE_MATERIALIZATION
 };
 
 string OptimizerTypeToString(OptimizerType type);
diff --git a/src/duckdb/src/include/duckdb/common/helper.hpp b/src/duckdb/src/include/duckdb/common/helper.hpp
index 758207c8..5aad45ab 100644
--- a/src/duckdb/src/include/duckdb/common/helper.hpp
+++ b/src/duckdb/src/include/duckdb/common/helper.hpp
@@ -195,6 +195,12 @@ static inline T AlignValue(T n) {
 	return ((n + (val - 1)) / val) * val;
 }
 
+template<uintptr_t alignment>
+inline data_ptr_t AlignValue(data_ptr_t addr) {
+	static_assert((alignment & (alignment - 1)) == 0, "'alignment' has to be a power of 2");
+	return reinterpret_cast<data_ptr_t>((reinterpret_cast<uintptr_t>(addr) + alignment - 1) & ~(alignment - 1));
+}
+
 template<class T, T val=8>
 constexpr inline T AlignValueFloor(T n) {
 	return (n / val) * val;
diff --git a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp
index 5715d25d..5e4e6235 100644
--- a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp
+++ b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp
@@ -236,6 +236,8 @@ class CSVSniffer {
 	                     unordered_map<idx_t, vector<LogicalType>> &best_sql_types_candidates_per_column_idx,
 	                     CSVReaderOptions &options, CSVErrorHandler &error_handler);
 	vector<string> names;
+	//! If the file only has a header
+	bool single_row_file = false;
 
 	//! ------------------------------------------------------//
 	//! ------------------ Type Replacement ----------------- //
diff --git a/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp b/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp
index 7392be25..8353ee9a 100644
--- a/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp
+++ b/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp
@@ -169,6 +169,7 @@ struct StrpTimeFormat : public StrTimeFormat { // NOLINT: work-around bug in cla
 		return format_specifier != other.format_specifier;
 	}
 	DUCKDB_API static ParseResult Parse(const string &format, const string &text);
+	DUCKDB_API static bool TryParse(const string &format, const string &text, ParseResult &result);
 
 	DUCKDB_API bool Parse(string_t str, ParseResult &result, bool strict = false) const;
 
diff --git a/src/duckdb/src/include/duckdb/function/table/table_scan.hpp b/src/duckdb/src/include/duckdb/function/table/table_scan.hpp
index 2a888a75..df4c829d 100644
--- a/src/duckdb/src/include/duckdb/function/table/table_scan.hpp
+++ b/src/duckdb/src/include/duckdb/function/table/table_scan.hpp
@@ -34,6 +34,13 @@ struct TableScanBindData : public TableFunctionData {
 		auto &other = other_p.Cast<TableScanBindData>();
 		return &other.table == &table;
 	}
+	unique_ptr<FunctionData> Copy() const override {
+		auto bind_data = make_uniq<TableScanBindData>(table);
+		bind_data->is_index_scan = is_index_scan;
+		bind_data->is_create_index = is_create_index;
+		bind_data->column_ids = column_ids;
+		return std::move(bind_data);
+	}
 };
 
 //! The table scan function represents a sequential or index scan over one of DuckDB's base tables.
diff --git a/src/duckdb/src/include/duckdb/main/client_config.hpp b/src/duckdb/src/include/duckdb/main/client_config.hpp
index 304d844f..9cedb407 100644
--- a/src/duckdb/src/include/duckdb/main/client_config.hpp
+++ b/src/duckdb/src/include/duckdb/main/client_config.hpp
@@ -120,6 +120,9 @@ struct ClientConfig {
 	//! The maximum amount of OR filters we generate dynamically from a hash join
 	idx_t dynamic_or_filter_threshold = 50;
 
+	//! The maximum amount of rows in the LIMIT/SAMPLE for which we trigger late materialization
+	idx_t late_materialization_max_rows = 50;
+
 	//! Whether the "/" division operator defaults to integer division or floating point division
 	bool integer_division = false;
 	//! When a scalar subquery returns multiple rows - return a random row instead of returning an error
diff --git a/src/duckdb/src/include/duckdb/main/config.hpp b/src/duckdb/src/include/duckdb/main/config.hpp
index 27e4b4b0..ed88f334 100644
--- a/src/duckdb/src/include/duckdb/main/config.hpp
+++ b/src/duckdb/src/include/duckdb/main/config.hpp
@@ -56,6 +56,7 @@ class CompressionInfo;
 class EncryptionUtil;
 
 struct CompressionFunctionSet;
+struct DatabaseCacheEntry;
 struct DBConfig;
 
 enum class CheckpointAbort : uint8_t {
@@ -331,6 +332,8 @@ struct DBConfig {
 	vector<unique_ptr<ExtensionCallback>> extension_callbacks;
 	//! Encryption Util for OpenSSL
 	shared_ptr<EncryptionUtil> encryption_util;
+	//! Reference to the database cache entry (if any)
+	shared_ptr<DatabaseCacheEntry> db_cache_entry;
 
 public:
 	DUCKDB_API static DBConfig &GetConfig(ClientContext &context);
diff --git a/src/duckdb/src/include/duckdb/main/database.hpp b/src/duckdb/src/include/duckdb/main/database.hpp
index 54884c56..ed956daa 100644
--- a/src/duckdb/src/include/duckdb/main/database.hpp
+++ b/src/duckdb/src/include/duckdb/main/database.hpp
@@ -77,7 +77,6 @@ class DatabaseInstance : public enable_shared_from_this<DatabaseInstance> {
 	                                                    const AttachOptions &options);
 
 	void AddExtensionInfo(const string &name, const ExtensionLoadedInfo &info);
-	void SetDatabaseCacheEntry(shared_ptr<DatabaseCacheEntry> entry);
 
 private:
 	void Initialize(const char *path, DBConfig *config);
@@ -95,7 +94,6 @@ class DatabaseInstance : public enable_shared_from_this<DatabaseInstance> {
 	unordered_map<string, ExtensionInfo> loaded_extensions_info;
 	ValidChecker db_validity;
 	unique_ptr<DatabaseFileSystem> db_file_system;
-	shared_ptr<DatabaseCacheEntry> db_cache_entry;
 	shared_ptr<LogManager> log_manager;
 
 	duckdb_ext_api_v1 (*create_api_v1)();
diff --git a/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp b/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp
index b71d07e7..5eff8254 100644
--- a/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp
+++ b/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp
@@ -18,6 +18,7 @@ namespace duckdb {
 class DBInstanceCache;
 
 struct DatabaseCacheEntry {
+	DatabaseCacheEntry();
 	explicit DatabaseCacheEntry(const shared_ptr<DuckDB> &database);
 	~DatabaseCacheEntry();
 
diff --git a/src/duckdb/src/include/duckdb/main/extension_entries.hpp b/src/duckdb/src/include/duckdb/main/extension_entries.hpp
index b0d0c0c4..037e9227 100644
--- a/src/duckdb/src/include/duckdb/main/extension_entries.hpp
+++ b/src/duckdb/src/include/duckdb/main/extension_entries.hpp
@@ -645,6 +645,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = {
     {"st_zmax", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
     {"st_zmflag", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
     {"st_zmin", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
+    {"start_ui", "motherduck", CatalogType::TABLE_FUNCTION_ENTRY},
     {"starts_with", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY},
     {"stats", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY},
     {"stddev", "core_functions", CatalogType::AGGREGATE_FUNCTION_ENTRY},
diff --git a/src/duckdb/src/include/duckdb/main/settings.hpp b/src/duckdb/src/include/duckdb/main/settings.hpp
index 98d4d4eb..e5fe2452 100644
--- a/src/duckdb/src/include/duckdb/main/settings.hpp
+++ b/src/duckdb/src/include/duckdb/main/settings.hpp
@@ -783,6 +783,17 @@ struct IntegerDivisionSetting {
 	static Value GetSetting(const ClientContext &context);
 };
 
+struct LateMaterializationMaxRowsSetting {
+	using RETURN_TYPE = idx_t;
+	static constexpr const char *Name = "late_materialization_max_rows";
+	static constexpr const char *Description =
+	    "The maximum amount of rows in the LIMIT/SAMPLE for which we trigger late materialization";
+	static constexpr const char *InputType = "UBIGINT";
+	static void SetLocal(ClientContext &context, const Value &parameter);
+	static void ResetLocal(ClientContext &context);
+	static Value GetSetting(const ClientContext &context);
+};
+
 struct LockConfigurationSetting {
 	using RETURN_TYPE = bool;
 	static constexpr const char *Name = "lock_configuration";
diff --git a/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp b/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp
new file mode 100644
index 00000000..76f4f05e
--- /dev/null
+++ b/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp
@@ -0,0 +1,45 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// duckdb/optimizer/late_materialization.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb/common/constants.hpp"
+#include "duckdb/optimizer/remove_unused_columns.hpp"
+
+namespace duckdb {
+class LogicalOperator;
+class LogicalGet;
+class Optimizer;
+
+//! Transform
+class LateMaterialization : public BaseColumnPruner {
+public:
+	explicit LateMaterialization(Optimizer &optimizer);
+
+	unique_ptr<LogicalOperator> Optimize(unique_ptr<LogicalOperator> op);
+
+private:
+	bool TryLateMaterialization(unique_ptr<LogicalOperator> &op);
+
+	unique_ptr<LogicalGet> ConstructLHS(LogicalGet &get);
+	ColumnBinding ConstructRHS(unique_ptr<LogicalOperator> &op);
+	idx_t GetOrInsertRowId(LogicalGet &get);
+
+	void ReplaceTopLevelTableIndex(LogicalOperator &op, idx_t new_index);
+	void ReplaceTableReferences(Expression &expr, idx_t new_table_index);
+	unique_ptr<Expression> GetExpression(LogicalOperator &op, idx_t column_index);
+	void ReplaceExpressionReferences(LogicalOperator &next_op, unique_ptr<Expression> &expr);
+	bool OptimizeLargeLimit(LogicalOperator &child);
+
+private:
+	Optimizer &optimizer;
+	//! The max row count for which we will consider late materialization
+	idx_t max_row_count;
+};
+
+} // namespace duckdb
diff --git a/src/duckdb/src/include/duckdb/optimizer/remove_unused_columns.hpp b/src/duckdb/src/include/duckdb/optimizer/remove_unused_columns.hpp
index a05a1d72..c2e5b1fc 100644
--- a/src/duckdb/src/include/duckdb/optimizer/remove_unused_columns.hpp
+++ b/src/duckdb/src/include/duckdb/optimizer/remove_unused_columns.hpp
@@ -23,33 +23,18 @@ struct ReferencedColumn {
 	vector<ColumnIndex> child_columns;
 };
 
-//! The RemoveUnusedColumns optimizer traverses the logical operator tree and removes any columns that are not required
-class RemoveUnusedColumns : public LogicalOperatorVisitor {
-public:
-	RemoveUnusedColumns(Binder &binder, ClientContext &context, bool is_root = false)
-	    : binder(binder), context(context), everything_referenced(is_root) {
-	}
+class BaseColumnPruner : public LogicalOperatorVisitor {
+protected:
+	//! The map of column references
+	column_binding_map_t<ReferencedColumn> column_references;
 
-	void VisitOperator(LogicalOperator &op) override;
+protected:
 	void VisitExpression(unique_ptr<Expression> *expression) override;
 
-protected:
 	unique_ptr<Expression> VisitReplace(BoundColumnRefExpression &expr, unique_ptr<Expression> *expr_ptr) override;
 	unique_ptr<Expression> VisitReplace(BoundReferenceExpression &expr, unique_ptr<Expression> *expr_ptr) override;
 
-private:
-	Binder &binder;
-	ClientContext &context;
-	//! Whether or not all the columns are referenced. This happens in the case of the root expression (because the
-	//! output implicitly refers all the columns below it)
-	bool everything_referenced;
-	//! The map of column references
-	column_binding_map_t<ReferencedColumn> column_references;
-
-private:
-	template <class T>
-	void ClearUnusedExpressions(vector<T> &list, idx_t table_idx, bool replace = true);
-
+protected:
 	//! Add a reference to the column in its entirey
 	void AddBinding(BoundColumnRefExpression &col);
 	//! Add a reference to a sub-section of the column
@@ -63,4 +48,25 @@ class RemoveUnusedColumns : public LogicalOperatorVisitor {
 	bool HandleStructExtractRecursive(Expression &expr, optional_ptr<BoundColumnRefExpression> &colref,
 	                                  vector<idx_t> &indexes);
 };
+
+//! The RemoveUnusedColumns optimizer traverses the logical operator tree and removes any columns that are not required
+class RemoveUnusedColumns : public BaseColumnPruner {
+public:
+	RemoveUnusedColumns(Binder &binder, ClientContext &context, bool is_root = false)
+	    : binder(binder), context(context), everything_referenced(is_root) {
+	}
+
+	void VisitOperator(LogicalOperator &op) override;
+
+private:
+	Binder &binder;
+	ClientContext &context;
+	//! Whether or not all the columns are referenced. This happens in the case of the root expression (because the
+	//! output implicitly refers all the columns below it)
+	bool everything_referenced;
+
+private:
+	template <class T>
+	void ClearUnusedExpressions(vector<T> &list, idx_t table_idx, bool replace = true);
+};
 } // namespace duckdb
diff --git a/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp b/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp
index 0a73d12c..81b93acc 100644
--- a/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp
+++ b/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp
@@ -80,7 +80,7 @@ class LogicalGet : public LogicalOperator {
 	void Serialize(Serializer &serializer) const override;
 	static unique_ptr<LogicalOperator> Deserialize(Deserializer &deserializer);
 
-	LogicalType GetRowIdType() const {
+	const LogicalType &GetRowIdType() const {
 		return rowid_type;
 	}
 
diff --git a/src/duckdb/src/include/duckdb/storage/compression/roaring/roaring.hpp b/src/duckdb/src/include/duckdb/storage/compression/roaring/roaring.hpp
index c2061374..da2fb571 100644
--- a/src/duckdb/src/include/duckdb/storage/compression/roaring/roaring.hpp
+++ b/src/duckdb/src/include/duckdb/storage/compression/roaring/roaring.hpp
@@ -273,9 +273,9 @@ struct ContainerCompressionState {
 
 public:
 	void Append(bool null, uint16_t amount = 1);
-	void OverrideArray(data_ptr_t destination, bool nulls, idx_t count);
-	void OverrideRun(data_ptr_t destination, idx_t count);
-	void OverrideUncompressed(data_ptr_t destination);
+	void OverrideArray(data_ptr_t &destination, bool nulls, idx_t count);
+	void OverrideRun(data_ptr_t &destination, idx_t count);
+	void OverrideUncompressed(data_ptr_t &destination);
 	void Finalize();
 	ContainerMetadata GetResult();
 	void Reset();
diff --git a/src/duckdb/src/main/config.cpp b/src/duckdb/src/main/config.cpp
index 286c72a3..52a9ac2d 100644
--- a/src/duckdb/src/main/config.cpp
+++ b/src/duckdb/src/main/config.cpp
@@ -126,6 +126,7 @@ static const ConfigurationOption internal_options[] = {
     DUCKDB_GLOBAL(IndexScanMaxCountSetting),
     DUCKDB_GLOBAL(IndexScanPercentageSetting),
     DUCKDB_LOCAL(IntegerDivisionSetting),
+    DUCKDB_LOCAL(LateMaterializationMaxRowsSetting),
     DUCKDB_GLOBAL(LockConfigurationSetting),
     DUCKDB_LOCAL(LogQueryPathSetting),
     DUCKDB_GLOBAL(LoggingLevel),
diff --git a/src/duckdb/src/main/database.cpp b/src/duckdb/src/main/database.cpp
index f33b48c1..bed4269d 100644
--- a/src/duckdb/src/main/database.cpp
+++ b/src/duckdb/src/main/database.cpp
@@ -68,7 +68,9 @@ DatabaseInstance::DatabaseInstance() {
 
 DatabaseInstance::~DatabaseInstance() {
 	// destroy all attached databases
-	GetDatabaseManager().ResetDatabases(scheduler);
+	if (db_manager) {
+		db_manager->ResetDatabases(scheduler);
+	}
 	// destroy child elements
 	connection_manager.reset();
 	object_cache.reset();
@@ -86,7 +88,7 @@ DatabaseInstance::~DatabaseInstance() {
 	}
 	Allocator::SetBackgroundThreads(false);
 	// after all destruction is complete clear the cache entry
-	db_cache_entry.reset();
+	config.db_cache_entry.reset();
 }
 
 BufferManager &BufferManager::GetBufferManager(DatabaseInstance &db) {
@@ -109,10 +111,6 @@ const DatabaseInstance &DatabaseInstance::GetDatabase(const ClientContext &conte
 	return *context.db;
 }
 
-void DatabaseInstance::SetDatabaseCacheEntry(shared_ptr<DatabaseCacheEntry> entry) {
-	db_cache_entry = std::move(entry);
-}
-
 DatabaseManager &DatabaseInstance::GetDatabaseManager() {
 	if (!db_manager) {
 		throw InternalException("Missing DB manager");
@@ -464,6 +462,7 @@ void DatabaseInstance::Configure(DBConfig &new_config, const char *database_path
 		                                                 config.options.buffer_manager_track_eviction_timestamps,
 		                                                 config.options.allocator_bulk_deallocation_flush_threshold);
 	}
+	config.db_cache_entry = new_config.db_cache_entry;
 }
 
 DBConfig &DBConfig::GetConfig(ClientContext &context) {
diff --git a/src/duckdb/src/main/db_instance_cache.cpp b/src/duckdb/src/main/db_instance_cache.cpp
index 062765b0..fa605979 100644
--- a/src/duckdb/src/main/db_instance_cache.cpp
+++ b/src/duckdb/src/main/db_instance_cache.cpp
@@ -3,6 +3,9 @@
 
 namespace duckdb {
 
+DatabaseCacheEntry::DatabaseCacheEntry() {
+}
+
 DatabaseCacheEntry::DatabaseCacheEntry(const shared_ptr<DuckDB> &database_p) : database(database_p) {
 }
 
@@ -91,18 +94,22 @@ shared_ptr<DuckDB> DBInstanceCache::CreateInstanceInternal(const string &databas
 	if (abs_database_path.rfind(IN_MEMORY_PATH, 0) == 0) {
 		instance_path = IN_MEMORY_PATH;
 	}
-	auto db_instance = make_shared_ptr<DuckDB>(instance_path, &config);
-	if (on_create) {
-		on_create(*db_instance);
-	}
+	shared_ptr<DatabaseCacheEntry> cache_entry;
 	if (cache_instance) {
-		// create the cache entry and attach it to the database
-		auto cache_entry = make_shared_ptr<DatabaseCacheEntry>(db_instance);
-		db_instance->instance->SetDatabaseCacheEntry(cache_entry);
+		cache_entry = make_shared_ptr<DatabaseCacheEntry>();
+		config.db_cache_entry = cache_entry;
+	}
+	auto db_instance = make_shared_ptr<DuckDB>(instance_path, &config);
+	if (cache_entry) {
+		// attach cache entry to the database
+		cache_entry->database = db_instance;
 
 		// cache the entry in the db_instances map
 		db_instances[abs_database_path] = cache_entry;
 	}
+	if (on_create) {
+		on_create(*db_instance);
+	}
 	return db_instance;
 }
 
diff --git a/src/duckdb/src/main/settings/autogenerated_settings.cpp b/src/duckdb/src/main/settings/autogenerated_settings.cpp
index 6a710696..d007da71 100644
--- a/src/duckdb/src/main/settings/autogenerated_settings.cpp
+++ b/src/duckdb/src/main/settings/autogenerated_settings.cpp
@@ -770,6 +770,23 @@ Value IntegerDivisionSetting::GetSetting(const ClientContext &context) {
 	return Value::BOOLEAN(config.integer_division);
 }
 
+//===----------------------------------------------------------------------===//
+// Late Materialization Max Rows
+//===----------------------------------------------------------------------===//
+void LateMaterializationMaxRowsSetting::SetLocal(ClientContext &context, const Value &input) {
+	auto &config = ClientConfig::GetConfig(context);
+	config.late_materialization_max_rows = input.GetValue<idx_t>();
+}
+
+void LateMaterializationMaxRowsSetting::ResetLocal(ClientContext &context) {
+	ClientConfig::GetConfig(context).late_materialization_max_rows = ClientConfig().late_materialization_max_rows;
+}
+
+Value LateMaterializationMaxRowsSetting::GetSetting(const ClientContext &context) {
+	auto &config = ClientConfig::GetConfig(context);
+	return Value::UBIGINT(config.late_materialization_max_rows);
+}
+
 //===----------------------------------------------------------------------===//
 // Lock Configuration
 //===----------------------------------------------------------------------===//
diff --git a/src/duckdb/src/optimizer/late_materialization.cpp b/src/duckdb/src/optimizer/late_materialization.cpp
new file mode 100644
index 00000000..81d4881c
--- /dev/null
+++ b/src/duckdb/src/optimizer/late_materialization.cpp
@@ -0,0 +1,414 @@
+#include "duckdb/optimizer/late_materialization.hpp"
+#include "duckdb/planner/operator/logical_comparison_join.hpp"
+#include "duckdb/planner/operator/logical_get.hpp"
+#include "duckdb/planner/operator/logical_limit.hpp"
+#include "duckdb/planner/operator/logical_order.hpp"
+#include "duckdb/planner/operator/logical_projection.hpp"
+#include "duckdb/planner/operator/logical_sample.hpp"
+#include "duckdb/planner/operator/logical_top_n.hpp"
+#include "duckdb/planner/expression/bound_columnref_expression.hpp"
+#include "duckdb/planner/binder.hpp"
+#include "duckdb/optimizer/optimizer.hpp"
+#include "duckdb/planner/expression_iterator.hpp"
+#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
+#include "duckdb/main/client_config.hpp"
+
+namespace duckdb {
+
+LateMaterialization::LateMaterialization(Optimizer &optimizer) : optimizer(optimizer) {
+	max_row_count = ClientConfig::GetConfig(optimizer.context).late_materialization_max_rows;
+}
+
+idx_t LateMaterialization::GetOrInsertRowId(LogicalGet &get) {
+	auto &column_ids = get.GetMutableColumnIds();
+	// check if it is already projected
+	for (idx_t i = 0; i < column_ids.size(); ++i) {
+		if (column_ids[i].IsRowIdColumn()) {
+			// already projected - return the id
+			return i;
+		}
+	}
+	// row id is not yet projected - push it and return the new index
+	column_ids.push_back(ColumnIndex(COLUMN_IDENTIFIER_ROW_ID));
+	if (!get.projection_ids.empty()) {
+		get.projection_ids.push_back(column_ids.size() - 1);
+	}
+	if (!get.types.empty()) {
+		get.types.push_back(get.GetRowIdType());
+	}
+	return column_ids.size() - 1;
+}
+
+unique_ptr<LogicalGet> LateMaterialization::ConstructLHS(LogicalGet &get) {
+	// we need to construct a new scan of the same table
+	auto table_index = optimizer.binder.GenerateTableIndex();
+	auto new_get = make_uniq<LogicalGet>(table_index, get.function, get.bind_data->Copy(), get.returned_types,
+	                                     get.names, get.GetRowIdType());
+	new_get->GetMutableColumnIds() = get.GetColumnIds();
+	new_get->projection_ids = get.projection_ids;
+	return new_get;
+}
+
+ColumnBinding LateMaterialization::ConstructRHS(unique_ptr<LogicalOperator> &op) {
+	// traverse down until we reach the LogicalGet
+	vector<reference<LogicalOperator>> stack;
+	reference<LogicalOperator> child = *op->children[0];
+	while (child.get().type != LogicalOperatorType::LOGICAL_GET) {
+		stack.push_back(child);
+		D_ASSERT(child.get().children.size() == 1);
+		child = *child.get().children[0];
+	}
+	// we have reached the logical get - now we need to push the row-id column (if it is not yet projected out)
+	auto &get = child.get().Cast<LogicalGet>();
+	auto row_id_idx = GetOrInsertRowId(get);
+
+	// the row id has been projected - now project it up the stack
+	ColumnBinding row_id_binding(get.table_index, row_id_idx);
+	for (idx_t i = stack.size(); i > 0; i--) {
+		auto &op = stack[i - 1].get();
+		switch (op.type) {
+		case LogicalOperatorType::LOGICAL_PROJECTION: {
+			auto &proj = op.Cast<LogicalProjection>();
+			// push a projection of the row-id column
+			proj.expressions.push_back(
+			    make_uniq<BoundColumnRefExpression>("rowid", get.GetRowIdType(), row_id_binding));
+			// modify the row-id-binding to push to the new projection
+			row_id_binding = ColumnBinding(proj.table_index, proj.expressions.size() - 1);
+			break;
+		}
+		case LogicalOperatorType::LOGICAL_FILTER:
+			// column bindings pass-through this operator as-is
+			break;
+		default:
+			throw InternalException("Unsupported logical operator in LateMaterialization::ConstructRHS");
+		}
+	}
+	return row_id_binding;
+}
+
+void LateMaterialization::ReplaceTopLevelTableIndex(LogicalOperator &root, idx_t new_index) {
+	reference<LogicalOperator> current_op = root;
+	while (true) {
+		auto &op = current_op.get();
+		switch (op.type) {
+		case LogicalOperatorType::LOGICAL_PROJECTION: {
+			// reached a projection - modify the table index and return
+			auto &proj = op.Cast<LogicalProjection>();
+			proj.table_index = new_index;
+			return;
+		}
+		case LogicalOperatorType::LOGICAL_GET: {
+			// reached the root get - modify the table index and return
+			auto &get = op.Cast<LogicalGet>();
+			get.table_index = new_index;
+			return;
+		}
+		case LogicalOperatorType::LOGICAL_TOP_N: {
+			// visit the expressions of the operator and continue into the child node
+			auto &top_n = op.Cast<LogicalTopN>();
+			for (auto &order : top_n.orders) {
+				ReplaceTableReferences(*order.expression, new_index);
+			}
+			current_op = *op.children[0];
+			break;
+		}
+		case LogicalOperatorType::LOGICAL_FILTER:
+		case LogicalOperatorType::LOGICAL_SAMPLE:
+		case LogicalOperatorType::LOGICAL_LIMIT: {
+			// visit the expressions of the operator and continue into the child node
+			for (auto &expr : op.expressions) {
+				ReplaceTableReferences(*expr, new_index);
+			}
+			current_op = *op.children[0];
+			break;
+		}
+		default:
+			throw InternalException("Unsupported operator type in LateMaterialization::ReplaceTopLevelTableIndex");
+		}
+	}
+}
+
+void LateMaterialization::ReplaceTableReferences(Expression &expr, idx_t new_table_index) {
+	if (expr.GetExpressionType() == ExpressionType::BOUND_COLUMN_REF) {
+		auto &bound_column_ref = expr.Cast<BoundColumnRefExpression>();
+		bound_column_ref.binding.table_index = new_table_index;
+	}
+
+	ExpressionIterator::EnumerateChildren(expr,
+	                                      [&](Expression &child) { ReplaceTableReferences(child, new_table_index); });
+}
+
+unique_ptr<Expression> LateMaterialization::GetExpression(LogicalOperator &op, idx_t column_index) {
+	switch (op.type) {
+	case LogicalOperatorType::LOGICAL_GET: {
+		auto &get = op.Cast<LogicalGet>();
+		auto &column_id = get.GetColumnIds()[column_index];
+		auto is_row_id = column_id.IsRowIdColumn();
+		auto column_name = is_row_id ? "rowid" : get.names[column_id.GetPrimaryIndex()];
+		auto &column_type = is_row_id ? get.GetRowIdType() : get.returned_types[column_id.GetPrimaryIndex()];
+		auto expr =
+		    make_uniq<BoundColumnRefExpression>(column_name, column_type, ColumnBinding(get.table_index, column_index));
+		return std::move(expr);
+	}
+	case LogicalOperatorType::LOGICAL_PROJECTION:
+		return op.expressions[column_index]->Copy();
+	default:
+		throw InternalException("Unsupported operator type for LateMaterialization::GetExpression");
+	}
+}
+
+void LateMaterialization::ReplaceExpressionReferences(LogicalOperator &next_op, unique_ptr<Expression> &expr) {
+	if (expr->GetExpressionType() == ExpressionType::BOUND_COLUMN_REF) {
+		auto &bound_column_ref = expr->Cast<BoundColumnRefExpression>();
+		expr = GetExpression(next_op, bound_column_ref.binding.column_index);
+		return;
+	}
+
+	ExpressionIterator::EnumerateChildren(
+	    *expr, [&](unique_ptr<Expression> &child) { ReplaceExpressionReferences(next_op, child); });
+}
+
+bool LateMaterialization::TryLateMaterialization(unique_ptr<LogicalOperator> &op) {
+	// check if we can benefit from late materialization
+	// we need to see how many columns we require in the pipeline versus how many columns we emit in the scan
+	// for example, in a query like SELECT * FROM tbl ORDER BY ts LIMIT 5, the top-n only needs the "ts" column
+	// the other columns can be fetched later on using late materialization
+	// we can only push late materialization through a subset of operators
+	// and we can only do it for scans that support the row-id pushdown (currently only DuckDB table scans)
+
+	// visit the expressions for each operator in the chain
+	vector<reference<LogicalOperator>> source_operators;
+
+	VisitOperatorExpressions(*op);
+	reference<LogicalOperator> child = *op->children[0];
+	while (child.get().type != LogicalOperatorType::LOGICAL_GET) {
+		switch (child.get().type) {
+		case LogicalOperatorType::LOGICAL_PROJECTION: {
+			// recurse into the child node - but ONLY visit expressions that are referenced
+			auto &proj = child.get().Cast<LogicalProjection>();
+			source_operators.push_back(child);
+
+			for (auto &expr : proj.expressions) {
+				if (expr->IsVolatile()) {
+					// we cannot do this optimization if any of the columns are volatile
+					return false;
+				}
+			}
+
+			// figure out which projection expressions we are currently referencing
+			set<idx_t> referenced_columns;
+			for (auto &entry : column_references) {
+				auto &column_binding = entry.first;
+				if (column_binding.table_index == proj.table_index) {
+					referenced_columns.insert(column_binding.column_index);
+				}
+			}
+			// clear the list of referenced expressions and visit those columns
+			column_references.clear();
+			for (auto &col_idx : referenced_columns) {
+				VisitExpression(&proj.expressions[col_idx]);
+			}
+			// continue into child
+			child = *child.get().children[0];
+			break;
+		}
+		case LogicalOperatorType::LOGICAL_FILTER:
+			// visit filter expressions - we need these columns
+			VisitOperatorExpressions(child.get());
+			// continue into child
+			child = *child.get().children[0];
+			break;
+		default:
+			// unsupported operator for late materialization
+			return false;
+		}
+	}
+	auto &get = child.get().Cast<LogicalGet>();
+	auto table = get.GetTable();
+	if (!table || !table->IsDuckTable()) {
+		// we can only do the late-materialization optimization for DuckDB tables currently
+		return false;
+	}
+	if (column_references.size() >= get.GetColumnIds().size()) {
+		// we do not benefit from late materialization
+		// we need all of the columns to compute the root node anyway (Top-N/Limit/etc)
+		return false;
+	}
+	// we benefit from late materialization
+	// we need to transform this plan into a semi-join with the row-id
+	// we need to ensure the operator returns exactly the same column bindings as before
+
+	// construct the LHS from the LogicalGet
+	auto lhs = ConstructLHS(get);
+	// insert the row-id column on the left hand side
+	auto &lhs_get = *lhs;
+	auto lhs_index = lhs_get.table_index;
+	auto lhs_columns = lhs_get.GetColumnIds().size();
+	auto lhs_row_idx = GetOrInsertRowId(lhs_get);
+	ColumnBinding lhs_binding(lhs_index, lhs_row_idx);
+
+	auto &row_id_type = get.GetRowIdType();
+
+	// after constructing the LHS but before constructing the RHS we construct the final projections/orders
+	// - we do this before constructing the RHS because that alter the original plan
+	vector<unique_ptr<Expression>> final_proj_list;
+	// construct the final projection list from either (1) the root projection, or (2) the logical get
+	if (!source_operators.empty()) {
+		// construct the columns from the root projection
+		auto &root_proj = source_operators[0].get();
+		for (auto &expr : root_proj.expressions) {
+			final_proj_list.push_back(expr->Copy());
+		}
+		// now we need to "flatten" the projection list by traversing the set of projections and inlining them
+		for (idx_t i = 0; i < source_operators.size(); i++) {
+			auto &next_operator = i + 1 < source_operators.size() ? source_operators[i + 1].get() : lhs_get;
+			for (auto &expr : final_proj_list) {
+				ReplaceExpressionReferences(next_operator, expr);
+			}
+		}
+	} else {
+		// if we have no projection directly construct the columns from the root get
+		for (idx_t i = 0; i < lhs_columns; i++) {
+			final_proj_list.push_back(GetExpression(lhs_get, i));
+		}
+	}
+
+	// we need to re-order again at the end
+	vector<BoundOrderByNode> final_orders;
+	auto root_type = op->type;
+	if (root_type == LogicalOperatorType::LOGICAL_TOP_N) {
+		// for top-n we need to re-order by the top-n conditions
+		auto &top_n = op->Cast<LogicalTopN>();
+		for (auto &order : top_n.orders) {
+			auto expr = order.expression->Copy();
+			final_orders.emplace_back(order.type, order.null_order, std::move(expr));
+		}
+	} else {
+		// for limit/sample we order by row-id
+		auto row_id_expr = make_uniq<BoundColumnRefExpression>("rowid", row_id_type, lhs_binding);
+		final_orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_LAST, std::move(row_id_expr));
+	}
+
+	// construct the RHS for the join
+	// this is essentially the old pipeline, but with the `rowid` column added
+	// note that the purpose of this optimization is to remove columns from the RHS
+	// we don't do that here yet though - we do this in a later step using the RemoveUnusedColumns optimizer
+	auto rhs_binding = ConstructRHS(op);
+
+	// the final table index emitted must be the table index of the original operator
+	// this ensures any upstream operators that refer to the original get will keep on referring to the correct columns
+	auto final_index = rhs_binding.table_index;
+
+	// we need to replace any references to "rhs_binding.table_index" in the rhs to a new table index
+	rhs_binding.table_index = optimizer.binder.GenerateTableIndex();
+	ReplaceTopLevelTableIndex(*op, rhs_binding.table_index);
+
+	// construct a semi join between the lhs and rhs
+	auto join = make_uniq<LogicalComparisonJoin>(JoinType::SEMI);
+	join->children.push_back(std::move(lhs));
+	join->children.push_back(std::move(op));
+	JoinCondition condition;
+	condition.comparison = ExpressionType::COMPARE_EQUAL;
+	condition.left = make_uniq<BoundColumnRefExpression>("rowid", row_id_type, lhs_binding);
+	condition.right = make_uniq<BoundColumnRefExpression>("rowid", row_id_type, rhs_binding);
+	join->conditions.push_back(std::move(condition));
+
+	// push a projection that removes the row id again from the lhs
+	// this is the final projection - so it should have the final table index
+	auto proj_index = final_index;
+	if (root_type == LogicalOperatorType::LOGICAL_TOP_N) {
+		// for top-n we need to order on expressions, so we need to order AFTER the final projection
+		auto proj = make_uniq<LogicalProjection>(proj_index, std::move(final_proj_list));
+		proj->children.push_back(std::move(join));
+
+		for (auto &order : final_orders) {
+			ReplaceTableReferences(*order.expression, proj_index);
+		}
+		auto order = make_uniq<LogicalOrder>(std::move(final_orders));
+		order->children.push_back(std::move(proj));
+
+		op = std::move(order);
+	} else {
+		// for limit/sample we order on row-id, so we need to order BEFORE the final projection
+		// because the final projection removes row-ids
+		auto order = make_uniq<LogicalOrder>(std::move(final_orders));
+		order->children.push_back(std::move(join));
+
+		auto proj = make_uniq<LogicalProjection>(proj_index, std::move(final_proj_list));
+		proj->children.push_back(std::move(order));
+
+		op = std::move(proj);
+	}
+
+	// run the RemoveUnusedColumns optimizer to prune the (now) unused columns the plan
+	RemoveUnusedColumns unused_optimizer(optimizer.binder, optimizer.context, true);
+	unused_optimizer.VisitOperator(*op);
+	return true;
+}
+
+bool LateMaterialization::OptimizeLargeLimit(LogicalOperator &child) {
+	// we only support large limits if the only
+	reference<LogicalOperator> current_op = child;
+	while (current_op.get().type != LogicalOperatorType::LOGICAL_GET) {
+		if (current_op.get().type != LogicalOperatorType::LOGICAL_PROJECTION) {
+			return false;
+		}
+		current_op = *current_op.get().children[0];
+	}
+	return true;
+}
+
+unique_ptr<LogicalOperator> LateMaterialization::Optimize(unique_ptr<LogicalOperator> op) {
+	switch (op->type) {
+	case LogicalOperatorType::LOGICAL_LIMIT: {
+		auto &limit = op->Cast<LogicalLimit>();
+		if (limit.limit_val.Type() != LimitNodeType::CONSTANT_VALUE) {
+			break;
+		}
+		if (limit.limit_val.GetConstantValue() > max_row_count) {
+			// for large limits - we may still want to do this optimization if the limit is consecutive
+			// this is the case if there are only projections/get below the limit
+			// if the row-ids are not consecutive doing the join can worsen performance
+			if (!OptimizeLargeLimit(*limit.children[0])) {
+				break;
+			}
+		}
+		if (TryLateMaterialization(op)) {
+			return op;
+		}
+		break;
+	}
+	case LogicalOperatorType::LOGICAL_TOP_N: {
+		auto &top_n = op->Cast<LogicalTopN>();
+		if (top_n.limit > max_row_count) {
+			break;
+		}
+		// for the top-n we need to visit the order elements
+		if (TryLateMaterialization(op)) {
+			return op;
+		}
+		break;
+	}
+	case LogicalOperatorType::LOGICAL_SAMPLE: {
+		auto &sample = op->Cast<LogicalSample>();
+		if (sample.sample_options->is_percentage) {
+			break;
+		}
+		if (sample.sample_options->sample_size.GetValue<uint64_t>() > max_row_count) {
+			break;
+		}
+		if (TryLateMaterialization(op)) {
+			return op;
+		}
+		break;
+	}
+	default:
+		break;
+	}
+	for (auto &child : op->children) {
+		child = Optimize(std::move(child));
+	}
+	return op;
+}
+
+} // namespace duckdb
diff --git a/src/duckdb/src/optimizer/optimizer.cpp b/src/duckdb/src/optimizer/optimizer.cpp
index d5c46621..8ac4cdd8 100644
--- a/src/duckdb/src/optimizer/optimizer.cpp
+++ b/src/duckdb/src/optimizer/optimizer.cpp
@@ -32,6 +32,7 @@
 #include "duckdb/optimizer/sum_rewriter.hpp"
 #include "duckdb/optimizer/topn_optimizer.hpp"
 #include "duckdb/optimizer/unnest_rewriter.hpp"
+#include "duckdb/optimizer/late_materialization.hpp"
 #include "duckdb/planner/binder.hpp"
 #include "duckdb/planner/planner.hpp"
 
@@ -227,6 +228,12 @@ void Optimizer::RunBuiltInOptimizers() {
 		plan = topn.Optimize(std::move(plan));
 	});
 
+	// try to use late materialization
+	RunOptimizer(OptimizerType::LATE_MATERIALIZATION, [&]() {
+		LateMaterialization late_materialization(*this);
+		plan = late_materialization.Optimize(std::move(plan));
+	});
+
 	// perform statistics propagation
 	column_binding_map_t<unique_ptr<BaseStatistics>> statistics_map;
 	RunOptimizer(OptimizerType::STATISTICS_PROPAGATION, [&]() {
diff --git a/src/duckdb/src/optimizer/remove_unused_columns.cpp b/src/duckdb/src/optimizer/remove_unused_columns.cpp
index 3a867bdf..b197a4f5 100644
--- a/src/duckdb/src/optimizer/remove_unused_columns.cpp
+++ b/src/duckdb/src/optimizer/remove_unused_columns.cpp
@@ -23,7 +23,7 @@
 
 namespace duckdb {
 
-void RemoveUnusedColumns::ReplaceBinding(ColumnBinding current_binding, ColumnBinding new_binding) {
+void BaseColumnPruner::ReplaceBinding(ColumnBinding current_binding, ColumnBinding new_binding) {
 	auto colrefs = column_references.find(current_binding);
 	if (colrefs != column_references.end()) {
 		for (auto &colref_p : colrefs->second.bindings) {
@@ -279,6 +279,7 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) {
 				// Now set the projection cols by matching the "selection vector" that excludes filter columns
 				// with the "selection vector" that includes filter columns
 				idx_t col_idx = 0;
+				get.projection_ids.clear();
 				for (auto proj_sel_idx : proj_sel) {
 					for (; col_idx < col_sel.size(); col_idx++) {
 						if (proj_sel_idx == col_sel[col_idx]) {
@@ -347,8 +348,8 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) {
 	}
 }
 
-bool RemoveUnusedColumns::HandleStructExtractRecursive(Expression &expr, optional_ptr<BoundColumnRefExpression> &colref,
-                                                       vector<idx_t> &indexes) {
+bool BaseColumnPruner::HandleStructExtractRecursive(Expression &expr, optional_ptr<BoundColumnRefExpression> &colref,
+                                                    vector<idx_t> &indexes) {
 	if (expr.GetExpressionClass() != ExpressionClass::BOUND_FUNCTION) {
 		return false;
 	}
@@ -382,7 +383,7 @@ bool RemoveUnusedColumns::HandleStructExtractRecursive(Expression &expr, optiona
 	return true;
 }
 
-bool RemoveUnusedColumns::HandleStructExtract(Expression &expr) {
+bool BaseColumnPruner::HandleStructExtract(Expression &expr) {
 	optional_ptr<BoundColumnRefExpression> colref;
 	vector<idx_t> indexes;
 	if (!HandleStructExtractRecursive(expr, colref, indexes)) {
@@ -428,7 +429,7 @@ void MergeChildColumns(vector<ColumnIndex> &current_child_columns, ColumnIndex &
 	current_child_columns.push_back(std::move(new_child_column));
 }
 
-void RemoveUnusedColumns::AddBinding(BoundColumnRefExpression &col, ColumnIndex child_column) {
+void BaseColumnPruner::AddBinding(BoundColumnRefExpression &col, ColumnIndex child_column) {
 	auto entry = column_references.find(col.binding);
 	if (entry == column_references.end()) {
 		// column not referenced yet - add a binding to it entirely
@@ -445,7 +446,7 @@ void RemoveUnusedColumns::AddBinding(BoundColumnRefExpression &col, ColumnIndex
 	}
 }
 
-void RemoveUnusedColumns::AddBinding(BoundColumnRefExpression &col) {
+void BaseColumnPruner::AddBinding(BoundColumnRefExpression &col) {
 	auto entry = column_references.find(col.binding);
 	if (entry == column_references.end()) {
 		// column not referenced yet - add a binding to it entirely
@@ -458,7 +459,7 @@ void RemoveUnusedColumns::AddBinding(BoundColumnRefExpression &col) {
 	}
 }
 
-void RemoveUnusedColumns::VisitExpression(unique_ptr<Expression> *expression) {
+void BaseColumnPruner::VisitExpression(unique_ptr<Expression> *expression) {
 	auto &expr = **expression;
 	if (HandleStructExtract(expr)) {
 		// already handled
@@ -468,15 +469,15 @@ void RemoveUnusedColumns::VisitExpression(unique_ptr<Expression> *expression) {
 	LogicalOperatorVisitor::VisitExpression(expression);
 }
 
-unique_ptr<Expression> RemoveUnusedColumns::VisitReplace(BoundColumnRefExpression &expr,
-                                                         unique_ptr<Expression> *expr_ptr) {
+unique_ptr<Expression> BaseColumnPruner::VisitReplace(BoundColumnRefExpression &expr,
+                                                      unique_ptr<Expression> *expr_ptr) {
 	// add a reference to the entire column
 	AddBinding(expr);
 	return nullptr;
 }
 
-unique_ptr<Expression> RemoveUnusedColumns::VisitReplace(BoundReferenceExpression &expr,
-                                                         unique_ptr<Expression> *expr_ptr) {
+unique_ptr<Expression> BaseColumnPruner::VisitReplace(BoundReferenceExpression &expr,
+                                                      unique_ptr<Expression> *expr_ptr) {
 	// BoundReferenceExpression should not be used here yet, they only belong in the physical plan
 	throw InternalException("BoundReferenceExpression should not be used here yet!");
 }
diff --git a/src/duckdb/src/storage/compression/roaring/compress.cpp b/src/duckdb/src/storage/compression/roaring/compress.cpp
index 77463d9e..a7454d44 100644
--- a/src/duckdb/src/storage/compression/roaring/compress.cpp
+++ b/src/duckdb/src/storage/compression/roaring/compress.cpp
@@ -100,7 +100,7 @@ void ContainerCompressionState::Append(bool null, uint16_t amount) {
 	appended_count += amount;
 }
 
-void ContainerCompressionState::OverrideArray(data_ptr_t destination, bool nulls, idx_t count) {
+void ContainerCompressionState::OverrideArray(data_ptr_t &destination, bool nulls, idx_t count) {
 	if (nulls) {
 		append_function = AppendToArray<true>;
 	} else {
@@ -110,28 +110,31 @@ void ContainerCompressionState::OverrideArray(data_ptr_t destination, bool nulls
 	if (count >= COMPRESSED_ARRAY_THRESHOLD) {
 		memset(destination, 0, sizeof(uint8_t) * COMPRESSED_SEGMENT_COUNT);
 		array_counts[nulls] = reinterpret_cast<uint8_t *>(destination);
-		destination += sizeof(uint8_t) * COMPRESSED_SEGMENT_COUNT;
-		compressed_arrays[nulls] = reinterpret_cast<uint8_t *>(destination);
+		auto data_start = destination + sizeof(uint8_t) * COMPRESSED_SEGMENT_COUNT;
+		compressed_arrays[nulls] = reinterpret_cast<uint8_t *>(data_start);
 	} else {
+		destination = AlignValue<sizeof(uint16_t)>(destination);
 		arrays[nulls] = reinterpret_cast<uint16_t *>(destination);
 	}
 }
 
-void ContainerCompressionState::OverrideRun(data_ptr_t destination, idx_t count) {
+void ContainerCompressionState::OverrideRun(data_ptr_t &destination, idx_t count) {
 	append_function = AppendRun;
 
 	if (count >= COMPRESSED_RUN_THRESHOLD) {
 		memset(destination, 0, sizeof(uint8_t) * COMPRESSED_SEGMENT_COUNT);
 		run_counts = reinterpret_cast<uint8_t *>(destination);
-		destination += sizeof(uint8_t) * COMPRESSED_SEGMENT_COUNT;
-		compressed_runs = reinterpret_cast<uint8_t *>(destination);
+		auto data_start = destination + sizeof(uint8_t) * COMPRESSED_SEGMENT_COUNT;
+		compressed_runs = reinterpret_cast<uint8_t *>(data_start);
 	} else {
+		destination = AlignValue<sizeof(RunContainerRLEPair)>(destination);
 		runs = reinterpret_cast<RunContainerRLEPair *>(destination);
 	}
 }
 
-void ContainerCompressionState::OverrideUncompressed(data_ptr_t destination) {
+void ContainerCompressionState::OverrideUncompressed(data_ptr_t &destination) {
 	append_function = AppendBitset;
+	destination = AlignValue<sizeof(idx_t)>(destination);
 	uncompressed = reinterpret_cast<validity_t *>(destination);
 }
 
diff --git a/src/duckdb/src/storage/compression/roaring/scan.cpp b/src/duckdb/src/storage/compression/roaring/scan.cpp
index 73ef83bf..559f9677 100644
--- a/src/duckdb/src/storage/compression/roaring/scan.cpp
+++ b/src/duckdb/src/storage/compression/roaring/scan.cpp
@@ -224,6 +224,10 @@ RoaringScanState::RoaringScanState(ColumnSegment &segment) : segment(segment) {
 		container_metadata.push_back(metadata);
 		if (metadata.IsUncompressed()) {
 			position = AlignValue<idx_t>(position);
+		} else if (metadata.IsArray() && metadata.Cardinality() < COMPRESSED_ARRAY_THRESHOLD) {
+			position = AlignValue<idx_t, sizeof(uint16_t)>(position);
+		} else if (metadata.IsRun() && metadata.NumberOfRuns() < COMPRESSED_RUN_THRESHOLD) {
+			position = AlignValue<idx_t, sizeof(RunContainerRLEPair)>(position);
 		}
 		data_start_position.push_back(position);
 		position += SkipVector(metadata);
@@ -281,6 +285,7 @@ ContainerScanState &RoaringScanState::LoadContainer(idx_t container_index, idx_t
 			current_container = make_uniq<CompressedRunContainerScanState>(container_index, container_size,
 			                                                               number_of_runs, segments, data_ptr);
 		} else {
+			D_ASSERT(AlignValue<sizeof(RunContainerRLEPair)>(data_ptr) == data_ptr);
 			current_container =
 			    make_uniq<RunContainerScanState>(container_index, container_size, number_of_runs, data_ptr);
 		}
@@ -297,6 +302,7 @@ ContainerScanState &RoaringScanState::LoadContainer(idx_t container_index, idx_t
 				    container_index, container_size, cardinality, segments, data_ptr);
 			}
 		} else {
+			D_ASSERT(AlignValue<sizeof(uint16_t)>(data_ptr) == data_ptr);
 			if (metadata.IsInverted()) {
 				current_container =
 				    make_uniq<ArrayContainerScanState<NULLS>>(container_index, container_size, cardinality, data_ptr);
diff --git a/src/duckdb/src/storage/table/row_group.cpp b/src/duckdb/src/storage/table/row_group.cpp
index 38e173e8..d5250387 100644
--- a/src/duckdb/src/storage/table/row_group.cpp
+++ b/src/duckdb/src/storage/table/row_group.cpp
@@ -610,8 +610,8 @@ void RowGroup::TemplatedScan(TransactionData transaction, CollectionScanState &s
 					const auto scan_idx = filter.scan_column_index;
 					const auto column_idx = filter.table_column_index;
 
+					auto &result_vector = result.data[scan_idx];
 					if (column_idx == COLUMN_IDENTIFIER_ROW_ID) {
-
 						// We do another quick statistics scan for row ids here
 						const auto rowid_start = this->start + current_row;
 						const auto rowid_end = this->start + current_row + max_count;
@@ -624,9 +624,9 @@ void RowGroup::TemplatedScan(TransactionData transaction, CollectionScanState &s
 
 						// Generate row ids
 						// Create sequence for row ids
-						D_ASSERT(result.data[i].GetType().InternalType() == ROW_TYPE);
-						result.data[i].SetVectorType(VectorType::FLAT_VECTOR);
-						auto result_data = FlatVector::GetData<int64_t>(result.data[i]);
+						D_ASSERT(result_vector.GetType().InternalType() == ROW_TYPE);
+						result_vector.SetVectorType(VectorType::FLAT_VECTOR);
+						auto result_data = FlatVector::GetData<int64_t>(result_vector);
 						for (size_t sel_idx = 0; sel_idx < approved_tuple_count; sel_idx++) {
 							result_data[sel.get_index(sel_idx)] =
 							    UnsafeNumericCast<int64_t>(this->start + current_row + sel.get_index(sel_idx));
@@ -639,14 +639,14 @@ void RowGroup::TemplatedScan(TransactionData transaction, CollectionScanState &s
 
 						// Now apply the filter
 						UnifiedVectorFormat vdata;
-						result.data[i].ToUnifiedFormat(approved_tuple_count, vdata);
-						ColumnSegment::FilterSelection(sel, result.data[i], vdata, filter.filter, approved_tuple_count,
+						result_vector.ToUnifiedFormat(approved_tuple_count, vdata);
+						ColumnSegment::FilterSelection(sel, result_vector, vdata, filter.filter, approved_tuple_count,
 						                               approved_tuple_count);
 
 					} else {
 						auto &col_data = GetColumn(filter.table_column_index);
-						col_data.Filter(transaction, state.vector_index, state.column_scans[scan_idx],
-						                result.data[scan_idx], sel, approved_tuple_count, filter.filter);
+						col_data.Filter(transaction, state.vector_index, state.column_scans[scan_idx], result_vector,
+						                sel, approved_tuple_count, filter.filter);
 					}
 				}
 				for (auto &table_filter : filter_list) {
diff --git a/src/duckdb/ub_src_optimizer.cpp b/src/duckdb/ub_src_optimizer.cpp
index 26f4abe5..05c18f99 100644
--- a/src/duckdb/ub_src_optimizer.cpp
+++ b/src/duckdb/ub_src_optimizer.cpp
@@ -30,6 +30,8 @@
 
 #include "src/optimizer/join_filter_pushdown_optimizer.cpp"
 
+#include "src/optimizer/late_materialization.cpp"
+
 #include "src/optimizer/optimizer.cpp"
 
 #include "src/optimizer/regex_range_filter.cpp"