impl sort limit, topn and sort limit as topn

chagelo · Jan 18, 2024 · 70d8be3 · 70d8be3
1 parent c2041dc
commit 70d8be3
Show file tree

Hide file tree

Showing 7 changed files with 179 additions and 13 deletions.
diff --git a/src/execution/limit_executor.cpp b/src/execution/limit_executor.cpp
@@ -16,10 +16,23 @@ namespace bustub {
 
 LimitExecutor::LimitExecutor(ExecutorContext *exec_ctx, const LimitPlanNode *plan,
                              std::unique_ptr<AbstractExecutor> &&child_executor)
-    : AbstractExecutor(exec_ctx) {}
+    : AbstractExecutor(exec_ctx), plan_(plan), child_executor_(child_executor.release()) {}
 
-void LimitExecutor::Init() { throw NotImplementedException("LimitExecutor is not implemented"); }
+void LimitExecutor::Init() {
+  child_executor_->Init();
+  count_ = 0;
+}
 
-auto LimitExecutor::Next(Tuple *tuple, RID *rid) -> bool { return false; }
+auto LimitExecutor::Next(Tuple *tuple, RID *rid) -> bool {
+  if (count_ >= plan_->GetLimit()) {
+    return false;
+  }
+
+  auto ok = child_executor_->Next(tuple, rid);
+  if (ok) {
+    count_++;
+  }
+  return ok;
+}
 
 }  // namespace bustub
diff --git a/src/execution/sort_executor.cpp b/src/execution/sort_executor.cpp
@@ -1,13 +1,59 @@
 #include "execution/executors/sort_executor.h"
+#include <algorithm>
+#include "binder/bound_order_by.h"
+#include "execution/expressions/comparison_expression.h"
+#include "storage/table/tuple.h"
 
 namespace bustub {
 
 SortExecutor::SortExecutor(ExecutorContext *exec_ctx, const SortPlanNode *plan,
                            std::unique_ptr<AbstractExecutor> &&child_executor)
-    : AbstractExecutor(exec_ctx) {}
+    : AbstractExecutor(exec_ctx), plan_(plan), child_executor_(child_executor.release()) {}
 
-void SortExecutor::Init() { throw NotImplementedException("SortExecutor is not implemented"); }
+void SortExecutor::Init() {
+  child_executor_->Init();
+  index_ = 0;
+  Tuple tuple{};
+  RID rid{};
 
-auto SortExecutor::Next(Tuple *tuple, RID *rid) -> bool { return false; }
+  if (!sorted_tuples_.empty()) {
+    return;    
+  }
+
+  while (child_executor_->Next(&tuple, &rid)) {
+    sorted_tuples_.emplace_back(std::move(tuple));
+  }
+  if (sorted_tuples_.empty()) {
+    return;
+  }
+
+  std::sort(sorted_tuples_.begin(), sorted_tuples_.end(), [this](const Tuple &a, const Tuple &b) {
+    for (auto [order_by_type, expr] : plan_->GetOrderBy()) {
+      bool default_order_by = (order_by_type == OrderByType::DEFAULT || order_by_type == OrderByType::ASC);
+      if (expr->Evaluate(&a, child_executor_->GetOutputSchema())
+              .CompareLessThan(expr->Evaluate(&b, child_executor_->GetOutputSchema())) == CmpBool::CmpTrue) {
+        return default_order_by;
+      }
+      if (expr->Evaluate(&a, child_executor_->GetOutputSchema())
+              .CompareGreaterThan(expr->Evaluate(&b, child_executor_->GetOutputSchema())) == CmpBool::CmpTrue) {
+        return !default_order_by;
+      }
+    }
+    return true;
+  });
+}
+
+auto SortExecutor::Next(Tuple *tuple, RID *rid) -> bool {
+  if (sorted_tuples_.empty()) {
+    return false;
+  }
+
+  if (index_ == sorted_tuples_.size()) {
+    return false;
+  }
+
+  *tuple = sorted_tuples_[index_++];
+  return true;
+}
 
 }  // namespace bustub
diff --git a/src/execution/topn_executor.cpp b/src/execution/topn_executor.cpp
@@ -1,15 +1,84 @@
 #include "execution/executors/topn_executor.h"
+#include <algorithm>
+#include "storage/table/tuple.h"
 
 namespace bustub {
 
 TopNExecutor::TopNExecutor(ExecutorContext *exec_ctx, const TopNPlanNode *plan,
                            std::unique_ptr<AbstractExecutor> &&child_executor)
-    : AbstractExecutor(exec_ctx) {}
+    : AbstractExecutor(exec_ctx), plan_(plan), child_executor_(child_executor.release()) {}
 
-void TopNExecutor::Init() { throw NotImplementedException("TopNExecutor is not implemented"); }
+void TopNExecutor::Init() {
+  child_executor_->Init();
+  index_ = 0;
 
-auto TopNExecutor::Next(Tuple *tuple, RID *rid) -> bool { return false; }
+  Tuple tuple{};
+  RID rid{};
+  uint32_t count = 0;
 
-auto TopNExecutor::GetNumInHeap() -> size_t { throw NotImplementedException("TopNExecutor is not implemented"); };
+  // when parent is join, may use this sorted_tuple many times
+  if (sorted_tuples_.size() == plan_->GetN()) {
+    return;
+  }
+
+  while (child_executor_->Next(&tuple, &rid)) {
+    sorted_tuples_.emplace_back(std::move(tuple));
+    count++;
+
+    // every time insert elements count = 1000, sort
+    std::sort(sorted_tuples_.begin(), sorted_tuples_.end(), [this](const Tuple &a, const Tuple &b) {
+      for (auto [order_by_type, expr] : plan_->GetOrderBy()) {
+        bool default_order_by = (order_by_type == OrderByType::DEFAULT || order_by_type == OrderByType::ASC);
+        if (expr->Evaluate(&a, child_executor_->GetOutputSchema())
+                .CompareLessThan(expr->Evaluate(&b, child_executor_->GetOutputSchema())) == CmpBool::CmpTrue) {
+          return default_order_by;
+        }
+        if (expr->Evaluate(&a, child_executor_->GetOutputSchema())
+                .CompareGreaterThan(expr->Evaluate(&b, child_executor_->GetOutputSchema())) == CmpBool::CmpTrue) {
+          return !default_order_by;
+        }
+      }
+      return true;
+    });
+    while (sorted_tuples_.size() > plan_->GetN()) {
+      sorted_tuples_.pop_back();
+    }
+    count = 0;
+  }
+}
+
+auto TopNExecutor::Next(Tuple *tuple, RID *rid) -> bool {
+  std::cout << GetNumInHeap() << std::endl;
+  if (sorted_tuples_.empty()) {
+    return false;
+  }
+
+  if (index_ >= sorted_tuples_.size()) {
+    return false;
+  }
+
+  *tuple = sorted_tuples_[index_++];
+  return true;
+}
+
+auto TopNExecutor::GetNumInHeap() -> size_t { return sorted_tuples_.size(); }
+
+// priority_queue
+// TopNExecutor::Compare::Compare(const std::vector<std::pair<OrderByType, AbstractExpressionRef>> *order_bys,
+//                                const Schema *schema)
+//     : order_bys_(order_bys), schema_(schema) {}
+
+// auto TopNExecutor::Compare::operator()(const Tuple &a, const Tuple &b) -> bool {
+//   for (auto [order_by_type, expr] : *order_bys_) {
+//     bool default_order_by = (order_by_type == OrderByType::DEFAULT || order_by_type == OrderByType::ASC);
+//     if (expr->Evaluate(&a, *schema_).CompareLessThan(expr->Evaluate(&b, *schema_)) == CmpBool::CmpTrue) {
+//       return default_order_by;
+//     }
+//     if (expr->Evaluate(&a, *schema_).CompareGreaterThan(expr->Evaluate(&b, *schema_)) == CmpBool::CmpTrue) {
+//       return !default_order_by;
+//     }
+//   }
+//   return true;
+// }
 
 }  // namespace bustub
diff --git a/src/include/execution/executors/limit_executor.h b/src/include/execution/executors/limit_executor.h
@@ -53,5 +53,6 @@ class LimitExecutor : public AbstractExecutor {
   const LimitPlanNode *plan_;
   /** The child executor from which tuples are obtained */
   std::unique_ptr<AbstractExecutor> child_executor_;
+  std::size_t count_{0};
 };
 }  // namespace bustub
diff --git a/src/include/execution/executors/sort_executor.h b/src/include/execution/executors/sort_executor.h
@@ -52,5 +52,8 @@ class SortExecutor : public AbstractExecutor {
  private:
   /** The sort plan node to be executed */
   const SortPlanNode *plan_;
+  std::unique_ptr<AbstractExecutor> child_executor_;
+  std::vector<Tuple> sorted_tuples_{};
+  uint32_t index_{0};
 };
 }  // namespace bustub
diff --git a/src/include/execution/executors/topn_executor.h b/src/include/execution/executors/topn_executor.h
@@ -12,18 +12,22 @@
 
 #pragma once
 
+#include <functional>
 #include <memory>
+#include <queue>
 #include <utility>
 #include <vector>
 
+#include "binder/bound_order_by.h"
+#include "catalog/schema.h"
 #include "execution/executor_context.h"
 #include "execution/executors/abstract_executor.h"
 #include "execution/plans/seq_scan_plan.h"
 #include "execution/plans/topn_plan.h"
 #include "storage/table/tuple.h"
 
 namespace bustub {
-
+  
 /**
  * The TopNExecutor executor executes a topn.
  */
@@ -58,10 +62,23 @@ class TopNExecutor : public AbstractExecutor {
   /** @return The size of top_entries_ container, which will be called on each child_executor->Next(). */
   auto GetNumInHeap() -> size_t;
 
+  // auto Com(const Tuple &a, const Tuple &b) -> bool;
+  // class Compare {
+  //  public:
+  //   const std::vector<std::pair<OrderByType, AbstractExpressionRef>> *order_bys_;
+  //   const Schema *schema_;
+  //   Compare(const std::vector<std::pair<OrderByType, AbstractExpressionRef>> *order_bys, const Schema *schema);
+  //   auto operator()(const Tuple &a, const Tuple &b) -> bool;
+  // };
+
  private:
   /** The topn plan node to be executed */
   const TopNPlanNode *plan_;
   /** The child executor from which tuples are obtained */
   std::unique_ptr<AbstractExecutor> child_executor_;
+  std::vector<Tuple>sorted_tuples_;
+  // std::priority_queue<Tuple, std::vector<Tuple>, Compare> tuple_queue_;
+  uint32_t index_{0};
 };
+
 }  // namespace bustub
diff --git a/src/optimizer/sort_limit_as_topn.cpp b/src/optimizer/sort_limit_as_topn.cpp
@@ -1,10 +1,27 @@
+#include "execution/plans/sort_plan.h"
+#include "execution/plans/topn_plan.h"
+#include "execution/plans/limit_plan.h"
 #include "optimizer/optimizer.h"
 
 namespace bustub {
 
 auto Optimizer::OptimizeSortLimitAsTopN(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef {
-  // TODO(student): implement sort + limit -> top N optimizer rule
-  return plan;
+  std::vector<AbstractPlanNodeRef> children;
+  for (const auto &child : plan->GetChildren()) {
+    children.emplace_back(OptimizeSortLimitAsTopN(child));
+  }
+  auto optimized_plan = plan->CloneWithChildren(std::move(children));
+
+  if (optimized_plan->GetType() == PlanType::Limit) {
+    const auto &limit_plan = dynamic_cast<const LimitPlanNode &>(*optimized_plan);
+    BUSTUB_ENSURE(limit_plan.children_.size() == 1, "SLAT should have exactly 1 children.");
+    if (limit_plan.GetChildAt(0)->GetType() == PlanType::Sort) {
+      const auto &child = dynamic_cast<const SortPlanNode &>(*limit_plan.GetChildAt(0));
+      return std::make_shared<TopNPlanNode>(limit_plan.output_schema_, child.GetChildPlan(), child.GetOrderBy(),
+                                            limit_plan.GetLimit());
+    }
+  }
+  return optimized_plan;
 }
 
 }  // namespace bustub