relational_operators/SortMergeRunOperatorHelpers.hpp

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 **/

#ifndef QUICKSTEP_RELATIONAL_OPERATORS_SORT_MERGE_RUN_OPERATOR_HELPERS_HPP_
#define QUICKSTEP_RELATIONAL_OPERATORS_SORT_MERGE_RUN_OPERATOR_HELPERS_HPP_

#include <algorithm>
#include <cstddef>
#include <limits>
#include <memory>
#include <utility>
#include <vector>

#include "catalog/CatalogTypedefs.hpp"
#include "expressions/scalar/Scalar.hpp"
#include "storage/InsertDestination.hpp"
#include "storage/StorageBlock.hpp"
#include "storage/StorageBlockInfo.hpp"
#include "storage/StorageManager.hpp"
#include "storage/TupleStorageSubBlock.hpp"
#include "storage/ValueAccessor.hpp"
#include "storage/ValueAccessorUtil.hpp"
#include "threading/SpinMutex.hpp"
#include "types/Type.hpp"
#include "types/containers/Tuple.hpp"
#include "types/operations/comparisons/Comparison.hpp"
#include "types/operations/comparisons/ComparisonFactory.hpp"
#include "types/operations/comparisons/ComparisonID.hpp"
#include "utility/Macros.hpp"
#include "utility/PtrVector.hpp"
#include "utility/SortConfiguration.hpp"

#include "glog/logging.h"

namespace quickstep {

class CatalogRelationSchema;

namespace merge_run_operator {

/**
 * @addtogroup SortMergeRun
 * @{
 */

/**
 * @brief Structure to hold a run of sorted blocks. Currently, a list of
 * block_ids.
 **/
typedef std::vector<block_id> Run;

/**
 * @brief Class to store the merge tree of sorting process, and produce merge
 * jobs.
 **/
class MergeTree {
 public:
  /**
   * @brief Structure to communicate the merge jobs with the merge operator.
   **/
  struct MergeJob {
    MergeJob(std::size_t _level, bool _is_final_level, std::vector<Run> &&_runs)
        : runs(std::move(_runs)),
          level(_level),
          is_final_level(_is_final_level) {}

    std::vector<Run> runs;
    std::size_t level;
    bool is_final_level;
  };

  /**
   * @brief Constructor for merge tree.
   *
   * @param merge_factor Merge factor of the merge tree.
   **/
  explicit MergeTree(const std::size_t merge_factor)
      : merge_factor_(merge_factor) {}

  /**
   * @brief Initialize the merge tree. Merge tree is dependent on the size of
   * the input runs.
   *
   * @param initial_runs Number of runs in the input. Currently, the number of
   *        blocks in the input relation.
   *
   * @note This can be called after \c initializeForPipeline when the input
   *       runs size is finalized.
   **/
  void initializeTree(const std::size_t initial_runs);

  /**
   * @brief Initialize the merge tree for pipelining. When the number of input
   * blocks are unknown upfront (in the case of pipeline), we initialize a
   * minimal merge tree supporting the first pass of merge alone.
   **/
  void initializeForPipeline();

  /**
   * @brief Check if the final merge for the merge tree is already scheduled,
   * and, if so, update the tree to generate a job to copy the final run into
   * correct output destination.
   *
   * @warning This is only supposed to be invoked when the final size of input
   * relation is computed, and the tree is initialized to this known size.
   **/
  void checkAndFixFinalMerge();

  /**
   * @brief Get merge jobs that can be executed at the moment. In a multi-pass
   * merge, there are situations where the merge tree can only schedule work if
   * the output of executing merge jobs complete.
   *
   * @param jobs The generated merge jobs are written to this vector.
   *
   * @return \c true if the final job was scheduled.
   **/
  bool getMergeJobs(std::vector<MergeJob> *jobs);

  /**
   * @brief Add input blocks to the merge tree.
   *
   * @param blocks Blocks to add as input to merge tree.
   **/
  inline void addInputBlocks(const std::vector<block_id> &blocks) {
    SpinMutexLock lock(pending_mutex_);
    for (const block_id block : blocks) {
      pending_[0].emplace_back(1, block);
    }
  }

  /**
   * @brief Write the output run of a merge job to the merge tree. This run
   * could be input to further merge jobs.
   *
   * @param merge_level Merge level that generated this output run. (It is
   *        0-indexed, and 0 indicates the first pass of the merge.)
   * @param output_run Output run of the merge.
   **/
  inline void writeOutputRun(const std::size_t merge_level, Run *output_run) {
    SpinMutexLock lock(pending_mutex_);
    pending_[merge_level + 1].emplace_back(std::move(*output_run));
  }

 private:
  // Value to signify final level is unknown.
  static constexpr std::size_t kFinalLevelUninitialized =
      std::numeric_limits<std::size_t>::max();

  void getRuns(const std::size_t level,
               const std::size_t num_runs,
               std::vector<Run> *runs) {
    DCHECK(num_runs <= pending_[level].size());
    for (std::size_t i = 0; i < num_runs; ++i) {
      runs->push_back(std::move(pending_[level].back()));
      pending_[level].pop_back();
    }
  }

  // Merge tree is computed bottom up. Level-0 has the initial input runs to the
  // operator (at present each run is a block). Subsequent level are used to
  // store runs obtained by merging the previous level.

  std::vector<std::size_t> runs_scheduled_;  // Runs scheduled in each level.
  std::vector<std::size_t> runs_expected_;   // Runs expected in each level.
  mutable SpinMutex pending_mutex_;  // Mutex to control access to pending_.
  std::vector<std::vector<merge_run_operator::Run>>
      pending_;              // Runs pending scheduling in each level.
  std::size_t num_levels_;   // Number of levels in the merge tree.
  std::size_t final_level_;  // Index (0-based) of the final merge level.
  std::size_t cur_level_;    // Current merge level where jobs are unscheduled.
  const std::size_t merge_factor_;  // Merge factor of the merges.

  DISALLOW_COPY_AND_ASSIGN(MergeTree);
};

/**
 * @brief Run creator. Creates a run by only appending tuples.
 *
 * @warning Assumes the InsertDestination uses StorageBlock that does not
 *          reorder the tuples appended order.
 **/
class RunCreator {
 public:
  /**
   * @brief Constructor.
   *
   * @param run A list of blocks to sort.
   * @param output_destination The InsertDestination that holds the blocks of
   *        the run.
   **/
  RunCreator(Run *run, InsertDestination *output_destination)
      : run_(run), output_destination_(output_destination) {
    createNewBlock();
  }

  /**
   * @brief Destructor.
   **/
  ~RunCreator() { flushBlock(); }

  /**
   * @brief Append tuple to run. Creates a new block if current block is full.
   *
   * @param tuple Tuple to insert into the run.
   *
   * @return true if a new block was created.
   **/
  inline bool appendTuple(const Tuple &tuple) {
    bool new_block = false;
    // TODO(shoban): We should use a non-virtual call to insert tuple to storage
    // block for better performance. Note that we will know the storage
    // implementation apriori.
    while (!storage_block_->insertTupleInBatch(tuple)) {
      output_destination_->returnBlock(std::move(storage_block_), true);
      createNewBlock();
      new_block = true;
    }
    return new_block;
  }

  /**
   * @brief Flush the block to destination.
   *
   * @warning No appends can happen after this.
   **/
  void flushBlock() {
    if (storage_block_.valid()) {
      if (storage_block_->getTupleStorageSubBlock().isEmpty()) {
        // No tuples were inserted. This can be reused by InsertDestination.
        output_destination_->returnBlock(std::move(storage_block_), false);
        // Remove block from run.
        run_->pop_back();
      } else {
        output_destination_->returnBlock(std::move(storage_block_), true);
      }
      storage_block_.release();
    }
    DCHECK(!storage_block_.valid());
  }

 private:
  inline void createNewBlock() {
    storage_block_ = output_destination_->getBlockForInsertion();
    DCHECK(storage_block_->getTupleStorageSubBlock().isInsertOrderPreserving())
        << kTupleStorageSubBlockTypeNames[storage_block_
                                              ->getTupleStorageSubBlock()
                                              .getTupleStorageSubBlockType()]
        << " is not insert order preserving.";
    run_->push_back(storage_block_->getID());
  }

  Run *run_;  // Output run.
  InsertDestination *output_destination_;  // Insert destination to generate
                                           // output blocks.
  MutableBlockReference storage_block_;  // Reference to current block.

  DISALLOW_COPY_AND_ASSIGN(RunCreator);
};

/**
 * @brief Run iterator to iterate through Tuples in the run.
 *
 * @warning Assumes all blocks in the run belong to same TupleStorageSubBlock
 *          type so that the specific ValueAccessor implementation can be
 *          directly used.
 * @warning Assumes no wrappers (like TupleIdSequence or OrderedTupleIdSequence)
 *          are to be applied.
 **/
template <typename ValueAccessorT>
class RunIterator {
 public:
  /**
   * @brief Constructor.
   *
   * @param run Run to iterate on.
   * @param storage_manager Storage manager.
   * @param input_relation Relation that the blocks in run belong to.
   **/
  RunIterator(const Run &run,
              StorageManager *storage_manager,
              const CatalogRelationSchema &input_relation)
      : run_(run),
        run_it_(run_.begin()),
        accessor_(nullptr),
        input_relation_(input_relation),
        storage_manager_(storage_manager) {
    loadAccessor();
  }

  /**
   * @brief Const ValueAccessor to read the tuple at the current position.
   **/
  inline const ValueAccessorT* getValueAccessor() { return accessor_.get(); }

  /**
   * @brief Move to the next tuple.
   *
   * @return \c true if there is a next tuple, \c false otherwise.
   **/
  bool next() {
    while (run_it_ != run_.end()) {
      if (accessor_->next()) {
        return true;
      } else {
        ++run_it_;
        loadAccessor();
      }
    }
    return false;
  }

 private:
  // Loads the current block's ValueAccessor, if we have not reached the end of
  // the run.
  void loadAccessor() {
    if (run_it_ == run_.end()) {
      accessor_.reset();
    } else {
      block_ =
          BlockReference(storage_manager_->getBlock(*run_it_, input_relation_));
      accessor_.reset(static_cast<ValueAccessorT *>(
          block_->getTupleStorageSubBlock().createValueAccessor()));
    }
  }

  const Run &run_;  // Run which iterated upon.
  Run::const_iterator run_it_;  // Run iterator.
  BlockReference block_;  // Reference to current block (i.e., run iterator).
  std::unique_ptr<ValueAccessorT> accessor_;  // ValueAccessor of the current
                                              // block.
  const CatalogRelationSchema &input_relation_;  // Schema of input relation.
  StorageManager *storage_manager_;  // Storage manager.

  DISALLOW_COPY_AND_ASSIGN(RunIterator);
};

/**
 * @brief Merge runs into a single run.
 **/
class RunMerger {
 public:
  /**
   * @brief Constructor.
   *
   * @param sort_config Sort configuration.
   * @param runs Vector of runs. R-valued. Moves and takes ownership of runs.
   * @param top_k Only keep the top-k results of the merge. All the tuples are
   *        kept if \c top_k is 0.
   * @param run_relation Relation that runs belong to.
   * @param output_destination The InsertDestination that stores the merge run.
   * @param level Merge level of this merge.
   * @param storage_manager Storage manager to use.
   **/
  RunMerger(const SortConfiguration &sort_config,
            std::vector<Run> &&runs,
            std::size_t top_k,
            const CatalogRelationSchema &run_relation,
            InsertDestination *output_destination,
            const std::size_t level,
            StorageManager *storage_manager)
      : sort_config_(sort_config),
        input_runs_(std::move(runs)),
        top_k_(top_k),
        output_run_(),
        output_run_creator_(&output_run_, output_destination),
        run_relation_(run_relation),
        level_(level),
        storage_manager_(storage_manager) {}

  /**
   * @brief Merges the runs. This will use a specialized merge implementation
   * depending on the sort configuration.
   **/
  void doMerge();

  /**
   * @brief Get a mutable a pointer to the merged output run.
   **/
  inline Run* getOutputRunMutable() { return &output_run_; }

  /**
   * @brief Get the merge level.
   **/
  inline std::size_t getMergeLevel() const { return level_; }

 private:
  // Generic heap-based merge implementation. Comparator takes care of NULL.
  // This defaults for any number of ORDER BY columns specification.
  // '*first_accessor' is an accessor on the first input block, used only to
  // determine the ValueAccessor type used to access all the blocks in the
  // runs.
  template <bool check_top_k>
  void mergeGeneric(ValueAccessor *first_accessor);

  // Merge implementation specialized for single-column ORDER BY sort
  // specification with NULL FIRST. Since the NULLs (if any) occur in the
  // beginning, we cycle through all the runs first and write out the NULLs,
  // before doing the merge. This makes the comparator have no branches. As
  // above, '*first_accessor' is used only to determine the ValueAccessor type.
  template <bool check_top_k>
  void mergeSingleColumnNullFirst(ValueAccessor *first_accessor);

  // Merge implementation specialized for single-column ORDER BY sort
  // specification with NULL LAST. Since the NULLs (if any) occur in the end of
  // the runs, we use the heap to sort all non-NULL values first and then cycle
  // through the runs to write out the NULLs. As above, '*first_accessor' is
  // used only to determine the ValueAccessor type.
  template <bool check_top_k>
  void mergeSingleColumnNullLast(ValueAccessor *first_accessor);

  // Trivial implementation to copy a run.
  template <bool check_top_k>
  void copyToOutput(const Run &run, ValueAccessor *first_accessor);

  const SortConfiguration &sort_config_;
  std::vector<Run> input_runs_;
  const std::size_t top_k_;
  Run output_run_;
  RunCreator output_run_creator_;
  const CatalogRelationSchema &run_relation_;
  const std::size_t level_;
  StorageManager *storage_manager_;

  DISALLOW_COPY_AND_ASSIGN(RunMerger);
};

// ----------------------------------------------------------------------------
// Implementations of RunMerger merge methods follow.

/**
 * @brief Reference node for each run for use in heap-sort. Holds data of the
 * current tuple at the head of each run, and run-id of this node.
 **/
template <typename ValueAccessorT>
struct GenericHeapNode {
  std::size_t run_id;
  const ValueAccessorT *value_accessor;
};

/**
 * @brief Simple struct to hold the comparators and sort configuration of a
 * single ORDER BY column.
 **/
struct ColumnComparator {
  /**
   * @brief Constructor for ColumnComparator.
   *
   * @param comp_id Comparison type of the column. (kGreater/kLess).
   * @param null_ordering NULL value ordering of this column.
   * @param type Type of this column.
   * @param attr_id Attribute ID of this column in the value-accessor.
   **/
  ColumnComparator(ComparisonID comp_id,
                   const bool null_ordering,
                   const Type &type,
                   const attribute_id attr_id)
      : comparator_(ComparisonFactory::GetComparison(comp_id)
                        .makeUncheckedComparatorForTypes(type, type)),
        equal_(ComparisonFactory::GetComparison(ComparisonID::kEqual)
                   .makeUncheckedComparatorForTypes(type, type)),
        null_ordering_(null_ordering),
        attr_id_(attr_id) {}

  const std::unique_ptr<UncheckedComparator> comparator_;
  const std::unique_ptr<UncheckedComparator> equal_;
  const bool null_ordering_;
  const attribute_id attr_id_;
};

/**
 * @brief Internal data structure for generic comparator to be used in a
 * max-heap to merge sorted runs. This internal structure exists so that we can
 * pass a const reference to this instead of deep copying comparators of all
 * columns.
 **/
class GenericHeapComparatorInternal {
 public:
  /**
   * @brief Constructor.
   *
   * @param sort_config Sort configuration.
   **/
  explicit GenericHeapComparatorInternal(const SortConfiguration &sort_config) {
    DCHECK(sort_config.isValid());

    const PtrVector<Scalar> &order_by = sort_config.getOrderByList();
    const std::vector<bool> &ordering = sort_config.getOrdering();
    const std::vector<bool> &null_ordering = sort_config.getNullOrdering();

    PtrVector<Scalar>::const_iterator order_by_it = order_by.begin();
    std::vector<bool>::const_iterator ordering_it = ordering.begin();
    std::vector<bool>::const_iterator null_ordering_it = null_ordering.begin();

    for (; order_by_it != order_by.end();
         ++order_by_it, ++ordering_it, ++null_ordering_it) {
      const attribute_id attr_id =
          order_by_it->getAttributeIdForValueAccessor();
      const Type &type = order_by_it->getType();
      DCHECK_NE(attr_id, -1);
      if (*ordering_it == kSortAscending) {
        columns_.push_back(new ColumnComparator(
            ComparisonID::kLess, *null_ordering_it, type, attr_id));
      } else {
        columns_.push_back(new ColumnComparator(
            ComparisonID::kGreater, *null_ordering_it, type, attr_id));
      }
    }
  }

  /**
   * @brief Constructor.
   *
   * @param sort_config Sort configuration.
   * @param attr_ids Attribute IDs for ORDER BY columns. This can be used when
   *        ORDER BY columns are ScalarExpression, and they are mapped to
   *        ColumnValueAccessor.
   **/
  GenericHeapComparatorInternal(const SortConfiguration &sort_config,
                                const std::vector<attribute_id> &attr_ids) {
    DCHECK(sort_config.isValid());
    DCHECK(sort_config.getOrderByList().size() == attr_ids.size());

    const PtrVector<Scalar> &order_by = sort_config.getOrderByList();
    const std::vector<bool> &ordering = sort_config.getOrdering();
    const std::vector<bool> &null_ordering = sort_config.getNullOrdering();

    PtrVector<Scalar>::const_iterator order_by_it = order_by.begin();
    std::vector<attribute_id>::const_iterator attr_id_it = attr_ids.begin();
    std::vector<bool>::const_iterator ordering_it = ordering.begin();
    std::vector<bool>::const_iterator null_ordering_it = null_ordering.begin();

    for (; order_by_it != order_by.end();
         ++order_by_it, ++attr_id_it, ++ordering_it, ++null_ordering_it) {
      const Type &type = order_by_it->getType();
      if (*ordering_it == kSortAscending) {
        columns_.push_back(new ColumnComparator(
            ComparisonID::kLess, *null_ordering_it, type, *attr_id_it));
      } else {
        columns_.push_back(new ColumnComparator(
            ComparisonID::kGreater, *null_ordering_it, type, *attr_id_it));
      }
    }
  }

  /**
   * @brief Get the vector of column comparators.
   **/
  inline const PtrVector<ColumnComparator> &getColumnComparators() const {
    return columns_;
  }

 private:
  PtrVector<ColumnComparator> columns_;

  DISALLOW_COPY_AND_ASSIGN(GenericHeapComparatorInternal);
};

/**
 * @brief Generic comparator to be used in max-heap for merging runs.
 **/
template <typename ValueAccessorT>
class GenericHeapComparator {
 public:
  /**
   * @brief Constructor.
   *
   * @param internal Internal generic comparator instance initialized with sort
   *        configuration.
   **/
  explicit GenericHeapComparator(const GenericHeapComparatorInternal &internal)
      : columns_(internal.getColumnComparators()) {}

  /**
   * @brief Comparison operator().
   *
   * @param left Left heap node operand in the comparison.
   * @param right Right heap node operand in the comparison.
   **/
  bool operator()(const GenericHeapNode<ValueAccessorT> &left,
                  const GenericHeapNode<ValueAccessorT> &right) {
    // This needs greater than comparator to implement min-heap using
    // std::make_heap, std::push_heap, std::pop_heap.

    // TODO(quickstep-team): This class is not specialized for
    // nullable/non-nullable attributes, since we do not know at compile time at
    // nullability of ORDER BY expressions. If all the ORDER BY expressions are
    // non-nullable, we can have a specialization.
    for (const ColumnComparator &column : columns_) {
      const void *left_value =
          left.value_accessor->getUntypedValue(column.attr_id_);
      const void *right_value =
          right.value_accessor->getUntypedValue(column.attr_id_);

      if ((left_value != nullptr) && (right_value != nullptr)) {
        if (column.comparator_->compareDataPtrs(left_value, right_value)) {
          return false;
        } else if (!column.equal_->compareDataPtrs(left_value, right_value)) {
          return true;
        }
        // Fallback both are equal.
      } else {
        if ((left_value != nullptr) || (right_value != nullptr)) {
          // Following is same as:
          // if (null_last) return right_value;
          // else return left_value;
          return ((column.null_ordering_ == kSortNullLast) && right_value) ||
                 ((column.null_ordering_ == kSortNullFirst) && left_value);
        }
        // Fallback both are NULL.
      }
    }
    // All ORDER BY columns are equal.
    return false;
  }

 private:
  const PtrVector<ColumnComparator> &columns_;
};

/**
 * @brief Reference node for each run for use in heap-sort. Holds column value
 * of the current tuple at the head of each run, and which run-id of this node.
 **/
struct SingleColumnHeapNode {
  std::size_t run_id;
  const void *value;
};

/**
 * @brief Internal data structure for single column comparator to be used in a
 * max-heap to merge sorted runs. This internal structure exists so that we can
 * pass a const reference to this instead of deep copying comparator.
 **/
class SingleColumnHeapComparatorInternal {
 public:
  /**
   * @brief Constructor.
   *
   * @param sort_config Sort configuration.
   **/
  explicit SingleColumnHeapComparatorInternal(
      const SortConfiguration &sort_config) {
    DCHECK(sort_config.isValid());
    DCHECK_EQ(1u, sort_config.getOrderByList().size());

    const bool ordering = sort_config.getOrdering()[0];
    const Scalar &order_by = sort_config.getOrderByList()[0];
    const Type &type = order_by.getType();
    if (ordering == kSortAscending) {
      comparator_.reset(
          ComparisonFactory::GetComparison(ComparisonID::kGreater)
              .makeUncheckedComparatorForTypes(type.getNonNullableVersion(),
                                               type.getNonNullableVersion()));
    } else {
      comparator_.reset(
          ComparisonFactory::GetComparison(ComparisonID::kLess)
              .makeUncheckedComparatorForTypes(type.getNonNullableVersion(),
                                               type.getNonNullableVersion()));
    }
  }

  /**
   * @brief Get the column comparator.
   **/
  inline const UncheckedComparator &getComparator() const {
    return *comparator_;
  }

 private:
  std::unique_ptr<UncheckedComparator> comparator_;
};

/**
 * @brief Single column comparator to be used in max-heap for merging runs.
 *
 * @warning This comparator assumes that the values compared are never NULL. The
 *          merger is expected to handle NULLs outside of the heap.
 **/
class SingleColumnHeapComparator {
 public:
  /**
   * @brief Constructor.
   *
   * @param internal Internal generic comparator instance initialized with sort
   *        configuration.
   **/
  explicit SingleColumnHeapComparator(
      const SingleColumnHeapComparatorInternal &internal)
      : comparator_(internal.getComparator()) {}

  /**
   * @brief Comparison operator().
   *
   * @param left Left heap node operand in the comparison.
   * @param right Right heap node operand in the comparison.
   **/
  inline bool operator()(const SingleColumnHeapNode &left,
                         const SingleColumnHeapNode &right) {
    // This needs to be greater than comparator to implement min-heap using
    // std::make_heap, std::push_heap, std::pop_heap.
    return comparator_.compareDataPtrs(left.value, right.value);
  }

 private:
  const UncheckedComparator &comparator_;
};

/**
 * @}
 */

}  // namespace merge_run_operator

}  // namespace quickstep

#endif  // QUICKSTEP_RELATIONAL_OPERATORS_SORT_MERGE_RUN_OPERATOR_HELPERS_HPP_