diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp index a9947e938b..8702b4c1f3 100644 --- a/src/engine/IndexScan.cpp +++ b/src/engine/IndexScan.cpp @@ -307,6 +307,7 @@ Permutation::IdTableGenerator IndexScan::lazyScanForJoinOfColumnWithScan( std::span joinColumn) const { AD_EXPENSIVE_CHECK(std::ranges::is_sorted(joinColumn)); AD_CORRECTNESS_CHECK(numVariables_ <= 3 && numVariables_ > 0); + AD_CONTRACT_CHECK(joinColumn.empty() || !joinColumn[0].isUndefined()); auto metaBlocks1 = getMetadataForScan(); diff --git a/src/engine/Join.cpp b/src/engine/Join.cpp index 5a0292af3e..f2c5e96311 100644 --- a/src/engine/Join.cpp +++ b/src/engine/Join.cpp @@ -26,7 +26,7 @@ using std::string; // _____________________________________________________________________________ Join::Join(QueryExecutionContext* qec, std::shared_ptr t1, std::shared_ptr t2, ColumnIndex t1JoinCol, - ColumnIndex t2JoinCol, bool keepJoinColumn) + ColumnIndex t2JoinCol) : Operation(qec) { AD_CONTRACT_CHECK(t1 && t2); // Currently all join algorithms require both inputs to be sorted, so we @@ -55,7 +55,6 @@ Join::Join(QueryExecutionContext* qec, std::shared_ptr t1, _leftJoinCol = t1JoinCol; _right = std::move(t2); _rightJoinCol = t2JoinCol; - _keepJoinColumn = keepJoinColumn; _sizeEstimate = 0; _sizeEstimateComputed = false; _multiplicities.clear(); @@ -92,10 +91,7 @@ string Join::getDescriptor() const { return "Join on " + _joinVar.name(); } // _____________________________________________________________________________ ProtoResult Join::computeResult([[maybe_unused]] bool requestLaziness) { LOG(DEBUG) << "Getting sub-results for join result computation..." << endl; - size_t leftWidth = _left->getResultWidth(); - size_t rightWidth = _right->getResultWidth(); - IdTable idTable{getExecutionContext()->getAllocator()}; - idTable.setNumColumns(leftWidth + rightWidth - 1); + IdTable idTable{getResultWidth(), getExecutionContext()->getAllocator()}; if (_left->knownEmptyResult() || _right->knownEmptyResult()) { _left->getRootOperation()->updateRuntimeInformationWhenOptimizedOut(); @@ -158,7 +154,7 @@ ProtoResult Join::computeResult([[maybe_unused]] bool requestLaziness) { std::shared_ptr leftRes = leftResIfCached ? leftResIfCached : _left->getResult(); checkCancellation(); - if (leftRes->idTable().size() == 0) { + if (leftRes->idTable().empty()) { _right->getRootOperation()->updateRuntimeInformationWhenOptimizedOut(); // TODO When we add triples to the // index, the vocabularies of index scans will not necessarily be empty and @@ -206,8 +202,7 @@ VariableToColumnMap Join::computeVariableToColumnMap() const { // _____________________________________________________________________________ size_t Join::getResultWidth() const { - size_t res = _left->getResultWidth() + _right->getResultWidth() - - (_keepJoinColumn ? 1 : 2); + size_t res = _left->getResultWidth() + _right->getResultWidth() - 1; AD_CONTRACT_CHECK(res > 0); return res; } diff --git a/src/engine/Join.h b/src/engine/Join.h index d8938dff0f..d23a9e4f5d 100644 --- a/src/engine/Join.h +++ b/src/engine/Join.h @@ -23,8 +23,6 @@ class Join : public Operation { Variable _joinVar{"?notSet"}; - bool _keepJoinColumn; - bool _sizeEstimateComputed; size_t _sizeEstimate; @@ -33,7 +31,7 @@ class Join : public Operation { public: Join(QueryExecutionContext* qec, std::shared_ptr t1, std::shared_ptr t2, ColumnIndex t1JoinCol, - ColumnIndex t2JoinCol, bool keepJoinColumn = true); + ColumnIndex t2JoinCol); // A very explicit constructor, which initializes an invalid join object (it // has no subtrees, which violates class invariants). These invalid Join @@ -108,8 +106,8 @@ class Join : public Operation { * @return The result is only sorted, if the bigger table is sorted. * Otherwise it is not sorted. **/ - void hashJoin(const IdTable& dynA, ColumnIndex jc1, const IdTable& dynB, - ColumnIndex jc2, IdTable* dynRes); + static void hashJoin(const IdTable& dynA, ColumnIndex jc1, + const IdTable& dynB, ColumnIndex jc2, IdTable* dynRes); protected: virtual string getCacheKeyImpl() const override; @@ -134,8 +132,6 @@ class Join : public Operation { IndexScan& scan, ColumnIndex joinColScan); - using ScanMethodType = std::function; - /* * @brief Combines 2 rows like in a join and inserts the result in the * given table. diff --git a/src/util/JoinAlgorithms/JoinAlgorithms.h b/src/util/JoinAlgorithms/JoinAlgorithms.h index 5b3e9de8d9..13d52aa2d8 100644 --- a/src/util/JoinAlgorithms/JoinAlgorithms.h +++ b/src/util/JoinAlgorithms/JoinAlgorithms.h @@ -20,14 +20,6 @@ namespace ad_utility { // Some helper concepts. -// A predicate type `Pred` fulfills `BinaryRangePredicate` if it can be -// called with two values of the `Range`'s `value_type` and produces a result -// that can be converted to `bool`. -template -concept BinaryRangePredicate = - std::indirect_binary_predicate, - std::ranges::iterator_t>; - // A function `F` fulfills `UnaryIteratorFunction` if it can be called with a // single argument of the `Range`'s iterator type (NOT value type). template @@ -667,14 +659,14 @@ class BlockAndSubrange { template struct JoinSide { using CurrentBlocks = - std::vector>; + std::vector>>; Iterator it_; [[no_unique_address]] const End end_; const Projection& projection_; CurrentBlocks currentBlocks_{}; // Type aliases for a single element from a block from the left/right input. - using value_type = std::ranges::range_value_t; + using value_type = std::ranges::range_value_t>; // Type alias for the result of the projection. using ProjectedEl = std::decay_t>; @@ -691,7 +683,7 @@ template auto makeJoinSide(Blocks& blocks, const auto& projection) { return JoinSide{std::ranges::begin(blocks), std::ranges::end(blocks), projection}; -}; +} // A concept to identify instantiations of the `JoinSide` template. template @@ -759,14 +751,10 @@ struct BlockZipperJoinImpl { using ProjectedEl = LeftSide::ProjectedEl; static_assert(std::same_as); - // The largest element for which all blocks are currently stored in the buffer - // and processed. - std::optional currentMinEl_ = std::nullopt; - // Create an equality comparison from the `lessThan` predicate. bool eq(const auto& el1, const auto& el2) { return !lessThan_(el1, el2) && !lessThan_(el2, el1); - }; + } // Recompute the `currentEl`. It is the minimum of the last element in the // first block of either of the join sides. @@ -775,7 +763,7 @@ struct BlockZipperJoinImpl { return side.projection_(side.currentBlocks_.front().back()); }; return std::min(getFirst(leftSide_), getFirst(rightSide_), lessThan_); - }; + } // Fill `side.currentBlocks_` with blocks from the range `[side.it_, // side.end_)` and advance `side.it_` for each read buffer until all elements @@ -823,7 +811,7 @@ struct BlockZipperJoinImpl { } else { return BlockStatus::allFilled; } - }; + } // Remove all elements from `blocks` (either `leftSide_.currentBlocks_` or // `rightSide_.currentBlocks`) s.t. only elements `> lastProcessedElement` @@ -848,7 +836,7 @@ struct BlockZipperJoinImpl { if (blocks.at(0).empty()) { blocks.clear(); } - }; + } // For one of the inputs (`leftSide_.currentBlocks_` or // `rightSide_.currentBlocks_`) obtain a tuple of the following elements: @@ -860,7 +848,7 @@ struct BlockZipperJoinImpl { const auto& first = currentBlocks.at(0); auto it = std::ranges::lower_bound(first.subrange(), currentEl, lessThan_); return std::tuple{std::ref(first.fullBlock()), first.subrange(), it}; - }; + } // Call `compatibleRowAction` for all pairs of elements in the Cartesian // product of the blocks in `blocksLeft` and `blocksRight`. @@ -892,7 +880,7 @@ struct BlockZipperJoinImpl { } } compatibleRowAction_.flush(); - }; + } // Return a vector of subranges of all elements in `blocks` that are equal to // `currentEl`. Effectively, these subranges cover all the blocks completely @@ -907,7 +895,7 @@ struct BlockZipperJoinImpl { last.setSubrange( std::ranges::equal_range(last.subrange(), currentEl, lessThan_)); return result; - }; + } // Join the first block in `currentBlocksLeft` with the first block in // `currentBlocksRight`, but ignore all elements that are `>= currentEl` @@ -949,7 +937,7 @@ struct BlockZipperJoinImpl { // Remove the joined elements. currentBlocksLeft.at(0).setSubrange(currentElItL, subrangeLeft.end()); currentBlocksRight.at(0).setSubrange(currentElItR, subrangeRight.end()); - }; + } // If the `targetBuffer` is empty, read the next nonempty block from `[it, // end)` if there is one. @@ -965,7 +953,7 @@ struct BlockZipperJoinImpl { } ++it; } - }; + } // Fill both buffers (left and right) until they contain at least one block. // Then recompute the `currentEl()` and keep on filling the buffers until at @@ -988,9 +976,7 @@ struct BlockZipperJoinImpl { } // Add the remaining blocks such that condition 3 from above is fulfilled. - auto blockStatus = fillEqualToCurrentElBothSides(getCurrentEl()); - currentMinEl_ = getCurrentEl(); - return blockStatus; + return fillEqualToCurrentElBothSides(getCurrentEl()); } // Combine the above functionality and perform one round of joining. @@ -999,11 +985,10 @@ struct BlockZipperJoinImpl { void joinBuffers(BlockStatus& blockStatus) { auto& currentBlocksLeft = leftSide_.currentBlocks_; auto& currentBlocksRight = rightSide_.currentBlocks_; + ProjectedEl currentEl = getCurrentEl(); joinAndRemoveLessThanCurrentEl( - currentBlocksLeft, currentBlocksRight, getCurrentEl()); + currentBlocksLeft, currentBlocksRight, currentEl); - // TODO This should still be the same. - ProjectedEl currentEl = getCurrentEl(); // At this point the `currentBlocksLeft/Right` only consist of elements `>= // currentEl`. We now obtain a view on the elements `== currentEl` which are // needed for the next step (the Cartesian product). In the last block, diff --git a/src/util/JoinAlgorithms/JoinColumnMapping.h b/src/util/JoinAlgorithms/JoinColumnMapping.h index dc38627cd0..bb01c24a9b 100644 --- a/src/util/JoinAlgorithms/JoinColumnMapping.h +++ b/src/util/JoinAlgorithms/JoinColumnMapping.h @@ -128,7 +128,7 @@ struct IdTableAndFirstCol { auto begin() const { return col().begin(); } auto end() const { return col().end(); } - bool empty() { return col().empty(); } + bool empty() const { return col().empty(); } const Id& operator[](size_t idx) const { return col()[idx]; } diff --git a/test/engine/SpatialJoinTestHelpers.h b/test/engine/SpatialJoinTestHelpers.h index 2b4b8913ae..15274fe5be 100644 --- a/test/engine/SpatialJoinTestHelpers.h +++ b/test/engine/SpatialJoinTestHelpers.h @@ -126,8 +126,7 @@ inline std::shared_ptr buildJoin( auto varCol2 = tree2->getVariableColumns(); size_t col1 = varCol1[joinVariable].columnIndex_; size_t col2 = varCol2[joinVariable].columnIndex_; - return ad_utility::makeExecutionTree(qec, tree1, tree2, col1, col2, - true); + return ad_utility::makeExecutionTree(qec, tree1, tree2, col1, col2); } inline std::shared_ptr buildMediumChild( diff --git a/test/engine/ValuesForTesting.h b/test/engine/ValuesForTesting.h index 3db9c6ffee..70ee183540 100644 --- a/test/engine/ValuesForTesting.h +++ b/test/engine/ValuesForTesting.h @@ -149,7 +149,9 @@ class ValuesForTesting : public Operation { } size_t getResultWidth() const override { - return tables_.empty() ? 0 : tables_.at(0).numColumns(); + // Assume a width of 1 if we have no tables and no other information to base + // it on because 0 would otherwise cause stuff to break. + return tables_.empty() ? 1 : tables_.at(0).numColumns(); } vector resultSortedOn() const override { diff --git a/test/util/JoinHelpers.h b/test/util/JoinHelpers.h index 2034cf03a0..32f9346234 100644 --- a/test/util/JoinHelpers.h +++ b/test/util/JoinHelpers.h @@ -58,9 +58,8 @@ IdTable useJoinFunctionOnIdTables(const IdTableAndJoinColumn& tableA, * `ad_utility::callFixedSize`. */ auto makeHashJoinLambda() { - Join J{Join::InvalidOnlyForTestingJoinTag{}, ad_utility::testing::getQec()}; - return [J = std::move(J)](auto&&... args) mutable { - return J.hashJoin(AD_FWD(args)...); + return [](auto&&... args) { + return Join::hashJoin(AD_FWD(args)...); }; } @@ -70,7 +69,7 @@ auto makeHashJoinLambda() { */ auto makeJoinLambda() { Join J{Join::InvalidOnlyForTestingJoinTag{}, ad_utility::testing::getQec()}; - return [J = std::move(J)](auto&&... args) mutable { + return [J = std::move(J)](auto&&... args) { return J.join(AD_FWD(args)...); }; }