Skip to content

Commit

Permalink
Merge pull request vespa-engine#32672 from vespa-engine/toregge/trim-…
Browse files Browse the repository at this point in the history
…down-searchlib-posting-list-handle

Trim down search::index::PostingListHandle
  • Loading branch information
geirst authored Oct 28, 2024
2 parents 63d8be1 + c9c9933 commit de18da1
Show file tree
Hide file tree
Showing 13 changed files with 79 additions and 78 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -570,14 +570,13 @@ ShowPostingListSubApp::showPostingList()
handle->first = offsetAndCounts._counts;
handle->second._bitOffset = offsetAndCounts._offset;
handle->second._bitLength = handle->first._bitLength;
handle->second._file = postingfile.get();
handle->second._file->readPostingList(handle->second);
postingfile->readPostingList(handle->second);
std::vector<TermFieldMatchData> tfmdv(numFields);
TermFieldMatchDataArray tfmda;
for (auto& tfmd : tfmdv) {
tfmda.add(&tfmd);
}
auto sb = handle->second.createIterator(handle->first, tfmda);
auto sb = postingfile->createIterator(handle->first, handle->second, tfmda);
sb->initFullRange();
uint32_t docId = 0;
bool first = true;
Expand Down
4 changes: 2 additions & 2 deletions searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,8 @@ DiskIndexTest::requireThatWeCanReadPostingList()
TermFieldMatchDataArray mda;
{ // field 'f1'
LookupResult::UP r = _index->lookup(0, "w1");
PostingListHandle::UP h = _index->readPostingList(*r);
auto sb = h->createIterator(r->counts, mda);
auto h = _index->readPostingList(*r);
auto sb = _index->create_iterator(*r, *h, mda);
EXPECT_EQ(SimpleResult({1,3}), SimpleResult().search(*sb));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,6 @@ randReadField(FakeWordSet &wordSet,
search::index::PostingListHandle handle;

handle._bitLength = counts._bitLength;
handle._file = postingFile;
handle._bitOffset = offsetAndCounts._offset;

postingFile->readPostingList(handle);
Expand All @@ -514,8 +513,7 @@ randReadField(FakeWordSet &wordSet,
TermFieldMatchDataArray tfmda;
tfmda.add(&mdfield1);

std::unique_ptr<SearchIterator>
sb(handle.createIterator(counts, tfmda));
auto sb(postingFile->createIterator(counts, handle, tfmda));

// LOG(info, "loop=%d, wordNum=%u", loop, wordNum);
word->validate(sb.get(), tfmda, true, decode_interleaved_features, verbose);
Expand Down
37 changes: 16 additions & 21 deletions searchlib/src/tests/diskindex/fusion/fusion_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,6 @@ assert_interleaved_features(DiskIndex &d, const std::string &field, const std::s
{
using LookupResult = DiskIndex::LookupResult;
using PostingListHandle = index::PostingListHandle;
using SearchIterator = search::queryeval::SearchIterator;

const Schema &schema = d.getSchema();
uint32_t field_id(schema.getIndexFieldId(field));
Expand All @@ -170,7 +169,7 @@ assert_interleaved_features(DiskIndex &d, const std::string &field, const std::s
TermFieldMatchData tfmd;
TermFieldMatchDataArray tfmda;
tfmda.add(&tfmd);
std::unique_ptr<SearchIterator> sbap(handle->createIterator(lookup_result->counts, tfmda));
auto sbap(d.create_iterator(*lookup_result, *handle, tfmda));
sbap->initFullRange();
EXPECT_TRUE(sbap->seek(doc_id));
sbap->unpack(doc_id);
Expand All @@ -181,22 +180,18 @@ assert_interleaved_features(DiskIndex &d, const std::string &field, const std::s
void
validateDiskIndex(DiskIndex &dw, bool f2HasElements, bool f3HasWeights)
{
using LR = DiskIndex::LookupResult;
using PH = index::PostingListHandle;
using SB = search::queryeval::SearchIterator;

const Schema &schema(dw.getSchema());

{
uint32_t id1(schema.getIndexFieldId("f0"));
LR::UP lr1(dw.lookup(id1, "c"));
auto lr1(dw.lookup(id1, "c"));
ASSERT_TRUE(lr1);
PH::UP wh1(dw.readPostingList(*lr1));
auto wh1(dw.readPostingList(*lr1));
ASSERT_TRUE(wh1);
TermFieldMatchData f0;
TermFieldMatchDataArray a;
a.add(&f0);
SB::UP sbap(wh1->createIterator(lr1->counts, a));
auto sbap(dw.create_iterator(*lr1, *wh1, a));
sbap->initFullRange();
EXPECT_EQ(std::string("{1000000:}"), toString(f0.getIterator()));
EXPECT_TRUE(sbap->seek(10));
Expand All @@ -205,14 +200,14 @@ validateDiskIndex(DiskIndex &dw, bool f2HasElements, bool f3HasWeights)
}
{
uint32_t id1(schema.getIndexFieldId("f2"));
LR::UP lr1(dw.lookup(id1, "ax"));
auto lr1(dw.lookup(id1, "ax"));
ASSERT_TRUE(lr1);
PH::UP wh1(dw.readPostingList(*lr1));
auto wh1(dw.readPostingList(*lr1));
ASSERT_TRUE(wh1);
TermFieldMatchData f2;
TermFieldMatchDataArray a;
a.add(&f2);
SB::UP sbap(wh1->createIterator(lr1->counts, a));
auto sbap(dw.create_iterator(*lr1, *wh1, a));
sbap->initFullRange();
EXPECT_EQ(std::string("{1000000:}"), toString(f2.getIterator()));
EXPECT_TRUE(sbap->seek(10));
Expand All @@ -227,14 +222,14 @@ validateDiskIndex(DiskIndex &dw, bool f2HasElements, bool f3HasWeights)
}
{
uint32_t id1(schema.getIndexFieldId("f3"));
LR::UP lr1(dw.lookup(id1, "wx"));
auto lr1(dw.lookup(id1, "wx"));
ASSERT_TRUE(lr1);
PH::UP wh1(dw.readPostingList(*lr1));
auto wh1(dw.readPostingList(*lr1));
ASSERT_TRUE(wh1);
TermFieldMatchData f3;
TermFieldMatchDataArray a;
a.add(&f3);
SB::UP sbap(wh1->createIterator(lr1->counts, a));
auto sbap(dw.create_iterator(*lr1, *wh1, a));
sbap->initFullRange();
EXPECT_EQ(std::string("{1000000:}"), toString(f3.getIterator()));
EXPECT_TRUE(sbap->seek(10));
Expand All @@ -249,14 +244,14 @@ validateDiskIndex(DiskIndex &dw, bool f2HasElements, bool f3HasWeights)
}
{
uint32_t id1(schema.getIndexFieldId("f3"));;
LR::UP lr1(dw.lookup(id1, "zz"));
auto lr1(dw.lookup(id1, "zz"));
ASSERT_TRUE(lr1);
PH::UP wh1(dw.readPostingList(*lr1));
auto wh1(dw.readPostingList(*lr1));
ASSERT_TRUE(wh1);
TermFieldMatchData f3;
TermFieldMatchDataArray a;
a.add(&f3);
SB::UP sbap(wh1->createIterator(lr1->counts, a));
auto sbap(dw.create_iterator(*lr1, *wh1, a));
sbap->initFullRange();
EXPECT_EQ(std::string("{1000000:}"), toString(f3.getIterator()));
EXPECT_TRUE(sbap->seek(11));
Expand All @@ -271,14 +266,14 @@ validateDiskIndex(DiskIndex &dw, bool f2HasElements, bool f3HasWeights)
}
{
uint32_t id1(schema.getIndexFieldId("f3"));;
LR::UP lr1(dw.lookup(id1, "zz0"));
auto lr1(dw.lookup(id1, "zz0"));
ASSERT_TRUE(lr1);
PH::UP wh1(dw.readPostingList(*lr1));
auto wh1(dw.readPostingList(*lr1));
ASSERT_TRUE(wh1);
TermFieldMatchData f3;
TermFieldMatchDataArray a;
a.add(&f3);
SB::UP sbap(wh1->createIterator(lr1->counts, a));
auto sbap(dw.create_iterator(*lr1, *wh1, a));
sbap->initFullRange();
EXPECT_EQ(std::string("{1000000:}"), toString(f3.getIterator()));
EXPECT_TRUE(sbap->seek(12));
Expand Down
11 changes: 10 additions & 1 deletion searchlib/src/vespa/searchlib/diskindex/diskindex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ DiskIndex::read(const Key & key, LookupResultVector & result)
return true;
}

index::PostingListHandle::UP
std::unique_ptr<index::PostingListHandle>
DiskIndex::readPostingList(const LookupResult &lookupRes) const
{
auto& field_index = _field_indexes[lookupRes.indexId];
Expand All @@ -248,6 +248,15 @@ DiskIndex::readBitVector(const LookupResult &lookupRes) const
return field_index.read_bit_vector(lookupRes);
}

std::unique_ptr<search::queryeval::SearchIterator>
DiskIndex::create_iterator(const LookupResult& lookup_result,
const index::PostingListHandle& handle,
const search::fef::TermFieldMatchDataArray& tfmda) const
{
auto& field_index = _field_indexes[lookup_result.indexId];
return field_index.create_iterator(lookup_result, handle, tfmda);
}

namespace {

const std::vector<std::string> nonfield_file_names{
Expand Down
7 changes: 6 additions & 1 deletion searchlib/src/vespa/searchlib/diskindex/diskindex.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,12 @@ class DiskIndex : public queryeval::Searchable {
* @param lookupRes the result of the previous dictionary lookup.
* @return a handle for the posting list in memory.
*/
index::PostingListHandle::UP readPostingList(const LookupResult &lookupRes) const;
std::unique_ptr<index::PostingListHandle> readPostingList(const LookupResult &lookupRes) const;

std::unique_ptr<search::queryeval::SearchIterator>
create_iterator(const LookupResult& lookup_result,
const index::PostingListHandle& handle,
const search::fef::TermFieldMatchDataArray& tfmda) const;

/**
* Read the bit vector corresponding to the given lookup result.
Expand Down
4 changes: 2 additions & 2 deletions searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ DiskTermBlueprint::createLeafSearch(const TermFieldMatchDataArray & tfmda) const
getName(_lookupRes->indexId).c_str(), _lookupRes->wordNum, _lookupRes->counts._numDocs);
return BitVectorIterator::create(_bitVector.get(), *tfmda[0], strict());
}
SearchIterator::UP search(_postingHandle->createIterator(_lookupRes->counts, tfmda));
auto search(_diskIndex.create_iterator(*_lookupRes, *_postingHandle, tfmda));
if (_useBitVector) {
LOG(debug, "Return BooleanMatchIteratorWrapper: %s, wordNum(%" PRIu64 "), docCount(%" PRIu64 ")",
getName(_lookupRes->indexId).c_str(), _lookupRes->wordNum, _lookupRes->counts._numDocs);
Expand All @@ -102,7 +102,7 @@ DiskTermBlueprint::createFilterSearch(FilterConstraint) const
if (_bitVector) {
wrapper->wrap(BitVectorIterator::create(_bitVector.get(), *tfmda[0], strict()));
} else {
wrapper->wrap(_postingHandle->createIterator(_lookupRes->counts, tfmda));
wrapper->wrap(_diskIndex.create_iterator(*_lookupRes, *_postingHandle, tfmda));
}
return wrapper;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class DiskTermBlueprint : public queryeval::SimpleLeafBlueprint
DiskIndex::LookupResult::UP _lookupRes;
bool _useBitVector;
bool _fetchPostingsDone;
index::PostingListHandle::UP _postingHandle;
std::unique_ptr<index::PostingListHandle> _postingHandle;
BitVector::UP _bitVector;

public:
Expand Down
16 changes: 13 additions & 3 deletions searchlib/src/vespa/searchlib/diskindex/field_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "field_index.h"
#include "fileheader.h"
#include "pagedict4randread.h"
#include <vespa/searchlib/queryeval/searchiterator.h>
#include <vespa/searchlib/util/disk_space_calculator.h>
#include <filesystem>

Expand Down Expand Up @@ -149,11 +150,11 @@ FieldIndex::read_posting_list(const DictionaryLookupResult& lookup_result) const
auto handle = std::make_unique<PostingListHandle>();
handle->_bitOffset = lookup_result.bitOffset;
handle->_bitLength = lookup_result.counts._bitLength;
handle->_file = _posting_file.get();
if (handle->_file == nullptr) {
auto file = _posting_file.get();
if (file == nullptr) {
return {};
}
handle->_file->readPostingList(*handle);
file->readPostingList(*handle);
if (handle->_read_bytes != 0) {
_disk_io_stats->add_read_operation(handle->_read_bytes);
}
Expand All @@ -169,6 +170,15 @@ FieldIndex::read_bit_vector(const DictionaryLookupResult& lookup_result) const
return _bit_vector_dict->lookup(lookup_result.wordNum);
}

std::unique_ptr<search::queryeval::SearchIterator>
FieldIndex::create_iterator(const search::index::DictionaryLookupResult& lookup_result,
const index::PostingListHandle& handle,
const search::fef::TermFieldMatchDataArray& tfmda) const
{
return _posting_file->createIterator(lookup_result.counts, handle, tfmda);
}


index::FieldLengthInfo
FieldIndex::get_field_length_info() const
{
Expand Down
3 changes: 3 additions & 0 deletions searchlib/src/vespa/searchlib/diskindex/field_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ class FieldIndex {
void reuse_files(const FieldIndex& rhs);
std::unique_ptr<index::PostingListHandle> read_posting_list(const search::index::DictionaryLookupResult& lookup_result) const;
std::unique_ptr<BitVector> read_bit_vector(const search::index::DictionaryLookupResult& lookup_result) const;
std::unique_ptr<search::queryeval::SearchIterator> create_iterator(const search::index::DictionaryLookupResult& lookup_result,
const index::PostingListHandle& handle,
const search::fef::TermFieldMatchDataArray& tfmda) const;
index::FieldLengthInfo get_field_length_info() const;

index::DictionaryFileRandRead* get_dictionary() noexcept { return _dict.get(); }
Expand Down
2 changes: 2 additions & 0 deletions searchlib/src/vespa/searchlib/index/postinglistfile.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
class FastOS_FileInterface;

namespace search::common { class FileHeaderContext; }
namespace search::fef { class TermFieldMatchDataArray; }
namespace search::queryeval { class SearchIterator; }

namespace search::index {

Expand Down
24 changes: 18 additions & 6 deletions searchlib/src/vespa/searchlib/index/postinglisthandle.cpp
Original file line number Diff line number Diff line change
@@ -1,16 +1,28 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#include "postinglisthandle.h"
#include "postinglistfile.h"
#include <vespa/searchlib/queryeval/searchiterator.h>

namespace search::index {

std::unique_ptr<search::queryeval::SearchIterator>
PostingListHandle::createIterator(const PostingListCounts &counts,
const search::fef::TermFieldMatchDataArray &matchData) const
PostingListHandle::~PostingListHandle()
{
return _file->createIterator(counts, *this, matchData);
if (_allocMem != nullptr) {
free(_allocMem);
}
}

void
PostingListHandle::drop()
{
_bitOffsetMem = 0;
_mem = nullptr;
if (_allocMem != nullptr) {
free(_allocMem);
_allocMem = nullptr;
}
_allocSize = 0;
_read_bytes = 0;
}


}
38 changes: 3 additions & 35 deletions searchlib/src/vespa/searchlib/index/postinglisthandle.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,18 @@
#pragma once

#include "postinglistcounts.h"
#include <memory>
#include <cstdlib>

namespace search { class BitVector; }
namespace search::queryeval { class SearchIterator; }
namespace search::fef { class TermFieldMatchDataArray; }

namespace search::index {

class PostingListFileRandRead;

/**
* Class for owning a posting list in memory after having read it from
* posting list file, or referencing a chunk of memory containing the
* posting list (if the file was memory mapped).
*/
class PostingListHandle {
public:
using UP = std::unique_ptr<PostingListHandle>;
// Key portion
PostingListFileRandRead *_file; // File containing posting list
uint64_t _bitOffset; // posting list start relative to start of file
uint64_t _bitLength; // Length of posting list, in bits

Expand All @@ -34,8 +25,7 @@ class PostingListHandle {
uint64_t _read_bytes; // Bytes read from disk (used by disk io stats)

PostingListHandle()
: _file(nullptr),
_bitOffset(0),
: _bitOffset(0),
_bitLength(0),
_bitOffsetMem(0),
_mem(nullptr),
Expand All @@ -44,34 +34,12 @@ class PostingListHandle {
_read_bytes(0)
{ }

~PostingListHandle()
{
if (_allocMem != nullptr) {
free(_allocMem);
}
}

/**
* Create iterator for single word. Semantic lifetime of counts and
* handle must exceed lifetime of iterator.
*/
std::unique_ptr<search::queryeval::SearchIterator>
createIterator(const PostingListCounts &counts,
const search::fef::TermFieldMatchDataArray &matchData) const;
~PostingListHandle();

/**
* Drop value portion of handle.
*/
void drop() {
_bitOffsetMem = 0;
_mem = nullptr;
if (_allocMem != nullptr) {
free(_allocMem);
_allocMem = nullptr;
}
_allocSize = 0;
_read_bytes = 0;
}
void drop();
};

}

0 comments on commit de18da1

Please sign in to comment.