Skip to content

Commit

Permalink
A better way to optimize LF(i)
Browse files Browse the repository at this point in the history
  • Loading branch information
jltsiren committed Apr 20, 2018
1 parent 9c3e389 commit ee2b723
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 30 deletions.
79 changes: 79 additions & 0 deletions include/gbwt/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,8 @@ struct Sequence
- CompressedRecordRankIterator keeps track of the rank for one successor node.
- CompressedRecordFullIterator is the slowest, as it keeps track of the ranks
for all successor nodes.
- CompressedRecordArrayIterator is a faster version of the full iterator for
records with outdegree <= MAX_OUTDEGREE.
FIXME a single iterator with a RankCalculator as a template parameter.
*/
Expand Down Expand Up @@ -438,6 +440,83 @@ struct CompressedRecordFullIterator
}
};

struct CompressedRecordArrayIterator
{
const static size_type MAX_OUTDEGREE = 4;

explicit CompressedRecordArrayIterator(const CompressedRecord& source) :
record(source), decoder(source.outdegree()),
record_offset(0), curr_offset(0), next_offset(0)
{
for(size_type i = 0; i < source.outdegree(); i++) { this->ranks[i] = source.outgoing[i]; }
this->read();
}

bool end() const { return (this->curr_offset >= this->record.data_size); }
void operator++() { this->curr_offset = this->next_offset; this->read(); }

run_type operator*() const { return this->run; }
const run_type* operator->() { return &(this->run); }

// After the current run.
size_type offset() const { return this->record_offset; }
size_type rank() const { return this->rank(this->run.first); }
size_type rank(rank_type outrank) const { return this->ranks[outrank].second; }
edge_type edge() const { return this->edge(this->run.first); }
edge_type edge(rank_type outrank) const { return this->ranks[outrank]; }

// Intended for positions i covered by or after the current run. May advance the iterator.
size_type rankAt(size_type i)
{
while(this->offset() <= i) // We need <= to get BWT[i].
{
if(this->end()) { return invalid_offset(); }
this->curr_offset = this->next_offset;
this->run = this->decoder.read(this->record.body, this->next_offset);
this->record_offset += this->run.second;
this->ranks[this->run.first].second += this->run.second;
}

return this->rank() - (this->offset() - i);
}

// Intended for positions i covered by or after the current run. May advance the iterator.
edge_type edgeAt(size_type i)
{
while(this->offset() <= i) // We need <= to get BWT[i].
{
if(this->end()) { return invalid_edge(); }
this->curr_offset = this->next_offset;
this->run = this->decoder.read(this->record.body, this->next_offset);
this->record_offset += this->run.second;
this->ranks[this->run.first].second += this->run.second;
}

edge_type temp = this->edge();
temp.second -= (this->offset() - i);
return temp;
}

const CompressedRecord& record;
Run decoder;
edge_type ranks[MAX_OUTDEGREE];

size_type record_offset;
size_type curr_offset, next_offset;
run_type run;

private:
void read()
{
if(!(this->end()))
{
this->run = this->decoder.read(this->record.body, this->next_offset);
this->record_offset += this->run.second;
this->ranks[this->run.first].second += this->run.second;
}
}
};

//------------------------------------------------------------------------------

/*
Expand Down
67 changes: 37 additions & 30 deletions support.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,34 +103,40 @@ DynamicRecord::LF(size_type i) const
return this->runLF(i, run_end);
}

template<class Array>
edge_type LFLoop(Array& result, const std::vector<edge_type>& body, size_type i, size_type& run_end)
{
rank_type last_edge = 0;
size_type offset = 0;
for(run_type run : body)
{
last_edge = run.first;
result[run.first].second += run.second;
offset += run.second;
if(offset > i) { break; }
}

result[last_edge].second -= (offset - i);
run_end = offset - 1;
return result[last_edge];
}

edge_type
DynamicRecord::runLF(size_type i, size_type& run_end) const
{
if(i >= this->size()) { return invalid_edge(); }

// Find the outrank at offset i.
size_type outrank = this->outdegree();
size_type offset = 0;
for(run_type run : this->body)
if(this->outdegree() <= CompressedRecordArrayIterator::MAX_OUTDEGREE)
{
offset += run.second;
if(offset > i) { outrank = run.first; break; }
edge_type result[CompressedRecordArrayIterator::MAX_OUTDEGREE];
for(size_type i = 0; i < this->outdegree(); i++) { result[i] = this->outgoing[i]; }
return LFLoop(result, this->body, i, run_end);
}
if(outrank >= this->outdegree()) { return invalid_edge(); }

// Compute LF(i, successor(outrank)).
offset = 0;
edge_type result(this->successor(outrank), this->offset(outrank));
for(run_type run : this->body)
else
{
if(run.first == outrank) { result.second += run.second; }
offset += run.second;
if(offset > i) { break;}
std::vector<edge_type> result(this->outgoing);
return LFLoop(result, this->body, i, run_end);
}

result.second -= (offset - i);
run_end = offset - 1;
return result;
}

size_type
Expand Down Expand Up @@ -352,19 +358,20 @@ CompressedRecord::runLF(size_type i, size_type& run_end) const
{
if(this->outdegree() == 0) { return invalid_edge(); }

// Find the outrank at offset i.
size_type outrank = this->outdegree();
for(CompressedRecordIterator iter(*this); !(iter.end()); ++iter)
if(this->outdegree() <= CompressedRecordArrayIterator::MAX_OUTDEGREE)
{
if(iter.offset() > i) { outrank = iter->first; break; }
CompressedRecordArrayIterator iter(*this);
edge_type result = iter.edgeAt(i);
if(result != invalid_edge()) { run_end = iter.offset() - 1; }
return result;
}
else
{
CompressedRecordFullIterator iter(*this);
edge_type result = iter.edgeAt(i);
if(result != invalid_edge()) { run_end = iter.offset() - 1; }
return result;
}
if(outrank >= this->outdegree()) { return invalid_edge(); }

// Compute LF(i, successor(outrank)).
CompressedRecordRankIterator result_iter(*this, outrank);
edge_type result(this->successor(outrank), result_iter.rankAt(i));
run_end = result_iter.offset() - 1;
return result;
}

size_type
Expand Down

0 comments on commit ee2b723

Please sign in to comment.