Skip to content

Commit

Permalink
updated PTHash: fixed handling of empty free array for tiny files
Browse files Browse the repository at this point in the history
  • Loading branch information
jermp committed Dec 2, 2023
1 parent 52a4ce0 commit 3d112e1
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 1 deletion.
57 changes: 57 additions & 0 deletions include/builder/parse_file.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,14 @@ void parse_file(std::istream& is, parse_data& data, build_configuration const& b
}
};

// uint64_t less = 0;
// uint64_t total = 0;
// std::ofstream arrows("arrows.txt");

// std::vector<uint64_t> minimizers;

std::vector<uint64_t> num_minimizers_per_unitigs(100 + 1, 0);

while (!is.eof()) {
std::getline(is, sequence); // header sequence
if (build_config.weighted) parse_header();
Expand All @@ -158,6 +166,8 @@ void parse_file(std::istream& is, parse_data& data, build_configuration const& b
throw std::runtime_error("file is malformed");
}

uint64_t num_minimizers_per_unitig = 1;

while (end != sequence.size() - k + 1) {
char const* kmer = sequence.data() + end;
assert(util::is_valid(kmer, k));
Expand All @@ -174,17 +184,64 @@ void parse_file(std::istream& is, parse_data& data, build_configuration const& b
if (minimizer != prev_minimizer) {
append_super_kmer();
begin = end;
// minimizers.push_back(prev_minimizer);
// if (minimizer < prev_minimizer) {
// // arrows << "<";
// less += 1;
// } else { // minimizer > prev_minimizer
// // arrows << ">";
// }
prev_minimizer = minimizer;
glue = true;

num_minimizers_per_unitig += 1;
// total += 1;
}
// else {
// std::cerr << "=";
// }

++data.num_kmers;
++end;
}

append_super_kmer();

if (num_minimizers_per_unitig <= 16) {
num_minimizers_per_unitigs[num_minimizers_per_unitig] += 1;
// total += 1;
}
}

std::cout << "k=" << k << " m=" << m << std::endl;
for (uint64_t i = 1; i <= 16; ++i) {
std::cout << "num. unitigs with " << i << " minimizers: " << num_minimizers_per_unitigs[i]
<< "/" << num_sequences << "("
<< (num_minimizers_per_unitigs[i] * 100.0) / num_sequences << "%)" << std::endl;
}

// std::cout << "total " << total << std::endl;
// std::cout << "less " << less << std::endl;
// std::cout << "greater " << total - less << std::endl;

// std::sort(minimizers.begin(), minimizers.end());
// for (auto x : minimizers) { arrows << x << '\n'; }
// prev_minimizer = uint64_t(-1);
// uint64_t count = 0;
// for (uint64_t i = 0; i != minimizers.size(); ++i) {
// if (minimizers[i] != prev_minimizer) {
// if (prev_minimizer != uint64_t(-1)) {
// arrows << prev_minimizer << ' ' << count << '\n';
// }
// count = 1;
// prev_minimizer = minimizers[i];
// } else {
// count += 1;
// }
// }

// arrows.close();

data.minimizers.finalize();
builder.finalize();
builder.build(data.strings);
Expand Down
6 changes: 6 additions & 0 deletions test/test_alphabet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

using namespace sshash;

std::ostream& operator<<(std::ostream& os, __uint128_t x) {
os << *(reinterpret_cast<uint64_t*>(&x) + 0);
os << *(reinterpret_cast<uint64_t*>(&x) + 1);
return os;
}

template <typename T>
void expect(T got, T expected) {
if (got != expected) {
Expand Down

0 comments on commit 3d112e1

Please sign in to comment.