Skip to content

Commit

Permalink
Makes HDF5 use optional
Browse files Browse the repository at this point in the history
  • Loading branch information
pmelsted committed Dec 10, 2019
1 parent e7954e0 commit 079a670
Show file tree
Hide file tree
Showing 10 changed files with 87 additions and 31 deletions.
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ project(kallisto)

include(GNUInstallDirs)


option(USE_HDF5 "Compile with HDF5 support" OFF) #OFF by default

if(USE_HDF5)
add_compile_definitions("USE_HDF5=ON")
endif(USE_HDF5)

set(EXT_PROJECTS_DIR ${PROJECT_SOURCE_DIR}/ext)
set(CMAKE_CXX_FLAGS_PROFILE "-g")

Expand Down
7 changes: 4 additions & 3 deletions src/Bootstrap.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "Bootstrap.h"
#include "PlaintextWriter.h"

EMAlgorithm Bootstrap::run_em() {
auto counts = mult_.sample();
Expand All @@ -19,7 +20,7 @@ BootstrapThreadPool::BootstrapThreadPool(
const MinCollector& tc,
const std::vector<double>& eff_lens,
const ProgramOptions& p_opts,
H5Writer& h5writer,
BootstrapWriter *bswriter,
const std::vector<double>& mean_fls
) :
n_threads_(n_threads),
Expand All @@ -30,7 +31,7 @@ BootstrapThreadPool::BootstrapThreadPool(
tc_(tc),
eff_lens_(eff_lens),
opt_(p_opts),
writer_(h5writer),
writer_(bswriter),
mean_fls_(mean_fls)
{
for (size_t i = 0; i < n_threads_; ++i) {
Expand Down Expand Up @@ -78,7 +79,7 @@ void BootstrapWorker::operator() (){
std::unique_lock<std::mutex> lock(pool_.write_lock_);
++pool_.n_complete_;
std::cerr << "[bstrp] number of EM bootstraps complete: " << pool_.n_complete_ << "\r";
pool_.writer_.write_bootstrap(res, cur_id);
pool_.writer_->write_bootstrap(res, cur_id);
// release write lock
} else {
// can write out plaintext in parallel
Expand Down
21 changes: 18 additions & 3 deletions src/Bootstrap.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include "weights.h"
#include "EMAlgorithm.h"
#include "Multinomial.hpp"
#include "H5Writer.h"


class Bootstrap {
// needs:
Expand Down Expand Up @@ -49,6 +49,21 @@ class Bootstrap {
const ProgramOptions& opt;
};

class BootstrapWriter {
public:
virtual ~BootstrapWriter() {};

virtual void init(const std::string& fname, int num_bootstrap, int num_processed,
const std::vector<int>& fld, const std::vector<int>& preBias, const std::vector<double>& postBias, uint compression, size_t index_version,
const std::string& shell_call, const std::string& start_time) = 0;

virtual void write_main(const EMAlgorithm& em,
const std::vector<std::string>& targ_ids,
const std::vector<int>& lengths) = 0;

virtual void write_bootstrap(const EMAlgorithm& em, int bs_id) = 0;
};

class BootstrapThreadPool {
friend class BootstrapWorker;

Expand All @@ -61,7 +76,7 @@ class BootstrapThreadPool {
const MinCollector& tc,
const std::vector<double>& eff_lens,
const ProgramOptions& p_opts,
H5Writer& h5writer,
BootstrapWriter *bswriter,
const std::vector<double>& mean_fls
);

Expand All @@ -84,7 +99,7 @@ class BootstrapThreadPool {
const MinCollector& tc_;
const std::vector<double>& eff_lens_;
const ProgramOptions& opt_;
H5Writer& writer_;
BootstrapWriter *writer_;
const std::vector<double>& mean_fls_;
};

Expand Down
24 changes: 15 additions & 9 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,29 @@ if(LINK MATCHES static)
SET_TARGET_PROPERTIES(kallisto kallisto_core PROPERTIES LINK_SEARCH_END_STATIC 1)
endif(LINK MATCHES static)

find_package( HDF5 REQUIRED )

if(USE_HDF5)
find_package( HDF5 REQUIRED )
endif(USE_HDF5)

find_package( ZLIB REQUIRED )

if ( ZLIB_FOUND )
include_directories( ${ZLIB_INCLUDE_DIRS} )
target_link_libraries(kallisto kallisto_core ${ZLIB_LIBRARIES})
else()
message(FATAL_ERROR "zlib not found. Required for to output files" )
endif( ZLIB_FOUND )

if(HDF5_FOUND)
include_directories( ${HDF5_INCLUDE_DIRS} )
target_link_libraries( kallisto_core ${HDF5_LIBRARIES} )
target_link_libraries( kallisto ${HDF5_LIBRARIES} )
else()
message(FATAL_ERROR "HDF5 not found. Required to output files")
endif()

if(USE_HDF5)
if(HDF5_FOUND)
include_directories( ${HDF5_INCLUDE_DIRS} )
target_link_libraries( kallisto_core ${HDF5_LIBRARIES} )
target_link_libraries( kallisto ${HDF5_LIBRARIES} )
else()
message(FATAL_ERROR "HDF5 not found. Required to output files")
endif()
endif(USE_HDF5)

if(LINK MATCHES static)
if (UNIX AND NOT APPLE)
Expand Down
3 changes: 3 additions & 0 deletions src/H5Writer.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#ifdef USE_HDF5
#include "H5Writer.h"

void H5Writer::init(const std::string& fname, int num_bootstrap, int num_processed,
Expand Down Expand Up @@ -201,3 +202,5 @@ void H5Converter::rw_from_counts(hid_t group_id, const std::string& count_name,

plaintext_writer(out_fname, targ_ids_, alpha, eff_lengths_, lengths_);
}

#endif // USE_HDF5
11 changes: 7 additions & 4 deletions src/H5Writer.h
Original file line number Diff line number Diff line change
@@ -1,25 +1,27 @@
#ifdef USE_HDF5
#ifndef KALLISTO_H5WRITER_H
#define KALLISTO_H5WRITER_H

#include "EMAlgorithm.h"

#include "h5utils.h"
#include "PlaintextWriter.h"
#include "Bootstrap.h"

class H5Writer {
class H5Writer : public BootstrapWriter {
public:
H5Writer() : primed_(false) {}
~H5Writer();

void init(const std::string& fname, int num_bootstrap, int num_processed,
virtual void init(const std::string& fname, int num_bootstrap, int num_processed,
const std::vector<int>& fld, const std::vector<int>& preBias, const std::vector<double>& postBias, uint compression, size_t index_version,
const std::string& shell_call, const std::string& start_time);

void write_main(const EMAlgorithm& em,
virtual void write_main(const EMAlgorithm& em,
const std::vector<std::string>& targ_ids,
const std::vector<int>& lengths);

void write_bootstrap(const EMAlgorithm& em, int bs_id);
virtual void write_bootstrap(const EMAlgorithm& em, int bs_id);

private:
bool primed_;
Expand Down Expand Up @@ -76,3 +78,4 @@ class H5Converter {
};

#endif // KALLISTO_H5WRITER_H
#endif // USE_HDF5
2 changes: 2 additions & 0 deletions src/h5utils.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#ifdef USE_HDF5
#include "h5utils.h"

// allocate a contiguous block of memory, dependent on the largest string
Expand Down Expand Up @@ -138,3 +139,4 @@ void read_vector(

delete [] pool;
}
#endif // USE_HDF5
2 changes: 2 additions & 0 deletions src/h5utils.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#ifdef USE_HDF5
#ifndef KALLISTO_H5_UTILS
#define KALLISTO_H5_UTILS

Expand Down Expand Up @@ -144,3 +145,4 @@ void read_dataset(hid_t group_id,
// end: reading utils

#endif // KALLISTO_H5_UTILS
#endif // USE_HDF5
38 changes: 28 additions & 10 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "Inspect.h"
#include "Bootstrap.h"
#include "H5Writer.h"
#include "PlaintextWriter.h"
#include "GeneModel.h"
#include "Merge.h"

Expand Down Expand Up @@ -1269,6 +1270,14 @@ bool CheckOptionsEM(ProgramOptions& opt, bool emonly = false) {
ret = false;
}

#ifndef USE_HDF5
if (opt.bootstrap > 0 && !opt.plaintext) {
cerr << "Warning: kallisto was not compiled with HDF5 support so no bootstrapping" << endl
<< "will be performed. Run quant with --plaintext option or recompile with" << endl
<< "HDF5 support to obtain bootstrap estimates." << endl;
opt.bootstrap = 0;
}
#endif
return ret;
}

Expand Down Expand Up @@ -2129,12 +2138,15 @@ int main(int argc, char *argv[]) {

std::string call = argv_to_string(argc, argv);


#ifdef USE_HDF5
H5Writer writer;
if (!opt.plaintext) {
writer.init(opt.output + "/abundance.h5", opt.bootstrap, num_processed, fld, preBias, em.post_bias_, 6,
index.INDEX_VERSION, call, start_time);
writer.write_main(em, index.target_names_, index.target_lens_);
}
#endif // USE_HDF5

for (int i = 0; i < index.num_trans; i++) {
num_unique += collection.counts[i];
Expand Down Expand Up @@ -2166,7 +2178,9 @@ int main(int argc, char *argv[]) {
// this happens if nothing aligns, then we write an empty bootstrap file
for (int b = 0; b < opt.bootstrap; b++) {
if (!opt.plaintext) {
#ifdef USE_HDF5
writer.write_bootstrap(em, b); // em is empty
#endif
} else {
plaintext_writer(opt.output + "/bs_abundance_" + std::to_string(b) + ".tsv",
em.target_names_, em.alpha_, em.eff_lens_, index.target_lens_); // re-use empty input
Expand All @@ -2192,17 +2206,20 @@ int main(int argc, char *argv[]) {
<< opt.bootstrap << endl;
n_threads = opt.bootstrap;
}

#ifdef USE_HDF5
BootstrapThreadPool pool(opt.threads, seeds, collection.counts, index,
collection, em.eff_lens_, opt, writer, fl_means);
collection, em.eff_lens_, opt, &writer, fl_means);
#endif
} else {
for (auto b = 0; b < B; ++b) {
Bootstrap bs(collection.counts, index, collection, em.eff_lens_, seeds[b], fl_means, opt);
cerr << "[bstrp] running EM for the bootstrap: " << b + 1 << "\r";
auto res = bs.run_em();

if (!opt.plaintext) {
#ifdef USE_HDF5
writer.write_bootstrap(res, b);
#endif
} else {
plaintext_writer(opt.output + "/bs_abundance_" + std::to_string(b) + ".tsv",
em.target_names_, res.alpha_, em.eff_lens_, index.target_lens_);
Expand Down Expand Up @@ -2269,13 +2286,13 @@ int main(int argc, char *argv[]) {
em.run(10000, 50, true, opt.bias);

std::string call = argv_to_string(argc, argv);
H5Writer writer;
//H5Writer writer;

if (!opt.plaintext) {
// setting num_processed to 0 because quant-only is for debugging/special ops
writer.init(opt.output + "/abundance.h5", opt.bootstrap, 0, fld, preBias, em.post_bias_, 6,
/* writer.init(opt.output + "/abundance.h5", opt.bootstrap, 0, fld, preBias, em.post_bias_, 6,
index.INDEX_VERSION, call, start_time);
writer.write_main(em, index.target_names_, index.target_lens_);
writer.write_main(em, index.target_names_, index.target_lens_);*/
} else {
plaintext_aux(
opt.output + "/run_info.json",
Expand Down Expand Up @@ -2307,7 +2324,7 @@ int main(int argc, char *argv[]) {
// this happens if nothing aligns, then we write an empty bootstrap file
for (int b = 0; b < opt.bootstrap; b++) {
if (!opt.plaintext) {
writer.write_bootstrap(em, b); // em is empty
//writer.write_bootstrap(em, b); // em is empty
} else {
plaintext_writer(opt.output + "/bs_abundance_" + std::to_string(b) + ".tsv",
em.target_names_, em.alpha_, em.eff_lens_, index.target_lens_); // re-use empty input
Expand All @@ -2334,16 +2351,16 @@ int main(int argc, char *argv[]) {
n_threads = opt.bootstrap;
}

BootstrapThreadPool pool(n_threads, seeds, collection.counts, index,
collection, em.eff_lens_, opt, writer, fl_means);
/*BootstrapThreadPool pool(n_threads, seeds, collection.counts, index,
collection, em.eff_lens_, opt, writer, fl_means);*/
} else {
for (auto b = 0; b < B; ++b) {
Bootstrap bs(collection.counts, index, collection, em.eff_lens_, seeds[b], fl_means, opt);
cerr << "[bstrp] running EM for the bootstrap: " << b + 1 << "\r";
auto res = bs.run_em();

if (!opt.plaintext) {
writer.write_bootstrap(res, b);
//writer.write_bootstrap(res, b);
} else {
plaintext_writer(opt.output + "/bs_abundance_" + std::to_string(b) + ".tsv",
em.target_names_, res.alpha_, em.eff_lens_, index.target_lens_);
Expand Down Expand Up @@ -2587,12 +2604,13 @@ int main(int argc, char *argv[]) {
usageh5dump();
exit(1);
}

#ifdef USE_HDF5
H5Converter h5conv(opt.files[0], opt.output);
if (!opt.peek) {
h5conv.write_aux();
h5conv.convert();
}
#endif
} else {
cerr << "Error: invalid command " << cmd << endl;
usage();
Expand Down
3 changes: 1 addition & 2 deletions test/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ INDEX = "{0}.kidx".format(PRE)

rule all:
input:
"quant_out/abundance.h5",
"quant_out/abundance.tsv",
"bus_out/output.bus"

rule index:
Expand All @@ -25,7 +25,6 @@ rule kallisto_quant:
"reads_2.fastq.gz",
INDEX
output:
"quant_out/abundance.h5",
"quant_out/abundance.tsv",
"quant_out/run_info.json"
shell:
Expand Down

0 comments on commit 079a670

Please sign in to comment.