From 70a6dd53f3641b75e4ba44ebcb696f87fe1ab465 Mon Sep 17 00:00:00 2001
From: Pall Melsted <pmelsted@gmail.com>
Date: Fri, 6 Feb 2015 10:28:13 -0800
Subject: [PATCH] Reformatting using astyle

---
 .gitignore           |   1 +
 astyle.txt           |   8 +
 src/EMAlgorithm.h    | 264 +++++++--------
 src/Kmer.cpp         | 206 ++++++------
 src/Kmer.hpp         |  22 +-
 src/KmerHashTable.h  | 430 ++++++++++++------------
 src/KmerIndex.h      | 772 +++++++++++++++++++++----------------------
 src/KmerIterator.cpp |  58 ++--
 src/KmerIterator.hpp |  14 +-
 src/MinCollector.h   | 166 +++++-----
 src/ProcessReads.h   | 144 ++++----
 src/common.h         |  24 +-
 src/hash.cpp         | 176 +++++-----
 src/hash.hpp         |   4 +-
 src/kseq.h           |   4 +-
 src/main.cpp         | 645 +++++++++++++++++-------------------
 src/weights.cpp      |   0
 src/weights.h        |  57 ++--
 18 files changed, 1485 insertions(+), 1510 deletions(-)
 create mode 100644 astyle.txt
 delete mode 100644 src/weights.cpp
diff --git a/.gitignore b/.gitignore
index e39dee7f..f855116a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,4 @@ src/kallisto
 src/*.a
 CTestTestfile.cmake
 *~
+*.orig
diff --git a/astyle.txt b/astyle.txt
new file mode 100644
index 00000000..2320d6eb
--- /dev/null
+++ b/astyle.txt
@@ -0,0 +1,8 @@
+close-templates  
+add-brackets 
+align-reference=type 
+align-pointer=name 
+style=google
+indent=spaces=2
+keep-one-line-blocks
+keep-one-line-statements
diff --git a/src/EMAlgorithm.h b/src/EMAlgorithm.h
index f44abd99..164ab36c 100644
--- a/src/EMAlgorithm.h
+++ b/src/EMAlgorithm.h
@@ -14,153 +14,153 @@ const double TOLERANCE = 1e-5;
 template <typename Index>
 struct EMAlgorithm {
 
-    // counts is vector from collector, with indices corresponding to ec ids
-    // TODO: initialize alpha a bit more intelligently
-    // TODO: refactor to remove dependence on Index
-	EMAlgorithm(const ProgramOptions& opt, const Index& idx,
-	        const std::vector<int>& counts,
-	        const std::vector<double>& eff_lens,
-	        const WeightMap& wm) :
-	    idx_(idx),
-	    num_trans_(idx.num_trans),
-	    counts_(counts),
-	    eff_lens_(eff_lens),
-	    weight_map_(wm),
-	    alpha_(idx.num_trans, 1.0/idx.num_trans), // uniform distribution over transcripts
-	    rho_(idx.num_trans, 0.0),
-	    rho_set_(false)
-	{}
-
-	~EMAlgorithm() {}
-
-	void run(size_t n_iter = 500) {
-        std::vector<double> next_alpha(alpha_.size(), 0.0);
-
-        assert(weight_map_.size() == counts_.size());
-
-        double denom;
-
-        std::cout << "[em]\tfishing for the right mixture (. = 50 rounds)" <<
-            std::endl;
-
-	    for (auto i = 0; i < n_iter; ++i) {
-            if (i % 50 == 0) {
-                std::cout << ".";
-                if (i % 500 == 0 && i > 0) {
-                    std::cout << std::endl;
-                }
-            }
-
-            for (auto& ec_kv : idx_.ecmap ) {
-                denom = 0.0;
-
-                // first, compute the denominator: a normalizer
-                // iterate over transcripts in EC map
-                auto w_search = weight_map_.find(ec_kv.first);
-
-                // everything in ecmap should be in weight_map
-                assert( w_search != weight_map_.end() );
-                assert( w_search->second.size() == ec_kv.second.size() );
-
-                for (auto t_it = 0; t_it < ec_kv.second.size(); ++t_it) {
-                    denom += alpha_[ec_kv.second[t_it]] * w_search->second[t_it];
-                }
-
-                if (denom < TOLERANCE) {
-                    continue;
-                }
-
-                /* std::cout << "denom: " << denom << std::endl; */
-
-                // compute the update step
-                for (auto t_it = 0; t_it < ec_kv.second.size(); ++t_it) {
-                    next_alpha[ec_kv.second[t_it]] += counts_[ec_kv.first] *
-                        (w_search->second[t_it] * alpha_[ec_kv.second[t_it]] / denom);
-                }
-            }
-
-            // TODO: check for relative difference for convergence in EM
-
-            // reassign alpha_ to next_alpha
-            std::copy(next_alpha.begin(), next_alpha.end(), alpha_.begin());
-
-            // clear all next_alpha values 0 for next iteration
-            std::fill(next_alpha.begin(), next_alpha.end(), 0.0);
-	    }
-
-        std::cout << std::endl;
-        std::cout.flush();
-	}
-
-    void compute_rho() {
-        if (rho_set_) {
-            // rho has already been set, let's clear it
-            std::fill(rho_.begin(), rho_.end(), 0.0);
+  // counts is vector from collector, with indices corresponding to ec ids
+  // TODO: initialize alpha a bit more intelligently
+  // TODO: refactor to remove dependence on Index
+  EMAlgorithm(const ProgramOptions& opt, const Index& idx,
+              const std::vector<int>& counts,
+              const std::vector<double>& eff_lens,
+              const WeightMap& wm) :
+    idx_(idx),
+    num_trans_(idx.num_trans),
+    counts_(counts),
+    eff_lens_(eff_lens),
+    weight_map_(wm),
+    alpha_(idx.num_trans, 1.0/idx.num_trans), // uniform distribution over transcripts
+    rho_(idx.num_trans, 0.0),
+    rho_set_(false)
+  {}
+
+  ~EMAlgorithm() {}
+
+  void run(size_t n_iter = 500) {
+    std::vector<double> next_alpha(alpha_.size(), 0.0);
+
+    assert(weight_map_.size() == counts_.size());
+
+    double denom;
+
+    std::cout << "[em]\tfishing for the right mixture (. = 50 rounds)" <<
+              std::endl;
+
+    for (auto i = 0; i < n_iter; ++i) {
+      if (i % 50 == 0) {
+        std::cout << ".";
+        if (i % 500 == 0 && i > 0) {
+          std::cout << std::endl;
         }
+      }
 
-        double total {0.0};
-        for (auto i = 0; i < alpha_.size(); ++i) {
-            // TODO: consider what the right tolerance is
-            if (eff_lens_[i] < TOLERANCE) {
-                continue;
-            }
-            rho_[i] = alpha_[i] / eff_lens_[i];
-            total += rho_[i];
+      for (auto& ec_kv : idx_.ecmap ) {
+        denom = 0.0;
+
+        // first, compute the denominator: a normalizer
+        // iterate over transcripts in EC map
+        auto w_search = weight_map_.find(ec_kv.first);
+
+        // everything in ecmap should be in weight_map
+        assert( w_search != weight_map_.end() );
+        assert( w_search->second.size() == ec_kv.second.size() );
+
+        for (auto t_it = 0; t_it < ec_kv.second.size(); ++t_it) {
+          denom += alpha_[ec_kv.second[t_it]] * w_search->second[t_it];
+        }
+
+        if (denom < TOLERANCE) {
+          continue;
         }
 
-        for (auto& r : rho_) {
-            r /= total;
+        /* std::cout << "denom: " << denom << std::endl; */
+
+        // compute the update step
+        for (auto t_it = 0; t_it < ec_kv.second.size(); ++t_it) {
+          next_alpha[ec_kv.second[t_it]] += counts_[ec_kv.first] *
+                                            (w_search->second[t_it] * alpha_[ec_kv.second[t_it]] / denom);
         }
+      }
+
+      // TODO: check for relative difference for convergence in EM
+
+      // reassign alpha_ to next_alpha
+      std::copy(next_alpha.begin(), next_alpha.end(), alpha_.begin());
 
-        rho_set_ = true;
+      // clear all next_alpha values 0 for next iteration
+      std::fill(next_alpha.begin(), next_alpha.end(), 0.0);
     }
 
-    void write(const std::string& dir_out) const {
-        const std::string out_fname = "/expression.txt";
+    std::cout << std::endl;
+    std::cout.flush();
+  }
 
-        std::ofstream out;
-        out.open(dir_out + out_fname, std::ios::out);
+  void compute_rho() {
+    if (rho_set_) {
+      // rho has already been set, let's clear it
+      std::fill(rho_.begin(), rho_.end(), 0.0);
+    }
 
-        if (!out.is_open()) {
-            std::cerr << "Error opening '" << dir_out + out_fname << "'" <<
-                std::endl;
-            exit(1);
-        }
+    double total {0.0};
+    for (auto i = 0; i < alpha_.size(); ++i) {
+      // TODO: consider what the right tolerance is
+      if (eff_lens_[i] < TOLERANCE) {
+        continue;
+      }
+      rho_[i] = alpha_[i] / eff_lens_[i];
+      total += rho_[i];
+    }
+
+    for (auto& r : rho_) {
+      r /= total;
+    }
+
+    rho_set_ = true;
+  }
 
-        out.precision(15);
-
-        out <<
-            "target_id" << "\t" <<
-            "kallisto_id" << "\t" <<
-            "rho" << "\t" <<
-            "tpm" << "\t" <<
-            "est_counts" <<
-            std::endl;
-
-        const double MILLION = 1e6;
-
-        for (auto i = 0; i < rho_.size(); ++i) {
-            out <<
-                idx_.target_names_[i] << "\t" <<
-                i << "\t" <<
-                rho_[i] << "\t" <<
-                rho_[i] * MILLION << "\t" <<
-                alpha_[i] <<
+  void write(const std::string& dir_out) const {
+    const std::string out_fname = "/expression.txt";
+
+    std::ofstream out;
+    out.open(dir_out + out_fname, std::ios::out);
+
+    if (!out.is_open()) {
+      std::cerr << "Error opening '" << dir_out + out_fname << "'" <<
                 std::endl;
-        }
+      exit(1);
+    }
 
-        out.flush();
-        out.close();
+    out.precision(15);
+
+    out <<
+        "target_id" << "\t" <<
+        "kallisto_id" << "\t" <<
+        "rho" << "\t" <<
+        "tpm" << "\t" <<
+        "est_counts" <<
+        std::endl;
+
+    const double MILLION = 1e6;
+
+    for (auto i = 0; i < rho_.size(); ++i) {
+      out <<
+          idx_.target_names_[i] << "\t" <<
+          i << "\t" <<
+          rho_[i] << "\t" <<
+          rho_[i] * MILLION << "\t" <<
+          alpha_[i] <<
+          std::endl;
     }
 
-	int num_trans_;
-	const Index &idx_;
-	const std::vector<int>& counts_;
-	const std::vector<double>& eff_lens_;
-	const WeightMap& weight_map_;
-    std::vector<double> alpha_;
-    std::vector<double> rho_;
-    bool rho_set_;
+    out.flush();
+    out.close();
+  }
+
+  int num_trans_;
+  const Index& idx_;
+  const std::vector<int>& counts_;
+  const std::vector<double>& eff_lens_;
+  const WeightMap& weight_map_;
+  std::vector<double> alpha_;
+  std::vector<double> rho_;
+  bool rho_set_;
 };
 
 #endif // KALLISTO_EMALGORITHM_H
diff --git a/src/Kmer.cpp b/src/Kmer.cpp
index 3ce1f47e..2810a431 100644
--- a/src/Kmer.cpp
+++ b/src/Kmer.cpp
@@ -19,38 +19,38 @@ void int2bin(uint32_t a, char *buffer, int buf_size) {
 */
 
 static const uint64_t twin_table[256] = {
-0xFF, 0xBF, 0x7F, 0x3F, 0xEF, 0xAF, 0x6F, 0x2F,
-0xDF, 0x9F, 0x5F, 0x1F, 0xCF, 0x8F, 0x4F, 0x0F,
-0xFB, 0xBB, 0x7B, 0x3B, 0xEB, 0xAB, 0x6B, 0x2B,
-0xDB, 0x9B, 0x5B, 0x1B, 0xCB, 0x8B, 0x4B, 0x0B,
-0xF7, 0xB7, 0x77, 0x37, 0xE7, 0xA7, 0x67, 0x27,
-0xD7, 0x97, 0x57, 0x17, 0xC7, 0x87, 0x47, 0x07,
-0xF3, 0xB3, 0x73, 0x33, 0xE3, 0xA3, 0x63, 0x23,
-0xD3, 0x93, 0x53, 0x13, 0xC3, 0x83, 0x43, 0x03,
-0xFE, 0xBE, 0x7E, 0x3E, 0xEE, 0xAE, 0x6E, 0x2E,
-0xDE, 0x9E, 0x5E, 0x1E, 0xCE, 0x8E, 0x4E, 0x0E,
-0xFA, 0xBA, 0x7A, 0x3A, 0xEA, 0xAA, 0x6A, 0x2A,
-0xDA, 0x9A, 0x5A, 0x1A, 0xCA, 0x8A, 0x4A, 0x0A,
-0xF6, 0xB6, 0x76, 0x36, 0xE6, 0xA6, 0x66, 0x26,
-0xD6, 0x96, 0x56, 0x16, 0xC6, 0x86, 0x46, 0x06,
-0xF2, 0xB2, 0x72, 0x32, 0xE2, 0xA2, 0x62, 0x22,
-0xD2, 0x92, 0x52, 0x12, 0xC2, 0x82, 0x42, 0x02,
-0xFD, 0xBD, 0x7D, 0x3D, 0xED, 0xAD, 0x6D, 0x2D,
-0xDD, 0x9D, 0x5D, 0x1D, 0xCD, 0x8D, 0x4D, 0x0D,
-0xF9, 0xB9, 0x79, 0x39, 0xE9, 0xA9, 0x69, 0x29,
-0xD9, 0x99, 0x59, 0x19, 0xC9, 0x89, 0x49, 0x09,
-0xF5, 0xB5, 0x75, 0x35, 0xE5, 0xA5, 0x65, 0x25,
-0xD5, 0x95, 0x55, 0x15, 0xC5, 0x85, 0x45, 0x05,
-0xF1, 0xB1, 0x71, 0x31, 0xE1, 0xA1, 0x61, 0x21,
-0xD1, 0x91, 0x51, 0x11, 0xC1, 0x81, 0x41, 0x01,
-0xFC, 0xBC, 0x7C, 0x3C, 0xEC, 0xAC, 0x6C, 0x2C,
-0xDC, 0x9C, 0x5C, 0x1C, 0xCC, 0x8C, 0x4C, 0x0C,
-0xF8, 0xB8, 0x78, 0x38, 0xE8, 0xA8, 0x68, 0x28,
-0xD8, 0x98, 0x58, 0x18, 0xC8, 0x88, 0x48, 0x08,
-0xF4, 0xB4, 0x74, 0x34, 0xE4, 0xA4, 0x64, 0x24,
-0xD4, 0x94, 0x54, 0x14, 0xC4, 0x84, 0x44, 0x04,
-0xF0, 0xB0, 0x70, 0x30, 0xE0, 0xA0, 0x60, 0x20,
-0xD0, 0x90, 0x50, 0x10, 0xC0, 0x80, 0x40, 0x00
+  0xFF, 0xBF, 0x7F, 0x3F, 0xEF, 0xAF, 0x6F, 0x2F,
+  0xDF, 0x9F, 0x5F, 0x1F, 0xCF, 0x8F, 0x4F, 0x0F,
+  0xFB, 0xBB, 0x7B, 0x3B, 0xEB, 0xAB, 0x6B, 0x2B,
+  0xDB, 0x9B, 0x5B, 0x1B, 0xCB, 0x8B, 0x4B, 0x0B,
+  0xF7, 0xB7, 0x77, 0x37, 0xE7, 0xA7, 0x67, 0x27,
+  0xD7, 0x97, 0x57, 0x17, 0xC7, 0x87, 0x47, 0x07,
+  0xF3, 0xB3, 0x73, 0x33, 0xE3, 0xA3, 0x63, 0x23,
+  0xD3, 0x93, 0x53, 0x13, 0xC3, 0x83, 0x43, 0x03,
+  0xFE, 0xBE, 0x7E, 0x3E, 0xEE, 0xAE, 0x6E, 0x2E,
+  0xDE, 0x9E, 0x5E, 0x1E, 0xCE, 0x8E, 0x4E, 0x0E,
+  0xFA, 0xBA, 0x7A, 0x3A, 0xEA, 0xAA, 0x6A, 0x2A,
+  0xDA, 0x9A, 0x5A, 0x1A, 0xCA, 0x8A, 0x4A, 0x0A,
+  0xF6, 0xB6, 0x76, 0x36, 0xE6, 0xA6, 0x66, 0x26,
+  0xD6, 0x96, 0x56, 0x16, 0xC6, 0x86, 0x46, 0x06,
+  0xF2, 0xB2, 0x72, 0x32, 0xE2, 0xA2, 0x62, 0x22,
+  0xD2, 0x92, 0x52, 0x12, 0xC2, 0x82, 0x42, 0x02,
+  0xFD, 0xBD, 0x7D, 0x3D, 0xED, 0xAD, 0x6D, 0x2D,
+  0xDD, 0x9D, 0x5D, 0x1D, 0xCD, 0x8D, 0x4D, 0x0D,
+  0xF9, 0xB9, 0x79, 0x39, 0xE9, 0xA9, 0x69, 0x29,
+  0xD9, 0x99, 0x59, 0x19, 0xC9, 0x89, 0x49, 0x09,
+  0xF5, 0xB5, 0x75, 0x35, 0xE5, 0xA5, 0x65, 0x25,
+  0xD5, 0x95, 0x55, 0x15, 0xC5, 0x85, 0x45, 0x05,
+  0xF1, 0xB1, 0x71, 0x31, 0xE1, 0xA1, 0x61, 0x21,
+  0xD1, 0x91, 0x51, 0x11, 0xC1, 0x81, 0x41, 0x01,
+  0xFC, 0xBC, 0x7C, 0x3C, 0xEC, 0xAC, 0x6C, 0x2C,
+  0xDC, 0x9C, 0x5C, 0x1C, 0xCC, 0x8C, 0x4C, 0x0C,
+  0xF8, 0xB8, 0x78, 0x38, 0xE8, 0xA8, 0x68, 0x28,
+  0xD8, 0x98, 0x58, 0x18, 0xC8, 0x88, 0x48, 0x08,
+  0xF4, 0xB4, 0x74, 0x34, 0xE4, 0xA4, 0x64, 0x24,
+  0xD4, 0x94, 0x54, 0x14, 0xC4, 0x84, 0x44, 0x04,
+  0xF0, 0xB0, 0x70, 0x30, 0xE0, 0xA0, 0x60, 0x20,
+  0xD0, 0x90, 0x50, 0x10, 0xC0, 0x80, 0x40, 0x00
 };
 
 
@@ -91,8 +91,8 @@ static const uint64_t twin_table[256] = {
 
 */
 // use:  km = Kmer();
-// pre:  
-// post: the DNA string in km is AA....AAA (k times A) 
+// pre:
+// post: the DNA string in km is AA....AAA (k times A)
 Kmer::Kmer() {
   //memset(bytes,0,MAX_K/4);
   for (size_t i = 0; i < MAX_K/32; i++) {
@@ -103,7 +103,7 @@ Kmer::Kmer() {
 
 // use:  _km = Kmer(km);
 // pre:  s[0],...,s[k] are all equal to 'A','C','G' or 'T'
-// post: the DNA string in _km and is the same as in km 
+// post: the DNA string in _km and is the same as in km
 Kmer::Kmer(const Kmer& o) {
   //memcpy(bytes,o.bytes,MAX_K/4);
   for (size_t i = 0; i < MAX_K/32; i++) {
@@ -121,8 +121,8 @@ Kmer::Kmer(const char *s) {
 
 
 // use:  _km = km;
-// pre:  
-// post: the DNA string in _km and is the same as in km 
+// pre:
+// post: the DNA string in _km and is the same as in km
 Kmer& Kmer::operator=(const Kmer& o) {
   if (this != &o) {
     for (size_t i = 0; i < MAX_K/32; i++) {
@@ -135,7 +135,7 @@ Kmer& Kmer::operator=(const Kmer& o) {
 
 
 // use:  km = Kmer();
-// pre:  
+// pre:
 // post: The last bit in the bit array which stores the DNA string has been set to 1
 //       which indicates that the km is invalid
 void Kmer::set_deleted() {
@@ -144,17 +144,19 @@ void Kmer::set_deleted() {
 
 
 // use:  b = (km1 < km2);
-// pre:  
+// pre:
 // post: b is true <==> the DNA strings in km1 is alphabetically smaller than
-//                      the DNA string in km2 
+//                      the DNA string in km2
 bool Kmer::operator<(const Kmer& o) const {
 
   bool r = false;
   for (size_t i = 0; i < MAX_K/32; ++i) {
-    if (longs[i] < o.longs[i])
+    if (longs[i] < o.longs[i]) {
       return true;
-    if (longs[i] > o.longs[i])
+    }
+    if (longs[i] > o.longs[i]) {
       return false;
+    }
   }
   return false;
 
@@ -176,7 +178,7 @@ bool Kmer::operator<(const Kmer& o) const {
 
 
 // use:  b = (km1 == km2);
-// pre:  
+// pre:
 // post: b is true <==> the DNA strings in km1 and km2 are equal
 bool Kmer::operator==(const Kmer& o) const {
   for (size_t i = 0; i < MAX_K/32; i++) {
@@ -191,16 +193,16 @@ bool Kmer::operator==(const Kmer& o) const {
 
 // use:  km.set_kmer(s);
 // pre:  s[0],...,s[k-1] are all 'A','C','G' or 'T'
-// post: The DNA string in km is now equal to s 
+// post: The DNA string in km is now equal to s
 void Kmer::set_kmer(const char *s)  {
   size_t i,j,l;
   memset(bytes,0,MAX_K/4);
-    
+
   for (i = 0; i < k; ++i) {
     j = i % 32;
     l = i/32;
     assert(*s != '\0');
-    
+
     size_t x = ((*s) & 4) >> 1;
     longs[l] |= ((x + ((x ^ (*s & 2)) >>1)) << (2*(31-j)));
     /*
@@ -210,24 +212,24 @@ void Kmer::set_kmer(const char *s)  {
       case 'G': longs[l] |= (0x02 << (2*j)); break;
       case 'T': longs[l] |= (0x03 << (2*j)); break;
       }*/
-    
-    s++; 
+
+    s++;
   }
 }
 
 
 // use:  i = km.hash();
-// pre:   
-// post: i is the hash value of km 
+// pre:
+// post: i is the hash value of km
 uint64_t Kmer::hash() const {
   uint64_t ret;
-  MurmurHash3_x64_64((const void*)bytes,k_bytes,0,&ret);
+  MurmurHash3_x64_64((const void *)bytes,k_bytes,0,&ret);
   return ret;
 }
 
 
 // use:  rep = km.rep();
-// pre:   
+// pre:
 // post: rep is km.twin() if the DNA string in km.twin() is alphabetically smaller than
 //       the DNA string in km, else rep is km
 Kmer Kmer::rep() const {
@@ -237,7 +239,7 @@ Kmer Kmer::rep() const {
 
 
 // use:  tw = km.twin();
-// pre:   
+// pre:
 // post: tw is the twin kmer with respect to km,
 //       i.e. if the DNA string in km is 'GTCA'
 //          then the DNA string in tw is 'TGAC'
@@ -245,22 +247,22 @@ Kmer Kmer::twin() const {
   Kmer km(*this);
 
   size_t nlongs = (k+31)/32;
-  
+
   /*cout << "debugging twin for" << endl;
   cout << toString() << endl;
   cout << getBinary() << endl;
   cout << "nlongs " << nlongs << endl;
   cout << "flipping bits" << endl;*/
- 
+
   for (size_t i = 0; i < nlongs; i++) {
     uint64_t v = longs[i];
-    km.longs[nlongs-1-i] =  
-      (twin_table[v & 0xFF] << 56) | 
-      (twin_table[(v>>8) & 0xFF] << 48) | 
-      (twin_table[(v>>16) & 0xFF] << 40) | 
+    km.longs[nlongs-1-i] =
+      (twin_table[v & 0xFF] << 56) |
+      (twin_table[(v>>8) & 0xFF] << 48) |
+      (twin_table[(v>>16) & 0xFF] << 40) |
       (twin_table[(v>>24) & 0xFF] << 32) |
-      (twin_table[(v>>32) & 0xFF] << 24) | 
-      (twin_table[(v>>40) & 0xFF] << 16) | 
+      (twin_table[(v>>32) & 0xFF] << 24) |
+      (twin_table[(v>>40) & 0xFF] << 16) |
       (twin_table[(v>>48) & 0xFF] << 8)  |
       (twin_table[(v>>56)]);
   }
@@ -274,17 +276,17 @@ Kmer Kmer::twin() const {
   //cout << "shift: " << shift << endl;
   //cout << "shiftmask" << endl << bitset<64>(shiftmask) << endl;
 
-  
+
   km.longs[0] = km.longs[0] << shift;
   //cout << km.getBinary() << endl;
   for (size_t i = 1; i < nlongs; i++) {
     //cout << "forloop " << i << endl;
     km.longs[i-1] |= (km.longs[i] & shiftmask) >> (64-shift);
     //cout << km.getBinary() << endl;
-    km.longs[i] = km.longs[i] << shift;    
+    km.longs[i] = km.longs[i] << shift;
     //cout << km.getBinary() << endl;
   }
-  
+
   /*
   for (size_t i = (k+31)/32; i < nlongs; i++) {
     km.longs[i] = 0;
@@ -301,7 +303,7 @@ Kmer Kmer::twin() const {
     uint64_t v = ~longs[i]; // flip bits
     // swap 2 bits
     v = ((v >> 2)  & 0x3333333333333333ULL) | ((v & 0x3333333333333333ULL) << 2);
-    // swap nibbles ... 
+    // swap nibbles ...
     v = ((v >> 4)  & 0x0F0F0F0F0F0F0F0FULL) | ((v & 0x0F0F0F0F0F0F0F0FULL) << 4);
     // swap bytes
     v = ((v >> 8)  & 0x00FF00FF00FF00FFULL) | ((v & 0x00FF00FF00FF00FFULL) << 8);
@@ -317,7 +319,7 @@ Kmer Kmer::twin() const {
   for (size_t i = 0; i < k_bytes; i++) {
     km.bytes[i] = ~bytes[i];
   }
-  
+
   km.bytes[k_bytes-1] ^= ~k_modmask;
   km.shiftForward(8*k_bytes-2*k);
   uint8_t tmp;
@@ -331,7 +333,7 @@ Kmer Kmer::twin() const {
   if ((k_bytes %2) == 1) {
     km.bytes[k_bytes/2] = base_swap[km.bytes[k_bytes/2]];
   }
-  
+
   return km;
   */
 }
@@ -346,18 +348,18 @@ Kmer Kmer::getLink(const size_t index) const {
   char c;
 
   switch (index % 4) {
-    case 0: c = 'A'; break;
-    case 1: c = 'C'; break;
-    case 2: c = 'G'; break;
-    case 3: c = 'T'; break;
+  case 0: c = 'A'; break;
+  case 1: c = 'C'; break;
+  case 2: c = 'G'; break;
+  case 3: c = 'T'; break;
   }
-  
+
   return (index < 4) ? forwardBase(c) : backwardBase(c);
 }
 
 
 // use:  fw = km.forwardBase(c)
-// pre:  
+// pre:
 // post: fw is the forward kmer from km with last character c,
 //       i.e. if the DNA string in km is 'ACGT' and c equals 'T' then
 //       the DNA string in fw is 'CGTT'
@@ -374,30 +376,30 @@ Kmer Kmer::forwardBase(const char b) const {
   km.longs[nlongs-1] |= (x + ((x ^ (b & 2)) >>1 )) << (2*(31-((k-1)%32)));
 
   return km;
-/********
-  km.shiftBackward(2);
-  km.bytes[k_bytes-1] &= Kmer::k_modmask;
-
-  switch(b) {
-    case 'A': km.bytes[k_bytes-1] |= 0x00 << s; break;
-    case 'C': km.bytes[k_bytes-1] |= 0x01 << s; break;
-    case 'G': km.bytes[k_bytes-1] |= 0x02 << s; break;
-    case 'T': km.bytes[k_bytes-1] |= 0x03 << s; break;
-  }
+  /********
+    km.shiftBackward(2);
+    km.bytes[k_bytes-1] &= Kmer::k_modmask;
+
+    switch(b) {
+      case 'A': km.bytes[k_bytes-1] |= 0x00 << s; break;
+      case 'C': km.bytes[k_bytes-1] |= 0x01 << s; break;
+      case 'G': km.bytes[k_bytes-1] |= 0x02 << s; break;
+      case 'T': km.bytes[k_bytes-1] |= 0x03 << s; break;
+    }
 
-  return km;
-*/
+    return km;
+  */
 }
 
 
 // use:  bw = km.backwardBase(c)
-// pre:  
+// pre:
 // post: bw is the backward kmer from km with first character c,
 //       i.e. if the DNA string in km is 'ACGT' and c equals 'T' then
 //       the DNA string in bw is 'TACG'
 Kmer Kmer::backwardBase(const char b) const {
   Kmer km(*this);
-  
+
   size_t nlongs = (k+31)/32;
   km.longs[nlongs-1] = km.longs[nlongs-1] >>2;
   km.longs[nlongs-1] &= (k%32) ? (((1ULL << (2*(k%32)))-1) << 2*(32-(k%32))) : ~0ULL;
@@ -432,16 +434,16 @@ Kmer Kmer::backwardBase(const char b) const {
 
 
 // use:  km.printBinary();
-// pre:   
-// post: The bits in the binary representation of the 
+// pre:
+// post: The bits in the binary representation of the
 //       DNA string for km has been printed to stdout
 std::string Kmer::getBinary() const {
-  
+
   size_t nlongs = MAX_K/32;
   std::string r;
   r.reserve(64*nlongs);
   for (size_t i = 0; i < nlongs; i++) {
-    r.append(std::bitset<64>(longs[i]).to_string<char,std::char_traits<char>,std::allocator<char> >());
+    r.append(std::bitset<64>(longs[i]).to_string<char,std::char_traits<char>,std::allocator<char>>());
   }
   return r;
   /*
@@ -449,7 +451,7 @@ std::string Kmer::getBinary() const {
     int2bin(bytes[i],buff,8);
     printf("%s",buff);
   }
-  
+
   printf("\n");
   */
 }
@@ -458,18 +460,18 @@ std::string Kmer::getBinary() const {
 // use:  km.toString(s);
 // pre:  s has space for k+1 elements
 // post: s[0,...,k-1] is the DNA string for the Kmer km and s[k] = '\0'
-void Kmer::toString(char * s) const {
+void Kmer::toString(char *s) const {
   size_t i,j,l;
-  
+
   for (i = 0; i < k; i++) {
     j = i % 32;
     l = i / 32;
 
     switch(((longs[l]) >> (2*(31-j)) )& 0x03 ) {
-      case 0x00: *s = 'A'; ++s; break;
-      case 0x01: *s = 'C'; ++s; break;
-      case 0x02: *s = 'G'; ++s; break;
-      case 0x03: *s = 'T'; ++s; break;  
+    case 0x00: *s = 'A'; ++s; break;
+    case 0x01: *s = 'C'; ++s; break;
+    case 0x02: *s = 'G'; ++s; break;
+    case 0x03: *s = 'T'; ++s; break;
     }
   }
 
@@ -490,8 +492,8 @@ std::string Kmer::toString() const {
 //       if i=2 then ACGT becomes XACG and X is A,C,G or T
 /*
 void Kmer::shiftForward(int shift) {
-  
-  size_t shiftmask = 
+
+  size_t shiftmask =
 
 
   if (shift>0) {
@@ -510,7 +512,7 @@ void Kmer::shiftForward(int shift) {
 */
 
 // use:  km.shiftBackward(i);
-// pre:  i = 2,4,6 
+// pre:  i = 2,4,6
 // post: The DNA string in km has been shifted i/2 positions backward i.e.
 //       if i=2 then ACGT becomes CGTX and X is A,C,G or T
 /*
@@ -521,7 +523,7 @@ void Kmer::shiftBackward(int shift) {
         bytes[i] >>= shift;
         bytes[i] |= (uint8_t) ( bytes[i+1] << (8-shift));
       }
-      
+
       bytes[Kmer::k_bytes-1] >>= shift;
     } else {
       assert(0); // bad
diff --git a/src/Kmer.hpp b/src/Kmer.hpp
index 77820074..729793dd 100644
--- a/src/Kmer.hpp
+++ b/src/Kmer.hpp
@@ -2,7 +2,7 @@
 #define BFG_KMER_HPP
 
 #ifndef MAX_KMER_SIZE
- #define MAX_KMER_SIZE 32
+#define MAX_KMER_SIZE 32
 #endif
 
 #include <stdio.h>
@@ -16,8 +16,8 @@
 
 
 
-/* Short description: 
- *  - Store kmer strings by using 2 bits per base instead of 8 
+/* Short description:
+ *  - Store kmer strings by using 2 bits per base instead of 8
  *  - Easily return reverse complements of kmers, e.g. TTGG -> CCAA
  *  - Easily compare kmers
  *  - Provide hash of kmers
@@ -30,10 +30,10 @@ class Kmer {
   Kmer(const Kmer& o);
   explicit Kmer(const char *s);
 
-  
+
 
   Kmer& operator=(const Kmer& o);
-  
+
   void set_deleted();
 
   bool operator<(const Kmer& o) const;
@@ -48,7 +48,7 @@ class Kmer {
 
   uint64_t hash() const;
 
-  
+
 
   Kmer twin() const;
   Kmer rep() const;
@@ -58,10 +58,10 @@ class Kmer {
   Kmer forwardBase(const char b) const;
 
   Kmer backwardBase(const char b) const;
-  
+
   std::string getBinary() const;
-  
-  void toString(char * s) const;
+
+  void toString(char *s) const;
   std::string toString() const;
 
   // static functions
@@ -87,14 +87,14 @@ class Kmer {
 
   // private functions
 //void shiftForward(int shift);
-  
+
 //void shiftBackward(int shift);
 
 };
 
 
 struct KmerHash {
-  size_t operator()(const Kmer &km) const {
+  size_t operator()(const Kmer& km) const {
     return km.hash();
   }
 };
diff --git a/src/KmerHashTable.h b/src/KmerHashTable.h
index 4a1642c9..81e0ad6a 100644
--- a/src/KmerHashTable.h
+++ b/src/KmerHashTable.h
@@ -11,229 +11,229 @@
 
 template<typename T, typename Hash>
 struct KmerHashTable {
-	using value_type = std::pair<Kmer, T>;
-	using key_type = Kmer;
-	using mapped_type = T;
+  using value_type = std::pair<Kmer, T>;
+  using key_type = Kmer;
+  using mapped_type = T;
 
-	Hash hasher;
-	value_type* table;
-	size_t size_, pop;
-	value_type empty;
+  Hash hasher;
+  value_type *table;
+  size_t size_, pop;
+  value_type empty;
 
 
 // ---- iterator ----
 
-	template<bool is_const_iterator = true> 
-	class iterator_ : public std::iterator<std::bidirectional_iterator_tag, value_type> {
-	public:
-	
-  	typedef typename std::conditional<is_const_iterator, const KmerHashTable*, KmerHashTable*>::type DataStructurePointerType;
-	  typedef typename std::conditional<is_const_iterator, const value_type&, value_type&>::type ValueReferenceType;
-	  typedef typename std::conditional<is_const_iterator, const value_type*, value_type*>::type ValuePointerType;
- 
- 
-  	DataStructurePointerType ht;
-		size_t h;
-
-  	iterator_(DataStructurePointerType ht_) : ht(ht_), h(ht_->size_) {}
-  	iterator_(DataStructurePointerType ht_, size_t h_) :  ht(ht_), h(h_) {}
-	
-  	iterator_(const iterator_<false>& o) : ht(o.ht), h(o.h) {}
-		iterator_& operator=(const iterator_& o) {ht=o.ht; h=o.h;}
-			
-		ValueReferenceType operator*() const {return ht->table[h];}
-	  ValuePointerType operator->() const {return &(ht->table[h]);}
-
-		void find_first() {
-			h = 0;
-			if (ht->table != nullptr && ht->size_>0) {
-				if (ht->table[h].first == ht->empty.first) {
-					operator++();
-				}
-			}
-		}
-		
-		iterator_& operator++() {
-			if (h == ht->size_) {
-				return *this;
-			}
-			++h;
-			for (; h < ht->size_; ++h) {
-				if (ht->table[h].first != ht->empty.first) {
-					break;
-				}
-			}
-			return *this;
-		}
-		bool operator==(const iterator_ &o) const {return (ht->table == o.ht->table) && (h == o.h);}
-		bool operator!=(const iterator_ &o) const {return !(this->operator==(o));}
-  	friend class iterator_<true>;
-	};
-	
-	typedef iterator_<true> const_iterator;
-	typedef iterator_<false> iterator;
-
-
-	// --- hash table
-	
-	
+  template<bool is_const_iterator = true>
+  class iterator_ : public std::iterator<std::bidirectional_iterator_tag, value_type> {
+   public:
+
+    typedef typename std::conditional<is_const_iterator, const KmerHashTable *, KmerHashTable *>::type DataStructurePointerType;
+    typedef typename std::conditional<is_const_iterator, const value_type&, value_type&>::type ValueReferenceType;
+    typedef typename std::conditional<is_const_iterator, const value_type *, value_type *>::type ValuePointerType;
+
+
+    DataStructurePointerType ht;
+    size_t h;
+
+    iterator_(DataStructurePointerType ht_) : ht(ht_), h(ht_->size_) {}
+    iterator_(DataStructurePointerType ht_, size_t h_) :  ht(ht_), h(h_) {}
+
+    iterator_(const iterator_<false>& o) : ht(o.ht), h(o.h) {}
+    iterator_& operator=(const iterator_& o) {ht=o.ht; h=o.h;}
+
+    ValueReferenceType operator*() const {return ht->table[h];}
+    ValuePointerType operator->() const {return &(ht->table[h]);}
+
+    void find_first() {
+      h = 0;
+      if (ht->table != nullptr && ht->size_>0) {
+        if (ht->table[h].first == ht->empty.first) {
+          operator++();
+        }
+      }
+    }
+
+    iterator_& operator++() {
+      if (h == ht->size_) {
+        return *this;
+      }
+      ++h;
+      for (; h < ht->size_; ++h) {
+        if (ht->table[h].first != ht->empty.first) {
+          break;
+        }
+      }
+      return *this;
+    }
+    bool operator==(const iterator_ &o) const {return (ht->table == o.ht->table) && (h == o.h);}
+    bool operator!=(const iterator_ &o) const {return !(this->operator==(o));}
+    friend class iterator_<true>;
+  };
+
+  typedef iterator_<true> const_iterator;
+  typedef iterator_<false> iterator;
+
+
+  // --- hash table
+
+
   KmerHashTable(const Hash& h = Hash() ) : hasher(h), table(nullptr), size_(0), pop(0) {
-		empty.first.set_deleted();
-		init_table(1024);
-	}
+    empty.first.set_deleted();
+    init_table(1024);
+  }
 
   KmerHashTable(size_t sz, const Hash& h = Hash() ) : hasher(h), table(nullptr), size_(0), pop(0) {
- 		empty.first.set_deleted();
-		init_table((size_t) (1.2*sz));
-	}
-
-	~KmerHashTable() {
-		clear_table();
-	}
-
-	void clear_table() {
-		if (table != nullptr) {
-			delete[] table;
-			table = nullptr;
-		}
-		size_ = 0;
-		pop  = 0;
-	}
-
-	size_t size() const {
-		return pop;
-	}
-
-	void clear() {
-		std::fill(table, table+size_, empty);
-		pop = 0;
-	}
-	
-	void init_table(size_t sz) {
-		clear_table();
-		size_ = rndup(sz);
-		//cerr << "init table of size " << size_ << endl;
-		table = new value_type[size_];
-		std::fill(table, table+size_, empty);
-	}
-
-	iterator find(const Kmer& key) {
-		size_t h = hasher(key) & (size_-1);
-
-		for (;; h =  (h+1!=size_ ? h+1 : 0)) {
-			if (table[h].first == empty.first) {
-				// empty slot, insert here
-				return iterator(this);
-			} else if (table[h].first == key) {
-				// same key, found
-				return iterator(this, h);
-			}
-		}
-	}
-
-	const_iterator find(const Kmer& key) const {
-		
-		size_t h = hasher(key) & (size_-1);
-
-		for (;; h =  (h+1!=size_ ? h+1 : 0)) {
-			if (table[h].first == empty.first) {
-				// empty slot, insert here
-				return const_iterator(this);
-			} else if (table[h].first == key) {
-				// same key, found
-				return const_iterator(this, h);
-			}
-		}
-	}
-	
-
-	std::pair<iterator,bool> insert(const value_type &val) {
-		//cerr << "inserting " << val.first.toString() << " = " << val.second << endl;
-		if ((pop + (pop>>4))> size_) { // if more than 80% full
-			//cerr << "-- triggered resize--" << endl;
-			reserve(2*size_);
-		}
-
-		size_t h = hasher(val.first) & (size_-1);
-		//cerr << " hash value = " << h << endl;
-		for (;; h = (h+1!=size_ ? h+1 : 0)) {
-			//cerr << "  lookup at " << h << endl;
-			if (table[h].first == empty.first) {
-				//cerr << "   found empty slot" << endl;
-				// empty slot, insert here
-				table[h] = val;
-				++pop; // new table
-				return {iterator(this, h), true};
-			} else if (table[h].first == val.first) {
-				// same key, update value
-				//cerr << "   found key already here " << table[h].first.toString() << " = " << table[h].second <<  endl;
-				return {iterator(this, h), false};
-			}
-		}
-		
-	}
-	
-	void reserve(size_t sz) {
-		
-		if (sz <= size_) {
-			return;
-		}
-
-		value_type* old_table = table;
-		size_t old_size_ = size_;
-
-		
-		size_ = rndup(sz);
-		pop = 0;
-		
-		table = new value_type[size_];
-		std::fill(table, table+size_, empty);
-		for (size_t i = 0; i < old_size_; i++) {
-			if (old_table[i].first != empty.first) {
-				insert(old_table[i]);
-			}
-		}
-		delete[] old_table;
-		old_table = nullptr;
-		
-	}
-	
-	size_t rndup(size_t v) {
-		v--;
-		v |= v >> 1;
-		v |= v >> 2;
-		v |= v >> 4;
-		v |= v >> 8;
-		v |= v >> 16;
-		v |= v >> 32;
-		v++;
-		return v;
-	}
-
-	iterator begin() {
-		iterator it(this);
-		it.find_first();
-		return it;
-	}
-
-	const_iterator begin() const {
-		const_iterator it(this);
-		it.find_first();
-		return it;
-	}
-
-	iterator end() {
-		return iterator(this);
-	}
-
-	const_iterator end() const {
-		return const_iterator(this);
-	}
-
-
-	
-
-	
+    empty.first.set_deleted();
+    init_table((size_t) (1.2*sz));
+  }
+
+  ~KmerHashTable() {
+    clear_table();
+  }
+
+  void clear_table() {
+    if (table != nullptr) {
+      delete[] table;
+      table = nullptr;
+    }
+    size_ = 0;
+    pop  = 0;
+  }
+
+  size_t size() const {
+    return pop;
+  }
+
+  void clear() {
+    std::fill(table, table+size_, empty);
+    pop = 0;
+  }
+
+  void init_table(size_t sz) {
+    clear_table();
+    size_ = rndup(sz);
+    //cerr << "init table of size " << size_ << endl;
+    table = new value_type[size_];
+    std::fill(table, table+size_, empty);
+  }
+
+  iterator find(const Kmer& key) {
+    size_t h = hasher(key) & (size_-1);
+
+    for (;; h =  (h+1!=size_ ? h+1 : 0)) {
+      if (table[h].first == empty.first) {
+        // empty slot, insert here
+        return iterator(this);
+      } else if (table[h].first == key) {
+        // same key, found
+        return iterator(this, h);
+      }
+    }
+  }
+
+  const_iterator find(const Kmer& key) const {
+
+    size_t h = hasher(key) & (size_-1);
+
+    for (;; h =  (h+1!=size_ ? h+1 : 0)) {
+      if (table[h].first == empty.first) {
+        // empty slot, insert here
+        return const_iterator(this);
+      } else if (table[h].first == key) {
+        // same key, found
+        return const_iterator(this, h);
+      }
+    }
+  }
+
+
+  std::pair<iterator,bool> insert(const value_type& val) {
+    //cerr << "inserting " << val.first.toString() << " = " << val.second << endl;
+    if ((pop + (pop>>4))> size_) { // if more than 80% full
+      //cerr << "-- triggered resize--" << endl;
+      reserve(2*size_);
+    }
+
+    size_t h = hasher(val.first) & (size_-1);
+    //cerr << " hash value = " << h << endl;
+    for (;; h = (h+1!=size_ ? h+1 : 0)) {
+      //cerr << "  lookup at " << h << endl;
+      if (table[h].first == empty.first) {
+        //cerr << "   found empty slot" << endl;
+        // empty slot, insert here
+        table[h] = val;
+        ++pop; // new table
+        return {iterator(this, h), true};
+      } else if (table[h].first == val.first) {
+        // same key, update value
+        //cerr << "   found key already here " << table[h].first.toString() << " = " << table[h].second <<  endl;
+        return {iterator(this, h), false};
+      }
+    }
+
+  }
+
+  void reserve(size_t sz) {
+
+    if (sz <= size_) {
+      return;
+    }
+
+    value_type *old_table = table;
+    size_t old_size_ = size_;
+
+
+    size_ = rndup(sz);
+    pop = 0;
+
+    table = new value_type[size_];
+    std::fill(table, table+size_, empty);
+    for (size_t i = 0; i < old_size_; i++) {
+      if (old_table[i].first != empty.first) {
+        insert(old_table[i]);
+      }
+    }
+    delete[] old_table;
+    old_table = nullptr;
+
+  }
+
+  size_t rndup(size_t v) {
+    v--;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    v |= v >> 32;
+    v++;
+    return v;
+  }
+
+  iterator begin() {
+    iterator it(this);
+    it.find_first();
+    return it;
+  }
+
+  const_iterator begin() const {
+    const_iterator it(this);
+    it.find_first();
+    return it;
+  }
+
+  iterator end() {
+    return iterator(this);
+  }
+
+  const_iterator end() const {
+    return const_iterator(this);
+  }
+
+
+
+
+
 };
 
 #endif // KALLISTO_KMERHASHTABLE_H
diff --git a/src/KmerIndex.h b/src/KmerIndex.h
index 250305aa..8752532e 100644
--- a/src/KmerIndex.h
+++ b/src/KmerIndex.h
@@ -28,407 +28,403 @@ KSEQ_INIT(gzFile, gzread)
 using EcMap = std::unordered_map<int, std::vector<int>>;
 
 struct SortedVectorHasher {
-        size_t operator()(const std::vector<int> &v) const {
-                uint64_t r = 0;
-                int i=0;
-                for (auto x : v) {
-                        uint64_t t;
-                        MurmurHash3_x64_64(&x,sizeof(x), 0,&t);
-                        t = (x>>i) | (x<<(64-i));
-                        r = r ^ t;
-                        i = (i+1)%64;
-                }
-                return r;
-        }
+  size_t operator()(const std::vector<int>& v) const {
+    uint64_t r = 0;
+    int i=0;
+    for (auto x : v) {
+      uint64_t t;
+      MurmurHash3_x64_64(&x,sizeof(x), 0,&t);
+      t = (x>>i) | (x<<(64-i));
+      r = r ^ t;
+      i = (i+1)%64;
+    }
+    return r;
+  }
 };
 
-struct KmerIndex
-{
-	KmerIndex(const ProgramOptions& opt) : k(opt.k), num_trans(0), skip(opt.skip) {
-		//LoadTranscripts(opt.transfasta);
-	}
-
-	~KmerIndex() {}
-
-
-	// use:  match(s,l,v)
-	// pre:  v is initialized
-	// post: v contains all equiv classes for the k-mers in s
-	void match(const char *s, int l, std::vector<int> & v) const {
-		KmerIterator kit(s), kit_end;
-		for (int i = 0;kit != kit_end; ++kit,++i) {
-			if (i==skip) {
-				i=0;
-			}
-			if (i==0) {
-				Kmer rep = kit->first.rep();
-				auto search = kmap.find(rep);
-				if (search != kmap.end()) {
-					// if k-mer found
-					v.push_back(search->second); // add equivalence class
-				}
-			}
-		}
-	}
-
-	// use:  res = intersect(ec,v)
-	// pre:  ec is in ecmap, v is a vector of valid transcripts
-	//       v is sorted in increasing order
-	// post: res contains the intersection  of ecmap[ec] and v sorted increasing
-	//       res is empty if ec is not in ecmap
-	std::vector<int> intersect(int ec, const std::vector<int>& v) const {
-		std::vector<int> res;
-		auto search = ecmap.find(ec);
-		if (search != ecmap.end()) {
-			auto &u = search->second;
-			res.reserve(v.size());
-
-			auto a = u.begin();
-			auto b = v.begin();
-
-			while (a != u.end() && b != v.end()) {
-				if (*a < *b) {
-					++a;
-				} else if (*b < *a) {
-					++b;
-				} else {
-					// match
-					res.push_back(*a);
-					++a;
-					++b;
-				}
-			}
-		}
-		return res;
-	}
-
-
-	void BuildTranscripts(const std::string& fasta) {
-		// TODO: add code to check if binary file exists and load it directly
-		// FIXME: check if FASTA file actually exists
-		// If it doesn't, will just hang
-		int l;
-		std::cerr << "Loading fasta file " << fasta
-							<< std::endl;
-        std::cerr << "k: " << k << std::endl;
-		gzFile fp = gzopen(fasta.c_str(),"r");
-		kseq_t *seq = kseq_init(fp);
-
-		int transid = 0;
-		std::unordered_map<Kmer, int, KmerHash> kmcount; // temporary
-
-		// maps kmers to set of transcript ids that contain them
-		std::unordered_map<Kmer, std::vector<int>, KmerHash> all_kmap;
-
-		// for each transcript in fasta file
-		while ((l = kseq_read(seq)) > 0) {
-			bool added = false;
-
-			target_names_.push_back(seq->name.s);
-
-			// if it is long enough
-			if (seq->seq.l >= k) {
-				KmerIterator kit(seq->seq.s), kit_end;
-				// for each k-mer add to map
-				for(;kit != kit_end; ++kit) {
-					Kmer rep = kit->first.rep();
-					kmcount[rep]++;
-					auto search = all_kmap.find(rep);
-					if (search == all_kmap.end()) {
-						// new k-mer
-						all_kmap.insert({rep, {transid}});
-					} else {
-						// seen before
-						std::vector<int> &v = search->second;
-						if (*v.rbegin() < transid) {
-							// but new transcript
-							v.push_back(transid);
-						}
-					}
-					added = true;
-				}
-			}
-			if (added) {
-				trans_lens_.push_back(seq->seq.l);
-				transid++;
-				if (transid % 1000 == 1) {
-					std::cerr << " " << transid << " size of k-mer map " << all_kmap.size() << std::endl;
-				}
-			}
-		}
-
-		num_trans = transid;
-		std::cerr << "Found " << num_trans << " transcripts"
-							<< std::endl
-							<< "Size of k-mer map " << all_kmap.size() << std::endl;
-
-
-		// for each transcript
-		for (int i = 0; i < num_trans; i++ ) {
-			// create its own eqs
-			std::vector<int> single(1,i);
-			ecmap.insert({i,single});
-			ecmapinv.insert({single,i});
-		}
-
-
-		int eqs_id = num_trans;
-
-
-		for (auto& kv : all_kmap) {
-			auto search = ecmapinv.find(kv.second);
-			// if we have seen this equivalence class
-			if (search != ecmapinv.end()) {
-				// update kmap
-				kmap.insert({kv.first, search->second});
-			} else {
-				// else create a new equivalence class and update kmap
-				ecmapinv.insert({kv.second,eqs_id});
-				ecmap.insert({eqs_id, kv.second});
-				kmap.insert({kv.first, eqs_id});
-				eqs_id++;
-			}
-		}
-
-		std::cerr << "Created " << ecmap.size() << " equivalence classes from " << num_trans << " transcripts" << std::endl;
-
-		/* std::cout << "EqId\tTransIdList\n"; */
-		/* for (auto &ekv : ecmap) { */
-		/* 	std::cout << ekv.first; */
-		/* 	for (auto el : ekv.second) { */
-		/* 		std::cout << "\t" << el; */
-		/* 	} */
-		/* 	std::cout << "\n"; */
-		/* } */
-		/* std::cout.flush(); */
-
-
-		std::cerr << "K-mer map has " << kmap.size() << " k-mers and " << std::endl;
-		kseq_destroy(seq);
-		gzclose(fp);
-	}
-
-	void write(const std::string& index_out, bool writeKmerTable = true)
-    {
-        std::ofstream out;
-        out.open(index_out, std::ios::out | std::ios::binary);
-
-        if (!out.is_open()) {
-            // TODO: better handling
-            std::cerr << "Error: index output file could not be opened!";
-            exit(1);
+struct KmerIndex {
+  KmerIndex(const ProgramOptions& opt) : k(opt.k), num_trans(0), skip(opt.skip) {
+    //LoadTranscripts(opt.transfasta);
+  }
+
+  ~KmerIndex() {}
+
+
+  // use:  match(s,l,v)
+  // pre:  v is initialized
+  // post: v contains all equiv classes for the k-mers in s
+  void match(const char *s, int l, std::vector<int>& v) const {
+    KmerIterator kit(s), kit_end;
+    for (int i = 0; kit != kit_end; ++kit,++i) {
+      if (i==skip) {
+        i=0;
+      }
+      if (i==0) {
+        Kmer rep = kit->first.rep();
+        auto search = kmap.find(rep);
+        if (search != kmap.end()) {
+          // if k-mer found
+          v.push_back(search->second); // add equivalence class
         }
-
-				// 1. write index
-				out.write((char*)&INDEX_VERSION, sizeof(INDEX_VERSION));
-
-				// 2. write k
-				out.write((char*)&k, sizeof(k));
-
-				// 3. write number of transcripts
-        out.write((char*)&num_trans, sizeof(num_trans));
-
-				// 4. write out transcript lengths
-				for (int tlen : trans_lens_) {
-					out.write((char*)&tlen, sizeof(tlen));
-				}
-
-        size_t kmap_size = kmap.size();
-
-				if (writeKmerTable) {
-					// 5. write number of k-mers in map
-					out.write((char*)&kmap_size, sizeof(kmap_size));
-
-					// 6. write kmer->ec values
-					for (auto& kv : kmap) {
-            out.write((char*)&kv.first, sizeof(kv.first));
-            out.write((char*)&kv.second, sizeof(kv.second));
-					}
-				} else {
-					// 5. write fake k-mer size
-					kmap_size = 0;
-					out.write((char*)&kmap_size, sizeof(kmap_size));
-
-					// 6. write none of the kmer->ec values
-				}
-				// 7. write number of equivalence classes
-        size_t tmp_size;
-        tmp_size = ecmap.size();
-        out.write((char*)&tmp_size, sizeof(tmp_size));
-
-				// 8. write out each equiv class
-        for (auto& kv : ecmap) {
-            out.write((char*)&kv.first, sizeof(kv.first));
-
-						// 8.1 write out the size of equiv class
-            tmp_size = kv.second.size();
-            out.write((char*)&tmp_size, sizeof(tmp_size));
-						// 8.2 write each member
-            for (auto& val: kv.second) {
-                out.write((char*)&val, sizeof(val));
+      }
+    }
+  }
+
+  // use:  res = intersect(ec,v)
+  // pre:  ec is in ecmap, v is a vector of valid transcripts
+  //       v is sorted in increasing order
+  // post: res contains the intersection  of ecmap[ec] and v sorted increasing
+  //       res is empty if ec is not in ecmap
+  std::vector<int> intersect(int ec, const std::vector<int>& v) const {
+    std::vector<int> res;
+    auto search = ecmap.find(ec);
+    if (search != ecmap.end()) {
+      auto& u = search->second;
+      res.reserve(v.size());
+
+      auto a = u.begin();
+      auto b = v.begin();
+
+      while (a != u.end() && b != v.end()) {
+        if (*a < *b) {
+          ++a;
+        } else if (*b < *a) {
+          ++b;
+        } else {
+          // match
+          res.push_back(*a);
+          ++a;
+          ++b;
+        }
+      }
+    }
+    return res;
+  }
+
+
+  void BuildTranscripts(const std::string& fasta) {
+    // TODO: add code to check if binary file exists and load it directly
+    // FIXME: check if FASTA file actually exists
+    // If it doesn't, will just hang
+    int l;
+    std::cerr << "Loading fasta file " << fasta
+              << std::endl;
+    std::cerr << "k: " << k << std::endl;
+    gzFile fp = gzopen(fasta.c_str(),"r");
+    kseq_t *seq = kseq_init(fp);
+
+    int transid = 0;
+    std::unordered_map<Kmer, int, KmerHash> kmcount; // temporary
+
+    // maps kmers to set of transcript ids that contain them
+    std::unordered_map<Kmer, std::vector<int>, KmerHash> all_kmap;
+
+    // for each transcript in fasta file
+    while ((l = kseq_read(seq)) > 0) {
+      bool added = false;
+
+      target_names_.push_back(seq->name.s);
+
+      // if it is long enough
+      if (seq->seq.l >= k) {
+        KmerIterator kit(seq->seq.s), kit_end;
+        // for each k-mer add to map
+        for(; kit != kit_end; ++kit) {
+          Kmer rep = kit->first.rep();
+          kmcount[rep]++;
+          auto search = all_kmap.find(rep);
+          if (search == all_kmap.end()) {
+            // new k-mer
+            all_kmap.insert({rep, {transid}});
+          } else {
+            // seen before
+            std::vector<int>& v = search->second;
+            if (*v.rbegin() < transid) {
+              // but new transcript
+              v.push_back(transid);
             }
+          }
+          added = true;
         }
-
-        // 9. Write out target ids
-        // XXX: num_trans should equal to target_names_.size(), so don't need
-        // to write out again.
-        assert(num_trans == target_names_.size());
-        for (auto& tid : target_names_) {
-            // 9.1 write out how many bytes
-            // XXX: Note: this doesn't actually encore the max targ id size.
-            // might cause problems in the future
-            tmp_size = tid.size();
-            out.write((char*)&tmp_size, sizeof(tmp_size));
-
-            // 9.2 write out the actual string
-            out.write(tid.c_str(), tid.size());
+      }
+      if (added) {
+        trans_lens_.push_back(seq->seq.l);
+        transid++;
+        if (transid % 1000 == 1) {
+          std::cerr << " " << transid << " size of k-mer map " << all_kmap.size() << std::endl;
         }
+      }
+    }
+
+    num_trans = transid;
+    std::cerr << "Found " << num_trans << " transcripts"
+              << std::endl
+              << "Size of k-mer map " << all_kmap.size() << std::endl;
 
-        out.flush();
-        out.close();
+
+    // for each transcript
+    for (int i = 0; i < num_trans; i++ ) {
+      // create its own eqs
+      std::vector<int> single(1,i);
+      ecmap.insert({i,single});
+      ecmapinv.insert({single,i});
     }
 
-	// note opt is not const
-	void load(ProgramOptions &opt, bool loadKmerTable = true) {
-
-		std::string& index_in = opt.index;
-		std::ifstream in;
-		
-		in.open(index_in, std::ios::in | std::ios::binary);
-
-		if (!in.is_open()) {
-			// TODO: better handling
-			std::cerr << "Error: index input file could not be opened!";
-			exit(1);
-		}
-
-		// 1. read version
-		size_t header_version = 0;
-		in.read((char*)&header_version, sizeof(header_version));
-
-		if (header_version != INDEX_VERSION) {
-			std::cerr << "Error: Incompatiple indices. Found version " << header_version << ", expected version " << INDEX_VERSION << std::endl
-								<< "Rerun with index to regenerate!";
-			exit(1);
-		}
-
-		// 2. read k
-		in.read((char*)&k, sizeof(k));
-		if (Kmer::k == 0) {
-			//std::cerr << "[index] no k has been set, setting k = " << k << std::endl;
-			Kmer::set_k(k);
-			opt.k = k;
-		} else if (Kmer::k == k) {
-			//std::cerr << "[index] Kmer::k has been set and matches" << k << std::endl;
-			opt.k = k;
-		} else {
-			std::cerr << "Error: Kmer::k was already set to = " << Kmer::k << std::endl
-								<< "       conflicts with value of k  = " << k << std::endl;
-			exit(1);
-		}
-
-		// 3. read number of transcripts
-		in.read((char*)&num_trans, sizeof(num_trans));
-
-		// 4. read number of transcripts
-		trans_lens_.clear();
-		trans_lens_.reserve(num_trans);
-
-		for (int i = 0; i < num_trans; i++) {
-			int tlen;
-			in.read((char*)&tlen, sizeof(tlen));
-			trans_lens_.push_back(tlen);
-		}
-
-		// 5. read number of k-mers
-		size_t kmap_size;
-		in.read((char*)&kmap_size, sizeof(kmap_size));
-		
-		std::cerr << "[index] k: " << k << std::endl;
-		std::cerr << "[index] num_trans read: " << num_trans << std::endl;
-		std::cerr << "[index] kmap size: " << kmap_size << std::endl;
-		
-		kmap.clear();
-		if (loadKmerTable) {
-			kmap.reserve(kmap_size);
-		}
-
-		// 6. read kmer->ec values
-		Kmer tmp_kmer;
-		int tmp_val;
-		for (size_t i = 0; i < kmap_size; ++i)
-		{
-			in.read((char*)&tmp_kmer, sizeof(tmp_kmer));
-			in.read((char*)&tmp_val, sizeof(tmp_val));
-			
-			if (loadKmerTable) {
-				kmap.insert({tmp_kmer, tmp_val});
-			}
-		}
-
-		// 7. read number of equivalence classes
-		size_t ecmap_size;
-		in.read((char*)&ecmap_size, sizeof(ecmap_size));
-
-		std::cerr << "[index] ecmap size: " << ecmap_size << std::endl;
-
-		int tmp_id;
-		size_t vec_size;
-		// 8. read each equiv class
-		for (size_t i = 0; i < ecmap_size; ++i) {
-			in.read((char*)&tmp_id, sizeof(tmp_id));
-
-			// 8.1 read size of equiv class
-			in.read((char*)&vec_size, sizeof(vec_size));
-			
-			// 8.2 read each member
-			std::vector<int> tmp_vec;
-			tmp_vec.reserve(vec_size);
-			for (size_t j = 0; j < vec_size; ++j )
-			{
-				in.read((char*)&tmp_val, sizeof(tmp_val));
-				tmp_vec.push_back(tmp_val);
-			}
-			ecmap.insert({tmp_id, tmp_vec});
-			ecmapinv.insert({tmp_vec, tmp_id});
-		}
-
-        // 9. read in target ids
-		target_names_.clear();
-		target_names_.reserve(num_trans);
-
-        size_t tmp_size;
-        char buffer[1024]; // if your target_name is longer than this, screw you.
-        for (auto i = 0; i < num_trans; ++i) {
-            // 9.1 read in the size
-            in.read((char*)&tmp_size, sizeof(tmp_size));
-
-            // 9.2 read in the character string
-            in.read(buffer, tmp_size);
-
-            std::string tmp_targ_id( buffer );
-            target_names_.push_back(std::string( buffer ));
-
-            // clear the buffer for next string
-            memset(buffer,0,strlen(buffer));
-        }
 
-		in.close();
-	}
+    int eqs_id = num_trans;
+
+
+    for (auto& kv : all_kmap) {
+      auto search = ecmapinv.find(kv.second);
+      // if we have seen this equivalence class
+      if (search != ecmapinv.end()) {
+        // update kmap
+        kmap.insert({kv.first, search->second});
+      } else {
+        // else create a new equivalence class and update kmap
+        ecmapinv.insert({kv.second,eqs_id});
+        ecmap.insert({eqs_id, kv.second});
+        kmap.insert({kv.first, eqs_id});
+        eqs_id++;
+      }
+    }
+
+    std::cerr << "Created " << ecmap.size() << " equivalence classes from " << num_trans << " transcripts" << std::endl;
+
+    /* std::cout << "EqId\tTransIdList\n"; */
+    /* for (auto &ekv : ecmap) { */
+    /* 	std::cout << ekv.first; */
+    /* 	for (auto el : ekv.second) { */
+    /* 		std::cout << "\t" << el; */
+    /* 	} */
+    /* 	std::cout << "\n"; */
+    /* } */
+    /* std::cout.flush(); */
+
+
+    std::cerr << "K-mer map has " << kmap.size() << " k-mers and " << std::endl;
+    kseq_destroy(seq);
+    gzclose(fp);
+  }
+
+  void write(const std::string& index_out, bool writeKmerTable = true) {
+    std::ofstream out;
+    out.open(index_out, std::ios::out | std::ios::binary);
+
+    if (!out.is_open()) {
+      // TODO: better handling
+      std::cerr << "Error: index output file could not be opened!";
+      exit(1);
+    }
+
+    // 1. write index
+    out.write((char *)&INDEX_VERSION, sizeof(INDEX_VERSION));
+
+    // 2. write k
+    out.write((char *)&k, sizeof(k));
+
+    // 3. write number of transcripts
+    out.write((char *)&num_trans, sizeof(num_trans));
+
+    // 4. write out transcript lengths
+    for (int tlen : trans_lens_) {
+      out.write((char *)&tlen, sizeof(tlen));
+    }
+
+    size_t kmap_size = kmap.size();
+
+    if (writeKmerTable) {
+      // 5. write number of k-mers in map
+      out.write((char *)&kmap_size, sizeof(kmap_size));
+
+      // 6. write kmer->ec values
+      for (auto& kv : kmap) {
+        out.write((char *)&kv.first, sizeof(kv.first));
+        out.write((char *)&kv.second, sizeof(kv.second));
+      }
+    } else {
+      // 5. write fake k-mer size
+      kmap_size = 0;
+      out.write((char *)&kmap_size, sizeof(kmap_size));
+
+      // 6. write none of the kmer->ec values
+    }
+    // 7. write number of equivalence classes
+    size_t tmp_size;
+    tmp_size = ecmap.size();
+    out.write((char *)&tmp_size, sizeof(tmp_size));
+
+    // 8. write out each equiv class
+    for (auto& kv : ecmap) {
+      out.write((char *)&kv.first, sizeof(kv.first));
+
+      // 8.1 write out the size of equiv class
+      tmp_size = kv.second.size();
+      out.write((char *)&tmp_size, sizeof(tmp_size));
+      // 8.2 write each member
+      for (auto& val: kv.second) {
+        out.write((char *)&val, sizeof(val));
+      }
+    }
+
+    // 9. Write out target ids
+    // XXX: num_trans should equal to target_names_.size(), so don't need
+    // to write out again.
+    assert(num_trans == target_names_.size());
+    for (auto& tid : target_names_) {
+      // 9.1 write out how many bytes
+      // XXX: Note: this doesn't actually encore the max targ id size.
+      // might cause problems in the future
+      tmp_size = tid.size();
+      out.write((char *)&tmp_size, sizeof(tmp_size));
+
+      // 9.2 write out the actual string
+      out.write(tid.c_str(), tid.size());
+    }
+
+    out.flush();
+    out.close();
+  }
+
+  // note opt is not const
+  void load(ProgramOptions& opt, bool loadKmerTable = true) {
+
+    std::string& index_in = opt.index;
+    std::ifstream in;
+
+    in.open(index_in, std::ios::in | std::ios::binary);
+
+    if (!in.is_open()) {
+      // TODO: better handling
+      std::cerr << "Error: index input file could not be opened!";
+      exit(1);
+    }
+
+    // 1. read version
+    size_t header_version = 0;
+    in.read((char *)&header_version, sizeof(header_version));
+
+    if (header_version != INDEX_VERSION) {
+      std::cerr << "Error: Incompatiple indices. Found version " << header_version << ", expected version " << INDEX_VERSION << std::endl
+                << "Rerun with index to regenerate!";
+      exit(1);
+    }
+
+    // 2. read k
+    in.read((char *)&k, sizeof(k));
+    if (Kmer::k == 0) {
+      //std::cerr << "[index] no k has been set, setting k = " << k << std::endl;
+      Kmer::set_k(k);
+      opt.k = k;
+    } else if (Kmer::k == k) {
+      //std::cerr << "[index] Kmer::k has been set and matches" << k << std::endl;
+      opt.k = k;
+    } else {
+      std::cerr << "Error: Kmer::k was already set to = " << Kmer::k << std::endl
+                << "       conflicts with value of k  = " << k << std::endl;
+      exit(1);
+    }
+
+    // 3. read number of transcripts
+    in.read((char *)&num_trans, sizeof(num_trans));
+
+    // 4. read number of transcripts
+    trans_lens_.clear();
+    trans_lens_.reserve(num_trans);
+
+    for (int i = 0; i < num_trans; i++) {
+      int tlen;
+      in.read((char *)&tlen, sizeof(tlen));
+      trans_lens_.push_back(tlen);
+    }
+
+    // 5. read number of k-mers
+    size_t kmap_size;
+    in.read((char *)&kmap_size, sizeof(kmap_size));
+
+    std::cerr << "[index] k: " << k << std::endl;
+    std::cerr << "[index] num_trans read: " << num_trans << std::endl;
+    std::cerr << "[index] kmap size: " << kmap_size << std::endl;
+
+    kmap.clear();
+    if (loadKmerTable) {
+      kmap.reserve(kmap_size);
+    }
+
+    // 6. read kmer->ec values
+    Kmer tmp_kmer;
+    int tmp_val;
+    for (size_t i = 0; i < kmap_size; ++i) {
+      in.read((char *)&tmp_kmer, sizeof(tmp_kmer));
+      in.read((char *)&tmp_val, sizeof(tmp_val));
+
+      if (loadKmerTable) {
+        kmap.insert({tmp_kmer, tmp_val});
+      }
+    }
+
+    // 7. read number of equivalence classes
+    size_t ecmap_size;
+    in.read((char *)&ecmap_size, sizeof(ecmap_size));
+
+    std::cerr << "[index] ecmap size: " << ecmap_size << std::endl;
+
+    int tmp_id;
+    size_t vec_size;
+    // 8. read each equiv class
+    for (size_t i = 0; i < ecmap_size; ++i) {
+      in.read((char *)&tmp_id, sizeof(tmp_id));
+
+      // 8.1 read size of equiv class
+      in.read((char *)&vec_size, sizeof(vec_size));
+
+      // 8.2 read each member
+      std::vector<int> tmp_vec;
+      tmp_vec.reserve(vec_size);
+      for (size_t j = 0; j < vec_size; ++j ) {
+        in.read((char *)&tmp_val, sizeof(tmp_val));
+        tmp_vec.push_back(tmp_val);
+      }
+      ecmap.insert({tmp_id, tmp_vec});
+      ecmapinv.insert({tmp_vec, tmp_id});
+    }
+
+    // 9. read in target ids
+    target_names_.clear();
+    target_names_.reserve(num_trans);
+
+    size_t tmp_size;
+    char buffer[1024]; // if your target_name is longer than this, screw you.
+    for (auto i = 0; i < num_trans; ++i) {
+      // 9.1 read in the size
+      in.read((char *)&tmp_size, sizeof(tmp_size));
+
+      // 9.2 read in the character string
+      in.read(buffer, tmp_size);
+
+      std::string tmp_targ_id( buffer );
+      target_names_.push_back(std::string( buffer ));
+
+      // clear the buffer for next string
+      memset(buffer,0,strlen(buffer));
+    }
+
+    in.close();
+  }
+
+  int k; // k-mer size used
+  int num_trans; // number of transcripts
+  int skip;
+  //std::unordered_map<Kmer, int, KmerHash> kmap;
+  KmerHashTable<int, KmerHash> kmap;
 
-	int k; // k-mer size used
-	int num_trans; // number of transcripts
-	int skip;
-	//std::unordered_map<Kmer, int, KmerHash> kmap;
-	KmerHashTable<int, KmerHash> kmap;
-	
-	EcMap ecmap;
-	std::unordered_map<std::vector<int>, int, SortedVectorHasher> ecmapinv;
-	const size_t INDEX_VERSION = 4; // increase this every time you change the fileformat
+  EcMap ecmap;
+  std::unordered_map<std::vector<int>, int, SortedVectorHasher> ecmapinv;
+  const size_t INDEX_VERSION = 4; // increase this every time you change the fileformat
 
-	std::vector<int> trans_lens_;
+  std::vector<int> trans_lens_;
 
-    std::vector<std::string> target_names_;
+  std::vector<std::string> target_names_;
 };
 
 #endif // KALLISTO_KMERINDEX_H
diff --git a/src/KmerIterator.cpp b/src/KmerIterator.cpp
index 0a089934..0f053fb3 100644
--- a/src/KmerIterator.cpp
+++ b/src/KmerIterator.cpp
@@ -7,7 +7,7 @@
 /* Note: That an iter is exhausted means that (iter._invalid == true) */
 
 // use:  ++iter;
-// pre:  
+// pre:
 // post: *iter is now exhausted
 //       OR *iter is the next valid pair of kmer and location
 KmerIterator& KmerIterator::operator++() {
@@ -26,17 +26,17 @@ KmerIterator& KmerIterator::operator++() {
 
 
 // use:  iter++;
-// pre:  
+// pre:
 // post: iter has been incremented by one
 KmerIterator KmerIterator::operator++(int) {
-  KmerIterator tmp(*this); 
-  operator++(); 
+  KmerIterator tmp(*this);
+  operator++();
   return tmp;
 }
 
 
 // use:  val = (a == b);
-// pre:   
+// pre:
 // post: (val == true) if a and b are both exhausted
 //       OR a and b are in the same location of the same string.
 //       (val == false) otherwise.
@@ -50,18 +50,18 @@ bool KmerIterator::operator==(const KmerIterator& o) {
 
 
 // use:  p = *iter;
-// pre:   
+// pre:
 // post: p is NULL or a pair of Kmer and int
 std::pair<Kmer, int>& KmerIterator::operator*() {
   return p_;
 }
 
 
-// use:  example 1: km = iter->first; 
+// use:  example 1: km = iter->first;
 //       example 2:  i = iter->second;
 // pre:  *iter is not NULL
 // post: km will be (*iter).first, i will be (*iter).second
-std::pair<Kmer, int>* KmerIterator::operator->() {
+std::pair<Kmer, int> *KmerIterator::operator->() {
   return &(operator*());
 }
 
@@ -69,7 +69,7 @@ std::pair<Kmer, int>* KmerIterator::operator->() {
 // use:  iter.raise(km, rep);
 // post: iter has been incremented by one
 //       if iter is not invalid, km is iter->first and rep is km.rep()
-void KmerIterator::raise(Kmer &km, Kmer &rep) {
+void KmerIterator::raise(Kmer& km, Kmer& rep) {
   operator++();
   if (!invalid_) {
     km = p_.first;
@@ -77,8 +77,8 @@ void KmerIterator::raise(Kmer &km, Kmer &rep) {
   }
 }
 
-// use:  find_next(i,j, last_valid); 
-// pre:  
+// use:  find_next(i,j, last_valid);
+// pre:
 // post: *iter is either invalid or is a pair of:
 //       1) the next valid kmer in the string that does not have any 'N'
 //       2) the location of that kmer in the string
@@ -87,29 +87,29 @@ void KmerIterator::find_next(size_t i, size_t j, bool last_valid) {
   ++j;
 
   while (s_[j] != 0) {
-      char c = s_[j];
-      if (c == 'A' || c == 'C' || c == 'G' || c == 'T') {
-          if (last_valid) {
-              p_.first = p_.first.forwardBase(c);
-              break; // default case, 
-          } else {
-              if (i + Kmer::k - 1 == j) {
-                  p_.first = Kmer(s_+i);
-                  last_valid = true;
-                  break; // create k-mer from scratch
-              } else {
-                  ++j;
-              }
-          }
+    char c = s_[j];
+    if (c == 'A' || c == 'C' || c == 'G' || c == 'T') {
+      if (last_valid) {
+        p_.first = p_.first.forwardBase(c);
+        break; // default case,
       } else {
+        if (i + Kmer::k - 1 == j) {
+          p_.first = Kmer(s_+i);
+          last_valid = true;
+          break; // create k-mer from scratch
+        } else {
           ++j;
-          i = j;
-          last_valid = false;
+        }
       }
+    } else {
+      ++j;
+      i = j;
+      last_valid = false;
+    }
   }
   if (i+Kmer::k-1 == j && s_[j] != 0) {
-      p_.second = i;
+    p_.second = i;
   } else {
-      invalid_ = true;
+    invalid_ = true;
   }
 }
diff --git a/src/KmerIterator.hpp b/src/KmerIterator.hpp
index f90dd60e..b9705354 100644
--- a/src/KmerIterator.hpp
+++ b/src/KmerIterator.hpp
@@ -5,30 +5,30 @@
 #include "Kmer.hpp"
 
 
-/* Short description: 
+/* Short description:
  *  - Easily iterate through kmers in a read
  *  - If the read contains any N, then the N is skipped and checked whether
  *    there is a kmer to the right of the N
  * */
 class KmerIterator : public std::iterator<std::input_iterator_tag, std::pair<Kmer, int>, int> {
-public:
+ public:
   KmerIterator() : s_(NULL), p_(), invalid_(true) {}
-  KmerIterator(const char* s) : s_(s), p_(), invalid_(false) { find_next(-1,-1,false);}
+  KmerIterator(const char *s) : s_(s), p_(), invalid_(false) { find_next(-1,-1,false);}
   KmerIterator(const KmerIterator& o) : s_(o.s_), p_(o.p_), invalid_(o.invalid_) {}
 
   KmerIterator& operator++();
   KmerIterator operator++(int);
-  void raise(Kmer &km, Kmer &rep);
+  void raise(Kmer& km, Kmer& rep);
 
   bool operator==(const KmerIterator& o);
   bool operator!=(const KmerIterator& o) { return !this->operator==(o);}
 
   std::pair<Kmer, int>& operator*();
-  std::pair<Kmer, int>* operator->();
+  std::pair<Kmer, int> *operator->();
 
-private:
+ private:
   void find_next(size_t i, size_t j, bool last_valid);
-  
+
   const char *s_;
   std::pair<Kmer, int> p_;
   bool invalid_;
diff --git a/src/MinCollector.h b/src/MinCollector.h
index e351473e..c42ae535 100644
--- a/src/MinCollector.h
+++ b/src/MinCollector.h
@@ -12,89 +12,89 @@
 
 template <typename Index>
 struct MinCollector {
-	
-MinCollector(Index &ind, const ProgramOptions& opt) : index(ind), counts(index.ecmap.size(), 0) {}
-
-
-	
-	void collect(std::vector<int>& v) {
-		if (v.empty()) {
-			return;
-		}
-		sort(v.begin(), v.end()); // sort by increasing order
-
-		int count = 1; // how many k-mer support the ec
-		std::vector<int> u = index.ecmap[v[0]];
-		
-		for (int i = 1; i < v.size(); i++) {
-			if (v[i] != v[i-1]) {
-				u = index.intersect(v[i],u);
-				if (u.empty()) {
-					break;
-				}
-			}
-			count++; // increase the count
-		}
-		// if u is empty do nothing
-		if (u.empty()) {
-			return;
-		}
-		
-		auto search = index.ecmapinv.find(u);
-		if (search != index.ecmapinv.end()) {
-			// ec class already exists, update count
-			++counts[search->second];
-		} else {
-			// new ec class, update the index and count
-			auto necs = counts.size();
-			index.ecmap.insert({necs,u});
-			index.ecmapinv.insert({u,necs});
-			counts.push_back(1);
-		}
-	}
-	
-	void write(std::ostream& o) {
-		for (int id = 0; id < counts.size(); id++) {
-			o << id << "\t" << counts[id] << "\n";
-		}
-	}
-
-	void loadCounts(ProgramOptions& opt) {
-		int num_ecs = counts.size();
-		counts.clear();
-		std::ifstream in((opt.output + "/counts.txt"));
-		int i = 0;
-		if (in.is_open()) {
-			std::string line;
-			while (getline(in, line)) {
-				std::stringstream ss(line);
-				int j,c;
-				ss >> j;
-				ss >> c;
-				if (j != i) {
-					std::cerr << "Error: equivalence class does not match index. Found "
-										<< j << ", expected " << i << std::endl;
-					exit(1);
-				}
-				counts.push_back(c);
-				i++;
-			}
-			
-			if (i != num_ecs) {
-				std::cerr << "Error: number of equivalence classes does not match index. Found "
-									<< i << ", expected " << num_ecs << std::endl;
-				exit(1);
-			}
-		} else {
-			std::cerr << "Error: Could not open file " << opt.output << "/counts.txt" << std::endl;
-			exit(1);
-			
-		}
-	}
-
-	Index &index;
-	std::vector<int> counts;
-	
+
+  MinCollector(Index& ind, const ProgramOptions& opt) : index(ind), counts(index.ecmap.size(), 0) {}
+
+
+
+  void collect(std::vector<int>& v) {
+    if (v.empty()) {
+      return;
+    }
+    sort(v.begin(), v.end()); // sort by increasing order
+
+    int count = 1; // how many k-mer support the ec
+    std::vector<int> u = index.ecmap[v[0]];
+
+    for (int i = 1; i < v.size(); i++) {
+      if (v[i] != v[i-1]) {
+        u = index.intersect(v[i],u);
+        if (u.empty()) {
+          break;
+        }
+      }
+      count++; // increase the count
+    }
+    // if u is empty do nothing
+    if (u.empty()) {
+      return;
+    }
+
+    auto search = index.ecmapinv.find(u);
+    if (search != index.ecmapinv.end()) {
+      // ec class already exists, update count
+      ++counts[search->second];
+    } else {
+      // new ec class, update the index and count
+      auto necs = counts.size();
+      index.ecmap.insert({necs,u});
+      index.ecmapinv.insert({u,necs});
+      counts.push_back(1);
+    }
+  }
+
+  void write(std::ostream& o) {
+    for (int id = 0; id < counts.size(); id++) {
+      o << id << "\t" << counts[id] << "\n";
+    }
+  }
+
+  void loadCounts(ProgramOptions& opt) {
+    int num_ecs = counts.size();
+    counts.clear();
+    std::ifstream in((opt.output + "/counts.txt"));
+    int i = 0;
+    if (in.is_open()) {
+      std::string line;
+      while (getline(in, line)) {
+        std::stringstream ss(line);
+        int j,c;
+        ss >> j;
+        ss >> c;
+        if (j != i) {
+          std::cerr << "Error: equivalence class does not match index. Found "
+                    << j << ", expected " << i << std::endl;
+          exit(1);
+        }
+        counts.push_back(c);
+        i++;
+      }
+
+      if (i != num_ecs) {
+        std::cerr << "Error: number of equivalence classes does not match index. Found "
+                  << i << ", expected " << num_ecs << std::endl;
+        exit(1);
+      }
+    } else {
+      std::cerr << "Error: Could not open file " << opt.output << "/counts.txt" << std::endl;
+      exit(1);
+
+    }
+  }
+
+  Index& index;
+  std::vector<int> counts;
+
 };
 
 #endif // KALLISTO_MINCOLLECTOR_H
diff --git a/src/ProcessReads.h b/src/ProcessReads.h
index 74d71a07..4f7849da 100644
--- a/src/ProcessReads.h
+++ b/src/ProcessReads.h
@@ -13,78 +13,78 @@
 
 template<typename Index, typename TranscriptCollector>
 TranscriptCollector ProcessReads(Index& index, const ProgramOptions& opt) {
-	
-	// need to receive an index map
-	std::ios_base::sync_with_stdio(false);
-
-
-	bool paired = (opt.files.size() == 2);
-	
-	gzFile fp1 = 0, fp2 = 0;
-	kseq_t *seq1 = 0, *seq2;
-	std::vector<int> v;
-	v.reserve(1000);
-
-	int l1,l2; // length of read
-	size_t nreads = 0;
-
-	TranscriptCollector tc(index, opt);
-
-	// for each file
-	
-	fp1 = gzopen(opt.files[0].c_str(), "r");
-	seq1 = kseq_init(fp1);
-	if (paired) {
-		fp2 = gzopen(opt.files[1].c_str(),"r");
-		seq2 = kseq_init(fp2);
-	}
-
-
-	// for each read
-	while (true) {
-		l1 = kseq_read(seq1);
-		if (paired) {
-			l2 = kseq_read(seq2);
-		}
-		if (l1 <= 0) {
-			break;
-		}
-		if (paired && l2 <= 0) {
-			break;
-		}
-
-		nreads++;
-		v.clear();
-		// process read
-		index.match(seq1->seq.s, seq1->seq.l, v);
-		if (paired) {
-			index.match(seq2->seq.s, seq2->seq.l, v);
-		}
-		
-		// collect the transcript information
-		tc.collect(v);
-		if (opt.verbose && nreads % 10000 == 0 ) {
-			std::cerr << "Processed " << nreads << std::endl;
-		}
-	}
-	gzclose(fp1);
-	if (paired) {
-		gzclose(fp2);
-	}
-
-	kseq_destroy(seq1);
-	if (paired) {
-		kseq_destroy(seq2);
-	}
-
-	// write output to outdir
-	std::string outfile = opt.output + "/counts.txt"; // figure out filenaming scheme
-	std::ofstream of;
-	of.open(outfile.c_str(), std::ios::out);
-	tc.write(of);
-	of.close();
-	
-	return tc;
+
+  // need to receive an index map
+  std::ios_base::sync_with_stdio(false);
+
+
+  bool paired = (opt.files.size() == 2);
+
+  gzFile fp1 = 0, fp2 = 0;
+  kseq_t *seq1 = 0, *seq2;
+  std::vector<int> v;
+  v.reserve(1000);
+
+  int l1,l2; // length of read
+  size_t nreads = 0;
+
+  TranscriptCollector tc(index, opt);
+
+  // for each file
+
+  fp1 = gzopen(opt.files[0].c_str(), "r");
+  seq1 = kseq_init(fp1);
+  if (paired) {
+    fp2 = gzopen(opt.files[1].c_str(),"r");
+    seq2 = kseq_init(fp2);
+  }
+
+
+  // for each read
+  while (true) {
+    l1 = kseq_read(seq1);
+    if (paired) {
+      l2 = kseq_read(seq2);
+    }
+    if (l1 <= 0) {
+      break;
+    }
+    if (paired && l2 <= 0) {
+      break;
+    }
+
+    nreads++;
+    v.clear();
+    // process read
+    index.match(seq1->seq.s, seq1->seq.l, v);
+    if (paired) {
+      index.match(seq2->seq.s, seq2->seq.l, v);
+    }
+
+    // collect the transcript information
+    tc.collect(v);
+    if (opt.verbose && nreads % 10000 == 0 ) {
+      std::cerr << "Processed " << nreads << std::endl;
+    }
+  }
+  gzclose(fp1);
+  if (paired) {
+    gzclose(fp2);
+  }
+
+  kseq_destroy(seq1);
+  if (paired) {
+    kseq_destroy(seq2);
+  }
+
+  // write output to outdir
+  std::string outfile = opt.output + "/counts.txt"; // figure out filenaming scheme
+  std::ofstream of;
+  of.open(outfile.c_str(), std::ios::out);
+  tc.write(of);
+  of.close();
+
+  return tc;
 }
 
 
diff --git a/src/common.h b/src/common.h
index aabcf409..443ecbcd 100644
--- a/src/common.h
+++ b/src/common.h
@@ -12,18 +12,18 @@
 
 
 struct ProgramOptions {
-	bool verbose;
-	int threads;
-	std::string index;
-	int k;
-	int iterations;
-	std::string output;
-	int skip;
-	size_t seed;
-	std::string transfasta;
-	std::vector<std::string> files;
-
-ProgramOptions() : verbose(false), seed(0), threads(1), k(21), iterations(500), skip(1)  {}
+  bool verbose;
+  int threads;
+  std::string index;
+  int k;
+  int iterations;
+  std::string output;
+  int skip;
+  size_t seed;
+  std::string transfasta;
+  std::vector<std::string> files;
+
+  ProgramOptions() : verbose(false), seed(0), threads(1), k(21), iterations(500), skip(1)  {}
 };
 
 #endif // KALLISTO_COMMON_H
diff --git a/src/hash.cpp b/src/hash.cpp
index 39ae90f3..d52eee30 100644
--- a/src/hash.cpp
+++ b/src/hash.cpp
@@ -6,49 +6,49 @@ uint64_t inline _rotl64(uint64_t value, int8_t amount) {
   return ((value) << (amount)) | ((value) >> (64 - (amount)));
 }
 
-uint32_t SuperFastHash (const char * data, int len) {
-uint32_t hash = len, tmp;
-int rem;
-
-    if (len <= 0 || data == NULL) return 0;
-
-    rem = len & 3;
-    len >>= 2;
-
-    /* Main loop */
-    for (;len > 0; len--) {
-        hash  += get16bits (data);
-        tmp    = (get16bits (data+2) << 11) ^ hash;
-        hash   = (hash << 16) ^ tmp;
-        data  += 2*sizeof (uint16_t);
-        hash  += hash >> 11;
-    }
-
-    /* Handle end cases */
-    switch (rem) {
-        case 3: hash += get16bits (data);
-                hash ^= hash << 16;
-                hash ^= data[sizeof (uint16_t)] << 18;
-                hash += hash >> 11;
-                break;
-        case 2: hash += get16bits (data);
-                hash ^= hash << 11;
-                hash += hash >> 17;
-                break;
-        case 1: hash += *data;
-                hash ^= hash << 10;
-                hash += hash >> 1;
-    }
-
-    /* Force "avalanching" of final 127 bits */
-    hash ^= hash << 3;
-    hash += hash >> 5;
-    hash ^= hash << 4;
+uint32_t SuperFastHash (const char *data, int len) {
+  uint32_t hash = len, tmp;
+  int rem;
+
+  if (len <= 0 || data == NULL) { return 0; }
+
+  rem = len & 3;
+  len >>= 2;
+
+  /* Main loop */
+  for (; len > 0; len--) {
+    hash  += get16bits (data);
+    tmp    = (get16bits (data+2) << 11) ^ hash;
+    hash   = (hash << 16) ^ tmp;
+    data  += 2*sizeof (uint16_t);
+    hash  += hash >> 11;
+  }
+
+  /* Handle end cases */
+  switch (rem) {
+  case 3: hash += get16bits (data);
+    hash ^= hash << 16;
+    hash ^= data[sizeof (uint16_t)] << 18;
+    hash += hash >> 11;
+    break;
+  case 2: hash += get16bits (data);
+    hash ^= hash << 11;
     hash += hash >> 17;
-    hash ^= hash << 25;
-    hash += hash >> 6;
-
-    return hash;
+    break;
+  case 1: hash += *data;
+    hash ^= hash << 10;
+    hash += hash >> 1;
+  }
+
+  /* Force "avalanching" of final 127 bits */
+  hash ^= hash << 3;
+  hash += hash >> 5;
+  hash ^= hash << 4;
+  hash += hash >> 17;
+  hash ^= hash << 25;
+  hash += hash >> 6;
+
+  return hash;
 }
 
 
@@ -58,25 +58,23 @@ int rem;
 // Block read - if your platform needs to do endian-swapping or can only
 // handle aligned reads, do the conversion here
 
-inline uint64_t getblock ( const uint64_t * p, int i )
-{
+inline uint64_t getblock ( const uint64_t *p, int i ) {
   return p[i];
 }
 
 //----------
 // Block mix - combine the key bits with the hash bits and scramble everything
 
-inline void bmix64 ( uint64_t & h1, uint64_t & h2, uint64_t & k1, uint64_t & k2, uint64_t & c1, uint64_t & c2 )
-{
-  k1 *= c1; 
-  k1  = _rotl64(k1,23); 
+inline void bmix64 ( uint64_t& h1, uint64_t& h2, uint64_t& k1, uint64_t& k2, uint64_t& c1, uint64_t& c2 ) {
+  k1 *= c1;
+  k1  = _rotl64(k1,23);
   k1 *= c2;
   h1 ^= k1;
   h1 += h2;
 
   h2 = _rotl64(h2,41);
 
-  k2 *= c2; 
+  k2 *= c2;
   k2  = _rotl64(k2,23);
   k2 *= c1;
   h2 ^= k2;
@@ -92,8 +90,7 @@ inline void bmix64 ( uint64_t & h1, uint64_t & h2, uint64_t & k1, uint64_t & k2,
 //----------
 // Finalization mix - avalanches all bits to within 0.05% bias
 
-inline uint64_t fmix64 ( uint64_t k )
-{
+inline uint64_t fmix64 ( uint64_t k ) {
   k ^= k >> 33;
   k *= 0xff51afd7ed558ccd;
   k ^= k >> 33;
@@ -103,9 +100,8 @@ inline uint64_t fmix64 ( uint64_t k )
   return k;
 }
 
-void MurmurHash3_x64_128 ( const void * key, const int len, const uint32_t seed, void * out )
-{
-  const uint8_t * data = (const uint8_t*)key;
+void MurmurHash3_x64_128 ( const void *key, const int len, const uint32_t seed, void *out ) {
+  const uint8_t *data = (const uint8_t *)key;
   const int nblocks = len / 16;
 
   uint64_t h1 = 0x9368e53c2f6af274 ^ seed;
@@ -117,44 +113,42 @@ void MurmurHash3_x64_128 ( const void * key, const int len, const uint32_t seed,
   //----------
   // body
 
-  const uint64_t * blocks = (const uint64_t *)(data);
+  const uint64_t *blocks = (const uint64_t *)(data);
 
-  for(int i = 0; i < nblocks; i++)
-    {
-      uint64_t k1 = getblock(blocks,i*2+0);
-      uint64_t k2 = getblock(blocks,i*2+1);
+  for(int i = 0; i < nblocks; i++) {
+    uint64_t k1 = getblock(blocks,i*2+0);
+    uint64_t k2 = getblock(blocks,i*2+1);
 
-      bmix64(h1,h2,k1,k2,c1,c2);
-    }
+    bmix64(h1,h2,k1,k2,c1,c2);
+  }
 
   //----------
   // tail
 
-  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+  const uint8_t *tail = (const uint8_t *)(data + nblocks*16);
 
   uint64_t k1 = 0;
   uint64_t k2 = 0;
 
-  switch(len & 15)
-    {
-    case 15: k2 ^= uint64_t(tail[14]) << 48;
-    case 14: k2 ^= uint64_t(tail[13]) << 40;
-    case 13: k2 ^= uint64_t(tail[12]) << 32;
-    case 12: k2 ^= uint64_t(tail[11]) << 24;
-    case 11: k2 ^= uint64_t(tail[10]) << 16;
-    case 10: k2 ^= uint64_t(tail[ 9]) << 8;
-    case  9: k2 ^= uint64_t(tail[ 8]) << 0;
-
-    case  8: k1 ^= uint64_t(tail[ 7]) << 56;
-    case  7: k1 ^= uint64_t(tail[ 6]) << 48;
-    case  6: k1 ^= uint64_t(tail[ 5]) << 40;
-    case  5: k1 ^= uint64_t(tail[ 4]) << 32;
-    case  4: k1 ^= uint64_t(tail[ 3]) << 24;
-    case  3: k1 ^= uint64_t(tail[ 2]) << 16;
-    case  2: k1 ^= uint64_t(tail[ 1]) << 8;
-    case  1: k1 ^= uint64_t(tail[ 0]) << 0;
-      bmix64(h1,h2,k1,k2,c1,c2);
-    };
+  switch(len & 15) {
+  case 15: k2 ^= uint64_t(tail[14]) << 48;
+  case 14: k2 ^= uint64_t(tail[13]) << 40;
+  case 13: k2 ^= uint64_t(tail[12]) << 32;
+  case 12: k2 ^= uint64_t(tail[11]) << 24;
+  case 11: k2 ^= uint64_t(tail[10]) << 16;
+  case 10: k2 ^= uint64_t(tail[ 9]) << 8;
+  case  9: k2 ^= uint64_t(tail[ 8]) << 0;
+
+  case  8: k1 ^= uint64_t(tail[ 7]) << 56;
+  case  7: k1 ^= uint64_t(tail[ 6]) << 48;
+  case  6: k1 ^= uint64_t(tail[ 5]) << 40;
+  case  5: k1 ^= uint64_t(tail[ 4]) << 32;
+  case  4: k1 ^= uint64_t(tail[ 3]) << 24;
+  case  3: k1 ^= uint64_t(tail[ 2]) << 16;
+  case  2: k1 ^= uint64_t(tail[ 1]) << 8;
+  case  1: k1 ^= uint64_t(tail[ 0]) << 0;
+    bmix64(h1,h2,k1,k2,c1,c2);
+  };
 
   //----------
   // finalization
@@ -170,32 +164,30 @@ void MurmurHash3_x64_128 ( const void * key, const int len, const uint32_t seed,
   h1 += h2;
   h2 += h1;
 
-  ((uint64_t*)out)[0] = h1;
-  ((uint64_t*)out)[1] = h2;
+  ((uint64_t *)out)[0] = h1;
+  ((uint64_t *)out)[1] = h2;
 }
 
 //-----------------------------------------------------------------------------
-// If we need a smaller hash value, it's faster to just use a portion of the 
+// If we need a smaller hash value, it's faster to just use a portion of the
 // 128-bit hash
 
-void MurmurHash3_x64_32 ( const void * key, int len, uint32_t seed, void * out )
-{
+void MurmurHash3_x64_32 ( const void *key, int len, uint32_t seed, void *out ) {
   uint32_t temp[4];
 
   MurmurHash3_x64_128(key,len,seed,temp);
 
-  *(uint32_t*)out = temp[0];
+  *(uint32_t *)out = temp[0];
 }
 
 //----------
 
-void MurmurHash3_x64_64 ( const void * key, int len, uint32_t seed, void * out )
-{
+void MurmurHash3_x64_64 ( const void *key, int len, uint32_t seed, void *out ) {
   uint64_t temp[2];
 
   MurmurHash3_x64_128(key,len,seed,temp);
 
-  *(uint64_t*)out = temp[0];
-} 
+  *(uint64_t *)out = temp[0];
+}
 
 //-----------------------------------------------------------------------------
diff --git a/src/hash.hpp b/src/hash.hpp
index 0b33f477..2f83c6c7 100644
--- a/src/hash.hpp
+++ b/src/hash.hpp
@@ -13,9 +13,9 @@
                        +(uint32_t)(((const uint8_t *)(d))[0]) )
 #endif
 
-uint32_t SuperFastHash (const char * data, int len);
+uint32_t SuperFastHash (const char *data, int len);
 
 //void MurmurHash3_x64_32 ( const void * key, int len, uint32_t seed, void * out );
-void MurmurHash3_x64_64 ( const void * key, int len, uint32_t seed, void * out );
+void MurmurHash3_x64_64 ( const void *key, int len, uint32_t seed, void *out );
 
 #endif
diff --git a/src/kseq.h b/src/kseq.h
index b2238d1d..9645bc43 100644
--- a/src/kseq.h
+++ b/src/kseq.h
@@ -78,8 +78,8 @@
 #ifndef KSTRING_T
 #define KSTRING_T kstring_t
 typedef struct __kstring_t {
-	size_t l, m;
-	char *s;
+  size_t l, m;
+  char *s;
 } kstring_t;
 #endif
 
diff --git a/src/main.cpp b/src/main.cpp
index b375c4c7..f864a056 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -27,23 +27,22 @@ KSEQ_INIT(gzFile, gzread)
 using namespace std;
 
 
-void ParseOptionsIndex(int argc, char **argv, ProgramOptions &opt) {
+void ParseOptionsIndex(int argc, char **argv, ProgramOptions& opt) {
   int verbose_flag = 0;
 
-  const char* opt_string = "i:k:f:";
-  static struct option long_options[] =
-  {
-		// long args
+  const char *opt_string = "i:k:f:";
+  static struct option long_options[] = {
+    // long args
     {"verbose", no_argument, &verbose_flag, 1},
-		// short args
-		{"index", required_argument, 0, 'i'},
+    // short args
+    {"index", required_argument, 0, 'i'},
     {"kmer-size", required_argument, 0, 'k'},
-		{"trans-fasta", required_argument, 0, 'f'},
+    {"trans-fasta", required_argument, 0, 'f'},
     {0,0,0,0}
   };
-	int c;
+  int c;
   int option_index = 0;
-	while (true) {
+  while (true) {
     c = getopt_long(argc,argv,opt_string, long_options, &option_index);
 
     if (c == -1) {
@@ -53,21 +52,18 @@ void ParseOptionsIndex(int argc, char **argv, ProgramOptions &opt) {
     switch (c) {
     case 0:
       break;
-		case 'i':
-		{
-			opt.index = optarg;
-			break;
-		}
-    case 'k':
-		{
+    case 'i': {
+      opt.index = optarg;
+      break;
+    }
+    case 'k': {
       stringstream(optarg) >> opt.k;
       break;
-		}
-		case 'f':
-		{
-			opt.transfasta = optarg;
-			break;
-		}
+    }
+    case 'f': {
+      opt.transfasta = optarg;
+      break;
+    }
     default: break;
     }
   }
@@ -78,25 +74,24 @@ void ParseOptionsIndex(int argc, char **argv, ProgramOptions &opt) {
 }
 
 
-void ParseOptionsEM(int argc, char **argv, ProgramOptions &opt) {
+void ParseOptionsEM(int argc, char **argv, ProgramOptions& opt) {
   int verbose_flag = 0;
 
-  const char* opt_string = "t:i:s:o:n:";
-  static struct option long_options[] =
-  {
-		// long args
+  const char *opt_string = "t:i:s:o:n:";
+  static struct option long_options[] = {
+    // long args
     {"verbose", no_argument, &verbose_flag, 1},
-		// short args
+    // short args
     {"threads", required_argument, 0, 't'},
-		{"index", required_argument, 0, 'i'},
+    {"index", required_argument, 0, 'i'},
     {"skip", required_argument, 0, 's'},
     {"output-dir", required_argument, 0, 'o'},
-		{"iterations", required_argument, 0, 'n'},
+    {"iterations", required_argument, 0, 'n'},
     {0,0,0,0}
   };
-	int c;
+  int c;
   int option_index = 0;
-	while (true) {
+  while (true) {
     c = getopt_long(argc,argv,opt_string, long_options, &option_index);
 
     if (c == -1) {
@@ -106,31 +101,26 @@ void ParseOptionsEM(int argc, char **argv, ProgramOptions &opt) {
     switch (c) {
     case 0:
       break;
-		case 't':
-		{
-			stringstream(optarg) >> opt.threads;
+    case 't': {
+      stringstream(optarg) >> opt.threads;
+      break;
+    }
+    case 'i': {
+      opt.index = optarg;
       break;
-		}
-		case 'i':
-		{
-			opt.index = optarg;
-			break;
-		}
-		case 's':
-		{
-			stringstream(optarg) >> opt.skip;
+    }
+    case 's': {
+      stringstream(optarg) >> opt.skip;
       break;
-		}
-		case 'o':
-		{
+    }
+    case 'o': {
       opt.output = optarg;
       break;
-		}
-		case 'n':
-		{
-			stringstream(optarg) >> opt.iterations;
-			break;
-		}
+    }
+    case 'n': {
+      stringstream(optarg) >> opt.iterations;
+      break;
+    }
     default: break;
     }
   }
@@ -145,24 +135,23 @@ void ParseOptionsEM(int argc, char **argv, ProgramOptions &opt) {
   }
 }
 
-void ParseOptionsEMOnly(int argc, char **argv, ProgramOptions &opt) {
+void ParseOptionsEMOnly(int argc, char **argv, ProgramOptions& opt) {
   int verbose_flag = 0;
 
-  const char* opt_string = "t:s:o:n:";
-  static struct option long_options[] =
-  {
-		// long args
+  const char *opt_string = "t:s:o:n:";
+  static struct option long_options[] = {
+    // long args
     {"verbose", no_argument, &verbose_flag, 1},
-		// short args
+    // short args
     {"threads", required_argument, 0, 't'},
     {"seed", required_argument, 0, 's'},
     {"output-dir", required_argument, 0, 'o'},
-		{"iterations", required_argument, 0, 'n'},
+    {"iterations", required_argument, 0, 'n'},
     {0,0,0,0}
   };
-	int c;
+  int c;
   int option_index = 0;
-	while (true) {
+  while (true) {
     c = getopt_long(argc,argv,opt_string, long_options, &option_index);
 
     if (c == -1) {
@@ -172,26 +161,22 @@ void ParseOptionsEMOnly(int argc, char **argv, ProgramOptions &opt) {
     switch (c) {
     case 0:
       break;
-		case 't':
-		{
-			stringstream(optarg) >> opt.threads;
+    case 't': {
+      stringstream(optarg) >> opt.threads;
       break;
-		}
-		case 's':
-		{
-			stringstream(optarg) >> opt.seed;
+    }
+    case 's': {
+      stringstream(optarg) >> opt.seed;
       break;
-		}
-		case 'o':
-		{
+    }
+    case 'o': {
       opt.output = optarg;
       break;
-		}
-		case 'n':
-		{
-			stringstream(optarg) >> opt.iterations;
-			break;
-		}
+    }
+    case 'n': {
+      stringstream(optarg) >> opt.iterations;
+      break;
+    }
     default: break;
     }
   }
@@ -204,138 +189,138 @@ void ParseOptionsEMOnly(int argc, char **argv, ProgramOptions &opt) {
 
 bool CheckOptionsIndex(ProgramOptions& opt) {
 
-	bool ret = true;
-
-	if (opt.k <= 0 || opt.k >= Kmer::MAX_K) {
-		cerr << "Error: invalid k-mer size " << opt.k << ", maximum is " << (Kmer::MAX_K -1) << endl;
-		ret = false;
-	}
-
-		// we want to generate the index, check k, index and transfasta
-	struct stat stFileInfo;
-	auto intStat = stat(opt.transfasta.c_str(), &stFileInfo);
-	if (intStat != 0) {
-		cerr << "Error: transcript fasta file not found " << opt.transfasta << endl;
-		ret = false;
-	}
-	
-	if (opt.index.empty()) {
-		cerr << "Error: need to specify index name" << endl;
-		ret = false;
-	}
-
-	return ret;
+  bool ret = true;
+
+  if (opt.k <= 0 || opt.k >= Kmer::MAX_K) {
+    cerr << "Error: invalid k-mer size " << opt.k << ", maximum is " << (Kmer::MAX_K -1) << endl;
+    ret = false;
+  }
+
+  // we want to generate the index, check k, index and transfasta
+  struct stat stFileInfo;
+  auto intStat = stat(opt.transfasta.c_str(), &stFileInfo);
+  if (intStat != 0) {
+    cerr << "Error: transcript fasta file not found " << opt.transfasta << endl;
+    ret = false;
+  }
+
+  if (opt.index.empty()) {
+    cerr << "Error: need to specify index name" << endl;
+    ret = false;
+  }
+
+  return ret;
 }
 
 bool CheckOptionsEM(ProgramOptions& opt, bool emonly = false) {
 
-	bool ret = true;
-
- 
-	// check for index
-	if (!emonly) {
-		if (opt.index.empty()) {
-			cerr << "Error: index file missing" << endl;
-			ret = false;
-		} else {
-			struct stat stFileInfo;
-			auto intStat = stat(opt.index.c_str(), &stFileInfo);
-			if (intStat != 0) {
-				cerr << "Error: index file not found " << opt.index << endl;
-				ret = false;
-			}
-		}
-	}
-
-	// check for read files
-	if (!emonly) {
-		if (opt.files.size() == 0) {
-			cerr << "Error: Missing read files" << endl;
-			ret = false;
-		} else {
-			struct stat stFileInfo;
-			for (auto &fn : opt.files) {
-				auto intStat = stat(fn.c_str(), &stFileInfo);
-				if (intStat != 0) {
-					cerr << "Error: file not found " << fn << endl;
-					ret = false;
-				}
-			}
-		}
-
-		if (!(opt.files.size() == 1 || opt.files.size() == 2)) {
-			cerr << "Error: Input files should be 1 or 2 files only" << endl;
-			ret = false;
-		}
-
-		if (opt.skip <= 0) {
-			cerr << "Error: skip has to be a positive integer" << endl;
-			ret = false;
-		}
-	}
-
-
-	if (opt.iterations <= 0) {
-		cerr << "Error: Invalid number of iterations " << opt.iterations << endl;
-		ret = false;
-	}
-
-	if (opt.output.empty()) {
-		cerr << "Error: need to specify output directory " << opt.output << endl;
-		ret = false;
-	} else {
-		struct stat stFileInfo;
-		auto intStat = stat(opt.output.c_str(), &stFileInfo);
-		if (intStat == 0) {
-			// file/dir exits
-			if (!S_ISDIR(stFileInfo.st_mode)) {
-				cerr << "Error: file " << opt.output << " exists and is not a directory" << endl;
-				ret = false;
-			} else if (emonly) {
-				// check for directory/counts.txt
-				struct stat stCountInfo;
-				auto intcountstat = stat((opt.output + "/counts.txt" ).c_str(), &stCountInfo);
-				if (intcountstat != 0) {
-					cerr << "Error: could not find file " << opt.output << "/counts.txt" << endl;
-					ret = false;
-				}
-
-				// check for directory/index.saved
-				struct stat stIndexInfo;
-				auto intindexstat = stat((opt.output + "/index.saved").c_str(), &stIndexInfo);
-				if (intindexstat != 0) {
-					cerr << "Error: could not find index " << opt.output << "/index.saved" << endl;
-					ret = false;
-				}
-				opt.index = (opt.output + "/index.saved");
-			}
-		} else {
-			if (emonly) {
-				cerr << "Error: output directory needs to exist, run em first" << endl;
-				ret = false;
-			} else {
-				// create directory
-				if (mkdir(opt.output.c_str(), 0777) == -1) {
-					cerr << "Error: could not create directory " << opt.output << endl;
-					ret = false;
-				}
-			}
-		}
-	}
-
-	if (opt.threads <= 0) {
-		cerr << "Error: invalid number of threads " << opt.threads << endl;
-		ret = false;
-	} else {
-		unsigned int n = std::thread::hardware_concurrency();
-		if (n != 0 && n < opt.threads) {
-			cerr << "Warning: you asked for " << opt.threads
-					 << ", but only " << n << " cores on the machine" << endl;
-		}
-	}
-	
-
-	return ret;
+  bool ret = true;
+
+
+  // check for index
+  if (!emonly) {
+    if (opt.index.empty()) {
+      cerr << "Error: index file missing" << endl;
+      ret = false;
+    } else {
+      struct stat stFileInfo;
+      auto intStat = stat(opt.index.c_str(), &stFileInfo);
+      if (intStat != 0) {
+        cerr << "Error: index file not found " << opt.index << endl;
+        ret = false;
+      }
+    }
+  }
+
+  // check for read files
+  if (!emonly) {
+    if (opt.files.size() == 0) {
+      cerr << "Error: Missing read files" << endl;
+      ret = false;
+    } else {
+      struct stat stFileInfo;
+      for (auto& fn : opt.files) {
+        auto intStat = stat(fn.c_str(), &stFileInfo);
+        if (intStat != 0) {
+          cerr << "Error: file not found " << fn << endl;
+          ret = false;
+        }
+      }
+    }
+
+    if (!(opt.files.size() == 1 || opt.files.size() == 2)) {
+      cerr << "Error: Input files should be 1 or 2 files only" << endl;
+      ret = false;
+    }
+
+    if (opt.skip <= 0) {
+      cerr << "Error: skip has to be a positive integer" << endl;
+      ret = false;
+    }
+  }
+
+
+  if (opt.iterations <= 0) {
+    cerr << "Error: Invalid number of iterations " << opt.iterations << endl;
+    ret = false;
+  }
+
+  if (opt.output.empty()) {
+    cerr << "Error: need to specify output directory " << opt.output << endl;
+    ret = false;
+  } else {
+    struct stat stFileInfo;
+    auto intStat = stat(opt.output.c_str(), &stFileInfo);
+    if (intStat == 0) {
+      // file/dir exits
+      if (!S_ISDIR(stFileInfo.st_mode)) {
+        cerr << "Error: file " << opt.output << " exists and is not a directory" << endl;
+        ret = false;
+      } else if (emonly) {
+        // check for directory/counts.txt
+        struct stat stCountInfo;
+        auto intcountstat = stat((opt.output + "/counts.txt" ).c_str(), &stCountInfo);
+        if (intcountstat != 0) {
+          cerr << "Error: could not find file " << opt.output << "/counts.txt" << endl;
+          ret = false;
+        }
+
+        // check for directory/index.saved
+        struct stat stIndexInfo;
+        auto intindexstat = stat((opt.output + "/index.saved").c_str(), &stIndexInfo);
+        if (intindexstat != 0) {
+          cerr << "Error: could not find index " << opt.output << "/index.saved" << endl;
+          ret = false;
+        }
+        opt.index = (opt.output + "/index.saved");
+      }
+    } else {
+      if (emonly) {
+        cerr << "Error: output directory needs to exist, run em first" << endl;
+        ret = false;
+      } else {
+        // create directory
+        if (mkdir(opt.output.c_str(), 0777) == -1) {
+          cerr << "Error: could not create directory " << opt.output << endl;
+          ret = false;
+        }
+      }
+    }
+  }
+
+  if (opt.threads <= 0) {
+    cerr << "Error: invalid number of threads " << opt.threads << endl;
+    ret = false;
+  } else {
+    unsigned int n = std::thread::hardware_concurrency();
+    if (n != 0 && n < opt.threads) {
+      cerr << "Warning: you asked for " << opt.threads
+           << ", but only " << n << " cores on the machine" << endl;
+    }
+  }
+
+
+  return ret;
 }
 
 
@@ -344,145 +329,139 @@ void PrintCite() {
   //  cerr << "When using this program in your research, please cite" << endl << endl;
 }
 
-void PrintVersion()
-{
+void PrintVersion() {
   cout << "Kallisto, version: " << 	KALLISTO_VERSION << endl;
 }
 
-void usage()
-{
-	cout << "Kallisto " << endl
-			 << "Does transcriptome stuff" << endl << endl
-			 << "Usage: Kallisto CMD [options] .." << endl << endl
-			 << "Where <CMD> can be one of:" << endl << endl
-			 << "    index         Builds the index "<< endl 
-			 << "    em            Runs the EM algorithm " << endl
-			 << "    em-only       Runs the EM algorithm without mapping" << endl
-			 << "    cite          Prints citation information " << endl
-			 << "    version       Prints version information"<< endl << endl;
+void usage() {
+  cout << "Kallisto " << endl
+       << "Does transcriptome stuff" << endl << endl
+       << "Usage: Kallisto CMD [options] .." << endl << endl
+       << "Where <CMD> can be one of:" << endl << endl
+       << "    index         Builds the index "<< endl
+       << "    em            Runs the EM algorithm " << endl
+       << "    em-only       Runs the EM algorithm without mapping" << endl
+       << "    cite          Prints citation information " << endl
+       << "    version       Prints version information"<< endl << endl;
 }
 
 
-void usageIndex()
-{
-	cout << "Kallisto " << endl
-			 << "Does transcriptome stuff" << endl << endl
-			 << "Usage: Kallisto index [options]" << endl << endl
-			 << "-k, --kmer-size=INT         Size of k-mers, default (21), max value is " << (Kmer::MAX_K-1) << endl
-			 << "-i, --index=STRING             Filename for index to be constructed " << endl
-			 << "-f, --trans-fasta=STRING       FASTA file containing reference transcriptome " << endl
-			 << "    --verbose               Print lots of messages during run" << endl;
+void usageIndex() {
+  cout << "Kallisto " << endl
+       << "Does transcriptome stuff" << endl << endl
+       << "Usage: Kallisto index [options]" << endl << endl
+       << "-k, --kmer-size=INT         Size of k-mers, default (21), max value is " << (Kmer::MAX_K-1) << endl
+       << "-i, --index=STRING             Filename for index to be constructed " << endl
+       << "-f, --trans-fasta=STRING       FASTA file containing reference transcriptome " << endl
+       << "    --verbose               Print lots of messages during run" << endl;
 }
 
-void usageEM()
-{
-	cout << "Kallisto " << endl
-			 << "Does transcriptome stuff" << endl << endl
-			 << "Usage: Kallisto em [options] FASTQ-files" << endl << endl
-			 << "-t, --threads=INT           Number of threads to use (default value 1)" << endl
-			 << "-i, --index=INT             Filename for index " << endl
-			 << "-s, --seed=INT              Seed value for randomness (default value 0, use time based randomness)" << endl
-			 << "-n, --iterations=INT        Number of iterations of EM algorithm (default value 500)" << endl
-			 << "-o, --output-dir=STRING        Directory to store output to" << endl
-			 << "    --verbose               Print lots of messages during run" << endl;
+void usageEM() {
+  cout << "Kallisto " << endl
+       << "Does transcriptome stuff" << endl << endl
+       << "Usage: Kallisto em [options] FASTQ-files" << endl << endl
+       << "-t, --threads=INT           Number of threads to use (default value 1)" << endl
+       << "-i, --index=INT             Filename for index " << endl
+       << "-s, --seed=INT              Seed value for randomness (default value 0, use time based randomness)" << endl
+       << "-n, --iterations=INT        Number of iterations of EM algorithm (default value 500)" << endl
+       << "-o, --output-dir=STRING        Directory to store output to" << endl
+       << "    --verbose               Print lots of messages during run" << endl;
 }
 
-void usageEMOnly()
-{
-	cout << "Kallisto " << endl
-			 << "Does transcriptome stuff" << endl << endl
-			 << "Usage: Kallisto em-only [options]" << endl << endl
-			 << "-t, --threads=INT           Number of threads to use (default value 1)" << endl
-			 << "-s, --seed=INT              Seed value for randomness (default value 0, use time based randomness)" << endl
-			 << "-n, --iterations=INT        Number of iterations of EM algorithm (default value 500)" << endl
-			 << "-o, --output-dir=STRING        Directory to store output to" << endl
-			 << "    --verbose               Print lots of messages during run" << endl;
+void usageEMOnly() {
+  cout << "Kallisto " << endl
+       << "Does transcriptome stuff" << endl << endl
+       << "Usage: Kallisto em-only [options]" << endl << endl
+       << "-t, --threads=INT           Number of threads to use (default value 1)" << endl
+       << "-s, --seed=INT              Seed value for randomness (default value 0, use time based randomness)" << endl
+       << "-n, --iterations=INT        Number of iterations of EM algorithm (default value 500)" << endl
+       << "-o, --output-dir=STRING        Directory to store output to" << endl
+       << "    --verbose               Print lots of messages during run" << endl;
 }
 
 
-int main(int argc, char *argv[])
-{
-	
-    if (argc < 2) {
-        usage();
+int main(int argc, char *argv[]) {
+
+  if (argc < 2) {
+    usage();
+    exit(1);
+  } else {
+    ProgramOptions opt;
+    string cmd(argv[1]);
+    if (cmd == "version") {
+      PrintVersion();
+    } else if (cmd == "cite") {
+      PrintCite();
+    } else if (cmd == "index") {
+      if (argc==2) {
+        usageIndex();
+        return 0;
+      }
+      ParseOptionsIndex(argc-1,argv+1,opt);
+      if (!CheckOptionsIndex(opt)) {
+        usageIndex();
+        exit(1);
+      } else {
+        // create an index
+        Kmer::set_k(opt.k);
+        KmerIndex index(opt);
+        std::cerr << "Building index from: " << opt.transfasta << std::endl;
+        index.BuildTranscripts(opt.transfasta);
+        index.write(opt.index);
+      }
+    } else if (cmd == "em") {
+      if (argc==2) {
+        usageEM();
+        return 0;
+      }
+      ParseOptionsEM(argc-1,argv+1,opt);
+      if (!CheckOptionsEM(opt)) {
+        usageEM();
         exit(1);
+      } else {
+        // run the em algorithm
+        KmerIndex index(opt);
+        index.load(opt);
+        auto collection = ProcessReads<KmerIndex, MinCollector<KmerIndex>>(index, opt);
+        // save modified index for future use
+        index.write((opt.output+"/index.saved"), false);
+        // compute mean frag length somewhere?
+        auto eff_lens = calc_eff_lens(index.trans_lens_, 30.0);
+        auto weights = calc_weights (collection.counts, index.ecmap, eff_lens);
+        EMAlgorithm<KmerIndex> em(opt, index, collection.counts, eff_lens, weights);
+        em.run();
+        em.compute_rho();
+        em.write(opt.output);
+      }
+    } else if (cmd == "em-only") {
+      if (argc==2) {
+        usageEMOnly();
+        return 0;
+      }
+      ParseOptionsEMOnly(argc-1,argv+1,opt);
+      if (!CheckOptionsEM(opt, true)) {
+        usageEMOnly();
+        exit(1);
+      } else {
+        // run the em algorithm
+        KmerIndex index(opt);
+        index.load(opt, false); // skip the k-mer map
+        MinCollector<KmerIndex> collection(index, opt);
+        collection.loadCounts(opt);
+        // compute mean frag length somewhere?
+        auto eff_lens = calc_eff_lens(index.trans_lens_, 30.0);
+        auto weights = calc_weights (collection.counts, index.ecmap, eff_lens);
+        EMAlgorithm<KmerIndex> em(opt, index, collection.counts, eff_lens, weights);
+        em.run();
+        em.compute_rho();
+        em.write(opt.output);
+      }
     } else {
-        ProgramOptions opt;
-        string cmd(argv[1]);
-        if (cmd == "version") {
-            PrintVersion();
-        } else if (cmd == "cite") {
-            PrintCite();
-        } else if (cmd == "index") {
-            if (argc==2) {
-                usageIndex();
-                return 0;
-            }
-            ParseOptionsIndex(argc-1,argv+1,opt);
-            if (!CheckOptionsIndex(opt)) {
-                usageIndex();
-                exit(1);
-            } else {
-                // create an index
-                Kmer::set_k(opt.k);
-                KmerIndex index(opt);
-                std::cerr << "Building index from: " << opt.transfasta << std::endl;
-                index.BuildTranscripts(opt.transfasta);
-                index.write(opt.index);
-            }
-        } else if (cmd == "em") {
-            if (argc==2) {
-                usageEM();
-                return 0;
-            }
-            ParseOptionsEM(argc-1,argv+1,opt);
-            if (!CheckOptionsEM(opt)) {
-                usageEM();
-                exit(1);
-            } else {
-                // run the em algorithm
-                KmerIndex index(opt);
-                index.load(opt);
-                auto collection = ProcessReads<KmerIndex, MinCollector<KmerIndex>>(index, opt);
-                // save modified index for future use
-                index.write((opt.output+"/index.saved"), false);
-                // compute mean frag length somewhere?
-                auto eff_lens = calc_eff_lens(index.trans_lens_, 30.0);
-                auto weights = calc_weights (collection.counts, index.ecmap, eff_lens);
-                EMAlgorithm<KmerIndex> em(opt, index, collection.counts, eff_lens, weights);
-                em.run();
-                em.compute_rho();
-                em.write(opt.output);
-            }
-        } else if (cmd == "em-only") {
-            if (argc==2) {
-                usageEMOnly();
-                return 0;
-            }
-            ParseOptionsEMOnly(argc-1,argv+1,opt);
-            if (!CheckOptionsEM(opt, true)) {
-                usageEMOnly();
-                exit(1);
-            } else {
-                // run the em algorithm
-                KmerIndex index(opt);
-                index.load(opt, false); // skip the k-mer map
-                MinCollector<KmerIndex> collection(index, opt);
-                collection.loadCounts(opt);
-                // compute mean frag length somewhere?
-                auto eff_lens = calc_eff_lens(index.trans_lens_, 30.0);
-                auto weights = calc_weights (collection.counts, index.ecmap, eff_lens);
-                EMAlgorithm<KmerIndex> em(opt, index, collection.counts, eff_lens, weights);
-                em.run();
-                em.compute_rho();
-                em.write(opt.output);
-            }
-        } else {
-            cerr << "Did not understand command " << cmd << endl;
-            usage();
-            exit(1);
-        }
-		
-	}
-	return 0;
+      cerr << "Did not understand command " << cmd << endl;
+      usage();
+      exit(1);
+    }
+
+  }
+  return 0;
 }
diff --git a/src/weights.cpp b/src/weights.cpp
deleted file mode 100644
index e69de29b..00000000
diff --git a/src/weights.h b/src/weights.h
index 2fec4762..996b9028 100644
--- a/src/weights.h
+++ b/src/weights.h
@@ -9,49 +9,46 @@
 
 using WeightMap = std::unordered_map<int, std::vector<double>>;
 
-std::vector<double> calc_eff_lens(const std::vector<int>& lengths, double mean)
-{
-    // for now do the total naive thing and subtract mean frag length
-    std::vector<double> eff_lens;
-    eff_lens.reserve(lengths.size());
-
-    for (auto& cur_len: lengths)
-    {
-        eff_lens.push_back( static_cast<double>(cur_len) - mean + 1.0 );
-    }
+std::vector<double> calc_eff_lens(const std::vector<int>& lengths, double mean) {
+  // for now do the total naive thing and subtract mean frag length
+  std::vector<double> eff_lens;
+  eff_lens.reserve(lengths.size());
+
+  for (auto& cur_len: lengths) {
+    eff_lens.push_back( static_cast<double>(cur_len) - mean + 1.0 );
+  }
 
-    return eff_lens;
+  return eff_lens;
 }
 
 
 WeightMap calc_weights(
-        const std::vector<int>& counts,
-        const EcMap& ecmap,
-        const std::vector<double>& eff_lens)
-{
+  const std::vector<int>& counts,
+  const EcMap& ecmap,
+  const std::vector<double>& eff_lens) {
 
-    // TODO: throw some assertions in here to make sure the length of counts
-    // and ec map are correct... as well as eff_lens size is reasonable
+  // TODO: throw some assertions in here to make sure the length of counts
+  // and ec map are correct... as well as eff_lens size is reasonable
 
-    // weights are stored _exactly_ in the same orientation as the ec map
-    WeightMap weights;
+  // weights are stored _exactly_ in the same orientation as the ec map
+  WeightMap weights;
 
-    for (auto& kv : ecmap) {
+  for (auto& kv : ecmap) {
 
-			//std::cout << kv.first;
-        std::vector<double> trans_weights;
-        trans_weights.reserve(kv.second.size());
+    //std::cout << kv.first;
+    std::vector<double> trans_weights;
+    trans_weights.reserve(kv.second.size());
 
-        for (auto& trans_id : kv.second) {
-            trans_weights.push_back( static_cast<double>(counts[kv.first]) /
-                    eff_lens[trans_id] );
-        }
-
-        weights.insert( {kv.first, trans_weights} );
+    for (auto& trans_id : kv.second) {
+      trans_weights.push_back( static_cast<double>(counts[kv.first]) /
+                               eff_lens[trans_id] );
     }
 
+    weights.insert( {kv.first, trans_weights} );
+  }
+
 
-    return weights;
+  return weights;
 }
 
 #endif // KALLISTO_WEIGHTS_H