Skip to content

Commit

Permalink
Merge pull request #28 from lilab-bcb/yiming
Browse files Browse the repository at this point in the history
Bug fix and user-friendly improvement
  • Loading branch information
yihming authored Feb 5, 2025
2 parents f249353 + cbd6f90 commit 0eef6d8
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 3 deletions.
34 changes: 31 additions & 3 deletions barcode_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,9 @@ inline void parse_one_line(const std::string& line, int& n_barcodes, int& barcod
pos = line.find_first_of(',');

if (pos != std::string::npos) { index_seq = line.substr(0, pos); trim(index_seq); index_name = line.substr(pos + 1); trim(index_name); }
else { index_seq = line; index_name = line; }
else { index_seq = line; index_name = line; trim(index_seq); trim(index_name); }

if (index_seq.empty() && index_name.empty()) return;

if (barcode_len == 0) barcode_len = index_seq.length();
else assert(barcode_len == index_seq.length());
Expand All @@ -215,6 +217,18 @@ inline void parse_one_line(const std::string& line, int& n_barcodes, int& barcod
++n_barcodes;
}

inline void skip_bom(std::string& line) {
size_t start = 0;

if (line.length() >= 3 && line.substr(0, 3) == "\xEF\xBB\xBF") // UTF-8
start = 3;
else if (line.length() >= 2 && (line.substr(0, 2) == "\xFF\xFE" or line.substr(0, 2) == "\xFE\xFF")) // UTF-16
start = 2;
else if (line.length() >= 4 && (line.substr(0, 4) == "\x00\x00\xFE\xFF" or line.substr(0, 4) == "\xFF\xFE\x00\x00")) // UTF-32
start = 4;

line = line.substr(start);
}

void parse_sample_sheet(const std::string& sample_sheet_file, int& n_barcodes, int& barcode_len, HashType& index_dict, std::vector<std::string>& index_names, int max_mismatch = 1, bool convert_cell_barcode = false) {
std::string line;
Expand All @@ -224,13 +238,27 @@ void parse_sample_sheet(const std::string& sample_sheet_file, int& n_barcodes, i
index_dict.clear();
index_names.clear();

bool is_first_line = true;

if (sample_sheet_file.length() > 3 && sample_sheet_file.substr(sample_sheet_file.length() - 3, 3) == ".gz") { // input sample sheet is gzipped
iGZipFile gin(sample_sheet_file);
while (gin.next(line)) parse_one_line(line, n_barcodes, barcode_len, index_dict, index_names, max_mismatch, convert_cell_barcode);
while (gin.next(line)) {
if (is_first_line) {
skip_bom(line);
is_first_line = false;
}
parse_one_line(line, n_barcodes, barcode_len, index_dict, index_names, max_mismatch, convert_cell_barcode);
}
}
else {
std::ifstream fin(sample_sheet_file);
while (std::getline(fin, line)) parse_one_line(line, n_barcodes, barcode_len, index_dict, index_names, max_mismatch, convert_cell_barcode);
while (std::getline(fin, line)) {
if (is_first_line) {
skip_bom(line);
is_first_line = false;
}
parse_one_line(line, n_barcodes, barcode_len, index_dict, index_names, max_mismatch, convert_cell_barcode);
}
fin.close();
}
printf("%s is parsed. n_barcodes = %d, and barcode_len = %d.\n", sample_sheet_file.c_str(), n_barcodes, barcode_len);
Expand Down
5 changes: 5 additions & 0 deletions generate_count_matrix_ADTs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,11 @@ void parse_input_directory(char* input_dirs) {

input_dir = strtok(NULL, ",");
}

if (inputs.empty()) {
printf("No FASTQ file found in input folder(s): \"%s\"!\n", input_dirs);
exit(-1);
}
}


Expand Down

0 comments on commit 0eef6d8

Please sign in to comment.