Skip to content

Commit

Permalink
Changed VCF header of merge
Browse files Browse the repository at this point in the history
  • Loading branch information
fritzsedlazeck committed Mar 16, 2018
1 parent f109e24 commit 873be34
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 4 deletions.
14 changes: 13 additions & 1 deletion src/SURVIVOR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include "analysis_sv/MUMmer_overlap.h"
#include "analysis_sv/Select_samples.h"
#include "convert/Convert_hapcut2.h"
#include "convert/Update_bam_pacbio.h"

Parameter* Parameter::m_pInstance = NULL;

Expand Down Expand Up @@ -223,6 +224,17 @@ void official_interface(int argc, char *argv[]) {
}
exit(0);
}
/*else if (strcmp(argv[1], "updateBamfile") == 0) {
if (argc == 5) {
process_sam_forpacbio(std::string(argv[2]), std::string(argv[3]), std::string(argv[4]));
} else {
std::cerr << "original SNP file" << std::endl;
std::cerr << "Hapcut2 final file" << std::endl;
std::cerr << "Output: vcf file" << std::endl;
}
exit(0);
}*/

}
std::cerr << "Program: SURVIVOR (Tools for Structural Variations in the VCF format)" << std::endl;
Expand Down Expand Up @@ -252,7 +264,7 @@ void official_interface(int argc, char *argv[]) {
std::cerr << "\tbedtovcf\tConverts a bed file to a VCF file " << std::endl;
std::cerr << "\tsmaptovcf\tConverts the smap file to a VCF file (beta version)" << std::endl;
std::cerr << "\tbedpetovcf\tConverts a bedpe file ot a VCF file (beta version)" << std::endl;
std::cerr << "\thapcuttovcf\tConverts the Hapcut2 final file to a VCF file using the original SNP file provided to Hapcut2" <<std::endl;
std::cerr << "\thapcuttovcf\tConverts the Hapcut2 final file to a VCF file using the original SNP file provided to Hapcut2" << std::endl;

exit(0);
}
Expand Down
106 changes: 106 additions & 0 deletions src/convert/Update_bam_pacbio.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
/*
* Update_bam_pacbio.cpp
*
* Created on: Mar 15, 2018
* Author: sedlazec
*/

#include "Update_bam_pacbio.h"


std::vector<std::string> parse_header(std::string unmapped_sam){
std::vector<std::string> header;


return header;
}



void merge_header(std::string unmapped_sam,std::string mapped_sam,FILE *& file2) {
std::vector<std::string> header_un=parse_header(unmapped_sam);


}

void update_entries(std::map<std::string, std::string> & entries, std::string unmapped_sam) {
std::cout<<"check unmapped"<<std::endl;
std::string buffer;
std::ifstream myfile;
myfile.open(unmapped_sam.c_str(), std::ifstream::in);
if (!myfile.good()) {
std::cout << "Sam Parser: could not open file: " << unmapped_sam.c_str() << std::endl;
exit(0);
}
getline(myfile, buffer);
while (!myfile.eof()) {
if (buffer[0] != '@') {
size_t found = buffer.find_first_of('\t');
std::string id = buffer.substr(0, found);
if (entries.find(id) != entries.end()) { //found!
int count = 0;
for (size_t i = 0; i < buffer.size(); i++) {
if (count > 9) {
entries[id] += buffer[i];
}
if (buffer[i] == '\t') {
count++;
}
}
}
}
getline(myfile, buffer);
}

}

void process_sam_forpacbio(std::string unmapped_sam, std::string mapped_sam, std::string output_sam) {


std::string buffer;
std::ifstream myfile;
myfile.open(mapped_sam.c_str(), std::ifstream::in);
if (!myfile.good()) {
std::cout << "Sam Parser: could not open file: " << mapped_sam.c_str() << std::endl;
exit(0);
}

FILE *file2;
file2 = fopen(output_sam.c_str(), "w");

merge_header(unmapped_sam,mapped_sam,file2);

std::map<std::string, std::string> entries;
getline(myfile, buffer);
while (!myfile.eof()) { //avoid header.
if (buffer[0] != '@') {
//parse part of the mapped entries into a map (e.g. step size =100000)
size_t found = buffer.find_first_of('\t');
std::string id = buffer.substr(0, found);
entries[id] = buffer;
if (entries.size() > 1000) {
std::cout<<"check entries"<<std::endl;
//check orig file and update them
update_entries(entries, unmapped_sam);
for (std::map<std::string, std::string>::iterator i = entries.begin(); i != entries.end(); i++) {
fprintf(file2, "%s", (*i).second.c_str());
fprintf(file2, "%c", '\n');
}
entries.clear();
}
}

getline(myfile, buffer);
}
myfile.close();
//check orig file and update them
update_entries(entries, unmapped_sam);
for (std::map<std::string, std::string>::iterator i = entries.begin(); i != entries.end(); i++) {
fprintf(file2, "%s", (*i).second.c_str());
fprintf(file2, "%c", '\n');
}
fclose(file2);


}

20 changes: 20 additions & 0 deletions src/convert/Update_bam_pacbio.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Update_bam_pacbio.h
*
* Created on: Mar 15, 2018
* Author: sedlazec
*/

#ifndef CONVERT_UPDATE_BAM_PACBIO_H_
#define CONVERT_UPDATE_BAM_PACBIO_H_

#include "../vcfs/Merge_VCF.h"
#include "../structs.h"
#include "../simulator/Eval_vcf.h"
#include <math.h>
#include <iosfwd>

void process_sam_forpacbio(std::string unmapped_sam, std::string mapped_sam, std::string output_sam);


#endif /* CONVERT_UPDATE_BAM_PACBIO_H_ */
10 changes: 7 additions & 3 deletions src/merge_vcf/combine_svs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,12 @@ void print_header(FILE *& file, std::vector<std::string> names,std::map<std::str
fprintf(file, "%s", "##INFO=<ID=RE,Number=1,Type=Integer,Description=\"read support\">\n");
fprintf(file, "%s", "##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description=\"Imprecise structural variation\">\n");
fprintf(file, "%s", "##INFO=<ID=PRECISE,Number=0,Type=Flag,Description=\"Precise structural variation\">\n");
fprintf(file, "%s", "##INFO=<ID=SVLEN,Number=1,Type=Integer,Description=\"Length of the SV\">\n");
fprintf(file, "%s", "##INFO=<ID=SVMETHOD,Number=1,Type=String,Description=\"Type of approach used to detect SV\">\n");
fprintf(file, "%s", "##INFO=<ID=AVGLEN,Number=1,Type=Integer,Description=\"Length of the SV\">\n");
fprintf(file, "%s", "##INFO=<ID=SVMETHOD,Number=1,Type=String,Description=\"Vector of samples supporting the SV.\">\n");
fprintf(file, "%s", "##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of the SV.\">\n");
fprintf(file, "%s", "##INFO=<ID=SUPP_VEC,Number=1,Type=Integer,Description=\"Number of samples supporting the variant.\">\n");
fprintf(file, "%s", "##INFO=<ID=SUPP,Number=1,Type=String,Description=\"Previous support vector\">\n");
fprintf(file, "%s", "##INFO=<ID=STRANDS,Number=1,Type=String,Description=\"Indicating the direction of the reads with respect to the type and breakpoint.\">\n");
fprintf(file, "%s", "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n");
fprintf(file, "%s", "##FORMAT=<ID=LN,Number=1,Type=Integer,Description=\"predicted length\">\n");
fprintf(file, "%s", "##FORMAT=<ID=DR,Number=1,Type=Integer,Description=\"# supporting reference,variant reads in that order\">\n");
Expand Down Expand Up @@ -228,7 +232,7 @@ void print_entry_overlap(FILE *& file, SVS_Node * entry, int id) {
}
pos++;
} else {
convert << "./.:0:0,0:--:NaN:NaN";
convert << "./.:NaN:0:0,0:--:NaN:NaN";
}

}
Expand Down

0 comments on commit 873be34

Please sign in to comment.