From 873be340e8adce476caac8f83c5f16eac7b77ff1 Mon Sep 17 00:00:00 2001 From: "fritz.sedlazeck" Date: Fri, 16 Mar 2018 10:46:17 -0500 Subject: [PATCH] Changed VCF header of merge --- src/SURVIVOR.cpp | 14 +++- src/convert/Update_bam_pacbio.cpp | 106 ++++++++++++++++++++++++++++++ src/convert/Update_bam_pacbio.h | 20 ++++++ src/merge_vcf/combine_svs.cpp | 10 ++- 4 files changed, 146 insertions(+), 4 deletions(-) create mode 100644 src/convert/Update_bam_pacbio.cpp create mode 100644 src/convert/Update_bam_pacbio.h diff --git a/src/SURVIVOR.cpp b/src/SURVIVOR.cpp index e8a8c67..97f8312 100644 --- a/src/SURVIVOR.cpp +++ b/src/SURVIVOR.cpp @@ -42,6 +42,7 @@ #include "analysis_sv/MUMmer_overlap.h" #include "analysis_sv/Select_samples.h" #include "convert/Convert_hapcut2.h" +#include "convert/Update_bam_pacbio.h" Parameter* Parameter::m_pInstance = NULL; @@ -223,6 +224,17 @@ void official_interface(int argc, char *argv[]) { } exit(0); } + /*else if (strcmp(argv[1], "updateBamfile") == 0) { + if (argc == 5) { + process_sam_forpacbio(std::string(argv[2]), std::string(argv[3]), std::string(argv[4])); + } else { + std::cerr << "original SNP file" << std::endl; + std::cerr << "Hapcut2 final file" << std::endl; + std::cerr << "Output: vcf file" << std::endl; + } + exit(0); + + }*/ } std::cerr << "Program: SURVIVOR (Tools for Structural Variations in the VCF format)" << std::endl; @@ -252,7 +264,7 @@ void official_interface(int argc, char *argv[]) { std::cerr << "\tbedtovcf\tConverts a bed file to a VCF file " << std::endl; std::cerr << "\tsmaptovcf\tConverts the smap file to a VCF file (beta version)" << std::endl; std::cerr << "\tbedpetovcf\tConverts a bedpe file ot a VCF file (beta version)" << std::endl; - std::cerr << "\thapcuttovcf\tConverts the Hapcut2 final file to a VCF file using the original SNP file provided to Hapcut2" < parse_header(std::string unmapped_sam){ + std::vector header; + + + return header; +} + + + +void merge_header(std::string unmapped_sam,std::string mapped_sam,FILE *& file2) { + std::vector header_un=parse_header(unmapped_sam); + + +} + +void update_entries(std::map & entries, std::string unmapped_sam) { + std::cout<<"check unmapped"< 9) { + entries[id] += buffer[i]; + } + if (buffer[i] == '\t') { + count++; + } + } + } + } + getline(myfile, buffer); + } + +} + +void process_sam_forpacbio(std::string unmapped_sam, std::string mapped_sam, std::string output_sam) { + + + std::string buffer; + std::ifstream myfile; + myfile.open(mapped_sam.c_str(), std::ifstream::in); + if (!myfile.good()) { + std::cout << "Sam Parser: could not open file: " << mapped_sam.c_str() << std::endl; + exit(0); + } + + FILE *file2; + file2 = fopen(output_sam.c_str(), "w"); + + merge_header(unmapped_sam,mapped_sam,file2); + + std::map entries; + getline(myfile, buffer); + while (!myfile.eof()) { //avoid header. + if (buffer[0] != '@') { + //parse part of the mapped entries into a map (e.g. step size =100000) + size_t found = buffer.find_first_of('\t'); + std::string id = buffer.substr(0, found); + entries[id] = buffer; + if (entries.size() > 1000) { + std::cout<<"check entries"<::iterator i = entries.begin(); i != entries.end(); i++) { + fprintf(file2, "%s", (*i).second.c_str()); + fprintf(file2, "%c", '\n'); + } + entries.clear(); + } + } + + getline(myfile, buffer); + } + myfile.close(); + //check orig file and update them + update_entries(entries, unmapped_sam); + for (std::map::iterator i = entries.begin(); i != entries.end(); i++) { + fprintf(file2, "%s", (*i).second.c_str()); + fprintf(file2, "%c", '\n'); + } + fclose(file2); + + +} + diff --git a/src/convert/Update_bam_pacbio.h b/src/convert/Update_bam_pacbio.h new file mode 100644 index 0000000..a59474f --- /dev/null +++ b/src/convert/Update_bam_pacbio.h @@ -0,0 +1,20 @@ +/* + * Update_bam_pacbio.h + * + * Created on: Mar 15, 2018 + * Author: sedlazec + */ + +#ifndef CONVERT_UPDATE_BAM_PACBIO_H_ +#define CONVERT_UPDATE_BAM_PACBIO_H_ + +#include "../vcfs/Merge_VCF.h" +#include "../structs.h" +#include "../simulator/Eval_vcf.h" +#include +#include + +void process_sam_forpacbio(std::string unmapped_sam, std::string mapped_sam, std::string output_sam); + + +#endif /* CONVERT_UPDATE_BAM_PACBIO_H_ */ diff --git a/src/merge_vcf/combine_svs.cpp b/src/merge_vcf/combine_svs.cpp index 53c1f5b..0889060 100644 --- a/src/merge_vcf/combine_svs.cpp +++ b/src/merge_vcf/combine_svs.cpp @@ -74,8 +74,12 @@ void print_header(FILE *& file, std::vector names,std::map\n"); fprintf(file, "%s", "##INFO=\n"); fprintf(file, "%s", "##INFO=\n"); - fprintf(file, "%s", "##INFO=\n"); - fprintf(file, "%s", "##INFO=\n"); + fprintf(file, "%s", "##INFO=\n"); + fprintf(file, "%s", "##INFO=\n"); + fprintf(file, "%s", "##INFO=\n"); + fprintf(file, "%s", "##INFO=\n"); + fprintf(file, "%s", "##INFO=\n"); + fprintf(file, "%s", "##INFO=\n"); fprintf(file, "%s", "##FORMAT=\n"); fprintf(file, "%s", "##FORMAT=\n"); fprintf(file, "%s", "##FORMAT=\n"); @@ -228,7 +232,7 @@ void print_entry_overlap(FILE *& file, SVS_Node * entry, int id) { } pos++; } else { - convert << "./.:0:0,0:--:NaN:NaN"; + convert << "./.:NaN:0:0,0:--:NaN:NaN"; } }