Updated examples for version v1.9.3

cmayer · Aug 17, 2022 · 65a5323 · 65a5323
1 parent 8331f68
commit 65a5323
Show file tree

Hide file tree

Showing 10 changed files with 94 additions and 99 deletions.
diff --git a/CSequences2.h b/CSequences2.h
@@ -1384,7 +1384,7 @@ class CSequences2
 	//	if (consensusWeightMinimum < minimum_total_coverage)
 	//	  consensusWeightMinimum = minimum_total_coverage;
 
-	if (1)
+	if (0)
 	{
 	  std::cerr << "DEBUG call conensus at postions: " << pos << '\n';
 	  std::cerr << "cA:     " << cA      << '\n';

diff --git a/MitoGeneExtractor.cpp b/MitoGeneExtractor.cpp
@@ -1250,14 +1250,20 @@ int main(int argc, char **argv)
       cout << setw(50) << "# skipped reads due to low rel. score:  " << skipped_relative_score << endl;
 
       cout << "Gap insertion sites and lengths suggested from reads for the reference:\n";
-      print_Mymap(cout, map_of_insertion_sites_suggested_in_reference);
+      if (map_of_insertion_sites_suggested_in_reference.size() > 0)
+	print_Mymap(cout, map_of_insertion_sites_suggested_in_reference);
+      else
+	cout << "-\n";
 
       cout << "Gap sites in query sequences:\n";
-      print_Mymap(cout, map_of_gap_sites_in_queries);
+      if (map_of_gap_sites_in_queries.size() > 0)
+	print_Mymap(cout, map_of_gap_sites_in_queries);
+      else
+	cout << "-\n";
       cout << '\n';
 
-      unsigned **coverage_profile = seqs_DNA_result.get_DNA_site_profile();
-      print_DNA_profile(cout, coverage_profile, seqs_DNA_result.GetPosNum());
+      //      unsigned **coverage_profile = seqs_DNA_result.get_DNA_site_profile();
+      //      print_DNA_profile(cout, coverage_profile, seqs_DNA_result.GetPosNum());
     }
 
     if (!global_consensus_sequence_output_filename.empty())

diff --git a/README.md b/README.md
diff --git a/example-analysis-for-MitoGeneExtractor/README.md b/example-analysis-for-MitoGeneExtractor/README.md
@@ -1,47 +1,47 @@
-# Readme for the example analysis of the MitoGeneExtractor program (without Snakemake workflow)
-
-## Prerequisites
-- This example requires that the exonerate program is installed. It either has to be in the system path, or you need 
-to modify the runner.sh file  by adding the
-option -e /path-to-exonerate-program/exonerate-program-name so that MitoGeneExtractor can find the exonerate program.
-
-- MitoGeneExtractor has to be compiled. Enter the source directory mitogeneextractor and run the command "./make".
-
-## The example data
-One of the sequencing libraries analysed in Brasseur et al. 2022 was:
-https://www.ncbi.nlm.nih.gov/sra/?term=SRR12554985 .
-This file has been downloaded and extracted as described in Brasseur et al. 2022 using the NCBI SRA-tools 
-version 1.10. Then it has been preprocessed and converted to the fasta format as described in Brasseur et al. 2022. 
-The resulting fasta file has been analysed with the first command of the runner.sh file.
-
-In order not to exceed total file size limits of Github and still be able to include the example file in the repository, the example input file SRR12554985_trimmed_reduced.fas.gz was created by extracting from the original file exactly those reads that match with the COI sequence.
-You can also recreate the full fasta file by downloading the SRR12554985.sra file and following Brasseur et al. 2022 to prepare the fasta file.
-
-## Running the analysis:
-```{r, eval=TRUE}
-./runner.sh
-```
-
-This runs the analysis twice, for two amino acid reference sequences for COI. (i) with the general vertebrate reference for COI and (ii) with the specific COI reference for Passeriformes. The two commands read as follows:
-
-```{r, eval=TRUE}
-../MitoGeneExtractor-v1.9.1 -d SRR12554985_trimmed_reduced.fas -p ../Amino-Acide-references-for-tanomic-groups/xxx-need-to-be-added.fasta -V vulgar-SRR12554985_PasseriformesReference.txt -o SRR12554985_align_PasseriformesReference.fas -n 0 -c SRR12554985_cons_PasseriformesReference.fas -t 0.5 -r 1 -C 2
-```
-
-```{r, eval=TRUE}
-../MitoGeneExtractor-v1.9.1 -d SRR12554985_trimmed_reduced.fas -p ../Amino-Acide-references-for-tanomic-groups/COI-vertebrata-protein-consensus-50_whole.fasta -V vulgar-SRR12554985_vertebrateReference.txt -o SRR12554985_align_vertebrateReference.fas -n 0 -c SRR12554985_cons_vertebrateReference.fas -t 0.5 -r 1 -C 2
-```
-
-Note that if you move the example folder or your data to a different location, the path to the MitoGeneExtractor-v1.9.1 program and/or the path to the reference sequences have to be changed accordingly.
-
-The options used in this example are described in detail in the [main README.md](https://github.com/cmayer/MitoGeneExtractor) or when calling the program with the option -h:
-```{r, eval=TRUE}
-../MitoGeneExtractor-v1.9.1 -h
-```
-
-The [README.md in the Amino-Acid-references-for-taxonomic-groups folder](https://github.com/cmayer/MitoGeneExtractor/blob/main/Amino-Acid-references-for-taxonomic-groups/Readme.md) describes the differences in the general vertebrate reference and the reference specific for Passeriformes (song birds).
-
-The two consensus sequence files generated by the example analysis are almost identical. They only differ in additional amino acids at the beginning and end of the sequence generated by the reference specific to Passeriformes. The main reason is that this reference sequence has additional amino acids at the beginning and at the end. The inner parts of the sequence, in particular the barcode region of COI are reconstructed identically.
+# Readme for the example analysis of the MitoGeneExtractor program (without Snakemake workflow)
+
+## Prerequisites
+- This example requires that the exonerate program is installed. It either has to be in the system path, or you need 
+to modify the runner.sh file  by adding the
+option -e /path-to-exonerate-program/exonerate-program-name so that MitoGeneExtractor can find the exonerate program.
+
+- MitoGeneExtractor has to be compiled. Enter the source directory mitogeneextractor and run the command "./make".
+
+## The example data
+One of the sequencing libraries analysed in Brasseur et al. 2022 was:
+https://www.ncbi.nlm.nih.gov/sra/?term=SRR12554985 .
+This file has been downloaded and extracted as described in Brasseur et al. 2022 using the NCBI SRA-tools 
+version 1.10. Then it has been preprocessed and converted to the fasta format as described in Brasseur et al. 2022. 
+The resulting fasta file has been analysed with the first command of the runner.sh file.
+
+In order not to exceed total file size limits of Github and still be able to include the example file in the repository, the example input file SRR12554985_trimmed_reduced.fas.gz was created by extracting from the original file exactly those reads that match with the COI sequence.
+You can also recreate the full fasta file by downloading the SRR12554985.sra file and following Brasseur et al. 2022 to prepare the fasta file.
+
+## Running the analysis:
+```{r, eval=TRUE}
+./runner.sh
+```
+
+This runs the analysis twice, for two amino acid reference sequences for COI.  (i) with the specific COI reference for Passeriformes and (ii) with the general vertebrate reference for COI. The two commands read as follows:
+
+```{r, eval=TRUE}
+../MitoGeneExtractor-v1.9.3 -d SRR12554985_trimmed_reduced.fas -p ../Amino-Acid-references-for-taxonomic-groups/COI-references/COI-fulllength-Passeriformes-protein-reference.fasta -V vulgar-SRR12554985_PasseriformesReference.txt -V vulgar-SRR12554985_PasseriformesReference.txt -o SRR12554985_align_PasseriformesReference.fas -n 0 -c SRR12554985_cons_PasseriformesReference.fas -t 0.5 -r 1 -C 2
+```
+
+```{r, eval=TRUE}
+../MitoGeneExtractor-v1.9.3 -d SRR12554985_trimmed_reduced.fas -p ../Amino-Acid-references-for-taxonomic-groups/COI-references/COI-fulllength-general-vertebrata-protein-reference_from-consensus-50.fasta -V vulgar-SRR12554985_vertebrateReference.txt -o SRR12554985_align_vertebrateReference.fas -n 0 -c SRR12554985_cons_vertebrateReference.fas -t 0.5 -r 1 -C 2
+```
+
+Note that if you move the example folder or your data to a different location, the path to the MitoGeneExtractor-v1.9.3 program and/or the path to the reference sequences have to be changed accordingly.
+
+The options used in this example are described in detail in the [main README.md](https://github.com/cmayer/MitoGeneExtractor) or when calling the program with the option -h:
+```{r, eval=TRUE}
+../MitoGeneExtractor-v1.9.1 -h
+```
+
+The [README.md in the Amino-Acid-references-for-taxonomic-groups folder](https://github.com/cmayer/MitoGeneExtractor/blob/main/Amino-Acid-references-for-taxonomic-groups/Readme.md) describes the differences in the general vertebrate reference and the reference specific for Passeriformes (song birds).
+
+The two consensus sequence files generated by the example analysis are almost identical. They only differ in additional amino acids at the beginning and end of the sequence generated by the reference specific to Passeriformes. The main reason is that this reference sequence has additional amino acids at the beginning and at the end. The inner parts of the sequence, in particular the barcode region of COI are reconstructed identically.
 
 Interleaved alignment of whole COI sequences reconstructed using the Passeriformes reference (upper) and the general vertebrate reference (lower):
 
@@ -108,7 +108,7 @@ CGTAAAATATTACAACCAGAACTAACCAGCACTAACATTGAATGAATCCACGGCTGCCCACCGCCCTACCACACTTTTGA
 AGAACCAGCCTTTGTACAAGTTCAAGAAAGNNAG
 AGAACCA---------------------------
 
-```
+```
+
 
 
-
diff --git a/example-analysis-for-MitoGeneExtractor/Result_SRR12554985_cons-vertebrateReference.fas b/example-analysis-for-MitoGeneExtractor/Result_SRR12554985_cons-vertebrateReference.fas
@@ -1,2 +1,2 @@
->ConsensusSRR12554985_trimmed_newname.fas
+>ConsensusSRR12554985_trimmed_reduced.fas
 ---TTCATTAATCGATGACTATTCTCTACCAACCACAAAGACATTGGCACTCTTTATCTAATCTTTGGCGCATGAGCTGGAATGATTGGAACAGCCCTAAGCCTTCTAATCCGAGCCGAACTAGGACAACCTGGAACCCTACTAGGAGACGACCAAATTTATAACGTAATTGTTACCGCCCATGCATTCATCATAATTTTCTTCATAGTTATACCCATTATAATCGGCGGATTCGGTAACTGATTAGTCCCTCTAATAATCGGAGCCCCAGACATAGCATTCCCACGAATAAACAACATAAGCTTCTGACTTCTACCCCCCTCTTTCCTTCTCCTTTTAGCCTCCTCCACAGTAGAAGCAGGAGTCGGAACAGGATGAACAGTGTATCCCCCACTAGCCGGTAACCTCGCCCATGCAGGAGCTTCAGTAGACCTGGCCATCTTTTCCCTTCACCTAGCTGGTGTTTCCTCCATTTTAGGTGCAATCAACTTCATCACAACCGCAATTAACATAAAACCCCCAGCACTATCACAATATCAAACTCCCCTATTCGTTTGATCTGTCCTTATCACTGCCGTATTACTACTTCTATCTCTCCCAGTCCTTGCCGCTGGTATTACAATACTGCTAACAGACCGTAACCTAAACACAACCTTTTTCGACCCGGCCGGAGGAGGAGACCCAATCCTCTACCAACACCTATTCTGATTCTTTGGTCACCCAGAAGTATACATCCTCATCCTCCCAGGATTTGGAATCATTTCCCACGTAGTTGCATATTATGCTGGCAAAAAAGAGCCATTTGGCTACATGGGAATAGTATGGGCCATACTTTCAATTGGATTCCTAGGATTTATTGTTTGAGCCCACCACATATTCACAGTCGGCATAGACGTAGACACCCGCGCATACTTCACATCAGCCACAATAATTATTGCAATCCCAACAGGTATTAAAGTTTTTAGCTGATTGGCCACACTGCATGGAGGCACAATTAAATGAGATCCCCCGATACTTTGAGCCCTAGGCTTCATTTTCCTATTTACTATTGGAGGGTTAACAGGCATCGTTCTAGCTAACTCTTCATTAGATATCGCCCTACATGACACCTACTACGTAGTTGCACATTTCCACTATGTTTTATCTATAGGGGCAGTATTTGCAATCCTAGCAGGTTTCACTCACTGATTCCCACTACTTACCGGATTCACCCTCCACCCCACATGAGCCAAAGCCCACTTCGGAGTCATATTCGCAGGAGTAAACCTTACTTTCTTCCCACAGCACTTCCTAGGCCTAGCTGGTATGCCCCGACGATACTCCGACTATCCAGACGCCTACACTCTTTGAAACACCCTCTCCTCTATCGGTTCACTCATTTCCATGATTGCAGTAATCATACTAATATTTATCATTTGAGAAGCCTTTACATCCAAACGTAAAATATTACAACCAGAACTAACCAGCACTAACATTGAATGAATCCACGGCTGCCCACCGCCCTACCACACTTTTGAAGAACCA
diff --git a/example-analysis-for-MitoGeneExtractor/Result_SRR12554985_cons_PasseriformesReference.fas b/example-analysis-for-MitoGeneExtractor/Result_SRR12554985_cons_PasseriformesReference.fas
@@ -1,2 +1,2 @@
->ConsensusSRR12554985_trimmed_newname.fas
+>ConsensusSRR12554985_trimmed_reduced.fas
 GTGACTTTCATTAATCGATGACTATTCTCTACCAACCACAAAGACATTGGCACTCTTTATCTAATCTTTGGCGCATGAGCTGGAATGATTGGAACAGCCCTAAGCCTTCTAATCCGAGCCGAACTAGGACAACCTGGAACCCTACTAGGAGACGACCAAATTTATAACGTAATTGTTACCGCCCATGCATTCATCATAATTTTCTTCATAGTTATACCCATTATAATCGGCGGATTCGGTAACTGATTAGTCCCTCTAATAATCGGAGCCCCAGACATAGCATTCCCACGAATAAACAACATAAGCTTCTGACTTCTACCCCCCTCTTTCCTTCTCCTTTTAGCCTCCTCCACAGTAGAAGCAGGAGTCGGAACAGGATGAACAGTGTATCCCCCACTAGCCGGTAACCTCGCCCATGCAGGAGCTTCAGTAGACCTGGCCATCTTTTCCCTTCACCTAGCTGGTGTTTCCTCCATTTTAGGTGCAATCAACTTCATCACAACCGCAATTAACATAAAACCCCCAGCACTATCACAATATCAAACTCCCCTATTCGTTTGATCTGTCCTTATCACTGCCGTATTACTACTTCTATCTCTCCCAGTCCTTGCCGCTGGTATTACAATACTGCTAACAGACCGTAACCTAAACACAACCTTTTTCGACCCGGCCGGAGGAGGAGACCCAATCCTCTACCAACACCTATTCTGATTCTTTGGTCACCCAGAAGTATACATCCTCATCCTCCCAGGATTTGGAATCATTTCCCACGTAGTTGCATATTATGCTGGCAAAAAAGAGCCATTTGGCTACATGGGAATAGTATGGGCCATACTTTCAATTGGATTCCTAGGATTTATTGTTTGAGCCCACCACATATTCACAGTCGGCATAGACGTAGACACCCGCGCATACTTCACATCAGCCACAATAATTATTGCAATCCCAACAGGTATTAAAGTTTTTAGCTGATTGGCCACACTGCATGGAGGCACAATTAAATGAGATCCCCCGATACTTTGAGCCCTAGGCTTCATTTTCCTATTTACTATTGGAGGGTTAACAGGCATCGTTCTAGCTAACTCTTCATTAGATATCGCCCTACATGACACCTACTACGTAGTTGCACATTTCCACTATGTTTTATCTATAGGGGCAGTATTTGCAATCCTAGCAGGTTTCACTCACTGATTCCCACTACTTACCGGATTCACCCTCCACCCCACATGAGCCAAAGCCCACTTCGGAGTCATATTCGCAGGAGTAAACCTTACTTTCTTCCCACAGCACTTCCTAGGCCTAGCTGGTATGCCCCGACGATACTCCGACTATCCAGACGCCTACACTCTTTGAAACACCCTCTCCTCTATCGGTTCACTCATTTCCATGATTGCAGTAATCATACTAATATTTATCATTTGAGAAGCCTTTACATCCAAACGTAAAATATTACAACCAGAACTAACCAGCACTAACATTGAATGAATCCACGGCTGCCCACCGCCCTACCACACTTTTGAAGAACCAGCCTTTGTACAAGTTCAAGAAAGNNAG
diff --git a/example-analysis-for-MitoGeneExtractor/runner.sh b/example-analysis-for-MitoGeneExtractor/runner.sh
@@ -1,5 +1,5 @@
 ## Extraction from a reduced sequencing library file with fasta reads. Reference as in Brasseur et al. 2022:
-/Daten/Programmieren/C++/Programme/align-trim-DNA-against-Protein/MitoGeneExtractor-v1.9.1-dist/MitoGeneExtractor-v1.9.1 -d SRR12554985_trimmed_reduced.fas -p ../Amino-Acide-references-for-tanomic-groups/xxx-need-to-be-added.fasta -V vulgar-SRR12554985_PasseriformesReference.txt -o SRR12554985_align_PasseriformesReference.fas -n 0 -c SRR12554985_cons_PasseriformesReference.fas -t 0.5 -r 1 -C 2
+../MitoGeneExtractor-v1.9.3 -d SRR12554985_trimmed_reduced.fas -p ../Amino-Acid-references-for-taxonomic-groups/COI-references/COI-fulllength-Passeriformes-protein-reference.fasta -V vulgar-SRR12554985_PasseriformesReference.txt -o SRR12554985_align_PasseriformesReference.fas -n 0 -c SRR12554985_cons_PasseriformesReference.fas -t 0.5 -r 1 -C 2
 
-/Daten/Programmieren/C++/Programme/align-trim-DNA-against-Protein/MitoGeneExtractor-v1.9.1-dist/MitoGeneExtractor-v1.9.1 -d SRR12554985_trimmed_reduced.fas -p ../Amino-Acide-references-for-tanomic-groups/COI-vertebrata-protein-consensus-50_whole.fasta -V vulgar-SRR12554985_vertebrateReference.txt -o SRR12554985_align_vertebrateReference.fas -n 0 -c SRR12554985_cons_vertebrateReference.fas -t 0.5 -r 1 -C 2
+../MitoGeneExtractor-v1.9.3 -d SRR12554985_trimmed_reduced.fas -p ../Amino-Acid-references-for-taxonomic-groups/COI-references/COI-fulllength-general-vertebrata-protein-reference_from-consensus-50.fasta -V vulgar-SRR12554985_vertebrateReference.txt -o SRR12554985_align_vertebrateReference.fas -n 0 -c SRR12554985_cons_vertebrateReference.fas -t 0.5 -r 1 -C 2
 
diff --git a/example-analysis-with-Snakemake-workflow-using-SRA-files-as-input/Snakefile b/example-analysis-with-Snakemake-workflow-using-SRA-files-as-input/Snakefile
@@ -57,4 +57,4 @@ rule MitoGeneExtractor:
         c = "{sample}_vulgar.txt",
         d = "{sample}_vulgar.txt.log"
     shell:
-        "MitoGeneExtractor -d {input.DNA} -p {input.AA} -o {output.a} -c {output.b} -V {output.c} -n 0 -t 0.5 -r 1 -e /home/usr/bin/exonerate"
+        "MitoGeneExtractor-v1.9.3 -d {input.DNA} -p {input.AA} -o {output.a} -c {output.b} -V {output.c} -n 0 -t 0.5 -r 1 -e /home/usr/bin/exonerate"
diff --git a/example-analysis-with-Snakemake-workflow-using-compressed-fastq-files-as-input/Snakefile b/example-analysis-with-Snakemake-workflow-using-compressed-fastq-files-as-input/Snakefile
@@ -57,4 +57,4 @@ rule MitoGeneExtractor:
         c = "{sample}_vulgar.txt",
         d = "{sample}_vulgar.txt.log"
     shell:
-        "MitoGeneExtractor -d {input.DNA} -p {input.AA} -o {output.a} -c {output.b} -V {output.c} -n 0 -t 0.5 -r 1 
+        "MitoGeneExtractor-v1.9.3 -d {input.DNA} -p {input.AA} -o {output.a} -c {output.b} -V {output.c} -n 0 -t 0.5 -r 1 
diff --git a/makefile b/makefile
@@ -1,5 +1,5 @@
 # CFLAGS  = -g3 -ggdb -Wall
-CFLAGS = -O2 -Wall
+CFLAGS = -O2 # -Wall
 
 INCL =    -I . -I tclap
 #LIBS    = -lc -Wall
@@ -12,14 +12,14 @@ HEADER = CDnaString2.h CSequence_Mol2_1.h CSequences2.h CSplit2.h Ctriple.h \
          global-types-and-parameters_MitoGeneExtractor.h primefactors.h statistic_functions.h
 
 
-all:    MitoGeneExtractor
+all:    MitoGeneExtractor-v1.9.3
 
 
-MitoGeneExtractor: $(SRC) $(HEADER)
-	g++ $(CFLAGS) $(INCL) $(SRC) -o MitoGeneExtractor
+MitoGeneExtractor-v1.9.3: $(SRC) $(HEADER)
+	g++ $(CFLAGS) $(INCL) $(SRC) -o MitoGeneExtractor-v1.9.3
 
 
 clean:
-	rm -f MitoGeneExtractor
+	rm -f MitoGeneExtractor-v1.9.3