-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathscoreBinTree.cpp
123 lines (106 loc) · 4.78 KB
/
scoreBinTree.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
/*
* scoreBinTree.cpp
*
* Created on: Mar 14, 2016
* Author: jahnka
*/
#include <stdbool.h>
#include <vector>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <float.h>
#include <iostream>
#include "limits.h"
#include "scoreBinTree.h"
#include "matrices.h"
#include "trees.h"
using namespace std;
/* computes the log likelihood for a single mutation for all subtrees of the binary tree, where the expected */
/* state of the mutation can be either absent or present in the whole subtree (passed as 'state' to the function) */
double* getBinSubtreeScore(bool state, int* bft, vector<vector<int> > &childLists, int mut, int nodeCount, int m, int** obsMutProfiles, double ** logScores){
double* score = init_doubleArray(nodeCount, 0.0);
for(int i=nodeCount-1; i>=0; i--){
int node = bft[i];
if(node < m){
score[node] = logScores[obsMutProfiles[node][mut]][state]; // for leafs the score is just P(Dij|Eij)
}
else{ // for inner nodes the score is the sum of the scores of the children
if(childLists.at(node).size()!=2){
cerr << "Error node " << node << " has " << childLists.at(node).size() << " children\n"; // tree should be binary, but isn't
}
score[node] = score[childLists.at(node).at(0)] + score[childLists.at(node).at(1)];
}
}
return score;
}
/* Computes the best log likelihood for placing a single mutation in a given sample tree */
/* Iterates through all nodes as possible placements of the mutation to find the best one */
/* All samples below the placement of the mutation should have it, mutation can also be placed at a leaf, i.e. uniquely to the sample */
double getBinTreeMutScore(int* bft, vector<vector<int> > &childLists, int mut, int nodeCount, int m, int** obsMutProfiles, double ** logScores){
double bestScore = -DBL_MAX;
double* absentScore = getBinSubtreeScore(0, bft, childLists, mut, nodeCount, m, obsMutProfiles, logScores);
double* presentScore = getBinSubtreeScore(1, bft, childLists, mut, nodeCount, m, obsMutProfiles, logScores);
for(int p=0; p<nodeCount; p++){
double score = absentScore[nodeCount-1] - absentScore[p] + presentScore[p];
bestScore = max(bestScore, score);
}
delete [] absentScore;
delete [] presentScore;
return bestScore;
}
/* Computes the maximum log likelihood of a binary tree for a given mutation matrix. */
/* Note: No extra root necessary for binary trees */
double getBinTreeScore(int** obsMutProfiles, int n, int m, double ** logScores, int* parent){
int nodeCount = (2*m)-1; // number of nodes in binary tree: m leafs, m-1 inner nodes (includes already the root)
double sumScore = 0; // sum of maximal scores of all samples
vector<vector<int> > childLists = getChildListFromParentVector(parent, nodeCount-1);
int* bft = getBreadthFirstTraversal(parent, nodeCount-1);
for(int mut=0; mut<n; mut++){ // sum over the optimal scores of each sample
double score = getBinTreeMutScore(bft, childLists, mut, nodeCount, m, obsMutProfiles, logScores);
sumScore += score;
}
delete [] bft;
for(int i=0; i<childLists.size(); i++){
childLists[i].clear();
}
childLists.clear();
//cout << "score: " << sumScore << "\n";
return sumScore;
}
//
///* Score contribution by a specific mutation when placed at a specific leaf */
///* This is the same for all trees and can be precomputed */
//double binTreeLeafScore(int** obsMutProfiles, int leaf, int mut, int m, double ** logScores){
//
// double score = logScores[obsMutProfiles[leaf][mut]][1]; // placing a mutation at a leaf only the sample at the leaf has the mutation
// for(int sample=0; sample<m; sample++){
// if(sample != leaf){
// score += logScores[obsMutProfiles[sample][mut]][0]; // all other samples do not have it
// }
// }
// return score;
//}
/* computes the best score for placing a mutation in a given binary tree */
//double binTreeMutScore(int** obsMutProfiles, int mut, int m, double ** logScores, bool** ancMatrix){
//
// int nodeCount = (2*m)-1;
// double bestPlacementScore = binTreeRootScore(obsMutProfiles, mut, m, logScores);
// //print_boolMatrix(bool** array, int n, int m);
// for(int p=0; p<nodeCount-1; p++){ // try all possible placements (nodes in the mutation tree)
//
// double score = 0.0; // score for placing mutation at a specific node
// for(int sample=0; sample<m; sample++){
// //cout << p << " " << sample << "\n";
// if(ancMatrix[p][sample] == 1){
// score += logScores[obsMutProfiles[sample][mut]][1]; // sample should have the mutation
// }
// else{
// score += logScores[obsMutProfiles[sample][mut]][0]; // sample should not have the mutation
// }
// }
// bestPlacementScore = max(bestPlacementScore, score);
// }
// return bestPlacementScore;
//}