-
Notifications
You must be signed in to change notification settings - Fork 9
/
expectation_step.d
107 lines (95 loc) · 3.44 KB
/
expectation_step.d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
/* Copyright (c) 2012,2013 Genome Research Ltd.
*
* Author: Stephan Schiffels <[email protected]>
*
* This file is part of msmc.
* msmc is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation; either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*/
import std.typecons;
import std.stdio;
import std.string;
import std.algorithm;
import std.exception;
import std.conv;
import std.parallelism;
import std.math;
import core.memory;
import model.propagation_core;
import model.psmc_model;
import model.psmc_hmm;
import model.data;
import logger;
alias Tuple!(double[][], double[][2], double) ExpectationResult_t;
ExpectationResult_t getExpectation(in SegSite_t[][] inputData, PSMCmodel psmc, size_t hmmStrideWidth, size_t maxDistance)
{
PropagationCore propagationCore;
propagationCore = new PropagationCore(psmc, maxDistance);
auto transitions = new double[][](psmc.nrStates, psmc.nrStates);
double[][2] emissions = [new double[psmc.nrStates], new double[psmc.nrStates]];
foreach(a; 0 .. psmc.nrStates) {
transitions[a][] = 0.0;
emissions[0][] = 0.0;
emissions[1][] = 0.0;
}
auto logLikelihood = 0.0;
auto cnt = 0;
GC.disable();
foreach(i, data; taskPool.parallel(inputData, 1)) {
logInfo(format("\r * [%s/%s] Expectation Step", ++cnt, inputData.length));
auto result = singleChromosomeExpectation(data, hmmStrideWidth, propagationCore);
emissions[0][] += result[1][0][];
emissions[1][] += result[1][1][];
foreach(a; 0 .. psmc.nrStates)
transitions[a][] += result[0][a][];
logLikelihood += result[2];
if(i % 500 == 0) {
GC.enable();
GC.collect();
GC.minimize();
GC.disable();
}
}
GC.enable();
GC.collect();
GC.minimize();
logInfo(format(", log likelihood: %s", logLikelihood));
logInfo("\n");
return tuple(transitions, emissions, logLikelihood);
}
ExpectationResult_t singleChromosomeExpectation(in SegSite_t[] data, size_t hmmStrideWidth, in PropagationCore propagationCore)
{
auto psmc_hmm = new PSMC_hmm(propagationCore, data);
psmc_hmm.runForward();
auto exp = psmc_hmm.runBackward(hmmStrideWidth);
auto logL = psmc_hmm.logLikelihood();
return tuple(exp[0], exp[1], logL);
}
unittest {
writeln("test expectation step");
auto psmc = new PSMCmodel(0.01, 0.001, 4);
auto fileName = "model/hmm_testData.txt";
auto data = readSegSites(fileName, [0UL, 1], false);
auto hmmStrideWidth = 100UL;
auto allData = [data, data, data];
std.parallelism.defaultPoolThreads(1U);
auto resultSingleThreaded = getExpectation(allData, psmc, hmmStrideWidth, 100UL);
std.parallelism.defaultPoolThreads(2U);
auto resultMultiThreaded = getExpectation(allData, psmc, hmmStrideWidth, 100UL);
foreach(a; 0 .. psmc.nrStates) {
assert(all!"a>0.0"(resultSingleThreaded[0][a]));
assert(all!"a>0.0"(resultMultiThreaded[0][a]));
assert(resultSingleThreaded[1][0][a] > 0.0);
assert(resultMultiThreaded[1][1][a] > 0.0);
}
}