-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
1,186 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
CC = g++ | ||
CC_FLAGS = -Wall -O3 -I/Users/ci3/local/include/gsl -I/opt/local/include/ -DHAVE_INLINE -fopenmp | ||
LD_FLAGS = -L/Users/ci3/local/include/gsl/lib/ -lm -lgsl -lcblas -fopenmp | ||
EMISSION = emission | ||
JUMPDIFF = jump-diffusion | ||
FILTER = jump-diffusion-filter | ||
OBJECTS = $(EMISSION).o $(JUMPDIFF).o $(FILTER).o | ||
all: $(OBJECTS) | ||
$(CC) $(CC_FLAGS) $(EMISSION).o $(JUMPDIFF).o $(FILTER).o -o ./build/$(FILTER) $(LD_FLAGS) | ||
rm -f ./*.o | ||
$(FILTER).o: $(FILTER).cpp | ||
$(CC) $(CC_FLAGS) -c $(FILTER).cpp | ||
$(EMISSION).o: $(EMISSION).cpp $(EMISSION).h | ||
$(CC) $(CC_FLAGS) -c $(EMISSION).cpp | ||
$(JUMPDIFF).o: $(JUMPDIFF).cpp $(JUMPDIFF).h | ||
$(CC) $(CC_FLAGS) -c $(JUMPDIFF).cpp | ||
clean: | ||
rm -f ./*.o | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
This code was used to generate results published in the paper, 'Rapid Identification of Genes Controlling Virulence and Immunity in Malaria Parasites' by Abkallo et al. Primary credit in developing the code goes to Andrej Fischer. These notes are written by Chris Illingworth. | ||
|
||
Two steps were performed in fitting the code to the sequence data. | ||
|
||
1. Application of a diffusion model with no jumps. Options were applied as follows | ||
--grid 250 : Sets the size of the grid upon which the discretisation of the model is perfomed | ||
--mode 2 : Specifies a beta-binomial model | ||
--nojump : Specifies that jumps were not allowed in the diffusion process | ||
|
||
This generated an inferred value for the shrink parameter, which categorises the variance of the beta-binomial model, and the proportion of errors rnd (points excluded by the beta-binomial model). | ||
|
||
Data from this run were used to identify and exclude errors (as might occur from incorrect mapping of reads) from the data. | ||
|
||
2. Application of a jump-diffusion model. Options were applied as follows | ||
--grid 250 | ||
--mode 2 | ||
--shrink s : Where s was the inferred beta-binomial parameter for a dataset from step 1. | ||
|
||
This fitted a jump-diffusion model to the data. The inferred jump locations were used to parse the data for further analysis |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
//emission.cpp | ||
|
||
//own headers... | ||
#include "emission.h" | ||
|
||
// Constructor | ||
Emission::Emission(){ | ||
rnd_emit = 1.0e-10; | ||
shrink = 1.0; | ||
mode = 0;// 1: binomial, 2: beta-binomial | ||
EmitProb_set = 0; | ||
dist_set = 0; | ||
} | ||
|
||
|
||
// real constructor | ||
void Emission::set(int nsamples, vector<int>& nsites, int grid){ | ||
nSamples = nsamples; | ||
if (nSamples != (int) nsites.size()){ | ||
cout<<"ERROR-1 in Emission::Emission()\n"; | ||
exit(1); | ||
} | ||
nSites = new int [nSamples]; | ||
for (int s=0; s<nSamples; s++){ | ||
nSites[s] = nsites[s]; | ||
} | ||
// set xgrid... | ||
gridSize = grid; | ||
dx = 1.0 / double(gridSize); | ||
xgrid = new double[gridSize+1]; | ||
for (int i=0; i<= gridSize; i++){ | ||
xgrid[i] = double(i) * dx; | ||
} | ||
EmitProb = new gsl_matrix * [nSamples]; | ||
reads = new unsigned int * [nSamples]; | ||
depths = new unsigned int * [nSamples]; | ||
loci = new unsigned int * [nSamples]; | ||
for (int s=0; s<nSamples; s++){ | ||
EmitProb[s] = gsl_matrix_alloc( nSites[s], gridSize + 1); | ||
reads[s] = new unsigned int [nSites[s]]; | ||
depths[s] = new unsigned int [nSites[s]]; | ||
loci[s] = new unsigned int [nSites[s]]; | ||
} | ||
} | ||
|
||
void Emission::set_dist(){ | ||
if (loci == NULL){ | ||
cout<<"ERROR-1 in Emission::set_dist()\n"; | ||
exit(1); | ||
} | ||
dist = new double * [nSamples]; | ||
total_loci=0; | ||
for (int s=0; s<nSamples; s++){ | ||
total_loci += nSites[s]; | ||
dist[s] = new double [nSites[s]]; | ||
for (int l=1; l <nSites[s]; l++){ | ||
dist[s][l] = fabs(double(loci[s][l] - loci[s][l-1])); | ||
if (dist[s][l] == 0.0){ | ||
printf("ERROR: dist=0 in chr %i at locus %i\n", s+1, loci[s][l]); | ||
exit(1); | ||
} | ||
} | ||
dist[s][0] = 0.0; | ||
} | ||
dist_set = 1; | ||
//printf("Data in %i sample(s) and with %i sites.\n", nSamples, total_loci); | ||
} | ||
|
||
|
||
Emission::~Emission(){ | ||
for (int s=0; s<nSamples; s++){ | ||
gsl_matrix_free(EmitProb[s]); | ||
} | ||
delete [] EmitProb; | ||
for (int s=0; s<nSamples; s++){ | ||
delete [] dist[s]; | ||
} | ||
delete [] dist; | ||
delete [] xgrid; | ||
} | ||
|
||
//emission probability as a function of total freq x | ||
void Emission::set_EmitProb(){ | ||
if (mode == 0){ | ||
printf("ERROR-1 in Emission::set_EmitProb(): mode not set.\n"); | ||
exit(1); | ||
} | ||
int s; | ||
#pragma omp parallel for schedule( dynamic, 1) default(shared) | ||
for ( s=0; s<nSamples; s++){ | ||
double x,f, p0,p; | ||
int n,N; | ||
for (int i=0; i < nSites[s]; i++){ | ||
n = reads[s][i]; | ||
N = depths[s][i]; | ||
if (mode == 1){//binomial emission model | ||
for (int j=0; j<=gridSize; j++){ | ||
x = double(j)*dx; | ||
f = (1.0-rnd_emit) * gsl_ran_binomial_pdf(n, x, N) + rnd_emit / double(N+1); | ||
if (f<0.0 || f!= f){ | ||
printf("ERROR-2 in Emission::set_EmitProb(): %e\n", f); | ||
} | ||
gsl_matrix_set(EmitProb[s], i, j, f); | ||
} | ||
} | ||
else if (mode == 2){//beta-binomial emission model | ||
p0 = gsl_sf_lngamma(double(N+1)) - gsl_sf_lngamma(double(n+1)) - gsl_sf_lngamma(double(N-n+1)); | ||
for (int j=0; j<=gridSize; j++){ | ||
x = double(j)*dx; | ||
if (x==0.0){ | ||
p = (n==0) ? 1.0 : 0.0; | ||
} | ||
else if (x==1.0){ | ||
p = (n==N) ? 1.0 : 0.0; | ||
} | ||
else{ | ||
p = p0 + gsl_sf_lnbeta(double(n) + shrink*x, double(N-n) + shrink*(1.0-x)); | ||
p -= gsl_sf_lnbeta( shrink*x, shrink*(1.0-x)); | ||
if (p>0.0){ | ||
printf("ERROR in BetaBinomial::set_EmitProb(): p = %e\n", p); | ||
exit(1); | ||
} | ||
p = exp(p); | ||
} | ||
f = (1.0-rnd_emit) * p + rnd_emit / double(N+1); | ||
gsl_matrix_set(EmitProb[s], i, j, f); | ||
} | ||
} | ||
else{ | ||
printf("ERROR in Emission::set_EmitProb(): mode %i does not exist.\n", mode); | ||
exit(1); | ||
} | ||
} | ||
} | ||
EmitProb_set=1; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
//emission.h | ||
|
||
#include <stdio.h> | ||
#include <iostream> | ||
#include <fstream> | ||
#include <sstream> | ||
#include <time.h> | ||
#include <math.h> | ||
#include <ctype.h> | ||
#include <string> | ||
#include <map> | ||
#include <vector> | ||
|
||
// GSL headers... | ||
#include "gsl/gsl_vector.h" | ||
#include "gsl/gsl_matrix.h" | ||
#include "gsl/gsl_randist.h" | ||
#include "gsl/gsl_blas.h" | ||
#include "gsl/gsl_sf_gamma.h" | ||
|
||
using namespace std; | ||
|
||
|
||
class Emission{ | ||
public: | ||
Emission(); | ||
void set(int nsamples, vector<int>& nsites, int grid); | ||
~Emission(); | ||
void set_dist(); | ||
int dist_set; | ||
gsl_matrix ** EmitProb; | ||
int EmitProb_set; | ||
double rnd_emit, shrink; | ||
int mode; | ||
void set_EmitProb(); | ||
int nSamples; | ||
int gridSize; | ||
double dx; | ||
unsigned int ** reads; | ||
unsigned int ** depths; | ||
unsigned int ** loci; | ||
double ** dist; | ||
double * xgrid; | ||
int * nSites; | ||
int total_loci; | ||
}; |
Binary file not shown.
Oops, something went wrong.