Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
cjri authored May 30, 2017
1 parent 8b47110 commit f429395
Show file tree
Hide file tree
Showing 8 changed files with 1,186 additions and 0 deletions.
19 changes: 19 additions & 0 deletions jump-diff-v0.2/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
CC = g++
CC_FLAGS = -Wall -O3 -I/Users/ci3/local/include/gsl -I/opt/local/include/ -DHAVE_INLINE -fopenmp
LD_FLAGS = -L/Users/ci3/local/include/gsl/lib/ -lm -lgsl -lcblas -fopenmp
EMISSION = emission
JUMPDIFF = jump-diffusion
FILTER = jump-diffusion-filter
OBJECTS = $(EMISSION).o $(JUMPDIFF).o $(FILTER).o
all: $(OBJECTS)
$(CC) $(CC_FLAGS) $(EMISSION).o $(JUMPDIFF).o $(FILTER).o -o ./build/$(FILTER) $(LD_FLAGS)
rm -f ./*.o
$(FILTER).o: $(FILTER).cpp
$(CC) $(CC_FLAGS) -c $(FILTER).cpp
$(EMISSION).o: $(EMISSION).cpp $(EMISSION).h
$(CC) $(CC_FLAGS) -c $(EMISSION).cpp
$(JUMPDIFF).o: $(JUMPDIFF).cpp $(JUMPDIFF).h
$(CC) $(CC_FLAGS) -c $(JUMPDIFF).cpp
clean:
rm -f ./*.o

19 changes: 19 additions & 0 deletions jump-diff-v0.2/Notes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
This code was used to generate results published in the paper, 'Rapid Identification of Genes Controlling Virulence and Immunity in Malaria Parasites' by Abkallo et al. Primary credit in developing the code goes to Andrej Fischer. These notes are written by Chris Illingworth.

Two steps were performed in fitting the code to the sequence data.

1. Application of a diffusion model with no jumps. Options were applied as follows
--grid 250 : Sets the size of the grid upon which the discretisation of the model is perfomed
--mode 2 : Specifies a beta-binomial model
--nojump : Specifies that jumps were not allowed in the diffusion process

This generated an inferred value for the shrink parameter, which categorises the variance of the beta-binomial model, and the proportion of errors rnd (points excluded by the beta-binomial model).

Data from this run were used to identify and exclude errors (as might occur from incorrect mapping of reads) from the data.

2. Application of a jump-diffusion model. Options were applied as follows
--grid 250
--mode 2
--shrink s : Where s was the inferred beta-binomial parameter for a dataset from step 1.

This fitted a jump-diffusion model to the data. The inferred jump locations were used to parse the data for further analysis
136 changes: 136 additions & 0 deletions jump-diff-v0.2/emission.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
//emission.cpp

//own headers...
#include "emission.h"

// Constructor
Emission::Emission(){
rnd_emit = 1.0e-10;
shrink = 1.0;
mode = 0;// 1: binomial, 2: beta-binomial
EmitProb_set = 0;
dist_set = 0;
}


// real constructor
void Emission::set(int nsamples, vector<int>& nsites, int grid){
nSamples = nsamples;
if (nSamples != (int) nsites.size()){
cout<<"ERROR-1 in Emission::Emission()\n";
exit(1);
}
nSites = new int [nSamples];
for (int s=0; s<nSamples; s++){
nSites[s] = nsites[s];
}
// set xgrid...
gridSize = grid;
dx = 1.0 / double(gridSize);
xgrid = new double[gridSize+1];
for (int i=0; i<= gridSize; i++){
xgrid[i] = double(i) * dx;
}
EmitProb = new gsl_matrix * [nSamples];
reads = new unsigned int * [nSamples];
depths = new unsigned int * [nSamples];
loci = new unsigned int * [nSamples];
for (int s=0; s<nSamples; s++){
EmitProb[s] = gsl_matrix_alloc( nSites[s], gridSize + 1);
reads[s] = new unsigned int [nSites[s]];
depths[s] = new unsigned int [nSites[s]];
loci[s] = new unsigned int [nSites[s]];
}
}

void Emission::set_dist(){
if (loci == NULL){
cout<<"ERROR-1 in Emission::set_dist()\n";
exit(1);
}
dist = new double * [nSamples];
total_loci=0;
for (int s=0; s<nSamples; s++){
total_loci += nSites[s];
dist[s] = new double [nSites[s]];
for (int l=1; l <nSites[s]; l++){
dist[s][l] = fabs(double(loci[s][l] - loci[s][l-1]));
if (dist[s][l] == 0.0){
printf("ERROR: dist=0 in chr %i at locus %i\n", s+1, loci[s][l]);
exit(1);
}
}
dist[s][0] = 0.0;
}
dist_set = 1;
//printf("Data in %i sample(s) and with %i sites.\n", nSamples, total_loci);
}


Emission::~Emission(){
for (int s=0; s<nSamples; s++){
gsl_matrix_free(EmitProb[s]);
}
delete [] EmitProb;
for (int s=0; s<nSamples; s++){
delete [] dist[s];
}
delete [] dist;
delete [] xgrid;
}

//emission probability as a function of total freq x
void Emission::set_EmitProb(){
if (mode == 0){
printf("ERROR-1 in Emission::set_EmitProb(): mode not set.\n");
exit(1);
}
int s;
#pragma omp parallel for schedule( dynamic, 1) default(shared)
for ( s=0; s<nSamples; s++){
double x,f, p0,p;
int n,N;
for (int i=0; i < nSites[s]; i++){
n = reads[s][i];
N = depths[s][i];
if (mode == 1){//binomial emission model
for (int j=0; j<=gridSize; j++){
x = double(j)*dx;
f = (1.0-rnd_emit) * gsl_ran_binomial_pdf(n, x, N) + rnd_emit / double(N+1);
if (f<0.0 || f!= f){
printf("ERROR-2 in Emission::set_EmitProb(): %e\n", f);
}
gsl_matrix_set(EmitProb[s], i, j, f);
}
}
else if (mode == 2){//beta-binomial emission model
p0 = gsl_sf_lngamma(double(N+1)) - gsl_sf_lngamma(double(n+1)) - gsl_sf_lngamma(double(N-n+1));
for (int j=0; j<=gridSize; j++){
x = double(j)*dx;
if (x==0.0){
p = (n==0) ? 1.0 : 0.0;
}
else if (x==1.0){
p = (n==N) ? 1.0 : 0.0;
}
else{
p = p0 + gsl_sf_lnbeta(double(n) + shrink*x, double(N-n) + shrink*(1.0-x));
p -= gsl_sf_lnbeta( shrink*x, shrink*(1.0-x));
if (p>0.0){
printf("ERROR in BetaBinomial::set_EmitProb(): p = %e\n", p);
exit(1);
}
p = exp(p);
}
f = (1.0-rnd_emit) * p + rnd_emit / double(N+1);
gsl_matrix_set(EmitProb[s], i, j, f);
}
}
else{
printf("ERROR in Emission::set_EmitProb(): mode %i does not exist.\n", mode);
exit(1);
}
}
}
EmitProb_set=1;
}
46 changes: 46 additions & 0 deletions jump-diff-v0.2/emission.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
//emission.h

#include <stdio.h>
#include <iostream>
#include <fstream>
#include <sstream>
#include <time.h>
#include <math.h>
#include <ctype.h>
#include <string>
#include <map>
#include <vector>

// GSL headers...
#include "gsl/gsl_vector.h"
#include "gsl/gsl_matrix.h"
#include "gsl/gsl_randist.h"
#include "gsl/gsl_blas.h"
#include "gsl/gsl_sf_gamma.h"

using namespace std;


class Emission{
public:
Emission();
void set(int nsamples, vector<int>& nsites, int grid);
~Emission();
void set_dist();
int dist_set;
gsl_matrix ** EmitProb;
int EmitProb_set;
double rnd_emit, shrink;
int mode;
void set_EmitProb();
int nSamples;
int gridSize;
double dx;
unsigned int ** reads;
unsigned int ** depths;
unsigned int ** loci;
double ** dist;
double * xgrid;
int * nSites;
int total_loci;
};
Binary file added jump-diff-v0.2/jump-diffusion-filter
Binary file not shown.
Loading

0 comments on commit f429395

Please sign in to comment.