Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
cjri authored May 30, 2017
1 parent f429395 commit cbf25ce
Show file tree
Hide file tree
Showing 21 changed files with 2,627 additions and 0 deletions.
42 changes: 42 additions & 0 deletions Models/Notes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
This code was used to generate results published in the paper, 'Rapid Identification of Genes Controlling Virulence and Immunity in Malaria Parasites' by Abkallo et al. These notes are written by Chris Illingworth.

A standard GSL optimisation routine was used to fit a variety of models to sequenced allele frequencies

OneFreq: Fits a single allele frequency to the data

To compile use:

make one

Command line for this model is:

./run_onefreq <file> <random seed> <reps> <beta>

where <file> contains the input allele frequencies, <random seed> initialises the random number generator, <reps> specifies how many replicate calcualations to run, and <beta> is the noise parameter characterising the beta-binomial model. Note that beta is equal to the shrink parameter from the jump-diffusion code.


SingleDriver: Fits a model of allele frequencies given the presence of a single allele under positive selection

make one compiles run_onetimesel

./run_onetimesel <file> <random seed> <reps> <beta>

This optimises model for a given dataset, finding a set of optimised parameters for each replicate optimisation. The model assumes a single local rate of recombination


SingleDriverRhoXStay: Fits a model of allele frequencies

Compiled using make one

./run_onetimeselrhoX_stay <file> <random seed> <reps> <beta> <locus>

This optimises a model for a given dataset, finding a set of optimised parameters for each replicate optimisation. The model assumes that there are two local rates of recombination, which change in a stepwise fashion at a given locus.

In order to obtain better results from the optimisation routine, this code specifies a locus at which selection might be found, and optimises the remaining parameters of the model. Code was run across a range of input loci.


SingleDriver2RhoXStay and SingleDriver3RhoXStay:

Work as does the SingleDriverRhoXStay model, but with three and four local rates of recombination, with two and three recombination rate change points.


11 changes: 11 additions & 0 deletions Models/OneFreq/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
CC = g++
CC_FLAGS = -g3 -O3 -Wall -I /Users/ci3/local/include/gsl/
LD_FLAGS = -L/Users/ci3/local/include/gsl/lib/ -lm -lgsl -lcblas
ONE = onefreq.o utilities.o

one : $(ONE)
$(CC) $(CC_FLAGS) $(ONE) -o run_onefreq $(LD_FLAGS)
onefreq.o: onefreq.cpp
$(CC) $(CC_FLAGS) -c onefreq.cpp
utilities.o: utilities.cpp
$(CC) $(CC_FLAGS) -c utilities.cpp
140 changes: 140 additions & 0 deletions Models/OneFreq/onefreq.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#include <iostream>
#include <sstream>
#include <vector>
#include <list>
#include <deque>
#include <map>
using namespace std;
#include "shared.h"

int main(int argc, char *argv[]){

vector<rec> traj;
vector<rec> traj_inf;
double x_final=0;

string input(argv[1]);
int seed=atoi(argv[2]);

//Set up random number generator
srand((unsigned int)(seed));
gsl_rng *rgen = gsl_rng_alloc (gsl_rng_taus);
gsl_rng_set (rgen, seed);

ImportData(input,traj);

//return 0;

//Number of data points
int p=traj.size();
//cout << p << "\n";

//Parameters are l (locus), x (cross frequency), e (vertical movement), rho (recombination), xf (driver frequency)

opt_data od;
double best_fit=-1e10;
double last_fit=-1e10;

int reps=atoi(argv[3]);
int bparm=atoi(argv[4]);

for (int r=1; r<=reps; r++) {

//Set up initial parameters
od.x = 0.5+(0.5*gsl_rng_uniform(rgen));

//General setup for optimisation routine

//Optimisation processs
for (int it=0;it<1000000;it++){

//cout << "it " << it << "\n";
size_t iter=0; //GSL iteration number

//CheckLastLikelihood

//Vector x contains parameters to be optimised. Parameters are locus and two frequencies
int xx_size=1;
gsl_vector *xx = gsl_vector_calloc(xx_size);
SetXXVector(od,xx);
//cout << "Done set xx\n";

//Optimisation bit goes here

int p_size = 4*(traj.size())+25;

double *params;
params=(double *)calloc(p_size,sizeof(double));

SetParams(bparm,params,traj);
//cout << "Done set params\n";

//Define the optimisation function
gsl_multimin_function my_func;
my_func.n=xx_size;
my_func.f=&get_best_fit;
my_func.params=params;

gsl_multimin_fminimizer *s;
const gsl_multimin_fminimizer_type *T=gsl_multimin_fminimizer_nmsimplex;
s=gsl_multimin_fminimizer_alloc(T,xx_size);
//cout << "Done define\n";

//Starting change magnitudes
gsl_vector* ss=gsl_vector_alloc(xx_size);
gsl_vector_set(ss,0,0.1);
//cout << "Done set ss\n";

gsl_multimin_fminimizer_set (s,&my_func,xx,ss);
//cout << "Done set mmin\n";
int status;
double size;
do {
iter++;
status=gsl_multimin_fminimizer_iterate(s);
if (status) {break;}

size=gsl_multimin_fminimizer_size(s);
status = gsl_multimin_test_size(size, 1e-4);

if (status == GSL_SUCCESS) {
}

} while (status==GSL_CONTINUE && iter<1000);

last_fit = s->fval;
last_fit=-last_fit;
cout << "Last fit score = " << last_fit <<"\n";

if (last_fit>best_fit) {
best_fit=last_fit;
x_final=gsl_vector_get(s->x,0);

} else {
break;
}

gsl_vector_free(xx);
gsl_vector_free(ss);
gsl_multimin_fminimizer_free(s);


}
}


//Print values and frequencies
cout << "Optimised values\n";
cout << " x " << x_final << " Log L " << best_fit << "\n";
od.x = x_final;
int xx_size=1;
gsl_vector *xx = gsl_vector_calloc(xx_size);
SetXXVector(od,xx);
//cout << "Allele frequencies\n";
ofstream out_file;
out_file.open("both");
FindFrequenciesFinal(traj,traj_inf,xx,out_file);


return 0;
}
54 changes: 54 additions & 0 deletions Models/OneFreq/shared.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#include <iostream>
#include <iomanip>
#include <fstream>
#include <cstring>
#include <vector>
#include <string>
#include <algorithm>
#include <map>

#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_blas.h>
#include <gsl/gsl_multifit_nlin.h>
#include <gsl/gsl_sf_hyperg.h>
#include <gsl/gsl_sf_gamma.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_histogram.h>
#include <gsl/gsl_statistics_double.h>
#include <gsl/gsl_permutation.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_linalg.h>
#include <gsl/gsl_multimin.h>


using namespace std;

struct rec {
int chr;
int index;
int pos;
int obs;
double q;
int N;
};

struct opt_data {
double x;
};


void ImportData (string input, vector<rec>& traj);

void SetXXVector(opt_data od, gsl_vector* xx);
void SetParams (int betaparm, double *params, vector<rec> traj);
void GetParams (double *p, int& betaparm, vector<rec>& traj);
int CheckParams(const gsl_vector *xx, double *p);
double get_best_fit(const gsl_vector *xx, void *params);
void FindFrequencies (vector<rec> traj, vector<rec>& traj_inf, const gsl_vector *xx);
void FindFrequenciesFinal (vector<rec> traj, vector<rec>& traj_inf, const gsl_vector *xx, ofstream& out_file);
double GetLogLikelihood(int betaparm, vector<rec> traj, vector<rec> traj_inf, vector<double> fact_store);
void FindLogFact(vector<double>& fact_store,int N);
double BetaBinomCalc(int N, int r, float p, int betaparm, vector<double> fact_store);
Loading

0 comments on commit cbf25ce

Please sign in to comment.