-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
21 changed files
with
2,627 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
This code was used to generate results published in the paper, 'Rapid Identification of Genes Controlling Virulence and Immunity in Malaria Parasites' by Abkallo et al. These notes are written by Chris Illingworth. | ||
|
||
A standard GSL optimisation routine was used to fit a variety of models to sequenced allele frequencies | ||
|
||
OneFreq: Fits a single allele frequency to the data | ||
|
||
To compile use: | ||
|
||
make one | ||
|
||
Command line for this model is: | ||
|
||
./run_onefreq <file> <random seed> <reps> <beta> | ||
|
||
where <file> contains the input allele frequencies, <random seed> initialises the random number generator, <reps> specifies how many replicate calcualations to run, and <beta> is the noise parameter characterising the beta-binomial model. Note that beta is equal to the shrink parameter from the jump-diffusion code. | ||
|
||
|
||
SingleDriver: Fits a model of allele frequencies given the presence of a single allele under positive selection | ||
|
||
make one compiles run_onetimesel | ||
|
||
./run_onetimesel <file> <random seed> <reps> <beta> | ||
|
||
This optimises model for a given dataset, finding a set of optimised parameters for each replicate optimisation. The model assumes a single local rate of recombination | ||
|
||
|
||
SingleDriverRhoXStay: Fits a model of allele frequencies | ||
|
||
Compiled using make one | ||
|
||
./run_onetimeselrhoX_stay <file> <random seed> <reps> <beta> <locus> | ||
|
||
This optimises a model for a given dataset, finding a set of optimised parameters for each replicate optimisation. The model assumes that there are two local rates of recombination, which change in a stepwise fashion at a given locus. | ||
|
||
In order to obtain better results from the optimisation routine, this code specifies a locus at which selection might be found, and optimises the remaining parameters of the model. Code was run across a range of input loci. | ||
|
||
|
||
SingleDriver2RhoXStay and SingleDriver3RhoXStay: | ||
|
||
Work as does the SingleDriverRhoXStay model, but with three and four local rates of recombination, with two and three recombination rate change points. | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
CC = g++ | ||
CC_FLAGS = -g3 -O3 -Wall -I /Users/ci3/local/include/gsl/ | ||
LD_FLAGS = -L/Users/ci3/local/include/gsl/lib/ -lm -lgsl -lcblas | ||
ONE = onefreq.o utilities.o | ||
|
||
one : $(ONE) | ||
$(CC) $(CC_FLAGS) $(ONE) -o run_onefreq $(LD_FLAGS) | ||
onefreq.o: onefreq.cpp | ||
$(CC) $(CC_FLAGS) -c onefreq.cpp | ||
utilities.o: utilities.cpp | ||
$(CC) $(CC_FLAGS) -c utilities.cpp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
#include <iostream> | ||
#include <sstream> | ||
#include <vector> | ||
#include <list> | ||
#include <deque> | ||
#include <map> | ||
using namespace std; | ||
#include "shared.h" | ||
|
||
int main(int argc, char *argv[]){ | ||
|
||
vector<rec> traj; | ||
vector<rec> traj_inf; | ||
double x_final=0; | ||
|
||
string input(argv[1]); | ||
int seed=atoi(argv[2]); | ||
|
||
//Set up random number generator | ||
srand((unsigned int)(seed)); | ||
gsl_rng *rgen = gsl_rng_alloc (gsl_rng_taus); | ||
gsl_rng_set (rgen, seed); | ||
|
||
ImportData(input,traj); | ||
|
||
//return 0; | ||
|
||
//Number of data points | ||
int p=traj.size(); | ||
//cout << p << "\n"; | ||
|
||
//Parameters are l (locus), x (cross frequency), e (vertical movement), rho (recombination), xf (driver frequency) | ||
|
||
opt_data od; | ||
double best_fit=-1e10; | ||
double last_fit=-1e10; | ||
|
||
int reps=atoi(argv[3]); | ||
int bparm=atoi(argv[4]); | ||
|
||
for (int r=1; r<=reps; r++) { | ||
|
||
//Set up initial parameters | ||
od.x = 0.5+(0.5*gsl_rng_uniform(rgen)); | ||
|
||
//General setup for optimisation routine | ||
|
||
//Optimisation processs | ||
for (int it=0;it<1000000;it++){ | ||
|
||
//cout << "it " << it << "\n"; | ||
size_t iter=0; //GSL iteration number | ||
|
||
//CheckLastLikelihood | ||
|
||
//Vector x contains parameters to be optimised. Parameters are locus and two frequencies | ||
int xx_size=1; | ||
gsl_vector *xx = gsl_vector_calloc(xx_size); | ||
SetXXVector(od,xx); | ||
//cout << "Done set xx\n"; | ||
|
||
//Optimisation bit goes here | ||
|
||
int p_size = 4*(traj.size())+25; | ||
|
||
double *params; | ||
params=(double *)calloc(p_size,sizeof(double)); | ||
|
||
SetParams(bparm,params,traj); | ||
//cout << "Done set params\n"; | ||
|
||
//Define the optimisation function | ||
gsl_multimin_function my_func; | ||
my_func.n=xx_size; | ||
my_func.f=&get_best_fit; | ||
my_func.params=params; | ||
|
||
gsl_multimin_fminimizer *s; | ||
const gsl_multimin_fminimizer_type *T=gsl_multimin_fminimizer_nmsimplex; | ||
s=gsl_multimin_fminimizer_alloc(T,xx_size); | ||
//cout << "Done define\n"; | ||
|
||
//Starting change magnitudes | ||
gsl_vector* ss=gsl_vector_alloc(xx_size); | ||
gsl_vector_set(ss,0,0.1); | ||
//cout << "Done set ss\n"; | ||
|
||
gsl_multimin_fminimizer_set (s,&my_func,xx,ss); | ||
//cout << "Done set mmin\n"; | ||
int status; | ||
double size; | ||
do { | ||
iter++; | ||
status=gsl_multimin_fminimizer_iterate(s); | ||
if (status) {break;} | ||
|
||
size=gsl_multimin_fminimizer_size(s); | ||
status = gsl_multimin_test_size(size, 1e-4); | ||
|
||
if (status == GSL_SUCCESS) { | ||
} | ||
|
||
} while (status==GSL_CONTINUE && iter<1000); | ||
|
||
last_fit = s->fval; | ||
last_fit=-last_fit; | ||
cout << "Last fit score = " << last_fit <<"\n"; | ||
|
||
if (last_fit>best_fit) { | ||
best_fit=last_fit; | ||
x_final=gsl_vector_get(s->x,0); | ||
|
||
} else { | ||
break; | ||
} | ||
|
||
gsl_vector_free(xx); | ||
gsl_vector_free(ss); | ||
gsl_multimin_fminimizer_free(s); | ||
|
||
|
||
} | ||
} | ||
|
||
|
||
//Print values and frequencies | ||
cout << "Optimised values\n"; | ||
cout << " x " << x_final << " Log L " << best_fit << "\n"; | ||
od.x = x_final; | ||
int xx_size=1; | ||
gsl_vector *xx = gsl_vector_calloc(xx_size); | ||
SetXXVector(od,xx); | ||
//cout << "Allele frequencies\n"; | ||
ofstream out_file; | ||
out_file.open("both"); | ||
FindFrequenciesFinal(traj,traj_inf,xx,out_file); | ||
|
||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
#include <iostream> | ||
#include <iomanip> | ||
#include <fstream> | ||
#include <cstring> | ||
#include <vector> | ||
#include <string> | ||
#include <algorithm> | ||
#include <map> | ||
|
||
#include <gsl/gsl_rng.h> | ||
#include <gsl/gsl_randist.h> | ||
#include <gsl/gsl_vector.h> | ||
#include <gsl/gsl_matrix.h> | ||
#include <gsl/gsl_blas.h> | ||
#include <gsl/gsl_multifit_nlin.h> | ||
#include <gsl/gsl_sf_hyperg.h> | ||
#include <gsl/gsl_sf_gamma.h> | ||
#include <gsl/gsl_randist.h> | ||
#include <gsl/gsl_histogram.h> | ||
#include <gsl/gsl_statistics_double.h> | ||
#include <gsl/gsl_permutation.h> | ||
#include <gsl/gsl_math.h> | ||
#include <gsl/gsl_linalg.h> | ||
#include <gsl/gsl_multimin.h> | ||
|
||
|
||
using namespace std; | ||
|
||
struct rec { | ||
int chr; | ||
int index; | ||
int pos; | ||
int obs; | ||
double q; | ||
int N; | ||
}; | ||
|
||
struct opt_data { | ||
double x; | ||
}; | ||
|
||
|
||
void ImportData (string input, vector<rec>& traj); | ||
|
||
void SetXXVector(opt_data od, gsl_vector* xx); | ||
void SetParams (int betaparm, double *params, vector<rec> traj); | ||
void GetParams (double *p, int& betaparm, vector<rec>& traj); | ||
int CheckParams(const gsl_vector *xx, double *p); | ||
double get_best_fit(const gsl_vector *xx, void *params); | ||
void FindFrequencies (vector<rec> traj, vector<rec>& traj_inf, const gsl_vector *xx); | ||
void FindFrequenciesFinal (vector<rec> traj, vector<rec>& traj_inf, const gsl_vector *xx, ofstream& out_file); | ||
double GetLogLikelihood(int betaparm, vector<rec> traj, vector<rec> traj_inf, vector<double> fact_store); | ||
void FindLogFact(vector<double>& fact_store,int N); | ||
double BetaBinomCalc(int N, int r, float p, int betaparm, vector<double> fact_store); |
Oops, something went wrong.