Skip to content

Commit 3ac620e

Browse files
authored
Merge pull request #898 from brieuclehmann/covariance
Add function for covariance matrix
2 parents 1c6ad9c + f250616 commit 3ac620e

File tree

6 files changed

+391
-6
lines changed

6 files changed

+391
-6
lines changed

c/tests/test_stats.c

+32
Original file line numberDiff line numberDiff line change
@@ -1333,6 +1333,36 @@ test_paper_ex_divergence(void)
13331333
tsk_treeseq_free(&ts);
13341334
}
13351335

1336+
static void
1337+
test_paper_ex_relatedness(void)
1338+
{
1339+
tsk_treeseq_t ts;
1340+
tsk_id_t samples[] = { 0, 1, 2, 3 };
1341+
tsk_size_t sample_set_sizes[] = { 2, 2 };
1342+
tsk_id_t set_indexes[] = { 0, 0 };
1343+
double result;
1344+
int ret;
1345+
1346+
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
1347+
paper_ex_mutations, paper_ex_individuals, NULL, 0);
1348+
1349+
ret = tsk_treeseq_relatedness(&ts, 2, sample_set_sizes, samples, 1, set_indexes, 0,
1350+
NULL, &result, TSK_STAT_SITE);
1351+
CU_ASSERT_EQUAL_FATAL(ret, 0);
1352+
tsk_treeseq_free(&ts);
1353+
}
1354+
1355+
static void
1356+
test_paper_ex_relatedness_errors(void)
1357+
{
1358+
tsk_treeseq_t ts;
1359+
1360+
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
1361+
paper_ex_mutations, paper_ex_individuals, NULL, 0);
1362+
verify_two_way_stat_func_errors(&ts, tsk_treeseq_relatedness);
1363+
tsk_treeseq_free(&ts);
1364+
}
1365+
13361366
static void
13371367
test_paper_ex_Y2_errors(void)
13381368
{
@@ -1679,6 +1709,8 @@ main(int argc, char **argv)
16791709
{ "test_paper_ex_Y1", test_paper_ex_Y1 },
16801710
{ "test_paper_ex_divergence_errors", test_paper_ex_divergence_errors },
16811711
{ "test_paper_ex_divergence", test_paper_ex_divergence },
1712+
{ "test_paper_ex_relatedness_errors", test_paper_ex_relatedness_errors },
1713+
{ "test_paper_ex_relatedness", test_paper_ex_relatedness },
16821714
{ "test_paper_ex_Y2_errors", test_paper_ex_Y2_errors },
16831715
{ "test_paper_ex_Y2", test_paper_ex_Y2 },
16841716
{ "test_paper_ex_f2_errors", test_paper_ex_f2_errors },

c/tskit/trees.c

+42
Original file line numberDiff line numberDiff line change
@@ -2752,6 +2752,48 @@ tsk_treeseq_divergence(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
27522752
return ret;
27532753
}
27542754

2755+
static int
2756+
relatedness_summary_func(size_t state_dim, const double *state, size_t result_dim,
2757+
double *result, void *params)
2758+
{
2759+
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
2760+
const double *x = state;
2761+
tsk_id_t i, j;
2762+
size_t k;
2763+
double sumx = 0;
2764+
double meanx;
2765+
2766+
for (k = 0; k < state_dim; k++) {
2767+
sumx += x[k];
2768+
}
2769+
2770+
meanx = sumx / (double) state_dim;
2771+
for (k = 0; k < result_dim; k++) {
2772+
i = args.set_indexes[2 * k];
2773+
j = args.set_indexes[2 * k + 1];
2774+
result[k] = (x[i] - meanx) * (x[j] - meanx);
2775+
}
2776+
return 0;
2777+
}
2778+
2779+
int
2780+
tsk_treeseq_relatedness(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
2781+
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
2782+
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,
2783+
const double *windows, double *result, tsk_flags_t options)
2784+
{
2785+
int ret = 0;
2786+
ret = check_sample_stat_inputs(num_sample_sets, 2, num_index_tuples, index_tuples);
2787+
if (ret != 0) {
2788+
goto out;
2789+
}
2790+
ret = tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,
2791+
sample_sets, num_index_tuples, index_tuples, relatedness_summary_func,
2792+
num_windows, windows, result, options);
2793+
out:
2794+
return ret;
2795+
}
2796+
27552797
static int
27562798
Y2_summary_func(size_t TSK_UNUSED(state_dim), const double *state, size_t result_dim,
27572799
double *result, void *params)

c/tskit/trees.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -338,8 +338,6 @@ int tsk_treeseq_allele_frequency_spectrum(const tsk_treeseq_t *self,
338338
const tsk_id_t *sample_sets, tsk_size_t num_windows, const double *windows,
339339
double *result, tsk_flags_t options);
340340

341-
/* Two way sample set stats */
342-
343341
typedef int general_sample_stat_method(const tsk_treeseq_t *self,
344342
tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,
345343
const tsk_id_t *sample_sets, tsk_size_t num_indexes, const tsk_id_t *indexes,
@@ -357,6 +355,10 @@ int tsk_treeseq_f2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
357355
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
358356
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,
359357
const double *windows, double *result, tsk_flags_t options);
358+
int tsk_treeseq_relatedness(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
359+
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
360+
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,
361+
const double *windows, double *result, tsk_flags_t options);
360362

361363
/* Three way sample set stats */
362364
int tsk_treeseq_Y3(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,

python/_tskitmodule.c

+18-4
Original file line numberDiff line numberDiff line change
@@ -6946,7 +6946,7 @@ TreeSequence_k_way_stat_method(TreeSequence *self, PyObject *args, PyObject *kwd
69466946
{
69476947
PyObject *ret = NULL;
69486948
static char *kwlist[] = { "sample_set_sizes", "sample_sets", "indexes", "windows",
6949-
"mode", "span_normalise", NULL };
6949+
"mode", "span_normalise", "polarised", NULL };
69506950
PyObject *sample_set_sizes = NULL;
69516951
PyObject *sample_sets = NULL;
69526952
PyObject *indexes = NULL;
@@ -6960,14 +6960,15 @@ TreeSequence_k_way_stat_method(TreeSequence *self, PyObject *args, PyObject *kwd
69606960
npy_intp *shape;
69616961
tsk_flags_t options = 0;
69626962
char *mode = NULL;
6963-
int span_normalise = 1;
6963+
int span_normalise = true;
6964+
int polarised = false;
69646965
int err;
69656966

69666967
if (TreeSequence_check_tree_sequence(self) != 0) {
69676968
goto out;
69686969
}
6969-
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOO|si", kwlist, &sample_set_sizes,
6970-
&sample_sets, &indexes, &windows, &mode, &span_normalise)) {
6970+
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOO|sii", kwlist, &sample_set_sizes,
6971+
&sample_sets, &indexes, &windows, &mode, &span_normalise, &polarised)) {
69716972
goto out;
69726973
}
69736974
if (parse_stats_mode(mode, &options) != 0) {
@@ -6976,6 +6977,9 @@ TreeSequence_k_way_stat_method(TreeSequence *self, PyObject *args, PyObject *kwd
69766977
if (span_normalise) {
69776978
options |= TSK_STAT_SPAN_NORMALISE;
69786979
}
6980+
if (polarised) {
6981+
options |= TSK_STAT_POLARISED;
6982+
}
69796983
if (parse_sample_sets(sample_set_sizes, &sample_set_sizes_array, sample_sets,
69806984
&sample_sets_array, &num_sample_sets)
69816985
!= 0) {
@@ -7028,6 +7032,12 @@ TreeSequence_divergence(TreeSequence *self, PyObject *args, PyObject *kwds)
70287032
return TreeSequence_k_way_stat_method(self, args, kwds, 2, tsk_treeseq_divergence);
70297033
}
70307034

7035+
static PyObject *
7036+
TreeSequence_relatedness(TreeSequence *self, PyObject *args, PyObject *kwds)
7037+
{
7038+
return TreeSequence_k_way_stat_method(self, args, kwds, 2, tsk_treeseq_relatedness);
7039+
}
7040+
70317041
static PyObject *
70327042
TreeSequence_Y2(TreeSequence *self, PyObject *args, PyObject *kwds)
70337043
{
@@ -7345,6 +7355,10 @@ static PyMethodDef TreeSequence_methods[] = {
73457355
.ml_meth = (PyCFunction) TreeSequence_divergence,
73467356
.ml_flags = METH_VARARGS | METH_KEYWORDS,
73477357
.ml_doc = "Computes diveregence between sample sets." },
7358+
{ .ml_name = "relatedness",
7359+
.ml_meth = (PyCFunction) TreeSequence_relatedness,
7360+
.ml_flags = METH_VARARGS | METH_KEYWORDS,
7361+
.ml_doc = "Computes genetic relatedness between sample sets." },
73487362
{ .ml_name = "Y1",
73497363
.ml_meth = (PyCFunction) TreeSequence_Y1,
73507364
.ml_flags = METH_VARARGS | METH_KEYWORDS,

0 commit comments

Comments
 (0)