Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Low level pair coalescence counts #2932

Merged
merged 1 commit into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
use-mamba: true

- name: Cache Conda env
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ${{ env.CONDA }}/envs
key: conda-${{ runner.os }}--${{ runner.arch }}--${{ hashFiles(env.REQUIREMENTS) }}-${{ env.CACHE_NUMBER }}
Expand All @@ -65,4 +65,4 @@ jobs:
curl -X POST https://api.github.com/repos/tskit-dev/tskit-site/dispatches \
-H 'Accept: application/vnd.github.everest-preview+json' \
-u AdminBot-tskit:${{ secrets.ADMINBOT_TOKEN }} \
--data '{"event_type":"build-docs"}'
--data '{"event_type":"build-docs"}'
2 changes: 2 additions & 0 deletions c/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
build
.*.swp
.*.swo
103 changes: 103 additions & 0 deletions c/tests/test_stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,94 @@ verify_divergence_matrix(tsk_treeseq_t *ts, tsk_flags_t options)
}
}

/* Check coalescence counts against naive implementation */
static void
verify_pair_coalescence_counts(tsk_treeseq_t *ts, tsk_flags_t options)
{
int ret;
const tsk_size_t n = tsk_treeseq_get_num_samples(ts);
const tsk_size_t N = tsk_treeseq_get_num_nodes(ts);
const tsk_size_t T = tsk_treeseq_get_num_trees(ts);
const tsk_id_t *samples = tsk_treeseq_get_samples(ts);
const double *breakpoints = tsk_treeseq_get_breakpoints(ts);
const tsk_size_t P = 2;
const tsk_size_t I = P * (P + 1) / 2;
tsk_id_t sample_sets[n];
tsk_size_t sample_set_sizes[P];
tsk_id_t index_tuples[2 * I];
tsk_id_t node_output_map[N];
tsk_size_t dim = T * N * I;
double C1[dim];
tsk_size_t i, j, k;

for (i = 0; i < n; i++) {
sample_sets[i] = samples[i];
}

for (i = 0; i < P; i++) {
sample_set_sizes[i] = 0;
}
for (j = 0; j < n; j++) {
i = j / (n / P);
sample_set_sizes[i]++;
}

for (j = 0, i = 0; j < P; j++) {
for (k = j; k < P; k++) {
index_tuples[i++] = (tsk_id_t) j;
index_tuples[i++] = (tsk_id_t) k;
}
}

for (i = 0; i < N; i++) {
node_output_map[i] = (tsk_id_t) i;
}

ret = tsk_treeseq_pair_coalescence_stat(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, N, node_output_map, options, C1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* TODO: test against naive pairs per node per tree */

/* cover errors */
double bad_breakpoints[2] = { breakpoints[1], 0.0 };
ret = tsk_treeseq_pair_coalescence_stat(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, 1, bad_breakpoints, N, node_output_map, options, C1);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);

index_tuples[0] = (tsk_id_t) P;
ret = tsk_treeseq_pair_coalescence_stat(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, 1, breakpoints, N, node_output_map, options, C1);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLE_SET_INDEX);
index_tuples[0] = 0;

tsk_size_t tmp = sample_set_sizes[0];
sample_set_sizes[0] = 0;
ret = tsk_treeseq_pair_coalescence_stat(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, 1, breakpoints, N, node_output_map, options, C1);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EMPTY_SAMPLE_SET);
sample_set_sizes[0] = tmp;

sample_sets[1] = 0;
ret = tsk_treeseq_pair_coalescence_stat(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, 1, breakpoints, N, node_output_map, options, C1);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
sample_sets[1] = 1;

ret = tsk_treeseq_pair_coalescence_stat(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, 1, breakpoints, N - 1, node_output_map, options, C1);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_OUTPUT_MAP_DIM);

ret = tsk_treeseq_pair_coalescence_stat(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, 1, breakpoints, 0, node_output_map, options, C1);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_OUTPUT_MAP_DIM);

node_output_map[0] = -2;
ret = tsk_treeseq_pair_coalescence_stat(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, 1, breakpoints, N, node_output_map, options, C1);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_OUTPUT_MAP);
node_output_map[0] = 0;
}

typedef struct {
int call_count;
int error_on;
Expand Down Expand Up @@ -2786,6 +2874,19 @@ test_multiroot_divergence_matrix(void)
tsk_treeseq_free(&ts);
}

static void
test_pair_coalescence_counts(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL,
nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL, 0);
verify_pair_coalescence_counts(&ts, TSK_STAT_NODE);
verify_pair_coalescence_counts(&ts, TSK_STAT_NODE | TSK_STAT_SPAN_NORMALISE);
verify_pair_coalescence_counts(&ts, 0);
verify_pair_coalescence_counts(&ts, TSK_STAT_SPAN_NORMALISE);
tsk_treeseq_free(&ts);
}

int
main(int argc, char **argv)
{
Expand Down Expand Up @@ -2884,6 +2985,8 @@ main(int argc, char **argv)
test_simplest_divergence_matrix_internal_sample },
{ "test_multiroot_divergence_matrix", test_multiroot_divergence_matrix },

{ "test_pair_coalescence_counts", test_pair_coalescence_counts },

{ NULL, NULL },
};
return test_main(tests, argc, argv);
Expand Down
8 changes: 8 additions & 0 deletions c/tskit/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,14 @@ tsk_strerror_internal(int err)
ret = "Insufficient weights provided (at least 1 required). "
"(TSK_ERR_INSUFFICIENT_WEIGHTS)";
break;
case TSK_ERR_BAD_NODE_OUTPUT_MAP:
ret = "Node output map contains values less than TSK_NULL. "
"(TSK_ERR_BAD_NODE_OUTPUT_MAP)";
break;
case TSK_ERR_BAD_NODE_OUTPUT_MAP_DIM:
ret = "Maximum index in node output map does not match "
"output dimension. (TSK_ERR_BAD_NODE_OUTPUT_MAP_DIM)";
break;

/* Mutation mapping errors */
case TSK_ERR_GENOTYPES_ALL_MISSING:
Expand Down
8 changes: 8 additions & 0 deletions c/tskit/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,14 @@ not support it.
Insufficient weights were provided.
*/
#define TSK_ERR_INSUFFICIENT_WEIGHTS -913
/**
The node output map contains a value less than TSK_NULL.
*/
#define TSK_ERR_BAD_NODE_OUTPUT_MAP -914
/**
Maximum index in node output map does not match output dimension.
*/
#define TSK_ERR_BAD_NODE_OUTPUT_MAP_DIM -915
/** @} */

/**
Expand Down
Loading
Loading