Skip to content

Commit

Permalink
Merge pull request #1819 from jeromekelleher/discrete_genome
Browse files Browse the repository at this point in the history
Implement TreeSequence.discrete_genome
  • Loading branch information
mergify[bot] authored Oct 20, 2021
2 parents e5dc0f6 + 90f62be commit 5ccccd5
Show file tree
Hide file tree
Showing 7 changed files with 193 additions and 0 deletions.
99 changes: 99 additions & 0 deletions c/tests/test_trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,104 @@ verify_empty_tree_sequence(tsk_treeseq_t *ts, double sequence_length)
* Simplest test cases.
*======================================================*/

static void
test_simplest_discrete_genome(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0";
const char *edges = "0 1 2 0,1\n";
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_id_t ret_id;
int ret;

tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_genome(&ts));

ret = tsk_table_collection_copy(ts.tables, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);

tables.sequence_length = 1.001;
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);
tables.sequence_length = 1;

ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);

tables.edges.right[0] = 0.999;
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);
tables.edges.right[0] = 1.0;

tables.edges.left[0] = 0.999;
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);
tables.edges.left[0] = 0;

ret_id = tsk_site_table_add_row(&tables.sites, 0, "A", 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);

tables.sites.position[0] = 0.001;
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);
tables.sites.position[0] = 0;

/* Need another population for a migration */
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);

ret_id
= tsk_migration_table_add_row(&tables.migrations, 0, 1, 0, 0, 1, 1.0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);

tables.migrations.left[0] = 0.001;
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);
tables.migrations.left[0] = 0;

tables.migrations.right[0] = 0.999;
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);
tables.migrations.right[0] = 1;

/* An empty tree sequence is has a discrete genome. */
tsk_table_collection_clear(&tables, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);

tsk_table_collection_free(&tables);
}

static void
test_simplest_records(void)
{
Expand Down Expand Up @@ -6558,6 +6656,7 @@ main(int argc, char **argv)
{
CU_TestInfo tests[] = {
/* simplest example tests */
{ "test_simplest_discrete_genome", test_simplest_discrete_genome },
{ "test_simplest_records", test_simplest_records },
{ "test_simplest_nonbinary_records", test_simplest_nonbinary_records },
{ "test_simplest_unary_records", test_simplest_unary_records },
Expand Down
39 changes: 39 additions & 0 deletions c/tskit/trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@

#include <tskit/trees.h>

static inline bool
is_discrete(double x)
{
return trunc(x) == x;
}

/* ======================================================== *
* tree sequence
* ======================================================== */
Expand Down Expand Up @@ -127,6 +133,8 @@ tsk_treeseq_init_sites(tsk_treeseq_t *self)
const tsk_size_t num_mutations = self->tables->mutations.num_rows;
const tsk_size_t num_sites = self->tables->sites.num_rows;
const tsk_id_t *restrict mutation_site = self->tables->mutations.site;
const double *restrict site_position = self->tables->sites.position;
bool discrete_sites = true;

self->site_mutations_mem
= tsk_malloc(num_mutations * sizeof(*self->site_mutations_mem));
Expand All @@ -148,6 +156,7 @@ tsk_treeseq_init_sites(tsk_treeseq_t *self)
}
k = 0;
for (j = 0; j < (tsk_id_t) num_sites; j++) {
discrete_sites = discrete_sites && is_discrete(site_position[j]);
self->site_mutations[j] = self->site_mutations_mem + offset;
self->site_mutations_length[j] = 0;
/* Go through all mutations for this site */
Expand All @@ -161,6 +170,7 @@ tsk_treeseq_init_sites(tsk_treeseq_t *self)
goto out;
}
}
self->discrete_genome = self->discrete_genome && discrete_sites;
out:
return ret;
}
Expand Down Expand Up @@ -243,6 +253,7 @@ tsk_treeseq_init_trees(tsk_treeseq_t *self)
const double *restrict edge_right = self->tables->edges.right;
const double *restrict edge_left = self->tables->edges.left;
tsk_size_t num_trees_alloc = self->num_trees + 1;
bool discrete_breakpoints = true;

self->tree_sites_length
= tsk_malloc(num_trees_alloc * sizeof(*self->tree_sites_length));
Expand All @@ -264,6 +275,7 @@ tsk_treeseq_init_trees(tsk_treeseq_t *self)
j = 0;
k = 0;
while (j < num_edges || tree_left < sequence_length) {
discrete_breakpoints = discrete_breakpoints && is_discrete(tree_left);
self->breakpoints[tree_index] = tree_left;
while (k < num_edges && edge_right[O[k]] == tree_left) {
k++;
Expand All @@ -289,11 +301,29 @@ tsk_treeseq_init_trees(tsk_treeseq_t *self)
tsk_bug_assert(site == num_sites);
tsk_bug_assert(tree_index == self->num_trees);
self->breakpoints[tree_index] = tree_right;
discrete_breakpoints = discrete_breakpoints && is_discrete(tree_right);
self->discrete_genome = self->discrete_genome && discrete_breakpoints;
ret = 0;
out:
return ret;
}

static void
tsk_treeseq_init_migrations(tsk_treeseq_t *self)
{
tsk_size_t j;
tsk_size_t num_migrations = self->tables->migrations.num_rows;
const double *restrict left = self->tables->migrations.left;
const double *restrict right = self->tables->migrations.right;
bool discrete_breakpoints = true;

for (j = 0; j < num_migrations; j++) {
discrete_breakpoints
= discrete_breakpoints && is_discrete(left[j]) && is_discrete(right[j]);
}
self->discrete_genome = self->discrete_genome && discrete_breakpoints;
}

static int
tsk_treeseq_init_nodes(tsk_treeseq_t *self)
{
Expand Down Expand Up @@ -400,6 +430,7 @@ tsk_treeseq_init(
if (ret != 0) {
goto out;
}
self->discrete_genome = true;
ret = tsk_treeseq_init_sites(self);
if (ret != 0) {
goto out;
Expand All @@ -412,6 +443,8 @@ tsk_treeseq_init(
if (ret != 0) {
goto out;
}
tsk_treeseq_init_migrations(self);

if (tsk_treeseq_get_time_units_length(self) == strlen(TSK_TIME_UNITS_UNCALIBRATED)
&& !strncmp(tsk_treeseq_get_time_units(self), TSK_TIME_UNITS_UNCALIBRATED,
strlen(TSK_TIME_UNITS_UNCALIBRATED))) {
Expand Down Expand Up @@ -630,6 +663,12 @@ tsk_treeseq_is_sample(const tsk_treeseq_t *self, tsk_id_t u)
return ret;
}

bool
tsk_treeseq_get_discrete_genome(const tsk_treeseq_t *self)
{
return self->discrete_genome;
}

/* Stats functions */

#define GET_2D_ROW(array, row_len, row) (array + (((size_t)(row_len)) * (size_t) row))
Expand Down
3 changes: 3 additions & 0 deletions c/tskit/trees.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ typedef struct {
tsk_id_t *samples;
/* Does this tree sequence have time_units == "uncalibrated" */
bool time_uncalibrated;
/* Are all genome coordinates discrete? */
bool discrete_genome;
/* Breakpoints along the sequence, including 0 and L. */
double *breakpoints;
/* If a node is a sample, map to its index in the samples list */
Expand Down Expand Up @@ -268,6 +270,7 @@ const double *tsk_treeseq_get_breakpoints(const tsk_treeseq_t *self);
const tsk_id_t *tsk_treeseq_get_samples(const tsk_treeseq_t *self);
const tsk_id_t *tsk_treeseq_get_sample_index_map(const tsk_treeseq_t *self);
bool tsk_treeseq_is_sample(const tsk_treeseq_t *self, tsk_id_t u);
bool tsk_treeseq_get_discrete_genome(const tsk_treeseq_t *self);

int tsk_treeseq_get_node(const tsk_treeseq_t *self, tsk_id_t index, tsk_node_t *node);
int tsk_treeseq_get_edge(const tsk_treeseq_t *self, tsk_id_t index, tsk_edge_t *edge);
Expand Down
3 changes: 3 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@
- Substantial performance improvement for ``Tree.total_branch_length``
(:user:`jeromekelleher`, :issue:`1794` :pr:`1799`)

- Add the ``discrete_genome`` property to the TreeSequence class which is true if
all coordinates are discrete (:user:`jeromekelleher`, :issue:`1144`, :pr:`1819`)

--------------------
[0.3.7] - 2021-07-08
--------------------
Expand Down
17 changes: 17 additions & 0 deletions python/_tskitmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -7812,6 +7812,19 @@ TreeSequence_get_sequence_length(TreeSequence *self)
return ret;
}

static PyObject *
TreeSequence_get_discrete_genome(TreeSequence *self)
{
PyObject *ret = NULL;

if (TreeSequence_check_state(self) != 0) {
goto out;
}
ret = Py_BuildValue("i", tsk_treeseq_get_discrete_genome(self->tree_sequence));
out:
return ret;
}

static PyObject *
TreeSequence_get_breakpoints(TreeSequence *self)
{
Expand Down Expand Up @@ -9017,6 +9030,10 @@ static PyMethodDef TreeSequence_methods[] = {
.ml_meth = (PyCFunction) TreeSequence_get_sequence_length,
.ml_flags = METH_NOARGS,
.ml_doc = "Returns the sequence length in bases." },
{ .ml_name = "get_discrete_genome",
.ml_meth = (PyCFunction) TreeSequence_get_discrete_genome,
.ml_flags = METH_NOARGS,
.ml_doc = "Returns True if this TreeSequence has discrete coordinates" },
{ .ml_name = "get_breakpoints",
.ml_meth = (PyCFunction) TreeSequence_get_breakpoints,
.ml_flags = METH_NOARGS,
Expand Down
16 changes: 16 additions & 0 deletions python/tests/test_highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1241,6 +1241,22 @@ class TestTreeSequence(HighLevelTestCase):
Tests for the tree sequence object.
"""

@pytest.mark.parametrize("ts", get_example_tree_sequences())
def test_discrete_genome(self, ts):
def is_discrete(a):
return np.all(np.floor(a) == a)

tables = ts.tables
discrete_genome = (
is_discrete([tables.sequence_length])
and is_discrete(tables.edges.left)
and is_discrete(tables.edges.right)
and is_discrete(tables.sites.position)
and is_discrete(tables.migrations.left)
and is_discrete(tables.migrations.right)
)
assert ts.discrete_genome == discrete_genome

def test_trees(self):
for ts in get_example_tree_sequences():
self.verify_trees(ts)
Expand Down
16 changes: 16 additions & 0 deletions python/tskit/trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -3842,6 +3842,22 @@ def get_sample_size(self):
def file_uuid(self):
return self._ll_tree_sequence.get_file_uuid()

@property
def discrete_genome(self):
"""
Returns True if all genome coordinates in this TreeSequence are
discrete integer values. This is true iff all the following are true:
- The sequence length is discrete
- All site positions are discrete
- All left and right edge coordinates are discrete
- All migration left and right coordinates are discrete
:return: True if this TreeSequence uses discrete genome coordinates.
:rtype: bool
"""
return bool(self._ll_tree_sequence.get_discrete_genome())

@property
def sequence_length(self):
"""
Expand Down

0 comments on commit 5ccccd5

Please sign in to comment.