Skip to content

Commit

Permalink
Implement decapitate function
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromekelleher committed May 18, 2022
1 parent 7239339 commit ab17582
Show file tree
Hide file tree
Showing 10 changed files with 853 additions and 1 deletion.
77 changes: 77 additions & 0 deletions c/tests/test_tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -10155,6 +10155,80 @@ test_table_collection_clear(void)
| TSK_CLEAR_TS_METADATA_AND_SCHEMA);
}

static void
test_table_collection_decapitate(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t t;

tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &t, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);

/* Add some migrations */
tsk_population_table_add_row(&t.populations, NULL, 0);
tsk_population_table_add_row(&t.populations, NULL, 0);
tsk_migration_table_add_row(&t.migrations, 0, 10, 0, 0, 1, 0.05, NULL, 0);
tsk_migration_table_add_row(&t.migrations, 0, 10, 0, 1, 0, 0.09, NULL, 0);
tsk_migration_table_add_row(&t.migrations, 0, 10, 0, 0, 1, 0.10, NULL, 0);
CU_ASSERT_EQUAL(t.migrations.num_rows, 3);

/* NOTE: haven't worked out the exact IDs on the branches here, just
* for illustration.
0.09┊ 9 5 10 ┊ 9 5 ┊11 5 ┊
┊ ┃ ┏┻┓ ┃ ┊ ┃ ┏━┻┓ ┊ ┃ ┏━┻┓ ┊
0.07┊ ┃ ┃ ┃ ┃ ┊ ┃ ┃ 4 ┊ ┃ ┃ 4 ┊
┊ ┃ ┃ ┃ ┃ ┊ ┃ ┃ ┏┻┓ ┊ ┃ ┃ ┏┻┓ ┊
0.00┊ 0 1 3 2 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊
0.00 2.00 7.00 10.00
*/

ret = tsk_table_collection_decapitate(&t, 0.09, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = tsk_treeseq_init(&ts, &t, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);

CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 12);
/* Lost the mutation over 5 */
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 2);
/* We keep the migration at exactly 0.09. */
CU_ASSERT_EQUAL(tsk_treeseq_get_num_migrations(&ts), 2);

tsk_table_collection_free(&t);
tsk_treeseq_free(&ts);
}

static void
test_table_collection_decapitate_errors(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t t;

tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &t, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);

/* This should be caught later when we try to index */
reverse_edges(&t);
ret = tsk_table_collection_decapitate(&t, 0.09, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGES_NOT_SORTED_CHILD);

/* This should be caught immediately on entry to the function */
t.sequence_length = -1;
ret = tsk_table_collection_decapitate(&t, 0.09, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SEQUENCE_LENGTH);

tsk_table_collection_free(&t);
}

static void
test_table_collection_takeset_indexes(void)
{
Expand Down Expand Up @@ -10317,6 +10391,9 @@ main(int argc, char **argv)
test_table_collection_union_middle_merge },
{ "test_table_collection_union_errors", test_table_collection_union_errors },
{ "test_table_collection_clear", test_table_collection_clear },
{ "test_table_collection_decapitate", test_table_collection_decapitate },
{ "test_table_collection_decapitate_errors",
test_table_collection_decapitate_errors },
{ "test_table_collection_takeset_indexes",
test_table_collection_takeset_indexes },
{ NULL, NULL },
Expand Down
127 changes: 127 additions & 0 deletions c/tskit/tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -12647,6 +12647,133 @@ tsk_table_collection_union(tsk_table_collection_t *self,
return ret;
}

int
tsk_table_collection_decapitate(
tsk_table_collection_t *self, double time, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_edge_t edge;
tsk_mutation_t mutation;
tsk_migration_t migration;
tsk_edge_table_t edges;
tsk_mutation_table_t mutations;
tsk_migration_table_t migrations;
const double *restrict node_time = self->nodes.time;
tsk_id_t j, ret_id;
double mutation_time;

memset(&edges, 0, sizeof(edges));
memset(&mutations, 0, sizeof(mutations));
memset(&migrations, 0, sizeof(migrations));

/* Note: perhaps should be stricter about what we accept here in terms
* of sorting, but compute_mutation_parents below will check anyway and
* so we're safe while we're calling that.
*/
ret = (int) tsk_table_collection_check_integrity(self, 0);
if (ret != 0) {
goto out;
}

ret = tsk_edge_table_copy(&self->edges, &edges, 0);
if (ret != 0) {
goto out;
}
/* Note: we are assuming below that the tables are sorted, so we could save
* a bit of time and memory here by truncating part-way through the
* edges. */
ret = tsk_edge_table_clear(&self->edges);
if (ret != 0) {
goto out;
}
for (j = 0; j < (tsk_id_t) edges.num_rows; j++) {
tsk_edge_table_get_row_unsafe(&edges, j, &edge);
if (node_time[edge.child] < time) {
if (time < node_time[edge.parent]) {
ret_id = tsk_node_table_add_row(
&self->nodes, 0, time, TSK_NULL, TSK_NULL, NULL, 0);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
edge.parent = ret_id;
}
ret_id = tsk_edge_table_add_row(&self->edges, edge.left, edge.right,
edge.parent, edge.child, edge.metadata, edge.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
}
}
/* Calling x_table_free multiple times is safe, so get rid of the
* extra edge table memory as soon as we can. */
tsk_edge_table_free(&edges);

ret = tsk_mutation_table_copy(&self->mutations, &mutations, 0);
if (ret != 0) {
goto out;
}
ret = tsk_mutation_table_clear(&self->mutations);
if (ret != 0) {
goto out;
}
for (j = 0; j < (tsk_id_t) mutations.num_rows; j++) {
tsk_mutation_table_get_row_unsafe(&mutations, j, &mutation);
mutation_time = tsk_is_unknown_time(mutation.time) ? node_time[mutation.node]
: mutation.time;
if (mutation_time < time) {
/* Set the mutation parent to NULL, and recalculate below */
ret_id = tsk_mutation_table_add_row(&self->mutations, mutation.site,
mutation.node, TSK_NULL, mutation.time, mutation.derived_state,
mutation.derived_state_length, mutation.metadata,
mutation.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
}
}
tsk_mutation_table_free(&mutations);

ret = tsk_migration_table_copy(&self->migrations, &migrations, 0);
if (ret != 0) {
goto out;
}
ret = tsk_migration_table_clear(&self->migrations);
if (ret != 0) {
goto out;
}
for (j = 0; j < (tsk_id_t) migrations.num_rows; j++) {
tsk_migration_table_get_row_unsafe(&migrations, j, &migration);
if (migration.time <= time) {
ret_id = tsk_migration_table_add_row(&self->migrations, migration.left,
migration.right, migration.node, migration.source, migration.dest,
migration.time, migration.metadata, migration.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
}
}
tsk_migration_table_free(&migrations);

ret = tsk_table_collection_build_index(self, 0);
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_compute_mutation_parents(self, 0);
if (ret != 0) {
goto out;
}

out:
tsk_edge_table_free(&edges);
tsk_mutation_table_free(&mutations);
tsk_migration_table_free(&migrations);
return ret;
}

static int
cmp_edge_cl(const void *a, const void *b)
{
Expand Down
5 changes: 5 additions & 0 deletions c/tskit/tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -4262,6 +4262,11 @@ int tsk_table_collection_compute_mutation_parents(
int tsk_table_collection_compute_mutation_times(
tsk_table_collection_t *self, double *random, tsk_flags_t TSK_UNUSED(options));

/* Not documenting this because we may want to pass through default values
* for the new nodes (in particular the metadata) in the future */
int tsk_table_collection_decapitate(
tsk_table_collection_t *self, double time, tsk_flags_t options);

int tsk_reference_sequence_init(tsk_reference_sequence_t *self, tsk_flags_t options);
int tsk_reference_sequence_free(tsk_reference_sequence_t *self);
bool tsk_reference_sequence_is_null(const tsk_reference_sequence_t *self);
Expand Down
1 change: 1 addition & 0 deletions docs/python-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ which perform the same actions but modify the {class}`TableCollection` in place.
TreeSequence.delete_intervals
TreeSequence.delete_sites
TreeSequence.trim
TreeSequence.decapitate
```

(sec_python_api_tree_sequences_ibd)=
Expand Down
7 changes: 6 additions & 1 deletion python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@

**Changes**

- ``VcfWriter.write`` now prints the site ID of variants in the ID field of the output VCF files.
- Add ``TableCollection.decapitate`` and ``TreeSequence.decapitate`` operations
to remove information from that data model that is older than a specific time.
(:user:`jeromekelleher`, :issue:`2236`, :pr:`2240`)

- ``VcfWriter.write`` now prints the site ID of variants in the ID field of the
output VCF files.
(:user:`roohy`, :issue:`2103`, :pr:`2107`)

- Make dumping of tables and tree sequences to disk a zero-copy operation.
Expand Down
27 changes: 27 additions & 0 deletions python/_tskitmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -7022,6 +7022,29 @@ TableCollection_compute_mutation_parents(TableCollection *self)
return ret;
}

static PyObject *
TableCollection_decapitate(TableCollection *self, PyObject *args)
{
PyObject *ret = NULL;
int err;
double time;

if (TableCollection_check_state(self) != 0) {
goto out;
}
if (!PyArg_ParseTuple(args, "d", &time)) {
goto out;
}
err = tsk_table_collection_decapitate(self->tables, time, 0);
if (err != 0) {
handle_library_error(err);
goto out;
}
ret = Py_BuildValue("");
out:
return ret;
}

static PyObject *
TableCollection_compute_mutation_times(TableCollection *self)
{
Expand Down Expand Up @@ -7507,6 +7530,10 @@ static PyMethodDef TableCollection_methods[] = {
.ml_flags = METH_VARARGS | METH_KEYWORDS,
.ml_doc
= "Returns True if the parameter table collection is equal to this one." },
{ .ml_name = "decapitate",
.ml_meth = (PyCFunction) TableCollection_decapitate,
.ml_flags = METH_VARARGS,
.ml_doc = "Removes information older than the specified time." },
{ .ml_name = "compute_mutation_parents",
.ml_meth = (PyCFunction) TableCollection_compute_mutation_parents,
.ml_flags = METH_NOARGS,
Expand Down
13 changes: 13 additions & 0 deletions python/tests/test_lowlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,19 @@ def test_union_bad_args(self):
with pytest.raises(ValueError):
tc.union(tc2, np.array([[1], [2]], dtype="int32"))

def test_decapitate_bad_args(self):
tc = _tskit.TableCollection(1)
self.get_example_tree_sequence().dump_tables(tc)
with pytest.raises(TypeError):
tc.decapitate()
with pytest.raises(TypeError):
tc.decapitate("1234")

def test_decapitate_error(self):
tc = _tskit.TableCollection(-1)
with pytest.raises(_tskit.LibraryError, match="Sequence length"):
tc.decapitate(0)

def test_equals_bad_args(self):
ts = msprime.simulate(10, random_seed=1242)
tc = ts.tables._ll_tables
Expand Down
Loading

0 comments on commit ab17582

Please sign in to comment.