Skip to content

Commit

Permalink
Don't sort individuals by default
Browse files Browse the repository at this point in the history
  • Loading branch information
benjeffery committed Oct 20, 2021
1 parent 6142f7f commit c5802b4
Show file tree
Hide file tree
Showing 14 changed files with 128 additions and 53 deletions.
7 changes: 7 additions & 0 deletions c/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,15 @@

- FIXME breaking changes for tree API and virtual root

- Individuals are no longer guaranteed or required to be topologically sorted in a tree sequence.
``tsk_table_collection_sort`` no longer sorts individuals.
(:user:`benjeffery`, :issue:`1774`, :pr:`1789`)

**Features**

- Add ``tsk_table_collection_individual_topological_sort`` to sort the individuals as this is no longer done by the
default sort. (:user:`benjeffery`, :issue:`1774`, :pr:`1789`)

- The default behaviour for table size growth is now to double the current size of the table,
up to a threshold. To keep the previous behaviour, use (e.g.)
``tsk_edge_table_set_max_rows_increment(tables->edges, 1024)``, which results in adding
Expand Down
30 changes: 23 additions & 7 deletions c/tests/test_tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -6280,10 +6280,10 @@ test_sort_tables_offsets(void)
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tables, &copy, 0));

/* Check that sorting would have had an effect */
/* Check that sorting would have had no effect as individuals not in default sort*/
ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_equals(&tables, &copy, 0));
CU_ASSERT_TRUE(tsk_table_collection_equals(&tables, &copy, 0));

tsk_memset(&bookmark, 0, sizeof(bookmark));
bookmark.individuals = tables.individuals.num_rows - 1;
Expand Down Expand Up @@ -6802,17 +6802,28 @@ test_sort_tables_individuals(void)
const char *individuals_cycle = "1 0.2 2 0\n"
"2 0.5 0 1\n"
"3 0.3 1 2\n";
const tsk_id_t bad_parents[] = { 200 };
tsk_id_t ret_id;

ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1.0;
parse_individuals(individuals, &tables.individuals);

ret = tsk_table_collection_copy(&tables, &copy, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);

/* Table sort doesn't touch individuals by default*/
ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &copy, 0));

/* Not calling with TSK_CHECK_TREES so casting is safe */
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_CHECK_INDIVIDUAL_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_INDIVIDUALS);

ret = tsk_table_collection_sort(&tables, NULL, 0);
ret = tsk_table_collection_individual_topological_sort(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_CHECK_INDIVIDUAL_ORDERING);
Expand All @@ -6821,16 +6832,21 @@ test_sort_tables_individuals(void)
/* Check that the sort is stable */
ret = tsk_table_collection_copy(&tables, &copy, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_individual_topological_sort(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &copy, 0));

ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT(tsk_table_collection_equals(&tables, &copy, 0));
/* Errors on bad table */
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, bad_parents, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 6);
ret = tsk_table_collection_individual_topological_sort(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);

/* Errors on cycle */
tsk_individual_table_clear(&tables.individuals);
parse_individuals(individuals_cycle, &tables.individuals);
ret = tsk_table_collection_sort(&tables, NULL, 0);
ret = tsk_table_collection_individual_topological_sort(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_PARENT_CYCLE);

tsk_table_collection_free(&tables);
Expand Down
4 changes: 2 additions & 2 deletions c/tests/test_trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -2077,10 +2077,10 @@ test_simplest_bad_individuals(void)
tsk_treeseq_free(&ts);
tables.individuals.parents[0] = TSK_NULL;

/* Unsorted individuals */
/* Unsorted individuals are OK*/
tables.individuals.parents[0] = 1;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_UNSORTED_INDIVIDUALS);
CU_ASSERT_EQUAL(ret, 0);
tsk_treeseq_free(&ts);
tables.individuals.parents[0] = TSK_NULL;

Expand Down
25 changes: 16 additions & 9 deletions c/tskit/tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -6143,15 +6143,16 @@ tsk_individual_table_topological_sort(
return ret;
}

static int
tsk_table_sorter_sort_individuals(tsk_table_sorter_t *self)
int
tsk_table_collection_individual_topological_sort(
tsk_table_collection_t *self, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_id_t i, ret_id;
tsk_individual_table_t copy;
tsk_individual_t individual;
tsk_individual_table_t *individuals = &self->tables->individuals;
tsk_node_table_t *nodes = &self->tables->nodes;
tsk_individual_table_t *individuals = &self->individuals;
tsk_node_table_t *nodes = &self->nodes;
tsk_size_t num_individuals = individuals->num_rows;
tsk_id_t *traversal_order = tsk_malloc(num_individuals * sizeof(*traversal_order));
tsk_id_t *new_id_map = tsk_malloc(num_individuals * sizeof(*new_id_map));
Expand All @@ -6167,6 +6168,12 @@ tsk_table_sorter_sort_individuals(tsk_table_sorter_t *self)
goto out;
}

ret_id = tsk_table_collection_check_integrity(self, 0);
if (ret_id != 0) {
ret = (int) ret_id;
goto out;
}

ret = tsk_individual_table_clear(individuals);
if (ret != 0) {
goto out;
Expand Down Expand Up @@ -6379,7 +6386,7 @@ tsk_table_sorter_run(tsk_table_sorter_t *self, const tsk_bookmark_t *start)
goto out;
}
}
if (!skip_individuals) {
if (!skip_individuals && self->sort_individuals != NULL) {
ret = self->sort_individuals(self);
if (ret != 0) {
goto out;
Expand Down Expand Up @@ -6415,7 +6422,8 @@ tsk_table_sorter_init(
/* Set the sort_edges and sort_mutations methods to the default. */
self->sort_edges = tsk_table_sorter_sort_edges;
self->sort_mutations = tsk_table_sorter_sort_mutations;
self->sort_individuals = tsk_table_sorter_sort_individuals;
/* Default sort doesn't touch individuals */
self->sort_individuals = NULL;
out:
return ret;
}
Expand Down Expand Up @@ -9722,11 +9730,10 @@ tsk_table_collection_check_integrity(
tsk_id_t ret = 0;

if (options & TSK_CHECK_TREES) {
/* Checking the trees implies all the other checks */
/* Checking the trees implies these checks */
options |= TSK_CHECK_EDGE_ORDERING | TSK_CHECK_SITE_ORDERING
| TSK_CHECK_SITE_DUPLICATES | TSK_CHECK_MUTATION_ORDERING
| TSK_CHECK_INDIVIDUAL_ORDERING | TSK_CHECK_MIGRATION_ORDERING
| TSK_CHECK_INDEXES;
| TSK_CHECK_MIGRATION_ORDERING | TSK_CHECK_INDEXES;
}

if (self->sequence_length <= 0) {
Expand Down
22 changes: 19 additions & 3 deletions c/tskit/tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -3672,17 +3672,33 @@ TSK_NO_CHECK_INTEGRITY
@endrst
@param self A pointer to a tsk_individual_table_t object.
@param self A pointer to a tsk_table_collection_t object.
@param start The position to begin sorting in each table; all rows less than this
position must fulfill the tree sequence sortedness requirements. If this is
NULL, sort all rows.
@param options Sort options. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@param options Sort options.
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_collection_sort(
tsk_table_collection_t *self, const tsk_bookmark_t *start, tsk_flags_t options);

/**
@brief Sorts the individual table in this collection.
@rst
Sorts the individual table in place, so that parents come before children,
and the parent column is remapped as required. Node references to individuals
are also updated.
@endrst
@param self A pointer to a tsk_table_collection_t object.
@param options Sort options. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_collection_individual_topological_sort(
tsk_table_collection_t *self, tsk_flags_t options);

/**
@brief Puts the tables into canonical form.
Expand Down
9 changes: 2 additions & 7 deletions docs/data-model.rst
Original file line number Diff line number Diff line change
Expand Up @@ -566,13 +566,8 @@ Individual requirements
-----------------------

Individuals are a basic type in a tree sequence and are not defined with
respect to any other tables. Individuals can have a reference to their parent
individuals, if present these references must be valid or null (-1).

Individuals must be sorted such that parents are before children.
Sorting a set of tables using :meth:`TableCollection.sort` will ensure that
this requirement is fulfilled.

respect to any other tables. Individuals can have a reference to any number of
their parent individuals, if present these references must be valid or null (-1).

.. _sec_node_requirements:

Expand Down
6 changes: 6 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,15 @@
are multiple roots. Previously orders were defined locally for each root, but
are now globally across all roots. (:user:`jeromekelleher`, :pr:`1704`).

- Individuals are no longer guaranteed or required to be topologically sorted in a tree sequence.
``TableCollection.sort`` no longer sorts individuals.
(:user:`benjeffery`, :issue:`1774`, :pr:`1789`)

**Features**

- Add ``TableCollection.sort_individuals`` to sort the individuals as this is no longer done by the
default sort. (:user:`benjeffery`, :issue:`1774`, :pr:`1789`)

- Add ``__setitem__`` to all tables allowing single rows to be updated. For example
``tables.nodes[0] = tables.nodes[0].replace(flags=tskit.NODE_IS_SAMPLE)``
(:user:`jeromekelleher`, :user:`benjeffery`, :issue:`1545`, :pr:`1600`).
Expand Down
24 changes: 24 additions & 0 deletions python/_tskitmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -6632,6 +6632,26 @@ TableCollection_sort(TableCollection *self, PyObject *args, PyObject *kwds)
return ret;
}

static PyObject *
TableCollection_sort_individuals(TableCollection *self, PyObject *args, PyObject *kwds)
{
int err;
PyObject *ret = NULL;

if (TableCollection_check_state(self) != 0) {
goto out;
}

err = tsk_table_collection_individual_topological_sort(self->tables, 0);
if (err != 0) {
handle_library_error(err);
goto out;
}
ret = Py_BuildValue("");
out:
return ret;
}

static PyObject *
TableCollection_canonicalise(TableCollection *self, PyObject *args, PyObject *kwds)
{
Expand Down Expand Up @@ -7115,6 +7135,10 @@ static PyMethodDef TableCollection_methods[] = {
.ml_meth = (PyCFunction) TableCollection_sort,
.ml_flags = METH_VARARGS | METH_KEYWORDS,
.ml_doc = "Sorts the tables to satisfy tree sequence requirements." },
{ .ml_name = "sort_individuals",
.ml_meth = (PyCFunction) TableCollection_sort_individuals,
.ml_flags = METH_VARARGS | METH_KEYWORDS,
.ml_doc = "Sorts the individual table topologically" },
{ .ml_name = "canonicalise",
.ml_meth = (PyCFunction) TableCollection_canonicalise,
.ml_flags = METH_VARARGS | METH_KEYWORDS,
Expand Down
20 changes: 10 additions & 10 deletions python/tests/data/svg/ts_multiroot.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 4 additions & 0 deletions python/tests/test_lowlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,10 @@ def test_fromdict_bad_args(self):
with pytest.raises(TypeError):
tc.fromdict(bad_type)

def test_sort_individuals(self):
tc = _tskit.TableCollection(1)
tc.sort_individuals()


class TestIbd:
def test_uninitialised(self):
Expand Down
10 changes: 4 additions & 6 deletions python/tests/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3204,12 +3204,10 @@ def test_shuffled_individual_parent_mapping(self, wf_sim_with_individual_metadat
shuffle_migrations=False,
)
# Check we have a mixed up order
with pytest.raises(
tskit.LibraryError,
match="Individuals must be provided in an order where"
" children are after their parent individuals",
):
tables.tree_sequence()
tables2 = tables.copy()
tables2.sort_individuals()
with pytest.raises(AssertionError, match="IndividualTable row 0 differs"):
tables.assert_equals(tables2)

tables.simplify()
metadata = [
Expand Down
Loading

0 comments on commit c5802b4

Please sign in to comment.