Skip to content

Commit

Permalink
Don't sort individuals by default
Browse files Browse the repository at this point in the history
  • Loading branch information
benjeffery committed Oct 18, 2021
1 parent a0d8424 commit 7a2e265
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 121 deletions.
28 changes: 4 additions & 24 deletions c/tests/test_tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -6028,10 +6028,10 @@ test_sort_tables_offsets(void)
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tables, &copy, 0));

/* Check that sorting would have had an effect */
/* Check that sorting would have had no effect as individuals not in default sort*/
ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_equals(&tables, &copy, 0));
CU_ASSERT_TRUE(tsk_table_collection_equals(&tables, &copy, 0));

tsk_memset(&bookmark, 0, sizeof(bookmark));
bookmark.individuals = tables.individuals.num_rows - 1;
Expand Down Expand Up @@ -6547,39 +6547,19 @@ test_sort_tables_individuals(void)
"4 0.3 -1 3\n"
"5 0.3 3 4\n"
"6 0.3 4 5\n";
const char *individuals_cycle = "1 0.2 2 0\n"
"2 0.5 0 1\n"
"3 0.3 1 2\n";

ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1.0;
parse_individuals(individuals, &tables.individuals);
/* Not calling with TSK_CHECK_TREES so casting is safe */
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_CHECK_INDIVIDUAL_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_INDIVIDUALS);

ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_CHECK_INDIVIDUAL_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, 0);

/* Check that the sort is stable */
ret = tsk_table_collection_copy(&tables, &copy, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &copy, 0));

/* Table sort doesn't touch individuals */
ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT(tsk_table_collection_equals(&tables, &copy, 0));

/* Errors on cycle */
tsk_individual_table_clear(&tables.individuals);
parse_individuals(individuals_cycle, &tables.individuals);
ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_PARENT_CYCLE);
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &copy, 0));

tsk_table_collection_free(&tables);
tsk_table_collection_free(&copy);
Expand Down
4 changes: 2 additions & 2 deletions c/tests/test_trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -2066,10 +2066,10 @@ test_simplest_bad_individuals(void)
tsk_treeseq_free(&ts);
tables.individuals.parents[0] = TSK_NULL;

/* Unsorted individuals */
/* Unsorted individuals are OK*/
tables.individuals.parents[0] = 1;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_UNSORTED_INDIVIDUALS);
CU_ASSERT_EQUAL(ret, 0);
tsk_treeseq_free(&ts);
tables.individuals.parents[0] = TSK_NULL;

Expand Down
146 changes: 74 additions & 72 deletions c/tskit/tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -6143,73 +6143,75 @@ tsk_individual_table_topological_sort(
return ret;
}

static int
tsk_table_sorter_sort_individuals(tsk_table_sorter_t *self)
{
int ret = 0;
tsk_id_t i, ret_id;
tsk_individual_table_t copy;
tsk_individual_t individual;
tsk_individual_table_t *individuals = &self->tables->individuals;
tsk_node_table_t *nodes = &self->tables->nodes;
tsk_size_t num_individuals = individuals->num_rows;
tsk_id_t *traversal_order = tsk_malloc(num_individuals * sizeof(*traversal_order));
tsk_id_t *new_id_map = tsk_malloc(num_individuals * sizeof(*new_id_map));

if (new_id_map == NULL || traversal_order == NULL) {
ret = TSK_ERR_NO_MEMORY;
goto out;
}
tsk_memset(new_id_map, 0xff, num_individuals * sizeof(*new_id_map));

ret = tsk_individual_table_copy(individuals, &copy, 0);
if (ret != 0) {
goto out;
}

ret = tsk_individual_table_clear(individuals);
if (ret != 0) {
goto out;
}

ret = tsk_individual_table_topological_sort(&copy, traversal_order, NULL);
if (ret != 0) {
goto out;
}

/* The sorted individuals are in reverse order */
for (i = (tsk_id_t) num_individuals - 1; i >= 0; i--) {
tsk_individual_table_get_row_unsafe(&copy, traversal_order[i], &individual);
ret_id = tsk_individual_table_add_row(individuals, individual.flags,
individual.location, individual.location_length, individual.parents,
individual.parents_length, individual.metadata, individual.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
new_id_map[traversal_order[i]] = ret_id;
}

/* Rewrite the parent ids */
for (i = 0; i < (tsk_id_t) individuals->parents_length; i++) {
if (individuals->parents[i] != TSK_NULL) {
individuals->parents[i] = new_id_map[individuals->parents[i]];
}
}
/* Rewrite the node individual ids */
for (i = 0; i < (tsk_id_t) nodes->num_rows; i++) {
if (nodes->individual[i] != TSK_NULL) {
nodes->individual[i] = new_id_map[nodes->individual[i]];
}
}

ret = 0;
out:
tsk_safe_free(traversal_order);
tsk_safe_free(new_id_map);
tsk_individual_table_free(&copy);
return ret;
}
// static int
// tsk_table_sorter_sort_individuals(tsk_table_sorter_t *self)
// {
// int ret = 0;
// tsk_id_t i, ret_id;
// tsk_individual_table_t copy;
// tsk_individual_t individual;
// tsk_individual_table_t *individuals = &self->tables->individuals;
// tsk_node_table_t *nodes = &self->tables->nodes;
// tsk_size_t num_individuals = individuals->num_rows;
// tsk_id_t *traversal_order = tsk_malloc(num_individuals *
// sizeof(*traversal_order)); tsk_id_t *new_id_map = tsk_malloc(num_individuals *
// sizeof(*new_id_map));

// if (new_id_map == NULL || traversal_order == NULL) {
// ret = TSK_ERR_NO_MEMORY;
// goto out;
// }
// tsk_memset(new_id_map, 0xff, num_individuals * sizeof(*new_id_map));

// ret = tsk_individual_table_copy(individuals, &copy, 0);
// if (ret != 0) {
// goto out;
// }

// ret = tsk_individual_table_clear(individuals);
// if (ret != 0) {
// goto out;
// }

// ret = tsk_individual_table_topological_sort(&copy, traversal_order, NULL);
// if (ret != 0) {
// goto out;
// }

// /* The sorted individuals are in reverse order */
// for (i = (tsk_id_t) num_individuals - 1; i >= 0; i--) {
// tsk_individual_table_get_row_unsafe(&copy, traversal_order[i], &individual);
// ret_id = tsk_individual_table_add_row(individuals, individual.flags,
// individual.location, individual.location_length, individual.parents,
// individual.parents_length, individual.metadata,
// individual.metadata_length);
// if (ret_id < 0) {
// ret = (int) ret_id;
// goto out;
// }
// new_id_map[traversal_order[i]] = ret_id;
// }

// /* Rewrite the parent ids */
// for (i = 0; i < (tsk_id_t) individuals->parents_length; i++) {
// if (individuals->parents[i] != TSK_NULL) {
// individuals->parents[i] = new_id_map[individuals->parents[i]];
// }
// }
// /* Rewrite the node individual ids */
// for (i = 0; i < (tsk_id_t) nodes->num_rows; i++) {
// if (nodes->individual[i] != TSK_NULL) {
// nodes->individual[i] = new_id_map[nodes->individual[i]];
// }
// }

// ret = 0;
// out:
// tsk_safe_free(traversal_order);
// tsk_safe_free(new_id_map);
// tsk_individual_table_free(&copy);
// return ret;
// }

static int
tsk_table_sorter_sort_individuals_canonical(tsk_table_sorter_t *self)
Expand Down Expand Up @@ -6379,7 +6381,7 @@ tsk_table_sorter_run(tsk_table_sorter_t *self, const tsk_bookmark_t *start)
goto out;
}
}
if (!skip_individuals) {
if (!skip_individuals && self->sort_individuals) {
ret = self->sort_individuals(self);
if (ret != 0) {
goto out;
Expand Down Expand Up @@ -6415,7 +6417,8 @@ tsk_table_sorter_init(
/* Set the sort_edges and sort_mutations methods to the default. */
self->sort_edges = tsk_table_sorter_sort_edges;
self->sort_mutations = tsk_table_sorter_sort_mutations;
self->sort_individuals = tsk_table_sorter_sort_individuals;
/* Default sort doesn't touch individuals */
self->sort_individuals = NULL;
out:
return ret;
}
Expand Down Expand Up @@ -9636,11 +9639,10 @@ tsk_table_collection_check_integrity(
tsk_id_t ret = 0;

if (options & TSK_CHECK_TREES) {
/* Checking the trees implies all the other checks */
/* Checking the trees implies these checks */
options |= TSK_CHECK_EDGE_ORDERING | TSK_CHECK_SITE_ORDERING
| TSK_CHECK_SITE_DUPLICATES | TSK_CHECK_MUTATION_ORDERING
| TSK_CHECK_INDIVIDUAL_ORDERING | TSK_CHECK_MIGRATION_ORDERING
| TSK_CHECK_INDEXES;
| TSK_CHECK_MIGRATION_ORDERING | TSK_CHECK_INDEXES;
}

if (self->sequence_length <= 0) {
Expand Down
20 changes: 10 additions & 10 deletions python/tests/data/svg/ts_multiroot.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
13 changes: 7 additions & 6 deletions python/tests/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3204,12 +3204,13 @@ def test_shuffled_individual_parent_mapping(self, wf_sim_with_individual_metadat
shuffle_migrations=False,
)
# Check we have a mixed up order
with pytest.raises(
tskit.LibraryError,
match="Individuals must be provided in an order where"
" children are after their parent individuals",
):
tables.tree_sequence()
# TODO replace with check by sorting and checking different
# with pytest.raises(
# tskit.LibraryError,
# match="Individuals must be provided in an order where"
# " children are after their parent individuals",
# ):
# tables.tree_sequence()

tables.simplify()
metadata = [
Expand Down
6 changes: 1 addition & 5 deletions python/tests/test_text_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def test_single_family_map_parent_ids(self):
FamEntry(iid="3", pat="1", mat="2"),
]
tb = self.get_parsed_fam(entries=entries)
assert np.array_equal(tb[2].parents, [1, 0])
assert np.array_equal(tb[2].parents, [0, 1])

def test_missing_parent_id(self):
# KeyError raised if at least one parent (PAT) does not exist in dataset
Expand Down Expand Up @@ -243,7 +243,3 @@ def test_children_before_parents(self, tmp_path):
for row in tb:
tc.individuals.append(row)
tc.tree_sequence() # creating tree sequence should succeed

for idx in range(2):
assert np.array_equal(tb[idx].parents, [-1, -1])
assert np.array_equal(tb[2].parents, [1, 0])
2 changes: 0 additions & 2 deletions python/tests/tsutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -1132,8 +1132,6 @@ def py_sort(tables, canonical=False):
)
)
tables.nodes.individual = [ind_id_map[i] for i in tables.nodes.individual]
else:
sort_individual_table(tables)


def algorithm_T(ts):
Expand Down

0 comments on commit 7a2e265

Please sign in to comment.