diff --git a/python/tskit/trees.py b/python/tskit/trees.py index dce1bed199..7d4bcd52e3 100644 --- a/python/tskit/trees.py +++ b/python/tskit/trees.py @@ -6047,8 +6047,12 @@ def diversity( self, sample_sets=None, windows=None, mode="site", span_normalise=True ): """ - Computes mean genetic diversity (also knowns as "Tajima's pi") in each of the - sets of nodes from ``sample_sets``. + Computes mean genetic diversity (also known as "pi") in each of the + sets of nodes from ``sample_sets``. The statistic is also known as + "sample heterozygosity"; common citations for the definition are Nei + and Li (1979), Nei and Tajima (1981), or Tajima (1983), so it is also + called "Nei's pi" and/or "Tajima's pi". + Please see the :ref:`one-way statistics ` section for details on how the ``sample_sets`` argument is interpreted and how it interacts with the dimensions of the output array. @@ -6101,6 +6105,12 @@ def divergence( """ Computes mean genetic divergence between (and within) pairs of sets of nodes from ``sample_sets``. + This is the "average number of differences", usually referred to as "dxy"; + a common citation for this definition is Nei and Li (1979), who called it + :math:`\pi_{XY}`. Note that computing the divergence of a population to itself + gives the mean pairwise nucleotide diversity within that population, + which is :meth:`diversity <.TreeSequence.diversity>`. + Operates on ``k = 2`` sample sets at a time; please see the :ref:`multi-way statistics ` section for details on how the ``sample_sets`` and ``indexes`` arguments are @@ -7019,6 +7029,14 @@ def f3( """ Computes Patterson's f3 statistic between three groups of nodes from ``sample_sets``. + Note that the order of the arguments of f3 differs across the literature: + here, ``f3([A, B, C])`` for sample sets ``A``, ``B``, and ``C`` + will estimate (compare to in Reich (2009)): + :math:`f_3(A; B, C) = \mathbb{E}[(p_A - p_B) (p_A - p_C)]`, + where :math:`p_A` is the allele frequency in ``A``. + When used as a test for admixture, the putatively admixed population + is usually placed as population ``A`` (see Peter (2016) for more discussion). + Operates on ``k = 3`` sample sets at a time; please see the :ref:`multi-way statistics ` section for details on how the ``sample_sets`` and ``indexes`` arguments are