scikit-hep · matthewfeickert · Sep 15, 2019 · Sep 6, 2019 · Sep 6, 2019 · Sep 7, 2019
diff --git a/docs/api.rst b/docs/api.rst
@@ -16,8 +16,23 @@ Top-Level
    get_backend
    set_backend
 
-Making Probability Distribution Functions (PDFs)
-------------------------------------------------
+Probability Distribution Functions (PDFs)
+-----------------------------------------
+
+.. currentmodule:: pyhf.probability
+
+.. autosummary::
+   :toctree: _generated/
+   :nosignatures:
+   :template: modifierclass.rst
+
+   Normal
+   Poisson
+   Independent
+   joint_logpdf
+
+Making Models from PDFs
+-----------------------
 
 .. currentmodule:: pyhf.pdf
 

diff --git a/src/pyhf/constraints.py b/src/pyhf/constraints.py
@@ -1,5 +1,6 @@
 from . import get_backend, default_backend
 from . import events
+from . import probability as prob
 from .parameters import ParamViewer
 
 
@@ -48,7 +49,7 @@ def __init__(self, pdfconfig, batch_size=None):
                 normal_constraint_sigmas.append([1.0] * len(thisauxdata))
 
         self._normal_data = None
-        self._batched_sigmas = None
+        self._sigmas = None
         self._access_field = None
         # if this constraint terms is at all used (non-zrto idx selection
         # start preparing constant tensors
@@ -58,10 +59,11 @@ def __init__(self, pdfconfig, batch_size=None):
             )
 
             _normal_sigmas = default_backend.concatenate(normal_constraint_sigmas)
-            sigmas = default_backend.reshape(_normal_sigmas, (1, -1))  # (1, normals)
-            self._batched_sigmas = default_backend.tile(
-                sigmas, (self.batch_size or 1, 1)
-            )
+            if self.batch_size:
+                sigmas = default_backend.reshape(_normal_sigmas, (1, -1))
+                self._sigmas = default_backend.tile(sigmas, (self.batch_size or 1, 1))
+            else:
+                self._sigmas = _normal_sigmas
 
             access_field = default_backend.concatenate(
                 self.param_viewer.index_selection, axis=1
@@ -75,10 +77,16 @@ def _precompute(self):
         if not self.param_viewer.index_selection:
             return
         tensorlib, _ = get_backend()
-        self.batched_sigmas = tensorlib.astensor(self._batched_sigmas)
+        self.sigmas = tensorlib.astensor(self._sigmas)
         self.normal_data = tensorlib.astensor(self._normal_data, dtype='int')
         self.access_field = tensorlib.astensor(self._access_field, dtype='int')
 
+    def _dataprojection(self, auxdata):
+        tensorlib, _ = get_backend()
+        auxdata = tensorlib.astensor(auxdata)
+        normal_data = tensorlib.gather(auxdata, self.normal_data)
+        return normal_data
+
     def logpdf(self, auxdata, pars):
         tensorlib, _ = get_backend()
         if not self.param_viewer.index_selection:
@@ -103,12 +111,12 @@ def logpdf(self, auxdata, pars):
         normal_means = tensorlib.gather(flat_pars, self.access_field)
 
         # pdf pars are done, now get data and compute
-        auxdata = tensorlib.astensor(auxdata)
-        normal_data = tensorlib.gather(auxdata, self.normal_data)
-        normal = tensorlib.normal_logpdf(normal_data, normal_means, self.batched_sigmas)
-        result = tensorlib.sum(normal, axis=1)
         if self.batch_size is None:
-            return result[0]
+            normal_means = normal_means[0]
+
+        result = prob.Independent(
+            prob.Normal(normal_means, self.sigmas), batch_size=self.batch_size
+        ).log_prob(self._dataprojection(auxdata))
         return result
 
 
@@ -188,6 +196,12 @@ def _precompute(self):
         self.access_field = tensorlib.astensor(self._access_field, dtype='int')
         self.batched_factors = tensorlib.astensor(self._batched_factors)
 
+    def _dataprojection(self, auxdata):
+        tensorlib, _ = get_backend()
+        auxdata = tensorlib.astensor(auxdata)
+        poisson_data = tensorlib.gather(auxdata, self.poisson_data)
+        return poisson_data
+
     def logpdf(self, auxdata, pars):
         tensorlib, _ = get_backend()
         if not self.param_viewer.index_selection:
@@ -214,12 +228,10 @@ def logpdf(self, auxdata, pars):
         pois_rates = tensorlib.product(
             tensorlib.stack([nuispars, self.batched_factors]), axis=0
         )
-
-        # pdf pars are done, now get data and compute
-        auxdata = tensorlib.astensor(auxdata)
-        poisson_data = tensorlib.gather(auxdata, self.poisson_data)
-        result = tensorlib.poisson_logpdf(poisson_data, pois_rates)
-        result = tensorlib.sum(result, axis=1)
         if self.batch_size is None:
-            return result[0]
+            pois_rates = pois_rates[0]
+        # pdf pars are done, now get data and compute
+        result = prob.Independent(
+            prob.Poisson(pois_rates), batch_size=self.batch_size
+        ).log_prob(self._dataprojection(auxdata))
         return result
diff --git a/src/pyhf/pdf.py b/src/pyhf/pdf.py
@@ -7,6 +7,7 @@
 from . import modifiers
 from . import utils
 from . import events
+from . import probability as prob
 from .constraints import gaussian_constraint_combined, poisson_constraint_combined
 from .parameters import reduce_paramsets_requirements, ParamViewer
 
@@ -211,18 +212,21 @@ def expected_data(self, pars):
             return auxdata[0]
         return auxdata
 
+    def _dataprojection(self, data):
+        tensorlib, _ = get_backend()
+        cut = tensorlib.shape(data)[0] - len(self.config.auxdata)
+        return data[cut:]
+
     def logpdf(self, auxdata, pars):
         tensorlib, _ = get_backend()
         normal = self.constraints_gaussian.logpdf(auxdata, pars)
         poisson = self.constraints_poisson.logpdf(auxdata, pars)
-        if self.batch_size is None:
-            return normal + poisson
-        terms = tensorlib.stack([normal, poisson])
-        return tensorlib.sum(terms, axis=0)
+        return prob.joint_logpdf([normal, poisson])
 
 
 class _MainModel(object):
     def __init__(self, config, mega_mods, nominal_rates, batch_size):
+        self.config = config
         self._factor_mods = [
             modtype
             for modtype, mod in modifiers.uncombined.items()
@@ -260,10 +264,13 @@ def _precompute(self):
     def logpdf(self, maindata, pars):
         tensorlib, _ = get_backend()
         lambdas_data = self.expected_data(pars)
-        summands = tensorlib.poisson_logpdf(maindata, lambdas_data)
-        if self.batch_size is None:
-            return tensorlib.sum(summands, axis=0)
-        return tensorlib.sum(summands, axis=1)
+        result = prob.Independent(prob.Poisson(lambdas_data)).log_prob(maindata)
+        return result
+
+    def _dataprojection(self, data):
+        tensorlib, _ = get_backend()
+        cut = tensorlib.shape(data)[0] - len(self.config.auxdata)
+        return data[:cut]
 
     def _modifications(self, pars):
         deltas = list(
@@ -531,16 +538,20 @@ def logpdf(self, pars, data):
         try:
             tensorlib, _ = get_backend()
             pars, data = tensorlib.astensor(pars), tensorlib.astensor(data)
-            cut = tensorlib.shape(data)[0] - len(self.config.auxdata)
-            actual_data, aux_data = data[:cut], data[cut:]
 
-            mainpdf = self.mainlogpdf(actual_data, pars)
-            constraint = self.constraint_logpdf(aux_data, pars)
+            actual_data = self.main_model._dataprojection(data)
+            aux_data = self.constraint_model._dataprojection(data)
+
+            mainpdf = self.main_model.logpdf(actual_data, pars)
+            constraint = self.constraint_model.logpdf(aux_data, pars)
+
+            result = prob.joint_logpdf([mainpdf, constraint])
 
-            result = tensorlib.sum(tensorlib.stack([mainpdf, constraint]), axis=0)
-            if not self.batch_size:
+            if (
+                not self.batch_size
+            ):  # force to be not scalar, should we changed with #522
                 return tensorlib.reshape(result, (1,))
-            return tensorlib.astensor(result)
+            return result
         except:
             log.error(
                 'eval failed for data {} pars: {}'.format(

diff --git a/src/pyhf/probability.py b/src/pyhf/probability.py
@@ -0,0 +1,46 @@
+from . import get_backend
+
+
+class Poisson(object):
+    def __init__(self, rate):
+        tensorlib, _ = get_backend()
+        self.lam = tensorlib.astensor(rate)
+
+    def log_prob(self, value):
+        tensorlib, _ = get_backend()
+        n = tensorlib.astensor(value)
+        return tensorlib.poisson_logpdf(n, self.lam)
+
+
+class Normal(object):
+    def __init__(self, loc, scale):
+        tensorlib, _ = get_backend()
+        self.mu = tensorlib.astensor(loc)
+        self.sigma = tensorlib.astensor(scale)
+
+    def log_prob(self, value):
+        tensorlib, _ = get_backend()
+        return tensorlib.normal_logpdf(value, self.mu, self.sigma)
+
+
+class Independent(object):
+    '''
+    A probability density corresponding to the joint
+    distribution of a batch of identically distributed random
+    numbers.
+    '''
+
+    def __init__(self, batched_pdf, batch_size=None):
+        self.batch_size = batch_size
+        self._pdf = batched_pdf
+
+    def log_prob(self, value):
+        tensorlib, _ = get_backend()
+        _log_prob = self._pdf.log_prob(value)
+        return tensorlib.sum(_log_prob, axis=-1)
+
+
+def joint_logpdf(terms):
+    tensorlib, _ = get_backend()
+    terms = tensorlib.stack(terms)
+    return tensorlib.sum(terms, axis=0)
diff --git a/tests/test_probability.py b/tests/test_probability.py
@@ -0,0 +1,52 @@
+from pyhf import probability
+from pyhf import get_backend
+
+
+def test_poisson(backend):
+    result = probability.Poisson([10.0]).log_prob(2.0)
+    assert result.shape == (1,)
+
+    result = probability.Poisson([10.0, 10.0]).log_prob(2.0)
+    assert result.shape == (2,)
+
+    result = probability.Poisson([10.0, 10.0]).log_prob([2.0, 3.0])
+    assert result.shape == (2,)
+
+    result = probability.Poisson([10.0, 10.0]).log_prob([[2.0, 3.0]])
+    assert result.shape == (1, 2)
+
+
+def test_normal(backend):
+    result = probability.Normal([10.0], [1]).log_prob(2.0)
+    assert result.shape == (1,)
+
+    result = probability.Normal([10.0, 10.0], [1, 1]).log_prob(2.0)
+    assert result.shape == (2,)
+
+    result = probability.Normal([10.0, 10.0], [10.0, 10.0]).log_prob([2.0, 3.0])
+    assert result.shape == (2,)
+
+    result = probability.Normal([10.0, 10.0], [10.0, 10.0]).log_prob([[2.0, 3.0]])
+    assert result.shape == (1, 2)
+
+
+def test_joint(backend):
+    tensorlib, _ = backend
+    p1 = probability.Poisson([10.0]).log_prob(2.0)
+    p2 = probability.Poisson([10.0]).log_prob(3.0)
+    assert tensorlib.tolist(probability.joint_logpdf([p1, p2])) == tensorlib.tolist(
+        p1 + p2
+    )
+
+
+def test_normal(backend):
+    tensorlib, _ = backend
+    result = probability.Independent(probability.Poisson([10.0, 10])).log_prob(
+        [2.0, 3.0]
+    )
+
+    p1 = probability.Poisson([10.0]).log_prob(2.0)
+    p2 = probability.Poisson([10.0]).log_prob(3.0)
+    assert tensorlib.tolist(probability.joint_logpdf([p1, p2]))[0] == tensorlib.tolist(
+        result
+    )