Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add probability module #551

Merged
merged 28 commits into from
Sep 15, 2019
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
d947820
add probability module.
lukasheinrich Sep 6, 2019
bcf0d3e
pyflakes
lukasheinrich Sep 6, 2019
4e740f7
add Identical
lukasheinrich Sep 7, 2019
aa04249
data projection
lukasheinrich Sep 7, 2019
dc7aaab
use Independent on MainModel
lukasheinrich Sep 7, 2019
66fede9
showcase delay
lukasheinrich Sep 7, 2019
7c950f6
Revert "showcase delay"
lukasheinrich Sep 7, 2019
65088b4
_dataprojection as in subconstraints
lukasheinrich Sep 7, 2019
9f69f03
rudimentary joint pdf
lukasheinrich Sep 7, 2019
a5a1785
add probability module.
lukasheinrich Sep 6, 2019
784dae7
pyflakes
lukasheinrich Sep 6, 2019
6e9fcac
add Identical
lukasheinrich Sep 7, 2019
1b87ac0
data projection
lukasheinrich Sep 7, 2019
39b331c
use Independent on MainModel
lukasheinrich Sep 7, 2019
6136fa8
showcase delay
lukasheinrich Sep 7, 2019
f0f43bd
Revert "showcase delay"
lukasheinrich Sep 7, 2019
6887846
_dataprojection as in subconstraints
lukasheinrich Sep 7, 2019
ca8c196
rudimentary joint pdf
lukasheinrich Sep 7, 2019
9d132cd
Add pyhf.probability to public API docs
matthewfeickert Sep 9, 2019
26e36bb
Use more explicit variable names
matthewfeickert Sep 9, 2019
cee2080
Simplify return of joint_logpdf
matthewfeickert Sep 9, 2019
81246b8
Update src/pyhf/probability.py
lukasheinrich Sep 10, 2019
e3bfb65
merge
lukasheinrich Sep 10, 2019
405ba71
merge
lukasheinrich Sep 10, 2019
da6a7a9
remove comment
lukasheinrich Sep 10, 2019
4621764
fix
lukasheinrich Sep 15, 2019
a9d8134
fix
lukasheinrich Sep 15, 2019
359e7ed
fix
lukasheinrich Sep 15, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,23 @@ Top-Level
get_backend
set_backend

Making Probability Distribution Functions (PDFs)
------------------------------------------------
Probability Distribution Functions (PDFs)
-----------------------------------------

.. currentmodule:: pyhf.probability

.. autosummary::
:toctree: _generated/
:nosignatures:
:template: modifierclass.rst

Normal
Poisson
Independent
joint_logpdf

Making Models from PDFs
-----------------------

.. currentmodule:: pyhf.pdf

Expand Down
48 changes: 30 additions & 18 deletions src/pyhf/constraints.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from . import get_backend, default_backend
from . import events
from . import probability as prob
from .parameters import ParamViewer


Expand Down Expand Up @@ -48,7 +49,7 @@ def __init__(self, pdfconfig, batch_size=None):
normal_constraint_sigmas.append([1.0] * len(thisauxdata))

self._normal_data = None
self._batched_sigmas = None
self._sigmas = None
self._access_field = None
# if this constraint terms is at all used (non-zrto idx selection
# start preparing constant tensors
Expand All @@ -58,10 +59,11 @@ def __init__(self, pdfconfig, batch_size=None):
)

_normal_sigmas = default_backend.concatenate(normal_constraint_sigmas)
sigmas = default_backend.reshape(_normal_sigmas, (1, -1)) # (1, normals)
self._batched_sigmas = default_backend.tile(
sigmas, (self.batch_size or 1, 1)
)
if self.batch_size:
sigmas = default_backend.reshape(_normal_sigmas, (1, -1))
self._sigmas = default_backend.tile(sigmas, (self.batch_size or 1, 1))
else:
self._sigmas = _normal_sigmas

access_field = default_backend.concatenate(
self.param_viewer.index_selection, axis=1
Expand All @@ -75,10 +77,16 @@ def _precompute(self):
if not self.param_viewer.index_selection:
return
tensorlib, _ = get_backend()
self.batched_sigmas = tensorlib.astensor(self._batched_sigmas)
self.sigmas = tensorlib.astensor(self._sigmas)
self.normal_data = tensorlib.astensor(self._normal_data, dtype='int')
self.access_field = tensorlib.astensor(self._access_field, dtype='int')

def _dataprojection(self, auxdata):
tensorlib, _ = get_backend()
auxdata = tensorlib.astensor(auxdata)
normal_data = tensorlib.gather(auxdata, self.normal_data)
return normal_data

def logpdf(self, auxdata, pars):
tensorlib, _ = get_backend()
if not self.param_viewer.index_selection:
Expand All @@ -103,12 +111,12 @@ def logpdf(self, auxdata, pars):
normal_means = tensorlib.gather(flat_pars, self.access_field)

# pdf pars are done, now get data and compute
auxdata = tensorlib.astensor(auxdata)
normal_data = tensorlib.gather(auxdata, self.normal_data)
normal = tensorlib.normal_logpdf(normal_data, normal_means, self.batched_sigmas)
result = tensorlib.sum(normal, axis=1)
if self.batch_size is None:
return result[0]
normal_means = normal_means[0]

result = prob.Independent(
prob.Normal(normal_means, self.sigmas), batch_size=self.batch_size
).log_prob(self._dataprojection(auxdata))
return result


Expand Down Expand Up @@ -188,6 +196,12 @@ def _precompute(self):
self.access_field = tensorlib.astensor(self._access_field, dtype='int')
self.batched_factors = tensorlib.astensor(self._batched_factors)

def _dataprojection(self, auxdata):
tensorlib, _ = get_backend()
auxdata = tensorlib.astensor(auxdata)
poisson_data = tensorlib.gather(auxdata, self.poisson_data)
return poisson_data

def logpdf(self, auxdata, pars):
tensorlib, _ = get_backend()
if not self.param_viewer.index_selection:
Expand All @@ -214,12 +228,10 @@ def logpdf(self, auxdata, pars):
pois_rates = tensorlib.product(
tensorlib.stack([nuispars, self.batched_factors]), axis=0
)

# pdf pars are done, now get data and compute
auxdata = tensorlib.astensor(auxdata)
poisson_data = tensorlib.gather(auxdata, self.poisson_data)
result = tensorlib.poisson_logpdf(poisson_data, pois_rates)
result = tensorlib.sum(result, axis=1)
if self.batch_size is None:
return result[0]
pois_rates = pois_rates[0]
# pdf pars are done, now get data and compute
result = prob.Independent(
prob.Poisson(pois_rates), batch_size=self.batch_size
).log_prob(self._dataprojection(auxdata))
return result
41 changes: 26 additions & 15 deletions src/pyhf/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from . import modifiers
from . import utils
from . import events
from . import probability as prob
from .constraints import gaussian_constraint_combined, poisson_constraint_combined
from .parameters import reduce_paramsets_requirements, ParamViewer

Expand Down Expand Up @@ -211,18 +212,21 @@ def expected_data(self, pars):
return auxdata[0]
return auxdata

def _dataprojection(self, data):
tensorlib, _ = get_backend()
cut = tensorlib.shape(data)[0] - len(self.config.auxdata)
return data[cut:]

def logpdf(self, auxdata, pars):
tensorlib, _ = get_backend()
normal = self.constraints_gaussian.logpdf(auxdata, pars)
poisson = self.constraints_poisson.logpdf(auxdata, pars)
if self.batch_size is None:
return normal + poisson
terms = tensorlib.stack([normal, poisson])
return tensorlib.sum(terms, axis=0)
return prob.joint_logpdf([normal, poisson])


class _MainModel(object):
def __init__(self, config, mega_mods, nominal_rates, batch_size):
self.config = config
self._factor_mods = [
modtype
for modtype, mod in modifiers.uncombined.items()
Expand Down Expand Up @@ -260,10 +264,13 @@ def _precompute(self):
def logpdf(self, maindata, pars):
tensorlib, _ = get_backend()
lambdas_data = self.expected_data(pars)
summands = tensorlib.poisson_logpdf(maindata, lambdas_data)
if self.batch_size is None:
return tensorlib.sum(summands, axis=0)
return tensorlib.sum(summands, axis=1)
result = prob.Independent(prob.Poisson(lambdas_data)).log_prob(maindata)
return result

def _dataprojection(self, data):
tensorlib, _ = get_backend()
cut = tensorlib.shape(data)[0] - len(self.config.auxdata)
return data[:cut]

def _modifications(self, pars):
deltas = list(
Expand Down Expand Up @@ -531,16 +538,20 @@ def logpdf(self, pars, data):
try:
tensorlib, _ = get_backend()
pars, data = tensorlib.astensor(pars), tensorlib.astensor(data)
cut = tensorlib.shape(data)[0] - len(self.config.auxdata)
actual_data, aux_data = data[:cut], data[cut:]

mainpdf = self.mainlogpdf(actual_data, pars)
constraint = self.constraint_logpdf(aux_data, pars)
actual_data = self.main_model._dataprojection(data)
aux_data = self.constraint_model._dataprojection(data)

mainpdf = self.main_model.logpdf(actual_data, pars)
constraint = self.constraint_model.logpdf(aux_data, pars)

result = prob.joint_logpdf([mainpdf, constraint])

result = tensorlib.sum(tensorlib.stack([mainpdf, constraint]), axis=0)
if not self.batch_size:
if (
not self.batch_size
): # force to be not scalar, should we changed with #522
return tensorlib.reshape(result, (1,))
return tensorlib.astensor(result)
return result
except:
log.error(
'eval failed for data {} pars: {}'.format(
Expand Down
46 changes: 46 additions & 0 deletions src/pyhf/probability.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from . import get_backend


class Poisson(object):
def __init__(self, rate):
tensorlib, _ = get_backend()
self.lam = tensorlib.astensor(rate)

def log_prob(self, value):
tensorlib, _ = get_backend()
n = tensorlib.astensor(value)
return tensorlib.poisson_logpdf(n, self.lam)


class Normal(object):
def __init__(self, loc, scale):
tensorlib, _ = get_backend()
self.mu = tensorlib.astensor(loc)
self.sigma = tensorlib.astensor(scale)

def log_prob(self, value):
tensorlib, _ = get_backend()
return tensorlib.normal_logpdf(value, self.mu, self.sigma)


class Independent(object):
'''
A probability density corresponding to the joint
distribution of a batch of identically distributed random
numbers.
'''

def __init__(self, batched_pdf, batch_size=None):
self.batch_size = batch_size
self._pdf = batched_pdf

def log_prob(self, value):
tensorlib, _ = get_backend()
_log_prob = self._pdf.log_prob(value)
return tensorlib.sum(_log_prob, axis=-1)


def joint_logpdf(terms):
tensorlib, _ = get_backend()
terms = tensorlib.stack(terms)
return tensorlib.sum(terms, axis=0)
52 changes: 52 additions & 0 deletions tests/test_probability.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from pyhf import probability
from pyhf import get_backend


def test_poisson(backend):
result = probability.Poisson([10.0]).log_prob(2.0)
assert result.shape == (1,)

result = probability.Poisson([10.0, 10.0]).log_prob(2.0)
assert result.shape == (2,)

result = probability.Poisson([10.0, 10.0]).log_prob([2.0, 3.0])
assert result.shape == (2,)

result = probability.Poisson([10.0, 10.0]).log_prob([[2.0, 3.0]])
assert result.shape == (1, 2)


def test_normal(backend):
result = probability.Normal([10.0], [1]).log_prob(2.0)
assert result.shape == (1,)

result = probability.Normal([10.0, 10.0], [1, 1]).log_prob(2.0)
assert result.shape == (2,)

result = probability.Normal([10.0, 10.0], [10.0, 10.0]).log_prob([2.0, 3.0])
assert result.shape == (2,)

result = probability.Normal([10.0, 10.0], [10.0, 10.0]).log_prob([[2.0, 3.0]])
assert result.shape == (1, 2)


def test_joint(backend):
tensorlib, _ = backend
p1 = probability.Poisson([10.0]).log_prob(2.0)
p2 = probability.Poisson([10.0]).log_prob(3.0)
assert tensorlib.tolist(probability.joint_logpdf([p1, p2])) == tensorlib.tolist(
p1 + p2
)


def test_normal(backend):
tensorlib, _ = backend
result = probability.Independent(probability.Poisson([10.0, 10])).log_prob(
[2.0, 3.0]
)

p1 = probability.Poisson([10.0]).log_prob(2.0)
p2 = probability.Poisson([10.0]).log_prob(3.0)
assert tensorlib.tolist(probability.joint_logpdf([p1, p2]))[0] == tensorlib.tolist(
result
)