From bdacdf92fd1f389977c8569c3856569a5dd0db9a Mon Sep 17 00:00:00 2001
From: "Documenter.jl" <documenter@juliadocs.github.io>
Date: Sun, 1 Sep 2024 07:59:45 +0000
Subject: [PATCH] build based on adc9ab2

---
 dev/.documenter-siteinfo.json         |   2 +-
 dev/API/architectures/index.html      |  27 ++++++------
 dev/API/core/index.html               |  60 +++++++++-----------------
 dev/API/index.html                    |   2 +-
 dev/API/loss/index.html               |   6 +--
 dev/API/simulation/index.html         |  12 +++---
 dev/API/utility/index.html            |  28 ++++++------
 dev/framework/index.html              |   2 +-
 dev/index.html                        |   2 +-
 dev/objects.inv                       | Bin 1807 -> 1820 bytes
 dev/search_index.js                   |   2 +-
 dev/workflow/advancedusage/index.html |  45 ++++++++++---------
 dev/workflow/examples/index.html      |   2 +-
 dev/workflow/overview/index.html      |   2 +-
 14 files changed, 87 insertions(+), 105 deletions(-)
diff --git a/dev/.documenter-siteinfo.json b/dev/.documenter-siteinfo.json
index a6215b24..84c6f4ea 100644
--- a/dev/.documenter-siteinfo.json
+++ b/dev/.documenter-siteinfo.json
@@ -1 +1 @@
-{"documenter":{"julia_version":"1.9.4","generation_timestamp":"2024-08-28T08:07:02","documenter_version":"1.6.0"}}
\ No newline at end of file
+{"documenter":{"julia_version":"1.9.4","generation_timestamp":"2024-09-01T07:59:42","documenter_version":"1.6.0"}}
\ No newline at end of file
diff --git a/dev/API/architectures/index.html b/dev/API/architectures/index.html
index 71a1aa4f..4bcd6c80 100644
--- a/dev/API/architectures/index.html
+++ b/dev/API/architectures/index.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Architectures · NeuralEstimators.jl</title><meta name="title" content="Architectures · NeuralEstimators.jl"/><meta property="og:title" content="Architectures · NeuralEstimators.jl"/><meta property="twitter:title" content="Architectures · NeuralEstimators.jl"/><meta name="description" content="Documentation for NeuralEstimators.jl."/><meta property="og:description" content="Documentation for NeuralEstimators.jl."/><meta property="twitter:description" content="Documentation for NeuralEstimators.jl."/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="NeuralEstimators.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../../">NeuralEstimators.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">NeuralEstimators</a></li><li><a class="tocitem" href="../../framework/">Framework</a></li><li><span class="tocitem">Workflow</span><ul><li><a class="tocitem" href="../../workflow/overview/">Overview</a></li><li><a class="tocitem" href="../../workflow/examples/">Examples</a></li><li><a class="tocitem" href="../../workflow/advancedusage/">Advanced usage</a></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="../core/">Core</a></li><li class="is-active"><a class="tocitem" href>Architectures</a><ul class="internal"><li><a class="tocitem" href="#Modules"><span>Modules</span></a></li><li class="toplevel"><a class="tocitem" href="#User-defined-summary-statistics"><span>User-defined summary statistics</span></a></li><li><a class="tocitem" href="#Layers"><span>Layers</span></a></li><li class="toplevel"><a class="tocitem" href="#Output-activation-functions"><span>Output activation functions</span></a></li></ul></li><li><a class="tocitem" href="../loss/">Loss functions</a></li><li><a class="tocitem" href="../simulation/">Model-specific functions</a></li><li><a class="tocitem" href="../utility/">Miscellaneous</a></li><li><a class="tocitem" href="../">Index</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">API</a></li><li class="is-active"><a href>Architectures</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Architectures</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/main/docs/src/API/architectures.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Architectures"><a class="docs-heading-anchor" href="#Architectures">Architectures</a><a id="Architectures-1"></a><a class="docs-heading-anchor-permalink" href="#Architectures" title="Permalink"></a></h1><h2 id="Modules"><a class="docs-heading-anchor" href="#Modules">Modules</a><a id="Modules-1"></a><a class="docs-heading-anchor-permalink" href="#Modules" title="Permalink"></a></h2><p>The following high-level modules are often used when constructing a neural-network architecture. In particular, the <a href="#NeuralEstimators.DeepSet"><code>DeepSet</code></a> is the building block for most classes of <a href="../core/#Estimators">Estimators</a> in the package. </p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.DeepSet" href="#NeuralEstimators.DeepSet"><code>NeuralEstimators.DeepSet</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">DeepSet(ψ, ϕ, a = mean; S = nothing)</code></pre><p>The DeepSets representation <a href="https://arxiv.org/abs/1703.06114">(Zaheer et al., 2017)</a>,</p><p class="math-container">\[θ̂(𝐙) = ϕ(𝐓(𝐙)),	 	 𝐓(𝐙) = 𝐚(\{ψ(𝐙ᵢ) : i = 1, …, m\}),\]</p><p>where 𝐙 ≡ (𝐙₁&#39;, …, 𝐙ₘ&#39;)&#39; are independent replicates from the statistical model, <code>ψ</code> and <code>ϕ</code> are neural networks, and <code>a</code> is a permutation-invariant aggregation function. Expert summary statistics can be incorporated as,</p><p class="math-container">\[θ̂(𝐙) = ϕ((𝐓(𝐙)&#39;, 𝐒(𝐙)&#39;)&#39;),\]</p><p>where <code>S</code> is a function that returns a vector of user-defined summary statistics. These user-defined summary statistics are provided either as a <code>Function</code> that returns a <code>Vector</code>, or as a vector of functions. In the case that <code>ψ</code> is set to <code>nothing</code>, only expert summary statistics will be used.</p><p>The aggregation function <code>a</code> can be any function that acts on an array and has a keyword argument <code>dims</code> that allows aggregation over a specific dimension of the array (e.g., <code>sum</code>, <code>mean</code>, <code>maximum</code>, <code>minimum</code>, <code>logsumexp</code>).</p><p><code>DeepSet</code> objects act on data of type <code>Vector{A}</code>, where each element of the vector is associated with one data set (i.e., one set of independent replicates from the statistical model), and where the type <code>A</code> depends on the form of the data and the chosen architecture for <code>ψ</code>. As a rule of thumb, when <code>A</code> is an array, the replicates are stored in the final dimension. For example, with gridded spatial data and <code>ψ</code> a CNN, <code>A</code> should be a 4-dimensional array, with the replicates stored in the 4ᵗʰ dimension. Note that in Flux, the final dimension is usually the &quot;batch&quot; dimension, but batching with <code>DeepSet</code> objects is done at the data set level (i.e., sets of replicates are batched together).</p><p>Data stored as <code>Vector{Arrays}</code> are first concatenated along the replicates dimension before being passed into the summary network <code>ψ</code>. This means that <code>ψ</code> is applied to a single large array rather than many small arrays, which can substantially improve computational efficiency.</p><p>Set-level information, <span>$𝐱$</span>, that is not a function of the data can be passed directly into the inference network <code>ϕ</code> in the following manner,</p><p class="math-container">\[θ̂(𝐙) = ϕ((𝐓(𝐙)&#39;, 𝐱&#39;)&#39;),	 	 \]</p><p>or, in the case that expert summary statistics are also used,</p><p class="math-container">\[θ̂(𝐙) = ϕ((𝐓(𝐙)&#39;, 𝐒(𝐙)&#39;, 𝐱&#39;)&#39;).	 \]</p><p>This is done by calling the <code>DeepSet</code> object on a <code>Tuple{Vector{A}, Vector{Vector}}</code>, where the first element of the tuple contains a vector of data sets and the second element contains a vector of set-level information (i.e., one vector for each data set).</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Flux
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Architectures · NeuralEstimators.jl</title><meta name="title" content="Architectures · NeuralEstimators.jl"/><meta property="og:title" content="Architectures · NeuralEstimators.jl"/><meta property="twitter:title" content="Architectures · NeuralEstimators.jl"/><meta name="description" content="Documentation for NeuralEstimators.jl."/><meta property="og:description" content="Documentation for NeuralEstimators.jl."/><meta property="twitter:description" content="Documentation for NeuralEstimators.jl."/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="NeuralEstimators.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../../">NeuralEstimators.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">NeuralEstimators</a></li><li><a class="tocitem" href="../../framework/">Framework</a></li><li><span class="tocitem">Workflow</span><ul><li><a class="tocitem" href="../../workflow/overview/">Overview</a></li><li><a class="tocitem" href="../../workflow/examples/">Examples</a></li><li><a class="tocitem" href="../../workflow/advancedusage/">Advanced usage</a></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="../core/">Core</a></li><li class="is-active"><a class="tocitem" href>Architectures</a><ul class="internal"><li><a class="tocitem" href="#Modules"><span>Modules</span></a></li><li class="toplevel"><a class="tocitem" href="#User-defined-summary-statistics"><span>User-defined summary statistics</span></a></li><li><a class="tocitem" href="#Layers"><span>Layers</span></a></li><li class="toplevel"><a class="tocitem" href="#Output-activation-functions"><span>Output activation functions</span></a></li></ul></li><li><a class="tocitem" href="../loss/">Loss functions</a></li><li><a class="tocitem" href="../simulation/">Model-specific functions</a></li><li><a class="tocitem" href="../utility/">Miscellaneous</a></li><li><a class="tocitem" href="../">Index</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">API</a></li><li class="is-active"><a href>Architectures</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Architectures</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/main/docs/src/API/architectures.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Architectures"><a class="docs-heading-anchor" href="#Architectures">Architectures</a><a id="Architectures-1"></a><a class="docs-heading-anchor-permalink" href="#Architectures" title="Permalink"></a></h1><h2 id="Modules"><a class="docs-heading-anchor" href="#Modules">Modules</a><a id="Modules-1"></a><a class="docs-heading-anchor-permalink" href="#Modules" title="Permalink"></a></h2><p>The following high-level modules are often used when constructing a neural-network architecture. In particular, the <a href="#NeuralEstimators.DeepSet"><code>DeepSet</code></a> is the building block for most classes of <a href="../core/#Estimators">Estimators</a> in the package.</p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.DeepSet" href="#NeuralEstimators.DeepSet"><code>NeuralEstimators.DeepSet</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">DeepSet(ψ, ϕ, a = mean; S = nothing)</code></pre><p>The DeepSets representation <a href="https://arxiv.org/abs/1703.06114">(Zaheer et al., 2017)</a>,</p><p class="math-container">\[θ̂(𝐙) = ϕ(𝐓(𝐙)),	 	 𝐓(𝐙) = 𝐚(\{ψ(𝐙ᵢ) : i = 1, …, m\}),\]</p><p>where 𝐙 ≡ (𝐙₁&#39;, …, 𝐙ₘ&#39;)&#39; are independent replicates from the statistical model, <code>ψ</code> and <code>ϕ</code> are neural networks, and <code>a</code> is a permutation-invariant aggregation function. Expert summary statistics can be incorporated as,</p><p class="math-container">\[θ̂(𝐙) = ϕ((𝐓(𝐙)&#39;, 𝐒(𝐙)&#39;)&#39;),\]</p><p>where <code>S</code> is a function that returns a vector of user-defined summary statistics. These user-defined summary statistics are provided either as a <code>Function</code> that returns a <code>Vector</code>, or as a vector of functions. In the case that <code>ψ</code> is set to <code>nothing</code>, only expert summary statistics will be used.</p><p>The aggregation function <code>a</code> can be any function that acts on an array and has a keyword argument <code>dims</code> that allows aggregation over a specific dimension of the array (e.g., <code>sum</code>, <code>mean</code>, <code>maximum</code>, <code>minimum</code>, <code>logsumexp</code>).</p><p><code>DeepSet</code> objects act on data of type <code>Vector{A}</code>, where each element of the vector is associated with one data set (i.e., one set of independent replicates from the statistical model), and where the type <code>A</code> depends on the form of the data and the chosen architecture for <code>ψ</code>. As a rule of thumb, when <code>A</code> is an array, the replicates are stored in the final dimension. For example, with gridded spatial data and <code>ψ</code> a CNN, <code>A</code> should be a 4-dimensional array, with the replicates stored in the 4ᵗʰ dimension. Note that in Flux, the final dimension is usually the &quot;batch&quot; dimension, but batching with <code>DeepSet</code> objects is done at the data set level (i.e., sets of replicates are batched together).</p><p>Data stored as <code>Vector{Arrays}</code> are first concatenated along the replicates dimension before being passed into the summary network <code>ψ</code>. This means that <code>ψ</code> is applied to a single large array rather than many small arrays, which can substantially improve computational efficiency.</p><p>Set-level information, <span>$𝐱$</span>, that is not a function of the data can be passed directly into the inference network <code>ϕ</code> in the following manner,</p><p class="math-container">\[θ̂(𝐙) = ϕ((𝐓(𝐙)&#39;, 𝐱&#39;)&#39;),	 	 \]</p><p>or, in the case that expert summary statistics are also used,</p><p class="math-container">\[θ̂(𝐙) = ϕ((𝐓(𝐙)&#39;, 𝐒(𝐙)&#39;, 𝐱&#39;)&#39;).	 \]</p><p>This is done by calling the <code>DeepSet</code> object on a <code>Tuple{Vector{A}, Vector{Vector}}</code>, where the first element of the tuple contains a vector of data sets and the second element contains a vector of set-level information (i.e., one vector for each data set).</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Flux
 
 # Two dummy data sets containing 3 and 4 replicates
 p = 5  # number of parameters in the statistical model
@@ -23,7 +23,7 @@
 ϕ = Chain(Dense(qₜ + qₛ + qₓ, w, relu), Dense(w, p))
 θ̂ = DeepSet(ψ, ϕ; S = S)
 x = [rand32(qₓ) for _ ∈ eachindex(Z)]
-θ̂((Z, x))</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Architectures.jl#L45-L132">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.GNNSummary" href="#NeuralEstimators.GNNSummary"><code>NeuralEstimators.GNNSummary</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">GNNSummary(propagation, readout; globalfeatures = nothing)</code></pre><p>A graph neural network (GNN) module designed to serve as the summary network <code>ψ</code> in the <a href="#NeuralEstimators.DeepSet"><code>DeepSet</code></a> representation when the data are graphical (e.g., irregularly observed spatial data).</p><p>The <code>propagation</code> module transforms graphical input data into a set of hidden-feature graphs. The <code>readout</code> module aggregates these feature graphs into a single hidden feature vector of fixed length (i.e., a vector of summary statistics). The summary network is then defined as the composition of the propagation and readout modules.</p><p>Optionally, one may also include a module that extracts features directly  from the graph, through the keyword argument <code>globalfeatures</code>. This module,  when applied to a <code>GNNGraph</code>, should return a matrix of features,  where the columns of the matrix correspond to the independent replicates  (e.g., a 5x10 matrix is expected for 5 hidden features for each of 10  independent replicates stored in the graph).  </p><p>The data should be stored as a <code>GNNGraph</code> or <code>Vector{GNNGraph}</code>, where each graph is associated with a single parameter vector. The graphs may contain subgraphs corresponding to independent replicates.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Flux, GraphNeuralNetworks
+θ̂((Z, x))</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Architectures.jl#L45-L132">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.GNNSummary" href="#NeuralEstimators.GNNSummary"><code>NeuralEstimators.GNNSummary</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">GNNSummary(propagation, readout; globalfeatures = nothing)</code></pre><p>A graph neural network (GNN) module designed to serve as the summary network <code>ψ</code> in the <a href="#NeuralEstimators.DeepSet"><code>DeepSet</code></a> representation when the data are graphical (e.g., irregularly observed spatial data).</p><p>The <code>propagation</code> module transforms graphical input data into a set of hidden-feature graphs. The <code>readout</code> module aggregates these feature graphs into a single hidden feature vector of fixed length (i.e., a vector of summary statistics). The summary network is then defined as the composition of the propagation and readout modules.</p><p>Optionally, one may also include a module that extracts features directly  from the graph, through the keyword argument <code>globalfeatures</code>. This module,  when applied to a <code>GNNGraph</code>, should return a matrix of features,  where the columns of the matrix correspond to the independent replicates  (e.g., a 5x10 matrix is expected for 5 hidden features for each of 10  independent replicates stored in the graph).  </p><p>The data should be stored as a <code>GNNGraph</code> or <code>Vector{GNNGraph}</code>, where each graph is associated with a single parameter vector. The graphs may contain subgraphs corresponding to independent replicates.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Flux, GraphNeuralNetworks
 using Flux: batch
 using Statistics: mean
 
@@ -55,11 +55,11 @@
 g₃ = batch([g₁, g₂])
 θ̂(g₁)
 θ̂(g₃)
-θ̂([g₁, g₂, g₃])</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Graphs.jl#L486-L546">source</a></section></article><h1 id="User-defined-summary-statistics"><a class="docs-heading-anchor" href="#User-defined-summary-statistics">User-defined summary statistics</a><a id="User-defined-summary-statistics-1"></a><a class="docs-heading-anchor-permalink" href="#User-defined-summary-statistics" title="Permalink"></a></h1><ul></ul><p>The following functions correspond to summary statistics that are often useful as user-defined summary statistics in <a href="#NeuralEstimators.DeepSet"><code>DeepSet</code></a> objects.</p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.samplesize" href="#NeuralEstimators.samplesize"><code>NeuralEstimators.samplesize</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">samplesize(Z::AbstractArray)</code></pre><p>Computes the sample size of a set of independent realisations <code>Z</code>.</p><p>Note that this function is a wrapper around <a href="../utility/#NeuralEstimators.numberreplicates"><code>numberreplicates</code></a>, but this function returns the number of replicates as the eltype of <code>Z</code>, rather than as an integer.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/summarystatistics.jl#L3-L10">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.samplecorrelation" href="#NeuralEstimators.samplecorrelation"><code>NeuralEstimators.samplecorrelation</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">samplecorrelation(Z::AbstractArray)</code></pre><p>Computes the sample correlation matrix, R̂, and returns the vectorised strict lower triangle of R̂.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs"># 5 independent replicates of a 3-dimensional vector
+θ̂([g₁, g₂, g₃])</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Graphs.jl#L486-L546">source</a></section></article><h1 id="User-defined-summary-statistics"><a class="docs-heading-anchor" href="#User-defined-summary-statistics">User-defined summary statistics</a><a id="User-defined-summary-statistics-1"></a><a class="docs-heading-anchor-permalink" href="#User-defined-summary-statistics" title="Permalink"></a></h1><ul></ul><p>The following functions correspond to summary statistics that are often useful as user-defined summary statistics in <a href="#NeuralEstimators.DeepSet"><code>DeepSet</code></a> objects.</p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.samplesize" href="#NeuralEstimators.samplesize"><code>NeuralEstimators.samplesize</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">samplesize(Z::AbstractArray)</code></pre><p>Computes the sample size of a set of independent realisations <code>Z</code>.</p><p>Note that this function is a wrapper around <a href="../utility/#NeuralEstimators.numberreplicates"><code>numberreplicates</code></a>, but this function returns the number of replicates as the eltype of <code>Z</code>, rather than as an integer.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/summarystatistics.jl#L3-L10">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.samplecorrelation" href="#NeuralEstimators.samplecorrelation"><code>NeuralEstimators.samplecorrelation</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">samplecorrelation(Z::AbstractArray)</code></pre><p>Computes the sample correlation matrix, R̂, and returns the vectorised strict lower triangle of R̂.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs"># 5 independent replicates of a 3-dimensional vector
 z = rand(3, 5)
-samplecorrelation(z)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/summarystatistics.jl#L36-L48">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.samplecovariance" href="#NeuralEstimators.samplecovariance"><code>NeuralEstimators.samplecovariance</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">samplecovariance(Z::AbstractArray)</code></pre><p>Computes the <a href="https://en.wikipedia.org/wiki/Sample_mean_and_covariance#Definition_of_sample_covariance">sample covariance matrix</a>, Σ̂, and returns the vectorised lower triangle of Σ̂.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs"># 5 independent replicates of a 3-dimensional vector
+samplecorrelation(z)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/summarystatistics.jl#L36-L48">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.samplecovariance" href="#NeuralEstimators.samplecovariance"><code>NeuralEstimators.samplecovariance</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">samplecovariance(Z::AbstractArray)</code></pre><p>Computes the <a href="https://en.wikipedia.org/wiki/Sample_mean_and_covariance#Definition_of_sample_covariance">sample covariance matrix</a>, Σ̂, and returns the vectorised lower triangle of Σ̂.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs"># 5 independent replicates of a 3-dimensional vector
 z = rand(3, 5)
-samplecovariance(z)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/summarystatistics.jl#L13-L25">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.NeighbourhoodVariogram" href="#NeuralEstimators.NeighbourhoodVariogram"><code>NeuralEstimators.NeighbourhoodVariogram</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">NeighbourhoodVariogram(h_max, n_bins) 
+samplecovariance(z)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/summarystatistics.jl#L13-L25">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.NeighbourhoodVariogram" href="#NeuralEstimators.NeighbourhoodVariogram"><code>NeuralEstimators.NeighbourhoodVariogram</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">NeighbourhoodVariogram(h_max, n_bins) 
 (l::NeighbourhoodVariogram)(g::GNNGraph)</code></pre><p>Computes the empirical variogram, </p><p class="math-container">\[\hat{\gamma}(h \pm \delta) = \frac{1}{2|N(h \pm \delta)|} \sum_{(i,j) \in N(h \pm \delta)} (Z_i - Z_j)^2\]</p><p>where <span>$N(h \pm \delta) \equiv \left\{(i,j) : \|\boldsymbol{s}_i - \boldsymbol{s}_j\| \in (h-\delta, h+\delta)\right\}$</span>  is the set of pairs of locations separated by a distance within <span>$(h-\delta, h+\delta)$</span>, and <span>$|\cdot|$</span> denotes set cardinality. </p><p>The distance bins are constructed to have constant width <span>$2\delta$</span>, chosen based on the maximum distance  <code>h_max</code> to be considered, and the specified number of bins <code>n_bins</code>. </p><p>The input type is a <code>GNNGraph</code>, and the empirical variogram is computed based on the corresponding graph structure.  Specifically, only locations that are considered neighbours will be used when computing the empirical variogram. </p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Distances, LinearAlgebra
   
 # Simulate Gaussian spatial data with exponential covariance function 
@@ -80,12 +80,12 @@
 nv = NeighbourhoodVariogram(r, 10) 
 
 # Compute the empirical variogram 
-nv(g)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/summarystatistics.jl#L113-L156">source</a></section></article><h2 id="Layers"><a class="docs-heading-anchor" href="#Layers">Layers</a><a id="Layers-1"></a><a class="docs-heading-anchor-permalink" href="#Layers" title="Permalink"></a></h2><p>In addition to the <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/">built-in layers</a> provided by Flux, the following layers may be used when constructing a neural-network architecture. </p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.DensePositive" href="#NeuralEstimators.DensePositive"><code>NeuralEstimators.DensePositive</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">DensePositive(layer::Dense, g::Function)
+nv(g)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/summarystatistics.jl#L113-L156">source</a></section></article><h2 id="Layers"><a class="docs-heading-anchor" href="#Layers">Layers</a><a id="Layers-1"></a><a class="docs-heading-anchor-permalink" href="#Layers" title="Permalink"></a></h2><p>In addition to the <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/">built-in layers</a> provided by Flux, the following layers may be used when constructing a neural-network architecture.</p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.DensePositive" href="#NeuralEstimators.DensePositive"><code>NeuralEstimators.DensePositive</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">DensePositive(layer::Dense, g::Function)
 DensePositive(layer::Dense; g::Function = Flux.relu)</code></pre><p>Wrapper around the standard <a href="https://fluxml.ai/Flux.jl/stable/models/layers/#Flux.Dense">Dense</a> layer that ensures positive weights (biases are left unconstrained).</p><p>This layer can be useful for constucting (partially) monotonic neural networks (see, e.g., <a href="../core/#NeuralEstimators.QuantileEstimatorContinuous"><code>QuantileEstimatorContinuous</code></a>).</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Flux
 
 layer = DensePositive(Dense(5 =&gt; 2))
 x = rand32(5, 64)
-layer(x)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Architectures.jl#L666-L683">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.PowerDifference" href="#NeuralEstimators.PowerDifference"><code>NeuralEstimators.PowerDifference</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">PowerDifference(a, b)</code></pre><p>Function <span>$f(x, y) = |ax - (1-a)y|^b$</span> for trainable parameters a ∈ [0, 1] and b &gt; 0.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Flux
+layer(x)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Architectures.jl#L666-L683">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.PowerDifference" href="#NeuralEstimators.PowerDifference"><code>NeuralEstimators.PowerDifference</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">PowerDifference(a, b)</code></pre><p>Function <span>$f(x, y) = |ax - (1-a)y|^b$</span> for trainable parameters a ∈ [0, 1] and b &gt; 0.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Flux
 
 # Generate some data
 d = 5
@@ -114,7 +114,10 @@
 
 # Estimates of a and b
 f.a
-f.b</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Architectures.jl#L714-L751">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.SpatialGraphConv" href="#NeuralEstimators.SpatialGraphConv"><code>NeuralEstimators.SpatialGraphConv</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">SpatialGraphConv(in =&gt; out, g=relu; args...)</code></pre><p>Implements a spatial graph convolution for isotropic processes, </p><p class="math-container">\[ \boldsymbol{h}^{(l)}_{j} =
+f.b</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Architectures.jl#L714-L751">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.ResidualBlock" href="#NeuralEstimators.ResidualBlock"><code>NeuralEstimators.ResidualBlock</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">ResidualBlock(filter, in =&gt; out; stride = 1)</code></pre><p>Basic residual block (see <a href="https://en.wikipedia.org/wiki/Residual_neural_network#Basic_block">here</a>), consisting of two sequential convolutional layers and a skip (shortcut) connection that connects the input of the block directly to the output, facilitating the training of deep networks.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators
+z = rand(16, 16, 1, 1)
+b = ResidualBlock((3, 3), 1 =&gt; 32)
+b(z)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Architectures.jl#L766-L781">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.SpatialGraphConv" href="#NeuralEstimators.SpatialGraphConv"><code>NeuralEstimators.SpatialGraphConv</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">SpatialGraphConv(in =&gt; out, g=relu; args...)</code></pre><p>Implements a spatial graph convolution for isotropic processes, </p><p class="math-container">\[ \boldsymbol{h}^{(l)}_{j} =
  g\Big(
  \boldsymbol{\Gamma}_{\!1}^{(l)} \boldsymbol{h}^{(l-1)}_{j}
  +
@@ -135,7 +138,7 @@
 
 # Construct and apply spatial graph convolution layer
 l = SpatialGraphConv(1 =&gt; 10)
-l(g)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Graphs.jl#L237-L304">source</a></section></article><h1 id="Output-activation-functions"><a class="docs-heading-anchor" href="#Output-activation-functions">Output activation functions</a><a id="Output-activation-functions-1"></a><a class="docs-heading-anchor-permalink" href="#Output-activation-functions" title="Permalink"></a></h1><ul></ul><p>In addition to the <a href="https://fluxml.ai/Flux.jl/stable/models/activation/">standard activation functions</a> provided by Flux, the following structs can be used at the end of an architecture to act as output activation functions that ensure valid estimates for certain models. <strong>NB:</strong> Although we refer to the following objects as &quot;activation functions&quot;, they should be treated as layers that are included in the final stage of a Flux <code>Chain()</code>. </p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.Compress" href="#NeuralEstimators.Compress"><code>NeuralEstimators.Compress</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">Compress(a, b, k = 1)</code></pre><p>Layer that compresses its input to be within the range <code>a</code> and <code>b</code>, where each element of <code>a</code> is less than the corresponding element of <code>b</code>.</p><p>The layer uses a logistic function,</p><p class="math-container">\[l(θ) = a + \frac{b - a}{1 + e^{-kθ}},\]</p><p>where the arguments <code>a</code> and <code>b</code> together combine to shift and scale the logistic function to the range (<code>a</code>, <code>b</code>), and the growth rate <code>k</code> controls the steepness of the curve.</p><p>The logistic function given <a href="https://en.wikipedia.org/wiki/Logistic_function">here</a> contains an additional parameter, θ₀, which is the input value corresponding to the functions midpoint. In <code>Compress</code>, we fix θ₀ = 0, since the output of a randomly initialised neural network is typically around zero.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Flux
+l(g)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Graphs.jl#L237-L304">source</a></section></article><h1 id="Output-activation-functions"><a class="docs-heading-anchor" href="#Output-activation-functions">Output activation functions</a><a id="Output-activation-functions-1"></a><a class="docs-heading-anchor-permalink" href="#Output-activation-functions" title="Permalink"></a></h1><ul></ul><p>In addition to the <a href="https://fluxml.ai/Flux.jl/stable/models/activation/">standard activation functions</a> provided by Flux, the following structs can be used at the end of an architecture to act as output activation functions that ensure valid estimates for certain models. <strong>NB:</strong> Although we refer to the following objects as &quot;activation functions&quot;, they should be treated as layers that are included in the final stage of a Flux <code>Chain()</code>.</p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.Compress" href="#NeuralEstimators.Compress"><code>NeuralEstimators.Compress</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">Compress(a, b, k = 1)</code></pre><p>Layer that compresses its input to be within the range <code>a</code> and <code>b</code>, where each element of <code>a</code> is less than the corresponding element of <code>b</code>.</p><p>The layer uses a logistic function,</p><p class="math-container">\[l(θ) = a + \frac{b - a}{1 + e^{-kθ}},\]</p><p>where the arguments <code>a</code> and <code>b</code> together combine to shift and scale the logistic function to the range (<code>a</code>, <code>b</code>), and the growth rate <code>k</code> controls the steepness of the curve.</p><p>The logistic function given <a href="https://en.wikipedia.org/wiki/Logistic_function">here</a> contains an additional parameter, θ₀, which is the input value corresponding to the functions midpoint. In <code>Compress</code>, we fix θ₀ = 0, since the output of a randomly initialised neural network is typically around zero.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Flux
 
 a = [25, 0.5, -pi/2]
 b = [500, 2.5, 0]
@@ -148,7 +151,7 @@
 n = 20
 θ̂ = Chain(Dense(n, p), l)
 Z = randn(n, K)
-θ̂(Z)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Architectures.jl#L322-L359">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.CorrelationMatrix" href="#NeuralEstimators.CorrelationMatrix"><code>NeuralEstimators.CorrelationMatrix</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">CorrelationMatrix(d)
+θ̂(Z)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Architectures.jl#L322-L359">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.CorrelationMatrix" href="#NeuralEstimators.CorrelationMatrix"><code>NeuralEstimators.CorrelationMatrix</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">CorrelationMatrix(d)
 (object::CorrelationMatrix)(x::Matrix, cholesky::Bool = false)</code></pre><p>Transforms a vector 𝐯 ∈ ℝᵈ to the parameters of an unconstrained <code>d</code>×<code>d</code> correlation matrix or, if <code>cholesky = true</code>, the lower Cholesky factor of an unconstrained <code>d</code>×<code>d</code> correlation matrix.</p><p>The expected input is a <code>Matrix</code> with T(<code>d</code>-1) = (<code>d</code>-1)<code>d</code>÷2 rows, where T(<code>d</code>-1) is the (<code>d</code>-1)th triangular number (the number of free parameters in an unconstrained <code>d</code>×<code>d</code> correlation matrix), and the output is a <code>Matrix</code> of the same dimension. The columns of the input and output matrices correspond to independent parameter configurations (i.e., different correlation matrices).</p><p>Internally, the layer constructs a valid Cholesky factor 𝐋 for a correlation matrix, and then extracts the strict lower triangle from the correlation matrix 𝐑 = 𝐋𝐋&#39;. The lower triangle is extracted and vectorised in line with Julia&#39;s column-major ordering: for example, when modelling the correlation matrix</p><p class="math-container">\[\begin{bmatrix}
 1   &amp; R₁₂ &amp;  R₁₃ \\
 R₂₁ &amp; 1   &amp;  R₂₃\\
@@ -184,7 +187,7 @@
 	L[diagind(L)] .= sqrt.(1 .- rowwisenorm(L).^2)
 	L
 end
-L[1] * L[1]&#39;</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Architectures.jl#L510-L582">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.CovarianceMatrix" href="#NeuralEstimators.CovarianceMatrix"><code>NeuralEstimators.CovarianceMatrix</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">CovarianceMatrix(d)
+L[1] * L[1]&#39;</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Architectures.jl#L510-L582">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.CovarianceMatrix" href="#NeuralEstimators.CovarianceMatrix"><code>NeuralEstimators.CovarianceMatrix</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">CovarianceMatrix(d)
 (object::CovarianceMatrix)(x::Matrix, cholesky::Bool = false)</code></pre><p>Transforms a vector 𝐯 ∈ ℝᵈ to the parameters of an unconstrained <code>d</code>×<code>d</code> covariance matrix or, if <code>cholesky = true</code>, the lower Cholesky factor of an unconstrained <code>d</code>×<code>d</code> covariance matrix.</p><p>The expected input is a <code>Matrix</code> with T(<code>d</code>) = <code>d</code>(<code>d</code>+1)÷2 rows, where T(<code>d</code>) is the <code>d</code>th triangular number (the number of free parameters in an unconstrained <code>d</code>×<code>d</code> covariance matrix), and the output is a <code>Matrix</code> of the same dimension. The columns of the input and output matrices correspond to independent parameter configurations (i.e., different covariance matrices).</p><p>Internally, the layer constructs a valid Cholesky factor 𝐋 and then extracts the lower triangle from the positive-definite covariance matrix 𝚺 = 𝐋𝐋&#39;. The lower triangle is extracted and vectorised in line with Julia&#39;s column-major ordering: for example, when modelling the covariance matrix</p><p class="math-container">\[\begin{bmatrix}
 Σ₁₁ &amp; Σ₁₂ &amp; Σ₁₃ \\
 Σ₂₁ &amp; Σ₂₂ &amp; Σ₂₃ \\
@@ -212,4 +215,4 @@
 # Obtain the Cholesky factor directly
 L = l(θ, true)
 L = [LowerTriangular(cpu(vectotril(x))) for x ∈ eachcol(L)]
-L[1] * L[1]&#39;</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Architectures.jl#L399-L463">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../core/">« Core</a><a class="docs-footer-nextpage" href="../loss/">Loss functions »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.6.0 on <span class="colophon-date" title="Wednesday 28 August 2024 08:07">Wednesday 28 August 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+L[1] * L[1]&#39;</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Architectures.jl#L399-L463">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../core/">« Core</a><a class="docs-footer-nextpage" href="../loss/">Loss functions »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.6.0 on <span class="colophon-date" title="Sunday 1 September 2024 07:59">Sunday 1 September 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/API/core/index.html b/dev/API/core/index.html
index 8a7a0188..dbecbec3 100644
--- a/dev/API/core/index.html
+++ b/dev/API/core/index.html
@@ -2,7 +2,7 @@
 <html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Core · NeuralEstimators.jl</title><meta name="title" content="Core · NeuralEstimators.jl"/><meta property="og:title" content="Core · NeuralEstimators.jl"/><meta property="twitter:title" content="Core · NeuralEstimators.jl"/><meta name="description" content="Documentation for NeuralEstimators.jl."/><meta property="og:description" content="Documentation for NeuralEstimators.jl."/><meta property="twitter:description" content="Documentation for NeuralEstimators.jl."/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="NeuralEstimators.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../../">NeuralEstimators.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">NeuralEstimators</a></li><li><a class="tocitem" href="../../framework/">Framework</a></li><li><span class="tocitem">Workflow</span><ul><li><a class="tocitem" href="../../workflow/overview/">Overview</a></li><li><a class="tocitem" href="../../workflow/examples/">Examples</a></li><li><a class="tocitem" href="../../workflow/advancedusage/">Advanced usage</a></li></ul></li><li><span class="tocitem">API</span><ul><li class="is-active"><a class="tocitem" href>Core</a><ul class="internal"><li><a class="tocitem" href="#Sampling-parameters"><span>Sampling parameters</span></a></li><li><a class="tocitem" href="#Simulating-data"><span>Simulating data</span></a></li><li><a class="tocitem" href="#Estimators"><span>Estimators</span></a></li><li><a class="tocitem" href="#Training"><span>Training</span></a></li><li><a class="tocitem" href="#Assessment/calibration"><span>Assessment/calibration</span></a></li><li><a class="tocitem" href="#Inference-with-observed-data"><span>Inference with observed data</span></a></li></ul></li><li><a class="tocitem" href="../architectures/">Architectures</a></li><li><a class="tocitem" href="../loss/">Loss functions</a></li><li><a class="tocitem" href="../simulation/">Model-specific functions</a></li><li><a class="tocitem" href="../utility/">Miscellaneous</a></li><li><a class="tocitem" href="../">Index</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">API</a></li><li class="is-active"><a href>Core</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Core</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/main/docs/src/API/core.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Core"><a class="docs-heading-anchor" href="#Core">Core</a><a id="Core-1"></a><a class="docs-heading-anchor-permalink" href="#Core" title="Permalink"></a></h1><p>This page documents the classes and functions that are central to the workflow of <code>NeuralEstimators</code>. Its organisation reflects the order in which these classes and functions appear in a standard implementation; that is, from sampling parameters from the prior distribution, to using a neural Bayes estimator to make inference with observed data sets.</p><h2 id="Sampling-parameters"><a class="docs-heading-anchor" href="#Sampling-parameters">Sampling parameters</a><a id="Sampling-parameters-1"></a><a class="docs-heading-anchor-permalink" href="#Sampling-parameters" title="Permalink"></a></h2><p>Parameters sampled from the prior distribution are stored as a <span>$p \times K$</span> matrix, where <span>$p$</span> is the number of parameters in the statistical model and <span>$K$</span> is the number of parameter vectors sampled from the prior distribution.</p><p>It can sometimes be helpful to wrap the parameter matrix in a user-defined type that also stores expensive intermediate objects needed for data simulated (e.g., Cholesky factors). In this case, the user-defined type should be a subtype of the abstract type <a href="#NeuralEstimators.ParameterConfigurations"><code>ParameterConfigurations</code></a>, whose only requirement is a field <code>θ</code> that stores the matrix of parameters. See <a href="../../workflow/advancedusage/#Storing-expensive-intermediate-objects-for-data-simulation">Storing expensive intermediate objects for data simulation</a> for further discussion.   </p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.ParameterConfigurations" href="#NeuralEstimators.ParameterConfigurations"><code>NeuralEstimators.ParameterConfigurations</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">ParameterConfigurations</code></pre><p>An abstract supertype for user-defined types that store parameters and any intermediate objects needed for data simulation.</p><p>The user-defined type must have a field <code>θ</code> that stores the <span>$p$</span> × <span>$K$</span> matrix of parameters, where <span>$p$</span> is the number of parameters in the model and <span>$K$</span> is the number of parameter vectors sampled from the prior distribution. There are no other restrictions.</p><p>See <a href="../utility/#NeuralEstimators.subsetparameters"><code>subsetparameters</code></a> for the generic function for subsetting these objects.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">struct P &lt;: ParameterConfigurations
 	θ
 	# other expensive intermediate objects...
-end</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Parameters.jl#L1-L22">source</a></section></article><h2 id="Simulating-data"><a class="docs-heading-anchor" href="#Simulating-data">Simulating data</a><a id="Simulating-data-1"></a><a class="docs-heading-anchor-permalink" href="#Simulating-data" title="Permalink"></a></h2><p><code>NeuralEstimators</code> facilitates neural estimation for arbitrary statistical models by having the user implicitly define their model via simulated data, either as fixed instances or via a function that simulates data from the statistical model.</p><p>The data are always stored as a <code>Vector{A}</code>, where each element of the vector corresponds to a data set of <span>$m$</span> independent replicates associated with one parameter vector (note that <span>$m$</span> is arbitrary), and where the type <code>A</code> depends on the multivariate structure of the data:</p><ul><li>For univariate and unstructured multivariate data, <code>A</code> is a <span>$d \times m$</span> matrix where <span>$d$</span> is the dimension each replicate (e.g., <span>$d=1$</span> for univariate data).</li><li>For data collected over a regular grid, <code>A</code> is a (<span>$N + 2$</span>)-dimensional array, where <span>$N$</span> is the dimension of the grid (e.g., <span>$N = 1$</span> for time series, <span>$N = 2$</span> for two-dimensional spatial grids, etc.). The first <span>$N$</span> dimensions of the array correspond to the dimensions of the grid; the penultimate dimension stores the so-called &quot;channels&quot; (this dimension is singleton for univariate processes, two for bivariate processes, and so on); and the final dimension stores the independent replicates. For example, to store 50 independent replicates of a bivariate spatial process measured over a 10x15 grid, one would construct an array of dimension 10x15x2x50.</li><li>For spatial data collected over irregular spatial locations, <code>A</code> is a <a href="https://carlolucibello.github.io/GraphNeuralNetworks.jl/dev/api/gnngraph/#GraphNeuralNetworks.GNNGraphs.GNNGraph"><code>GNNGraph</code></a> with independent replicates (possibly with differing spatial locations) stored as subgraphs using the function <a href="https://carlolucibello.github.io/GraphNeuralNetworks.jl/dev/api/gnngraph/#MLUtils.batch-Tuple{AbstractVector{%3C:GNNGraph}}"><code>batch</code></a>.</li></ul><h2 id="Estimators"><a class="docs-heading-anchor" href="#Estimators">Estimators</a><a id="Estimators-1"></a><a class="docs-heading-anchor-permalink" href="#Estimators" title="Permalink"></a></h2><p>Several classes of neural estimators are available in the package.</p><p>The simplest class is <a href="#NeuralEstimators.PointEstimator"><code>PointEstimator</code></a>, used for constructing arbitrary mappings from the sample space to the parameter space. When constructing a generic point estimator, the user defines the loss function and therefore the Bayes estimator that will be targeted.</p><p>Several classes cater for the estimation of marginal posterior quantiles, based on the quantile loss function (see <a href="../loss/#NeuralEstimators.quantileloss"><code>quantileloss()</code></a>); in particular, see <a href="#NeuralEstimators.IntervalEstimator"><code>IntervalEstimator</code></a> and <a href="#NeuralEstimators.QuantileEstimatorDiscrete"><code>QuantileEstimatorDiscrete</code></a> for estimating marginal posterior quantiles for a fixed set of probability levels, and <a href="#NeuralEstimators.QuantileEstimatorContinuous"><code>QuantileEstimatorContinuous</code></a> for estimating marginal posterior quantiles with the probability level as an input to the neural network.</p><p>In addition to point estimation, the package also provides the class <a href="#NeuralEstimators.RatioEstimator"><code>RatioEstimator</code></a> for approximating the so-called likelihood-to-evidence ratio. The binary classification problem at the heart of this approach proceeds based on the binary cross-entropy loss.</p><p>Users are free to choose the neural-network architecture of these estimators as they see fit (subject to some class-specific requirements), but the package also provides the convenience constructor <a href="../utility/#NeuralEstimators.initialise_estimator"><code>initialise_estimator()</code></a>.</p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.NeuralEstimator" href="#NeuralEstimators.NeuralEstimator"><code>NeuralEstimators.NeuralEstimator</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">NeuralEstimator</code></pre><p>An abstract supertype for neural estimators.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Estimators.jl#L1-L5">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.PointEstimator" href="#NeuralEstimators.PointEstimator"><code>NeuralEstimators.PointEstimator</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">PointEstimator(deepset::DeepSet)</code></pre><p>A neural point estimator, a mapping from the sample space to the parameter space.</p><p>The estimator leverages the <a href="../architectures/#NeuralEstimators.DeepSet"><code>DeepSet</code></a> architecture. The only requirement is that number of output neurons in the final layer of the inference network (i.e., the outer network) is equal to the number of parameters in the statistical model.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Estimators.jl#L12-L20">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.IntervalEstimator" href="#NeuralEstimators.IntervalEstimator"><code>NeuralEstimators.IntervalEstimator</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">IntervalEstimator(u, v = u; probs = [0.025, 0.975], g::Function = exp)
+end</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Parameters.jl#L1-L22">source</a></section></article><h2 id="Simulating-data"><a class="docs-heading-anchor" href="#Simulating-data">Simulating data</a><a id="Simulating-data-1"></a><a class="docs-heading-anchor-permalink" href="#Simulating-data" title="Permalink"></a></h2><p><code>NeuralEstimators</code> facilitates neural estimation for arbitrary statistical models by having the user implicitly define their model via simulated data, either as fixed instances or via a function that simulates data from the statistical model.</p><p>The data are always stored as a <code>Vector{A}</code>, where each element of the vector corresponds to a data set of <span>$m$</span> independent replicates associated with one parameter vector (note that <span>$m$</span> is arbitrary), and where the type <code>A</code> depends on the multivariate structure of the data:</p><ul><li>For univariate and unstructured multivariate data, <code>A</code> is a <span>$d \times m$</span> matrix where <span>$d$</span> is the dimension each replicate (e.g., <span>$d=1$</span> for univariate data).</li><li>For data collected over a regular grid, <code>A</code> is a (<span>$N + 2$</span>)-dimensional array, where <span>$N$</span> is the dimension of the grid (e.g., <span>$N = 1$</span> for time series, <span>$N = 2$</span> for two-dimensional spatial grids, etc.). The first <span>$N$</span> dimensions of the array correspond to the dimensions of the grid; the penultimate dimension stores the so-called &quot;channels&quot; (this dimension is singleton for univariate processes, two for bivariate processes, and so on); and the final dimension stores the independent replicates. For example, to store 50 independent replicates of a bivariate spatial process measured over a 10x15 grid, one would construct an array of dimension 10x15x2x50.</li><li>For spatial data collected over irregular spatial locations, <code>A</code> is a <a href="https://carlolucibello.github.io/GraphNeuralNetworks.jl/dev/api/gnngraph/#GraphNeuralNetworks.GNNGraphs.GNNGraph"><code>GNNGraph</code></a> with independent replicates (possibly with differing spatial locations) stored as subgraphs using the function <a href="https://carlolucibello.github.io/GraphNeuralNetworks.jl/dev/api/gnngraph/#MLUtils.batch-Tuple{AbstractVector{%3C:GNNGraph}}"><code>batch</code></a>.</li></ul><h2 id="Estimators"><a class="docs-heading-anchor" href="#Estimators">Estimators</a><a id="Estimators-1"></a><a class="docs-heading-anchor-permalink" href="#Estimators" title="Permalink"></a></h2><p>Several classes of neural estimators are available in the package.</p><p>The simplest class is <a href="#NeuralEstimators.PointEstimator"><code>PointEstimator</code></a>, used for constructing arbitrary mappings from the sample space to the parameter space. When constructing a generic point estimator, the user defines the loss function and therefore the Bayes estimator that will be targeted.</p><p>Several classes cater for the estimation of marginal posterior quantiles, based on the quantile loss function (see <a href="../loss/#NeuralEstimators.quantileloss"><code>quantileloss()</code></a>); in particular, see <a href="#NeuralEstimators.IntervalEstimator"><code>IntervalEstimator</code></a> and <a href="#NeuralEstimators.QuantileEstimatorDiscrete"><code>QuantileEstimatorDiscrete</code></a> for estimating marginal posterior quantiles for a fixed set of probability levels, and <a href="#NeuralEstimators.QuantileEstimatorContinuous"><code>QuantileEstimatorContinuous</code></a> for estimating marginal posterior quantiles with the probability level as an input to the neural network.</p><p>In addition to point estimation, the package also provides the class <a href="#NeuralEstimators.RatioEstimator"><code>RatioEstimator</code></a> for approximating the so-called likelihood-to-evidence ratio. The binary classification problem at the heart of this approach proceeds based on the binary cross-entropy loss.</p><p>Users are free to choose the neural-network architecture of these estimators as they see fit (subject to some class-specific requirements), but the package also provides the convenience constructor <a href="../utility/#NeuralEstimators.initialise_estimator"><code>initialise_estimator()</code></a>.</p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.NeuralEstimator" href="#NeuralEstimators.NeuralEstimator"><code>NeuralEstimators.NeuralEstimator</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">NeuralEstimator</code></pre><p>An abstract supertype for neural estimators.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Estimators.jl#L1-L5">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.PointEstimator" href="#NeuralEstimators.PointEstimator"><code>NeuralEstimators.PointEstimator</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">PointEstimator(deepset::DeepSet)</code></pre><p>A neural point estimator, a mapping from the sample space to the parameter space.</p><p>The estimator leverages the <a href="../architectures/#NeuralEstimators.DeepSet"><code>DeepSet</code></a> architecture. The only requirement is that number of output neurons in the final layer of the inference network (i.e., the outer network) is equal to the number of parameters in the statistical model.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Estimators.jl#L12-L20">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.IntervalEstimator" href="#NeuralEstimators.IntervalEstimator"><code>NeuralEstimators.IntervalEstimator</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">IntervalEstimator(u, v = u; probs = [0.025, 0.975], g::Function = exp)
 IntervalEstimator(u, c::Union{Function,Compress}; probs = [0.025, 0.975], g::Function = exp)
 IntervalEstimator(u, v, c::Union{Function,Compress}; probs = [0.025, 0.975], g::Function = exp)</code></pre><p>A neural interval estimator which, given data <span>$Z$</span>, jointly estimates marginal posterior credible intervals based on the probability levels <code>probs</code>.</p><p>The estimator employs a representation that prevents quantile crossing, namely, it constructs marginal posterior credible intervals for each parameter <span>$\theta_i$</span>, <span>$i = 1, \dots, p,$</span>  of the form,</p><p class="math-container">\[[c_i(u_i(\boldsymbol{Z})), \;\; c_i(u_i(\boldsymbol{Z})) + g(v_i(\boldsymbol{Z})))],\]</p><p>where  <span>$\boldsymbol{u}(⋅) \equiv (u_1(\cdot), \dots, u_p(\cdot))&#39;$</span> and <span>$\boldsymbol{v}(⋅) \equiv (v_1(\cdot), \dots, v_p(\cdot))&#39;$</span> are neural networks that transform data into <span>$p$</span>-dimensional vectors; <span>$g(\cdot)$</span> is a monotonically increasing function (e.g., exponential or softplus); and each <span>$c_i(⋅)$</span> is a monotonically increasing function that maps its input to the prior support of <span>$\theta_i$</span>.</p><p>The functions <span>$c_i(⋅)$</span> may be defined by a <span>$p$</span>-dimensional object of type <a href="../architectures/#NeuralEstimators.Compress"><code>Compress</code></a>. If these functions are unspecified, they will be set to the identity function so that the range of the intervals will be unrestricted.</p><p>If only a single neural-network architecture is provided, it will be used for both <span>$\boldsymbol{u}(⋅)$</span> and <span>$\boldsymbol{v}(⋅)$</span>.</p><p>The return value  when applied to data is a matrix with <span>$2p$</span> rows, where the first and second <span>$p$</span> rows correspond to the lower and upper bounds, respectively.</p><p>See also <a href="#NeuralEstimators.QuantileEstimatorDiscrete"><code>QuantileEstimatorDiscrete</code></a> and <a href="#NeuralEstimators.QuantileEstimatorContinuous"><code>QuantileEstimatorContinuous</code></a>.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Flux
 
@@ -28,7 +28,7 @@
 
 # Apply the (untrained) interval estimator
 estimator(Z)
-interval(estimator, Z)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Estimators.jl#L33-L95">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.QuantileEstimatorDiscrete" href="#NeuralEstimators.QuantileEstimatorDiscrete"><code>NeuralEstimators.QuantileEstimatorDiscrete</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">QuantileEstimatorDiscrete(v::DeepSet; probs = [0.05, 0.25, 0.5, 0.75, 0.95], g = Flux.softplus, i = nothing)
+interval(estimator, Z)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Estimators.jl#L33-L95">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.QuantileEstimatorDiscrete" href="#NeuralEstimators.QuantileEstimatorDiscrete"><code>NeuralEstimators.QuantileEstimatorDiscrete</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">QuantileEstimatorDiscrete(v::DeepSet; probs = [0.05, 0.25, 0.5, 0.75, 0.95], g = Flux.softplus, i = nothing)
 (estimator::QuantileEstimatorDiscrete)(Z)
 (estimator::QuantileEstimatorDiscrete)(Z, θ₋ᵢ)</code></pre><p>A neural estimator that jointly estimates a fixed set of marginal posterior quantiles with probability levels <span>$\{\tau_1, \dots, \tau_T\}$</span>, controlled by the keyword argument <code>probs</code>.</p><p>By default, the estimator approximates the marginal quantiles for all parameters in the model, that is, the quantiles of</p><p class="math-container">\[\theta_i \mid \boldsymbol{Z}\]</p><p>for parameters <span>$\boldsymbol{\theta} \equiv (\theta_1, \dots, \theta_p)&#39;$</span>. Alternatively, if initialised with <code>i</code> set to a positive integer, the estimator approximates the quantiles of the full conditional distribution</p><p class="math-container">\[\theta_i \mid \boldsymbol{Z}, \boldsymbol{\theta}_{-i},\]</p><p>where <span>$\boldsymbol{\theta}_{-i}$</span> denotes the parameter vector with its <span>$i$</span>th element removed. For ease of exposition, when targetting marginal posteriors of the form <span>$\theta_i \mid \boldsymbol{Z}$</span> (i.e., the default behaviour), we define <span>$\text{dim}(\boldsymbol{\theta}_{-i}) ≡ 0$</span>.</p><p>The estimator leverages the <a href="../architectures/#NeuralEstimators.DeepSet"><code>DeepSet</code></a> architecture, subject to two requirements. First, the number of input neurons in the first layer of the inference network (i.e., the outer network) must be equal to the number of neurons in the final layer of the summary network plus <span>$\text{dim}(\boldsymbol{\theta}_{-i})$</span>. Second, the number of output neurons in the final layer of the inference network must be equal to <span>$p - \text{dim}(\boldsymbol{\theta}_{-i})$</span>.  The estimator employs a representation that prevents quantile crossing, namely,</p><p class="math-container">\[\begin{aligned}
 \boldsymbol{q}^{(\tau_1)}(\boldsymbol{Z}) &amp;= \boldsymbol{v}^{(\tau_1)}(\boldsymbol{Z}),\\
@@ -106,7 +106,7 @@
 q₁(Z, θ₋ᵢ)
 
 # Estimate quantiles of μ∣Z,σ with σ = 0.5 for only a single data set
-q₁(Z[1], θ₋ᵢ)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Estimators.jl#L121-L255">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.QuantileEstimatorContinuous" href="#NeuralEstimators.QuantileEstimatorContinuous"><code>NeuralEstimators.QuantileEstimatorContinuous</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">QuantileEstimatorContinuous(deepset::DeepSet; i = nothing, num_training_probs::Integer = 1)
+q₁(Z[1], θ₋ᵢ)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Estimators.jl#L121-L255">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.QuantileEstimatorContinuous" href="#NeuralEstimators.QuantileEstimatorContinuous"><code>NeuralEstimators.QuantileEstimatorContinuous</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">QuantileEstimatorContinuous(deepset::DeepSet; i = nothing, num_training_probs::Integer = 1)
 (estimator::QuantileEstimatorContinuous)(Z, τ)
 (estimator::QuantileEstimatorContinuous)(Z, θ₋ᵢ, τ)</code></pre><p>A neural estimator targetting posterior quantiles.</p><p>Given as input data <span>$\boldsymbol{Z}$</span> and the desired probability level <span>$\tau ∈ (0, 1)$</span>, by default the estimator approximates the <span>$\tau$</span>-quantile of</p><p class="math-container">\[\theta_i \mid \boldsymbol{Z}\]</p><p>for parameters <span>$\boldsymbol{\theta} \equiv (\theta_1, \dots, \theta_p)&#39;$</span>. Alternatively, if initialised with <code>i</code> set to a positive integer, the estimator approximates the <span>$\tau$</span>-quantile of the full conditional distribution</p><p class="math-container">\[\theta_i \mid \boldsymbol{Z}, \boldsymbol{\theta}_{-i},\]</p><p>where <span>$\boldsymbol{\theta}_{-i}$</span> denotes the parameter vector with its <span>$i$</span>th element removed. For ease of exposition, when targetting marginal posteriors of the form <span>$\theta_i \mid \boldsymbol{Z}$</span> (i.e., the default behaviour), we define <span>$\text{dim}(\boldsymbol{\theta}_{-i}) ≡ 0$</span>.</p><p>The estimator leverages the <a href="../architectures/#NeuralEstimators.DeepSet"><code>DeepSet</code></a> architecture, subject to two requirements. First, the number of input neurons in the first layer of the inference network (i.e., the outer network) must be equal to the number of neurons in the final layer of the summary network plus <span>$1 + \text{dim}(\boldsymbol{\theta}_{-i})$</span>. Second, the number of output neurons in the final layer of the inference network must be equal to <span>$p - \text{dim}(\boldsymbol{\theta}_{-i})$</span>.</p><p>Although not a requirement, one may employ a (partially) monotonic neural network to prevent quantile crossing (i.e., to ensure that the <span>$\tau_1$</span>-quantile does not exceed the <span>$\tau_2$</span>-quantile for any <span>$\tau_2 &gt; \tau_1$</span>). There are several ways to construct such a neural network: one simple yet effective approach is to ensure that all weights associated with <span>$\tau$</span> are strictly positive (see, e.g., <a href="https://link.springer.com/article/10.1007/s00477-018-1573-6">Cannon, 2018</a>), and this can be done using the <a href="../architectures/#NeuralEstimators.DensePositive"><code>DensePositive</code></a> layer as illustrated in the examples below.</p><p>The return value is a matrix with <span>$p - \text{dim}(\boldsymbol{\theta}_{-i})$</span> rows, corresponding to the estimated quantile for each parameter not in <span>$\boldsymbol{\theta}_{-i}$</span>.</p><p>See also <a href="#NeuralEstimators.QuantileEstimatorDiscrete"><code>QuantileEstimatorDiscrete</code></a>.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Flux, Distributions , InvertedIndices, Statistics
 using AlgebraOfGraphics, CairoMakie
@@ -211,7 +211,7 @@
 q̂(Z, θ₋ᵢ, τ)
 
 # Estimate quantiles for a single data set
-q̂(Z[1], θ₋ᵢ, τ)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Estimators.jl#L312-L466">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.RatioEstimator" href="#NeuralEstimators.RatioEstimator"><code>NeuralEstimators.RatioEstimator</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">RatioEstimator(deepset::DeepSet)</code></pre><p>A neural estimator that estimates the likelihood-to-evidence ratio,</p><p class="math-container">\[r(\boldsymbol{Z}, \boldsymbol{\theta}) \equiv p(\boldsymbol{Z} \mid \boldsymbol{\theta})/p(\boldsymbol{Z}),\]</p><p>where <span>$p(\boldsymbol{Z} \mid \boldsymbol{\theta})$</span> is the likelihood and <span>$p(\boldsymbol{Z})$</span> is the marginal likelihood, also known as the model evidence.</p><p>The estimator leverages the <a href="../architectures/#NeuralEstimators.DeepSet"><code>DeepSet</code></a> architecture, subject to two requirements. First, the number of input neurons in the first layer of the inference network (i.e., the outer network) must equal the number of output neurons in the final layer of the summary network plus the number of parameters in the statistical model. Second, the number of output neurons in the final layer of the inference network must be equal to one.</p><p>The ratio estimator is trained by solving a relatively straightforward binary classification problem. Specifically, consider the problem of distinguishing dependent parameter–data pairs <span>${(\boldsymbol{\theta}&#39;, \boldsymbol{Z}&#39;)&#39; \sim p(\boldsymbol{Z}, \boldsymbol{\theta})}$</span> with class labels <span>$Y=1$</span> from independent parameter–data pairs <span>${(\tilde{\boldsymbol{\theta}}&#39;, \tilde{\boldsymbol{Z}}&#39;)&#39; \sim p(\boldsymbol{\theta})p(\boldsymbol{Z})}$</span> with class labels <span>$Y=0$</span>, and where the classes are balanced. Then the Bayes classifier under binary cross-entropy loss is given by</p><p class="math-container">\[c(\boldsymbol{Z}, \boldsymbol{\theta}) = \frac{p(\boldsymbol{Z}, \boldsymbol{\theta})}{p(\boldsymbol{Z}, \boldsymbol{\theta}) + p(\boldsymbol{\theta})p(\boldsymbol{Z})},\]</p><p>and hence,</p><p class="math-container">\[r(\boldsymbol{Z}, \boldsymbol{\theta}) = \frac{c(\boldsymbol{Z}, \boldsymbol{\theta})}{1 - c(\boldsymbol{Z}, \boldsymbol{\theta})}.\]</p><p>For numerical stability, training is done on the log-scale using <span>$\log r(\boldsymbol{Z}, \boldsymbol{\theta}) = \text{logit}(c(\boldsymbol{Z}, \boldsymbol{\theta}))$</span>.</p><p>When applying the estimator to data, by default the likelihood-to-evidence ratio <span>$r(\boldsymbol{Z}, \boldsymbol{\theta})$</span> is returned (setting the keyword argument <code>classifier = true</code> will yield class probability estimates). The estimated ratio can then be used in various downstream Bayesian (e.g., <a href="https://proceedings.mlr.press/v119/hermans20a.html">Hermans et al., 2020</a>) or Frequentist (e.g., <a href="https://arxiv.org/abs/2305.04634">Walchessen et al., 2023</a>) inferential algorithms.</p><p>See also <a href="#NeuralEstimators.mlestimate"><code>mlestimate</code></a> and <a href="#NeuralEstimators.mapestimate"><code>mapestimate</code></a> for obtaining approximate maximum-likelihood and maximum-a-posteriori estimates, and <a href="#NeuralEstimators.sampleposterior"><code>sampleposterior</code></a> for obtaining approximate posterior samples.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Flux, Statistics
+q̂(Z[1], θ₋ᵢ, τ)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Estimators.jl#L312-L466">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.RatioEstimator" href="#NeuralEstimators.RatioEstimator"><code>NeuralEstimators.RatioEstimator</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">RatioEstimator(deepset::DeepSet)</code></pre><p>A neural estimator that estimates the likelihood-to-evidence ratio,</p><p class="math-container">\[r(\boldsymbol{Z}, \boldsymbol{\theta}) \equiv p(\boldsymbol{Z} \mid \boldsymbol{\theta})/p(\boldsymbol{Z}),\]</p><p>where <span>$p(\boldsymbol{Z} \mid \boldsymbol{\theta})$</span> is the likelihood and <span>$p(\boldsymbol{Z})$</span> is the marginal likelihood, also known as the model evidence.</p><p>The estimator leverages the <a href="../architectures/#NeuralEstimators.DeepSet"><code>DeepSet</code></a> architecture, subject to two requirements. First, the number of input neurons in the first layer of the inference network (i.e., the outer network) must equal the number of output neurons in the final layer of the summary network plus the number of parameters in the statistical model. Second, the number of output neurons in the final layer of the inference network must be equal to one.</p><p>The ratio estimator is trained by solving a relatively straightforward binary classification problem. Specifically, consider the problem of distinguishing dependent parameter–data pairs <span>${(\boldsymbol{\theta}&#39;, \boldsymbol{Z}&#39;)&#39; \sim p(\boldsymbol{Z}, \boldsymbol{\theta})}$</span> with class labels <span>$Y=1$</span> from independent parameter–data pairs <span>${(\tilde{\boldsymbol{\theta}}&#39;, \tilde{\boldsymbol{Z}}&#39;)&#39; \sim p(\boldsymbol{\theta})p(\boldsymbol{Z})}$</span> with class labels <span>$Y=0$</span>, and where the classes are balanced. Then the Bayes classifier under binary cross-entropy loss is given by</p><p class="math-container">\[c(\boldsymbol{Z}, \boldsymbol{\theta}) = \frac{p(\boldsymbol{Z}, \boldsymbol{\theta})}{p(\boldsymbol{Z}, \boldsymbol{\theta}) + p(\boldsymbol{\theta})p(\boldsymbol{Z})},\]</p><p>and hence,</p><p class="math-container">\[r(\boldsymbol{Z}, \boldsymbol{\theta}) = \frac{c(\boldsymbol{Z}, \boldsymbol{\theta})}{1 - c(\boldsymbol{Z}, \boldsymbol{\theta})}.\]</p><p>For numerical stability, training is done on the log-scale using <span>$\log r(\boldsymbol{Z}, \boldsymbol{\theta}) = \text{logit}(c(\boldsymbol{Z}, \boldsymbol{\theta}))$</span>.</p><p>When applying the estimator to data, by default the likelihood-to-evidence ratio <span>$r(\boldsymbol{Z}, \boldsymbol{\theta})$</span> is returned (setting the keyword argument <code>classifier = true</code> will yield class probability estimates). The estimated ratio can then be used in various downstream Bayesian (e.g., <a href="https://proceedings.mlr.press/v119/hermans20a.html">Hermans et al., 2020</a>) or Frequentist (e.g., <a href="https://arxiv.org/abs/2305.04634">Walchessen et al., 2023</a>) inferential algorithms.</p><p>See also <a href="#NeuralEstimators.mlestimate"><code>mlestimate</code></a> and <a href="#NeuralEstimators.mapestimate"><code>mapestimate</code></a> for obtaining approximate maximum-likelihood and maximum-a-posteriori estimates, and <a href="#NeuralEstimators.sampleposterior"><code>sampleposterior</code></a> for obtaining approximate posterior samples.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Flux, Statistics
 
 # Generate data from Z|μ,σ ~ N(μ, σ²) with μ, σ ~ U(0, 1)
 p = 2     # number of unknown parameters in the statistical model
@@ -251,7 +251,7 @@
 θ_grid = expandgrid(0:0.01:1, 0:0.01:1)&#39;  # fine gridding of the parameter space
 θ_grid = Float32.(θ_grid)
 r̂(z, θ_grid)                              # likelihood-to-evidence ratios over grid
-sampleposterior(r̂, z; θ_grid = θ_grid)    # posterior samples</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Estimators.jl#L535-L627">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.PiecewiseEstimator" href="#NeuralEstimators.PiecewiseEstimator"><code>NeuralEstimators.PiecewiseEstimator</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">PiecewiseEstimator(estimators, changepoints)</code></pre><p>Creates a piecewise estimator (<a href="https://www.tandfonline.com/doi/full/10.1080/00031305.2023.2249522">Sainsbury-Dale et al., 2024</a>, sec. 2.2.2) from a collection of <code>estimators</code> and sample-size <code>changepoints</code>.</p><p>Specifically, with <span>$l$</span> estimators and sample-size changepoints <span>$m_1 &lt; m_2 &lt; \dots &lt; m_{l-1}$</span>, the piecewise etimator takes the form,</p><p class="math-container">\[\hat{\boldsymbol{\theta}}(\boldsymbol{Z})
+sampleposterior(r̂, z; θ_grid = θ_grid)    # posterior samples</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Estimators.jl#L535-L627">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.PiecewiseEstimator" href="#NeuralEstimators.PiecewiseEstimator"><code>NeuralEstimators.PiecewiseEstimator</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">PiecewiseEstimator(estimators, changepoints)</code></pre><p>Creates a piecewise estimator (<a href="https://www.tandfonline.com/doi/full/10.1080/00031305.2023.2249522">Sainsbury-Dale et al., 2024</a>, sec. 2.2.2) from a collection of <code>estimators</code> and sample-size <code>changepoints</code>.</p><p>Specifically, with <span>$l$</span> estimators and sample-size changepoints <span>$m_1 &lt; m_2 &lt; \dots &lt; m_{l-1}$</span>, the piecewise etimator takes the form,</p><p class="math-container">\[\hat{\boldsymbol{\theta}}(\boldsymbol{Z})
 =
 \begin{cases}
 \hat{\boldsymbol{\theta}}_1(\boldsymbol{Z}) &amp; m \leq m_1,\\
@@ -279,9 +279,9 @@
 
 # Apply the (untrained) piecewise estimator to data
 Z = [rand(d, 1, m) for m ∈ (10, 50)]
-θ̂(Z)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Estimators.jl#L661-L714">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.Ensemble" href="#NeuralEstimators.Ensemble"><code>NeuralEstimators.Ensemble</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">Ensemble(estimators)
+θ̂(Z)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Estimators.jl#L661-L714">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.Ensemble" href="#NeuralEstimators.Ensemble"><code>NeuralEstimators.Ensemble</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">Ensemble(estimators)
 Ensemble(architecture::Function, J::Integer)
-(ensemble::Ensemble)(Z; aggr = median)</code></pre><p>Defines an ensemble based on a collection of <code>estimators</code> which, when applied to data <code>Z</code>, returns the median (or another summary defined by <code>aggr</code>) of the estimates.</p><p>The ensemble can be initialised with a collection of trained <code>estimators</code> and then applied immediately to observed data. Alternatively, the ensemble can be initialised with a collection of untrained <code>estimators</code>  (or a function defining the architecture of each estimator, and the number of estimators in the ensemble),  trained with <code>train()</code>, and then applied to observed data. In the latter case, where the ensemble is trained directly,  if <code>savepath</code> is specified both the ensemble and component estimators will be saved. </p><p>Note that the training of ensemble components can be done in parallel; however, currently this needs to be done manually by the user, since <code>train()</code> currently trains the ensemble components sequentially.</p><p>The ensemble components can be accessed by indexing the ensemble directly; the number  of component estimators can be obtained using <code>length()</code>. </p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Flux
+(ensemble::Ensemble)(Z; aggr = median)</code></pre><p>Defines an ensemble based on a collection of <code>estimators</code> which, when applied to data <code>Z</code>, returns the median (or another summary defined by <code>aggr</code>) of the estimates.</p><p>The ensemble can be initialised with a collection of trained <code>estimators</code> and then applied immediately to observed data. Alternatively, the ensemble can be initialised with a collection of untrained <code>estimators</code> (or a function defining the architecture of each estimator, and the number of estimators in the ensemble), trained with <code>train()</code>, and then applied to observed data. In the latter case, where the ensemble is trained directly, if <code>savepath</code> is specified both the ensemble and component estimators will be saved.</p><p>Note that <code>train()</code> currently acts sequentially on the component estimators.</p><p>The ensemble components can be accessed by indexing the ensemble directly; the number of component estimators can be obtained using <code>length()</code>.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Flux
 
 # Define the model, Z|θ ~ N(θ, 1), θ ~ N(0, 1)
 d = 1   # dimension of each replicate
@@ -299,12 +299,12 @@
 end
 
 # Ensemble size
-J = 5 
+J = 3
 
 # Initialise ensemble
 ensemble = Ensemble(architecture, J)
-ensemble[1]      # access component estimators by indexing 
-length(ensemble) # number of component estimators 
+ensemble[1]      # access component estimators by indexing
+length(ensemble) # number of component estimators
 
 # Training
 ensemble = train(ensemble, sampler, simulator, m = m, epochs = 5)
@@ -316,29 +316,9 @@
 rmse(assessment)
 
 # Apply to data
-ensemble(Z)
-
-# Testing
-J = 5 # ensemble size
-ensemble = Ensemble(architecture, J)
-train(ensemble, sampler, simulator, m = m, epochs = 5, savepath=&quot;testing-path&quot;)
-ensemble = Ensemble(architecture, J)
-ensemble(Z)
-loadpath = joinpath(pwd(), &quot;testing-path&quot;, &quot;ensemble.bson&quot;)
-Flux.loadparams!(ensemble, load(loadpath, @__MODULE__)[:weights])
-ensemble(Z)
-
-# Testing
-J = 5 # ensemble size
-ensemble = Ensemble(architecture, J)
-trainx(ensemble, sampler, simulator, [30, 50], epochs = 5, savepath=&quot;testing-path&quot;)
-ensemble = Ensemble(architecture, J)
-ensemble(Z)
-loadpath = joinpath(pwd(), &quot;testing-path_m50&quot;, &quot;ensemble.bson&quot;)
-Flux.loadparams!(ensemble, load(loadpath, @__MODULE__)[:weights])
-ensemble(Z)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Estimators.jl#L883-L965">source</a></section></article><h2 id="Training"><a class="docs-heading-anchor" href="#Training">Training</a><a id="Training-1"></a><a class="docs-heading-anchor-permalink" href="#Training" title="Permalink"></a></h2><p>The function <a href="#NeuralEstimators.train"><code>train</code></a> is used to train a single neural estimator, while the wrapper function <a href="#NeuralEstimators.trainx"><code>trainx</code></a> is useful for training multiple neural estimators over a range of sample sizes, making using of the technique known as pre-training.</p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.train" href="#NeuralEstimators.train"><code>NeuralEstimators.train</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">train(θ̂, sampler::Function, simulator::Function; ...)
+ensemble(Z)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Estimators.jl#L883-L943">source</a></section></article><h2 id="Training"><a class="docs-heading-anchor" href="#Training">Training</a><a id="Training-1"></a><a class="docs-heading-anchor-permalink" href="#Training" title="Permalink"></a></h2><p>The function <a href="#NeuralEstimators.train"><code>train</code></a> is used to train a single neural estimator, while the wrapper function <a href="#NeuralEstimators.trainx"><code>trainx</code></a> is useful for training multiple neural estimators over a range of sample sizes, making using of the technique known as pre-training.</p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.train" href="#NeuralEstimators.train"><code>NeuralEstimators.train</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">train(θ̂, sampler::Function, simulator::Function; ...)
 train(θ̂, θ_train::P, θ_val::P, simulator::Function; ...) where {P &lt;: Union{AbstractMatrix, ParameterConfigurations}}
-train(θ̂, θ_train::P, θ_val::P, Z_train::T, Z_val::T; ...) where {T, P &lt;: Union{AbstractMatrix, ParameterConfigurations}}</code></pre><p>Train a neural estimator <code>θ̂</code>.</p><p>The methods cater for different variants of &quot;on-the-fly&quot; simulation. Specifically, a <code>sampler</code> can be provided to continuously sample new parameter vectors from the prior, and a <code>simulator</code> can be provided to continuously simulate new data conditional on the parameters. If provided with specific sets of parameters (<code>θ_train</code> and <code>θ_val</code>) and/or data (<code>Z_train</code> and <code>Z_val</code>), they will be held fixed during training.</p><p>In all methods, the validation parameters and data are held fixed to reduce noise when evaluating the validation risk.</p><p><strong>Keyword arguments common to all methods:</strong></p><ul><li><code>loss = mae</code></li><li><code>epochs::Integer = 100</code></li><li><code>batchsize::Integer = 32</code></li><li><code>optimiser = ADAM()</code></li><li><code>savepath::String = &quot;&quot;</code>: path to save the neural-network weights during training (as <code>bson</code> files) and other information, such as the risk vs epoch (the risk function evaluated over the training and validation sets are saved in the first and second columns of <code>loss_per_epoch.csv</code>). If <code>savepath</code> is an empty string (default), nothing is saved.</li><li><code>stopping_epochs::Integer = 5</code>: cease training if the risk doesn&#39;t improve in this number of epochs.</li><li><code>use_gpu::Bool = true</code></li><li><code>verbose::Bool = true</code></li></ul><p><strong>Keyword arguments common to <code>train(θ̂, sampler, simulator)</code> and <code>train(θ̂, θ_train, θ_val, simulator)</code>:</strong></p><ul><li><code>m</code>: sample sizes (either an <code>Integer</code> or a collection of <code>Integers</code>). The <code>simulator</code> is called as <code>simulator(θ, m)</code>.</li><li><code>epochs_per_Z_refresh::Integer = 1</code>: how often to refresh the training data.</li><li><code>simulate_just_in_time::Bool = false</code>: flag indicating whether we should simulate just-in-time, in the sense that only a <code>batchsize</code> number of parameter vectors and corresponding data are in memory at a given time.</li></ul><p><strong>Keyword arguments unique to <code>train(θ̂, sampler, simulator)</code>:</strong></p><ul><li><code>K::Integer = 10000</code>: number of parameter vectors in the training set; the size of the validation set is <code>K ÷ 5</code>.</li><li><code>ξ = nothing</code>: an arbitrary collection of objects that are fixed (e.g., distance matrices). If provided, the parameter sampler is called as <code>sampler(K, ξ)</code>; otherwise, the parameter sampler will be called as <code>sampler(K)</code>. Can also be provided as <code>xi</code>.</li><li><code>epochs_per_θ_refresh::Integer = 1</code>: how often to refresh the training parameters. Must be a multiple of <code>epochs_per_Z_refresh</code>. Can also be provided as <code>epochs_per_theta_refresh</code>.</li></ul><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Flux
+train(θ̂, θ_train::P, θ_val::P, Z_train::T, Z_val::T; ...) where {T, P &lt;: Union{AbstractMatrix, ParameterConfigurations}}</code></pre><p>Train a neural estimator <code>θ̂</code>.</p><p>The methods cater for different variants of &quot;on-the-fly&quot; simulation. Specifically, a <code>sampler</code> can be provided to continuously sample new parameter vectors from the prior, and a <code>simulator</code> can be provided to continuously simulate new data conditional on the parameters. If provided with specific sets of parameters (<code>θ_train</code> and <code>θ_val</code>) and/or data (<code>Z_train</code> and <code>Z_val</code>), they will be held fixed during training.</p><p>In all methods, the validation parameters and data are held fixed to reduce noise when evaluating the validation risk.</p><p><strong>Keyword arguments common to all methods:</strong></p><ul><li><code>loss = mae</code></li><li><code>epochs = 100</code></li><li><code>batchsize = 32</code></li><li><code>optimiser = ADAM()</code></li><li><code>savepath::String = &quot;&quot;</code>: path to save the trained estimator and other information; if an empty string (default), nothing is saved. Otherwise, the neural-network parameters (i.e., the weights and biases) will be saved during training as <code>bson</code> files; the risk function evaluated over the training and validation sets will also be saved, in the first and second columns of <code>loss_per_epoch.csv</code>, respectively; the best parameters (as measured by validation risk) will be saved as <code>best_network.bson</code>. </li><li><code>stopping_epochs = 5</code>: cease training if the risk doesn&#39;t improve in this number of epochs.</li><li><code>use_gpu = true</code></li><li><code>verbose = true</code></li></ul><p><strong>Keyword arguments common to <code>train(θ̂, sampler, simulator)</code> and <code>train(θ̂, θ_train, θ_val, simulator)</code>:</strong></p><ul><li><code>m</code>: sample sizes (either an <code>Integer</code> or a collection of <code>Integers</code>). The <code>simulator</code> is called as <code>simulator(θ, m)</code>.</li><li><code>epochs_per_Z_refresh = 1</code>: the number of passes to make through the training set before the training data are refreshed.</li><li><code>simulate_just_in_time = false</code>: flag indicating whether we should simulate just-in-time, in the sense that only a <code>batchsize</code> number of parameter vectors and corresponding data are in memory at a given time.</li></ul><p><strong>Keyword arguments unique to <code>train(θ̂, sampler, simulator)</code>:</strong></p><ul><li><code>K = 10000</code>: number of parameter vectors in the training set; the size of the validation set is <code>K ÷ 5</code>.</li><li><code>ξ = nothing</code>: an arbitrary collection of objects that, if provided, will be passed to the parameter sampler as <code>sampler(K, ξ)</code>; otherwise, the parameter sampler will be called as <code>sampler(K)</code>. Can also be provided as <code>xi</code>.</li><li><code>epochs_per_θ_refresh = 1</code>: the number of passes to make through the training set before the training parameters are refreshed. Must be a multiple of <code>epochs_per_Z_refresh</code>. Can also be provided as <code>epochs_per_theta_refresh</code>.</li></ul><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Flux
 
 function sampler(K)
 	μ = randn(K) # Gaussian prior
@@ -373,10 +353,10 @@
 # training: fixed parameters and fixed data
 Z_train = simulator(θ_train, m)
 Z_val   = simulator(θ_val, m)
-θ̂       = train(θ̂, θ_train, θ_val, Z_train, Z_val, epochs = 5)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/train.jl#L2-L77">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.trainx" href="#NeuralEstimators.trainx"><code>NeuralEstimators.trainx</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">trainx(θ̂, sampler::Function, simulator::Function, m::Vector{Integer}; ...)
+θ̂       = train(θ̂, θ_train, θ_val, Z_train, Z_val, epochs = 5)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/train.jl#L3-L78">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.trainx" href="#NeuralEstimators.trainx"><code>NeuralEstimators.trainx</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">trainx(θ̂, sampler::Function, simulator::Function, m::Vector{Integer}; ...)
 trainx(θ̂, θ_train, θ_val, simulator::Function, m::Vector{Integer}; ...)
 trainx(θ̂, θ_train, θ_val, Z_train, Z_val, m::Vector{Integer}; ...)
-trainx(θ̂, θ_train, θ_val, Z_train::V, Z_val::V; ...) where {V &lt;: AbstractVector{AbstractVector{Any}}}</code></pre><p>A wrapper around <code>train()</code> to construct neural estimators for different sample sizes.</p><p>The positional argument <code>m</code> specifies the desired sample sizes. Each estimator is pre-trained with the estimator for the previous sample size. For example, if <code>m = [m₁, m₂]</code>, the estimator for sample size <code>m₂</code> is pre-trained with the estimator for sample size <code>m₁</code>.</p><p>The method for <code>Z_train</code> and <code>Z_val</code> subsets the data using <code>subsetdata(Z, 1:mᵢ)</code> for each <code>mᵢ ∈ m</code>. The method for <code>Z_train::V</code> and <code>Z_val::V</code> trains an estimator for each element of <code>Z_train::V</code> and <code>Z_val::V</code> and, hence, it does not need to invoke <code>subsetdata()</code>, which can be slow or difficult to define in some cases (e.g., for graphical data). Note that, in this case, <code>m</code> is inferred from the data.</p><p>The keyword arguments inherit from <code>train()</code>. The keyword arguments <code>epochs</code>, <code>batchsize</code>, <code>stopping_epochs</code>, and <code>optimiser</code> can each be given as vectors. For example, if training two estimators, one may use a different number of epochs for each estimator by providing <code>epochs = [epoch₁, epoch₂]</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/train.jl#L659-L683">source</a></section></article><h2 id="Assessment/calibration"><a class="docs-heading-anchor" href="#Assessment/calibration">Assessment/calibration</a><a id="Assessment/calibration-1"></a><a class="docs-heading-anchor-permalink" href="#Assessment/calibration" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.assess" href="#NeuralEstimators.assess"><code>NeuralEstimators.assess</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">assess(estimator, θ, Z)</code></pre><p>Using an <code>estimator</code> (or a collection of estimators), computes estimates from data <code>Z</code> simulated based on true parameter vectors stored in <code>θ</code>.</p><p>The data <code>Z</code> should be a <code>Vector</code>, with each element corresponding to a single simulated data set. If <code>Z</code> contains more data sets than parameter vectors, the parameter matrix <code>θ</code> will be recycled by horizontal concatenation via the call <code>θ = repeat(θ, outer = (1, J))</code> where <code>J = length(Z) ÷ K</code> is the number of simulated data sets and <code>K = size(θ, 2)</code> is the number of parameter vectors.</p><p>The output is of type <code>Assessment</code>; see <code>?Assessment</code> for details.</p><p><strong>Keyword arguments</strong></p><ul><li><code>estimator_names::Vector{String}</code>: names of the estimators (sensible defaults provided).</li><li><code>parameter_names::Vector{String}</code>: names of the parameters (sensible defaults provided). If <code>ξ</code> is provided with a field <code>parameter_names</code>, those names will be used.</li><li><code>ξ = nothing</code>: an arbitrary collection of objects that are fixed (e.g., distance matrices). Can also be provided as <code>xi</code>.</li><li><code>use_ξ = false</code>: a <code>Bool</code> or a collection of <code>Bool</code> objects with length equal to the number of estimators. Specifies whether or not the estimator uses <code>ξ</code>: if it does, the estimator will be applied as <code>estimator(Z, ξ)</code>. This argument is useful when multiple <code>estimators</code> are provided, only some of which need <code>ξ</code>; hence, if only one estimator is provided and <code>ξ</code> is not <code>nothing</code>, <code>use_ξ</code> is automatically set to <code>true</code>. Can also be provided as <code>use_xi</code>.</li><li><code>use_gpu = true</code>: a <code>Bool</code> or a collection of <code>Bool</code> objects with length equal to the number of estimators.</li><li><code>probs = range(0.01, stop=0.99, length=100)</code>: (relevant only for <code>estimator::QuantileEstimatorContinuous</code>) a collection of probability levels in (0, 1)</li></ul><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Flux
+trainx(θ̂, θ_train, θ_val, Z_train::V, Z_val::V; ...) where {V &lt;: AbstractVector{AbstractVector{Any}}}</code></pre><p>A wrapper around <code>train()</code> to construct neural estimators for different sample sizes.</p><p>The positional argument <code>m</code> specifies the desired sample sizes. Each estimator is pre-trained with the estimator for the previous sample size. For example, if <code>m = [m₁, m₂]</code>, the estimator for sample size <code>m₂</code> is pre-trained with the estimator for sample size <code>m₁</code>.</p><p>The method for <code>Z_train</code> and <code>Z_val</code> subsets the data using <code>subsetdata(Z, 1:mᵢ)</code> for each <code>mᵢ ∈ m</code>. The method for <code>Z_train::V</code> and <code>Z_val::V</code> trains an estimator for each element of <code>Z_train::V</code> and <code>Z_val::V</code> and, hence, it does not need to invoke <code>subsetdata()</code>, which can be slow or difficult to define in some cases (e.g., for graphical data). Note that, in this case, <code>m</code> is inferred from the data.</p><p>The keyword arguments inherit from <code>train()</code>. The keyword arguments <code>epochs</code>, <code>batchsize</code>, <code>stopping_epochs</code>, and <code>optimiser</code> can each be given as vectors. For example, if training two estimators, one may use a different number of epochs for each estimator by providing <code>epochs = [epoch₁, epoch₂]</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/train.jl#L660-L684">source</a></section></article><h2 id="Assessment/calibration"><a class="docs-heading-anchor" href="#Assessment/calibration">Assessment/calibration</a><a id="Assessment/calibration-1"></a><a class="docs-heading-anchor-permalink" href="#Assessment/calibration" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.assess" href="#NeuralEstimators.assess"><code>NeuralEstimators.assess</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">assess(estimator, θ, Z)</code></pre><p>Using an <code>estimator</code> (or a collection of estimators), computes estimates from data <code>Z</code> simulated based on true parameter vectors stored in <code>θ</code>.</p><p>The data <code>Z</code> should be a <code>Vector</code>, with each element corresponding to a single simulated data set. If <code>Z</code> contains more data sets than parameter vectors, the parameter matrix <code>θ</code> will be recycled by horizontal concatenation via the call <code>θ = repeat(θ, outer = (1, J))</code> where <code>J = length(Z) ÷ K</code> is the number of simulated data sets and <code>K = size(θ, 2)</code> is the number of parameter vectors.</p><p>The output is of type <code>Assessment</code>; see <code>?Assessment</code> for details.</p><p><strong>Keyword arguments</strong></p><ul><li><code>estimator_names::Vector{String}</code>: names of the estimators (sensible defaults provided).</li><li><code>parameter_names::Vector{String}</code>: names of the parameters (sensible defaults provided). If <code>ξ</code> is provided with a field <code>parameter_names</code>, those names will be used.</li><li><code>ξ = nothing</code>: an arbitrary collection of objects that are fixed (e.g., distance matrices). Can also be provided as <code>xi</code>.</li><li><code>use_ξ = false</code>: a <code>Bool</code> or a collection of <code>Bool</code> objects with length equal to the number of estimators. Specifies whether or not the estimator uses <code>ξ</code>: if it does, the estimator will be applied as <code>estimator(Z, ξ)</code>. This argument is useful when multiple <code>estimators</code> are provided, only some of which need <code>ξ</code>; hence, if only one estimator is provided and <code>ξ</code> is not <code>nothing</code>, <code>use_ξ</code> is automatically set to <code>true</code>. Can also be provided as <code>use_xi</code>.</li><li><code>use_gpu = true</code>: a <code>Bool</code> or a collection of <code>Bool</code> objects with length equal to the number of estimators.</li><li><code>probs = range(0.01, stop=0.99, length=100)</code>: (relevant only for <code>estimator::QuantileEstimatorContinuous</code>) a collection of probability levels in (0, 1)</li></ul><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Flux
 
 n = 10 # number of observations in each realisation
 p = 4  # number of parameters in the statistical model
@@ -409,17 +389,17 @@
 θ̂ = DeepSet(ψ, ϕ)
 x = [rand(qₓ) for _ ∈ eachindex(Z)]
 assessment = assess(θ̂, θ, (Z, x));
-risk(assessment)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/assess.jl#L250-L309">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.Assessment" href="#NeuralEstimators.Assessment"><code>NeuralEstimators.Assessment</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">Assessment(df::DataFrame, runtime::DataFrame)</code></pre><p>A type for storing the output of <code>assess()</code>. The field <code>runtime</code> contains the total time taken for each estimator. The field <code>df</code> is a long-form <code>DataFrame</code> with columns:</p><ul><li><code>estimator</code>: the name of the estimator</li><li><code>parameter</code>: the name of the parameter</li><li><code>truth</code>:     the true value of the parameter</li><li><code>estimate</code>:  the estimated value of the parameter</li><li><code>m</code>:         the sample size (number of iid replicates) for the given data set</li><li><code>k</code>:         the index of the parameter vector</li><li><code>j</code>:         the index of the data set (in the case that multiple data sets are associated with each parameter vector)</li></ul><p>If <code>estimator</code> is an <code>IntervalEstimator</code>, the column <code>estimate</code> will be replaced by the columns <code>lower</code> and <code>upper</code>, containing the lower and upper bounds of the interval, respectively.</p><p>If <code>estimator</code> is a <code>QuantileEstimator</code>, the <code>df</code> will also contain a column <code>prob</code> indicating the probability level of the corresponding quantile estimate.</p><p>Multiple <code>Assessment</code> objects can be combined with <code>merge()</code> (used for combining assessments from multiple point estimators) or <code>join()</code> (used for combining assessments from a point estimator and an interval estimator).</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/assess.jl#L1-L23">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.risk" href="#NeuralEstimators.risk"><code>NeuralEstimators.risk</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">risk(assessment::Assessment; ...)</code></pre><p>Computes a Monte Carlo approximation of an estimator&#39;s Bayes risk,</p><p class="math-container">\[r(\hat{\boldsymbol{\theta}}(\cdot))
+risk(assessment)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/assess.jl#L250-L309">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.Assessment" href="#NeuralEstimators.Assessment"><code>NeuralEstimators.Assessment</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">Assessment(df::DataFrame, runtime::DataFrame)</code></pre><p>A type for storing the output of <code>assess()</code>. The field <code>runtime</code> contains the total time taken for each estimator. The field <code>df</code> is a long-form <code>DataFrame</code> with columns:</p><ul><li><code>estimator</code>: the name of the estimator</li><li><code>parameter</code>: the name of the parameter</li><li><code>truth</code>:     the true value of the parameter</li><li><code>estimate</code>:  the estimated value of the parameter</li><li><code>m</code>:         the sample size (number of iid replicates) for the given data set</li><li><code>k</code>:         the index of the parameter vector</li><li><code>j</code>:         the index of the data set (in the case that multiple data sets are associated with each parameter vector)</li></ul><p>If <code>estimator</code> is an <code>IntervalEstimator</code>, the column <code>estimate</code> will be replaced by the columns <code>lower</code> and <code>upper</code>, containing the lower and upper bounds of the interval, respectively.</p><p>If <code>estimator</code> is a <code>QuantileEstimator</code>, the <code>df</code> will also contain a column <code>prob</code> indicating the probability level of the corresponding quantile estimate.</p><p>Multiple <code>Assessment</code> objects can be combined with <code>merge()</code> (used for combining assessments from multiple point estimators) or <code>join()</code> (used for combining assessments from a point estimator and an interval estimator).</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/assess.jl#L1-L23">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.risk" href="#NeuralEstimators.risk"><code>NeuralEstimators.risk</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">risk(assessment::Assessment; ...)</code></pre><p>Computes a Monte Carlo approximation of an estimator&#39;s Bayes risk,</p><p class="math-container">\[r(\hat{\boldsymbol{\theta}}(\cdot))
 \approx
-\frac{1}{K} \sum_{k=1}^K L(\boldsymbol{\theta}^{(k)}, \hat{\boldsymbol{\theta}}(\boldsymbol{Z}^{(k)})),\]</p><p>where <span>$\{\boldsymbol{\theta}^{(k)} : k = 1, \dots, K\}$</span> denotes a set of <span>$K$</span> parameter vectors sampled from the prior and, for each <span>$k$</span>, data <span>$\boldsymbol{Z}^{(k)}$</span> are simulated from the statistical model conditional on <span>$\boldsymbol{\theta}^{(k)}$</span>.</p><p><strong>Keyword arguments</strong></p><ul><li><code>loss = (x, y) -&gt; abs(x - y)</code>: a binary operator defining the loss function (default absolute-error loss).</li><li><code>average_over_parameters::Bool = false</code>: if true, the loss is averaged over all parameters; otherwise (default), the loss is averaged over each parameter separately.</li><li><code>average_over_sample_sizes::Bool = true</code>: if true (default), the loss is averaged over all sample sizes <span>$m$</span>; otherwise, the loss is averaged over each sample size separately.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/assess.jl#L76-L94">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.bias" href="#NeuralEstimators.bias"><code>NeuralEstimators.bias</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">bias(assessment::Assessment; ...)</code></pre><p>Computes a Monte Carlo approximation of an estimator&#39;s bias,</p><p class="math-container">\[{\rm{bias}}(\hat{\boldsymbol{\theta}}(\cdot))
+\frac{1}{K} \sum_{k=1}^K L(\boldsymbol{\theta}^{(k)}, \hat{\boldsymbol{\theta}}(\boldsymbol{Z}^{(k)})),\]</p><p>where <span>$\{\boldsymbol{\theta}^{(k)} : k = 1, \dots, K\}$</span> denotes a set of <span>$K$</span> parameter vectors sampled from the prior and, for each <span>$k$</span>, data <span>$\boldsymbol{Z}^{(k)}$</span> are simulated from the statistical model conditional on <span>$\boldsymbol{\theta}^{(k)}$</span>.</p><p><strong>Keyword arguments</strong></p><ul><li><code>loss = (x, y) -&gt; abs(x - y)</code>: a binary operator defining the loss function (default absolute-error loss).</li><li><code>average_over_parameters::Bool = false</code>: if true, the loss is averaged over all parameters; otherwise (default), the loss is averaged over each parameter separately.</li><li><code>average_over_sample_sizes::Bool = true</code>: if true (default), the loss is averaged over all sample sizes <span>$m$</span>; otherwise, the loss is averaged over each sample size separately.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/assess.jl#L76-L94">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.bias" href="#NeuralEstimators.bias"><code>NeuralEstimators.bias</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">bias(assessment::Assessment; ...)</code></pre><p>Computes a Monte Carlo approximation of an estimator&#39;s bias,</p><p class="math-container">\[{\rm{bias}}(\hat{\boldsymbol{\theta}}(\cdot))
 \approx
-\frac{1}{K} \sum_{k=1}^K \hat{\boldsymbol{\theta}}(\boldsymbol{Z}^{(k)}) - \boldsymbol{\theta}^{(k)},\]</p><p>where <span>$\{\boldsymbol{\theta}^{(k)} : k = 1, \dots, K\}$</span> denotes a set of <span>$K$</span> parameter vectors sampled from the prior and, for each <span>$k$</span>, data <span>$\boldsymbol{Z}^{(k)}$</span> are simulated from the statistical model conditional on <span>$\boldsymbol{\theta}^{(k)}$</span>.</p><p>This function inherits the keyword arguments of <a href="#NeuralEstimators.risk"><code>risk</code></a> (excluding the argument <code>loss</code>).</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/assess.jl#L114-L129">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.rmse" href="#NeuralEstimators.rmse"><code>NeuralEstimators.rmse</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">rmse(assessment::Assessment; ...)</code></pre><p>Computes a Monte Carlo approximation of an estimator&#39;s root-mean-squared error,</p><p class="math-container">\[{\rm{rmse}}(\hat{\boldsymbol{\theta}}(\cdot))
+\frac{1}{K} \sum_{k=1}^K \hat{\boldsymbol{\theta}}(\boldsymbol{Z}^{(k)}) - \boldsymbol{\theta}^{(k)},\]</p><p>where <span>$\{\boldsymbol{\theta}^{(k)} : k = 1, \dots, K\}$</span> denotes a set of <span>$K$</span> parameter vectors sampled from the prior and, for each <span>$k$</span>, data <span>$\boldsymbol{Z}^{(k)}$</span> are simulated from the statistical model conditional on <span>$\boldsymbol{\theta}^{(k)}$</span>.</p><p>This function inherits the keyword arguments of <a href="#NeuralEstimators.risk"><code>risk</code></a> (excluding the argument <code>loss</code>).</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/assess.jl#L114-L129">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.rmse" href="#NeuralEstimators.rmse"><code>NeuralEstimators.rmse</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">rmse(assessment::Assessment; ...)</code></pre><p>Computes a Monte Carlo approximation of an estimator&#39;s root-mean-squared error,</p><p class="math-container">\[{\rm{rmse}}(\hat{\boldsymbol{\theta}}(\cdot))
 \approx
-\sqrt{\frac{1}{K} \sum_{k=1}^K (\hat{\boldsymbol{\theta}}(\boldsymbol{Z}^{(k)}) - \boldsymbol{\theta}^{(k)})^2},\]</p><p>where <span>$\{\boldsymbol{\theta}^{(k)} : k = 1, \dots, K\}$</span> denotes a set of <span>$K$</span> parameter vectors sampled from the prior and, for each <span>$k$</span>, data <span>$\boldsymbol{Z}^{(k)}$</span> are simulated from the statistical model conditional on <span>$\boldsymbol{\theta}^{(k)}$</span>.</p><p>This function inherits the keyword arguments of <a href="#NeuralEstimators.risk"><code>risk</code></a> (excluding the argument <code>loss</code>).</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/assess.jl#L141-L156">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.coverage" href="#NeuralEstimators.coverage"><code>NeuralEstimators.coverage</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">coverage(assessment::Assessment; ...)</code></pre><p>Computes a Monte Carlo approximation of an interval estimator&#39;s expected coverage, as defined in <a href="https://arxiv.org/abs/2110.06581">Hermans et al. (2022, Definition 2.1)</a>, and the proportion of parameters below and above the lower and upper bounds, respectively.</p><p><strong>Keyword arguments</strong></p><ul><li><code>average_over_parameters::Bool = false</code>: if true, the coverage is averaged over all parameters; otherwise (default), it is computed over each parameter separately.</li><li><code>average_over_sample_sizes::Bool = true</code>: if true (default), the coverage is averaged over all sample sizes <span>$m$</span>; otherwise, it is computed over each sample size separately.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/assess.jl#L169-L179">source</a></section></article><h2 id="Inference-with-observed-data"><a class="docs-heading-anchor" href="#Inference-with-observed-data">Inference with observed data</a><a id="Inference-with-observed-data-1"></a><a class="docs-heading-anchor-permalink" href="#Inference-with-observed-data" title="Permalink"></a></h2><h3 id="Inference-using-point-estimators"><a class="docs-heading-anchor" href="#Inference-using-point-estimators">Inference using point estimators</a><a id="Inference-using-point-estimators-1"></a><a class="docs-heading-anchor-permalink" href="#Inference-using-point-estimators" title="Permalink"></a></h3><p>Inference with a neural Bayes (point) estimator proceeds simply by applying the estimator <code>θ̂</code> to the observed data <code>Z</code> (possibly containing multiple data sets) in a call of the form <code>θ̂(Z)</code>. To leverage a GPU, simply move the estimator and the data to the GPU using <a href="https://fluxml.ai/Flux.jl/stable/models/functors/#Flux.gpu-Tuple{Any}"><code>gpu()</code></a>; see also <a href="../utility/#NeuralEstimators.estimateinbatches"><code>estimateinbatches()</code></a> to apply the estimator over batches of data, which can alleviate memory issues when working with a large number of data sets.</p><p>Uncertainty quantification often proceeds through the bootstrap distribution, which is essentially available &quot;for free&quot; when bootstrap data sets can be quickly generated; this is facilitated by <a href="#NeuralEstimators.bootstrap"><code>bootstrap()</code></a> and <a href="#NeuralEstimators.interval"><code>interval()</code></a>. Alternatively, one may approximate a set of low and high marginal posterior quantiles using a specially constructed neural Bayes estimator, which can then be used to construct credible intervals: see <a href="#NeuralEstimators.IntervalEstimator"><code>IntervalEstimator</code></a>, <a href="#NeuralEstimators.QuantileEstimatorDiscrete"><code>QuantileEstimatorDiscrete</code></a>, and <a href="#NeuralEstimators.QuantileEstimatorContinuous"><code>QuantileEstimatorContinuous</code></a>.  </p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.bootstrap" href="#NeuralEstimators.bootstrap"><code>NeuralEstimators.bootstrap</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">bootstrap(θ̂, parameters::P, Z) where P &lt;: Union{AbstractMatrix, ParameterConfigurations}
+\sqrt{\frac{1}{K} \sum_{k=1}^K (\hat{\boldsymbol{\theta}}(\boldsymbol{Z}^{(k)}) - \boldsymbol{\theta}^{(k)})^2},\]</p><p>where <span>$\{\boldsymbol{\theta}^{(k)} : k = 1, \dots, K\}$</span> denotes a set of <span>$K$</span> parameter vectors sampled from the prior and, for each <span>$k$</span>, data <span>$\boldsymbol{Z}^{(k)}$</span> are simulated from the statistical model conditional on <span>$\boldsymbol{\theta}^{(k)}$</span>.</p><p>This function inherits the keyword arguments of <a href="#NeuralEstimators.risk"><code>risk</code></a> (excluding the argument <code>loss</code>).</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/assess.jl#L141-L156">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.coverage" href="#NeuralEstimators.coverage"><code>NeuralEstimators.coverage</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">coverage(assessment::Assessment; ...)</code></pre><p>Computes a Monte Carlo approximation of an interval estimator&#39;s expected coverage, as defined in <a href="https://arxiv.org/abs/2110.06581">Hermans et al. (2022, Definition 2.1)</a>, and the proportion of parameters below and above the lower and upper bounds, respectively.</p><p><strong>Keyword arguments</strong></p><ul><li><code>average_over_parameters::Bool = false</code>: if true, the coverage is averaged over all parameters; otherwise (default), it is computed over each parameter separately.</li><li><code>average_over_sample_sizes::Bool = true</code>: if true (default), the coverage is averaged over all sample sizes <span>$m$</span>; otherwise, it is computed over each sample size separately.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/assess.jl#L169-L179">source</a></section></article><h2 id="Inference-with-observed-data"><a class="docs-heading-anchor" href="#Inference-with-observed-data">Inference with observed data</a><a id="Inference-with-observed-data-1"></a><a class="docs-heading-anchor-permalink" href="#Inference-with-observed-data" title="Permalink"></a></h2><h3 id="Inference-using-point-estimators"><a class="docs-heading-anchor" href="#Inference-using-point-estimators">Inference using point estimators</a><a id="Inference-using-point-estimators-1"></a><a class="docs-heading-anchor-permalink" href="#Inference-using-point-estimators" title="Permalink"></a></h3><p>Inference with a neural Bayes (point) estimator proceeds simply by applying the estimator <code>θ̂</code> to the observed data <code>Z</code> (possibly containing multiple data sets) in a call of the form <code>θ̂(Z)</code>. To leverage a GPU, simply move the estimator and the data to the GPU using <a href="https://fluxml.ai/Flux.jl/stable/models/functors/#Flux.gpu-Tuple{Any}"><code>gpu()</code></a>; see also <a href="../utility/#NeuralEstimators.estimateinbatches"><code>estimateinbatches()</code></a> to apply the estimator over batches of data, which can alleviate memory issues when working with a large number of data sets.</p><p>Uncertainty quantification often proceeds through the bootstrap distribution, which is essentially available &quot;for free&quot; when bootstrap data sets can be quickly generated; this is facilitated by <a href="#NeuralEstimators.bootstrap"><code>bootstrap()</code></a> and <a href="#NeuralEstimators.interval"><code>interval()</code></a>. Alternatively, one may approximate a set of low and high marginal posterior quantiles using a specially constructed neural Bayes estimator, which can then be used to construct credible intervals: see <a href="#NeuralEstimators.IntervalEstimator"><code>IntervalEstimator</code></a>, <a href="#NeuralEstimators.QuantileEstimatorDiscrete"><code>QuantileEstimatorDiscrete</code></a>, and <a href="#NeuralEstimators.QuantileEstimatorContinuous"><code>QuantileEstimatorContinuous</code></a>.  </p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.bootstrap" href="#NeuralEstimators.bootstrap"><code>NeuralEstimators.bootstrap</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">bootstrap(θ̂, parameters::P, Z) where P &lt;: Union{AbstractMatrix, ParameterConfigurations}
 bootstrap(θ̂, parameters::P, simulator, m::Integer; B = 400) where P &lt;: Union{AbstractMatrix, ParameterConfigurations}
-bootstrap(θ̂, Z; B = 400, blocks = nothing)</code></pre><p>Generates <code>B</code> bootstrap estimates from an estimator <code>θ̂</code>.</p><p>Parametric bootstrapping is facilitated by passing a single parameter configuration, <code>parameters</code>, and corresponding simulated data, <code>Z</code>, whose length implicitly defines <code>B</code>. Alternatively, one may provide a <code>simulator</code> and the desired sample size, in which case the data will be simulated using <code>simulator(parameters, m)</code>.</p><p>Non-parametric bootstrapping is facilitated by passing a single data set, <code>Z</code>. The argument <code>blocks</code> caters for block bootstrapping, and it should be a vector of integers specifying the block for each replicate. For example, with 5 replicates, the first two corresponding to block 1 and the remaining three corresponding to block 2, <code>blocks</code> should be <code>[1, 1, 2, 2, 2]</code>. The resampling algorithm aims to produce resampled data sets that are of a similar size to <code>Z</code>, but this can only be achieved exactly if all blocks are equal in length.</p><p>The keyword argument <code>use_gpu</code> is a flag determining whether to use the GPU, if it is available (default <code>true</code>).</p><p>The return type is a p × <code>B</code> matrix, where p is the number of parameters in the model.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/inference.jl#L250-L275">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.interval" href="#NeuralEstimators.interval"><code>NeuralEstimators.interval</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">interval(θ::Matrix; probs = [0.05, 0.95], parameter_names = nothing)
+bootstrap(θ̂, Z; B = 400, blocks = nothing)</code></pre><p>Generates <code>B</code> bootstrap estimates from an estimator <code>θ̂</code>.</p><p>Parametric bootstrapping is facilitated by passing a single parameter configuration, <code>parameters</code>, and corresponding simulated data, <code>Z</code>, whose length implicitly defines <code>B</code>. Alternatively, one may provide a <code>simulator</code> and the desired sample size, in which case the data will be simulated using <code>simulator(parameters, m)</code>.</p><p>Non-parametric bootstrapping is facilitated by passing a single data set, <code>Z</code>. The argument <code>blocks</code> caters for block bootstrapping, and it should be a vector of integers specifying the block for each replicate. For example, with 5 replicates, the first two corresponding to block 1 and the remaining three corresponding to block 2, <code>blocks</code> should be <code>[1, 1, 2, 2, 2]</code>. The resampling algorithm aims to produce resampled data sets that are of a similar size to <code>Z</code>, but this can only be achieved exactly if all blocks are equal in length.</p><p>The keyword argument <code>use_gpu</code> is a flag determining whether to use the GPU, if it is available (default <code>true</code>).</p><p>The return type is a p × <code>B</code> matrix, where p is the number of parameters in the model.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/inference.jl#L250-L275">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.interval" href="#NeuralEstimators.interval"><code>NeuralEstimators.interval</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">interval(θ::Matrix; probs = [0.05, 0.95], parameter_names = nothing)
 interval(estimator::IntervalEstimator, Z; parameter_names = nothing, use_gpu = true)</code></pre><p>Compute a confidence interval based either on a <span>$p$</span> × <span>$B$</span> matrix <code>θ</code> of parameters (typically containing bootstrap estimates or posterior draws) with <span>$p$</span> the number of parameters in the model, or from an <code>IntervalEstimator</code> and data <code>Z</code>.</p><p>When given <code>θ</code>, the intervals are constructed by compute quantiles with probability levels controlled by the keyword argument <code>probs</code>.</p><p>The return type is a <span>$p$</span> × 2 matrix, whose first and second columns respectively contain the lower and upper bounds of the interval. The rows of this matrix can be named by passing a vector of strings to the keyword argument <code>parameter_names</code>.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators
 p = 3
 B = 50
 θ = rand(p, B)
-interval(θ)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/inference.jl#L162-L186">source</a></section></article><h3 id="Inference-using-likelihood-and-likelihood-to-evidence-ratio-estimators"><a class="docs-heading-anchor" href="#Inference-using-likelihood-and-likelihood-to-evidence-ratio-estimators">Inference using likelihood and likelihood-to-evidence-ratio estimators</a><a id="Inference-using-likelihood-and-likelihood-to-evidence-ratio-estimators-1"></a><a class="docs-heading-anchor-permalink" href="#Inference-using-likelihood-and-likelihood-to-evidence-ratio-estimators" title="Permalink"></a></h3><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.mlestimate" href="#NeuralEstimators.mlestimate"><code>NeuralEstimators.mlestimate</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">mlestimate(estimator::RatioEstimator, Z; θ₀ = nothing, θ_grid = nothing, penalty::Function = θ -&gt; 1, use_gpu = true)</code></pre><p>Computes the (approximate) maximum likelihood estimate given data <span>$\boldsymbol{Z}$</span>,</p><p class="math-container">\[\argmax_{\boldsymbol{\theta}} \ell(\boldsymbol{\theta} ; \boldsymbol{Z})\]</p><p>where <span>$\ell(\cdot ; \cdot)$</span> denotes the approximate log-likelihood function derived from <code>estimator</code>.</p><p>If a vector <code>θ₀</code> of initial parameter estimates is given, the approximate likelihood is maximised by gradient descent. Otherwise, if a matrix of parameters <code>θ_grid</code> is given, the approximate likelihood is maximised by grid search.</p><p>A maximum penalised likelihood estimate,</p><p class="math-container">\[\argmax_{\boldsymbol{\theta}} \ell(\boldsymbol{\theta} ; \boldsymbol{Z}) + \log p(\boldsymbol{\theta}),\]</p><p>can be obtained by specifying the keyword argument <code>penalty</code> that defines the penalty term <span>$p(\boldsymbol{\theta})$</span>.</p><p>See also <a href="#NeuralEstimators.mapestimate"><code>mapestimate()</code></a> for computing (approximate) maximum a posteriori estimates.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/inference.jl#L63-L85">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.mapestimate" href="#NeuralEstimators.mapestimate"><code>NeuralEstimators.mapestimate</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">mapestimate(estimator::RatioEstimator, Z; θ₀ = nothing, θ_grid = nothing, prior::Function = θ -&gt; 1, use_gpu = true)</code></pre><p>Computes the (approximate) maximum a posteriori estimate given data <span>$\boldsymbol{Z}$</span>,</p><p class="math-container">\[\argmax_{\boldsymbol{\theta}} \ell(\boldsymbol{\theta} ; \boldsymbol{Z}) + \log p(\boldsymbol{\theta})\]</p><p>where <span>$\ell(\cdot ; \cdot)$</span> denotes the approximate log-likelihood function derived from <code>estimator</code>, and <span>$p(\boldsymbol{\theta})$</span> denotes the prior density function controlled through the keyword argument <code>prior</code> (by default, a uniform prior is used).</p><p>If a vector <code>θ₀</code> of initial parameter estimates is given, the approximate posterior density is maximised by gradient descent. Otherwise, if a matrix of parameters <code>θ_grid</code> is given, the approximate posterior density is maximised by grid search.</p><p>See also <a href="#NeuralEstimators.mlestimate"><code>mlestimate()</code></a> for computing (approximate) maximum likelihood estimates.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/inference.jl#L89-L105">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.sampleposterior" href="#NeuralEstimators.sampleposterior"><code>NeuralEstimators.sampleposterior</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">sampleposterior(estimator::RatioEstimator, Z, N::Integer = 1000; θ_grid, prior::Function = θ -&gt; 1f0)</code></pre><p>Samples from the approximate posterior distribution <span>$p(\boldsymbol{\theta} \mid \boldsymbol{Z})$</span> implied by <code>estimator</code>.</p><p>The positional argument <code>N</code> controls the size of the posterior sample.</p><p>Currently, the sampling algorithm is based on a fine-gridding of the parameter space, specified through the keyword argument <code>θ_grid</code> (or <code>theta_grid</code>).  The approximate posterior density is evaluated over this grid, which is then used to draw samples. This is very effective when making inference with a small number of parameters. For models with a large number of parameters, other sampling algorithms may be needed (please feel free to contact the package maintainer for discussion).</p><p>The prior distribution <span>$p(\boldsymbol{\theta})$</span> is controlled through the keyword argument <code>prior</code> (by default, a uniform prior is used).</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/inference.jl#L8-L25">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../../workflow/advancedusage/">« Advanced usage</a><a class="docs-footer-nextpage" href="../architectures/">Architectures »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.6.0 on <span class="colophon-date" title="Wednesday 28 August 2024 08:07">Wednesday 28 August 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+interval(θ)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/inference.jl#L162-L186">source</a></section></article><h3 id="Inference-using-likelihood-and-likelihood-to-evidence-ratio-estimators"><a class="docs-heading-anchor" href="#Inference-using-likelihood-and-likelihood-to-evidence-ratio-estimators">Inference using likelihood and likelihood-to-evidence-ratio estimators</a><a id="Inference-using-likelihood-and-likelihood-to-evidence-ratio-estimators-1"></a><a class="docs-heading-anchor-permalink" href="#Inference-using-likelihood-and-likelihood-to-evidence-ratio-estimators" title="Permalink"></a></h3><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.mlestimate" href="#NeuralEstimators.mlestimate"><code>NeuralEstimators.mlestimate</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">mlestimate(estimator::RatioEstimator, Z; θ₀ = nothing, θ_grid = nothing, penalty::Function = θ -&gt; 1, use_gpu = true)</code></pre><p>Computes the (approximate) maximum likelihood estimate given data <span>$\boldsymbol{Z}$</span>,</p><p class="math-container">\[\argmax_{\boldsymbol{\theta}} \ell(\boldsymbol{\theta} ; \boldsymbol{Z})\]</p><p>where <span>$\ell(\cdot ; \cdot)$</span> denotes the approximate log-likelihood function derived from <code>estimator</code>.</p><p>If a vector <code>θ₀</code> of initial parameter estimates is given, the approximate likelihood is maximised by gradient descent. Otherwise, if a matrix of parameters <code>θ_grid</code> is given, the approximate likelihood is maximised by grid search.</p><p>A maximum penalised likelihood estimate,</p><p class="math-container">\[\argmax_{\boldsymbol{\theta}} \ell(\boldsymbol{\theta} ; \boldsymbol{Z}) + \log p(\boldsymbol{\theta}),\]</p><p>can be obtained by specifying the keyword argument <code>penalty</code> that defines the penalty term <span>$p(\boldsymbol{\theta})$</span>.</p><p>See also <a href="#NeuralEstimators.mapestimate"><code>mapestimate()</code></a> for computing (approximate) maximum a posteriori estimates.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/inference.jl#L63-L85">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.mapestimate" href="#NeuralEstimators.mapestimate"><code>NeuralEstimators.mapestimate</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">mapestimate(estimator::RatioEstimator, Z; θ₀ = nothing, θ_grid = nothing, prior::Function = θ -&gt; 1, use_gpu = true)</code></pre><p>Computes the (approximate) maximum a posteriori estimate given data <span>$\boldsymbol{Z}$</span>,</p><p class="math-container">\[\argmax_{\boldsymbol{\theta}} \ell(\boldsymbol{\theta} ; \boldsymbol{Z}) + \log p(\boldsymbol{\theta})\]</p><p>where <span>$\ell(\cdot ; \cdot)$</span> denotes the approximate log-likelihood function derived from <code>estimator</code>, and <span>$p(\boldsymbol{\theta})$</span> denotes the prior density function controlled through the keyword argument <code>prior</code> (by default, a uniform prior is used).</p><p>If a vector <code>θ₀</code> of initial parameter estimates is given, the approximate posterior density is maximised by gradient descent. Otherwise, if a matrix of parameters <code>θ_grid</code> is given, the approximate posterior density is maximised by grid search.</p><p>See also <a href="#NeuralEstimators.mlestimate"><code>mlestimate()</code></a> for computing (approximate) maximum likelihood estimates.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/inference.jl#L89-L105">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.sampleposterior" href="#NeuralEstimators.sampleposterior"><code>NeuralEstimators.sampleposterior</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">sampleposterior(estimator::RatioEstimator, Z, N::Integer = 1000; θ_grid, prior::Function = θ -&gt; 1f0)</code></pre><p>Samples from the approximate posterior distribution <span>$p(\boldsymbol{\theta} \mid \boldsymbol{Z})$</span> implied by <code>estimator</code>.</p><p>The positional argument <code>N</code> controls the size of the posterior sample.</p><p>Currently, the sampling algorithm is based on a fine-gridding of the parameter space, specified through the keyword argument <code>θ_grid</code> (or <code>theta_grid</code>).  The approximate posterior density is evaluated over this grid, which is then used to draw samples. This is very effective when making inference with a small number of parameters. For models with a large number of parameters, other sampling algorithms may be needed (please feel free to contact the package maintainer for discussion).</p><p>The prior distribution <span>$p(\boldsymbol{\theta})$</span> is controlled through the keyword argument <code>prior</code> (by default, a uniform prior is used).</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/inference.jl#L8-L25">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../../workflow/advancedusage/">« Advanced usage</a><a class="docs-footer-nextpage" href="../architectures/">Architectures »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.6.0 on <span class="colophon-date" title="Sunday 1 September 2024 07:59">Sunday 1 September 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/API/index.html b/dev/API/index.html
index aa1cc679..b0d71fb6 100644
--- a/dev/API/index.html
+++ b/dev/API/index.html
@@ -1,2 +1,2 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Index · NeuralEstimators.jl</title><meta name="title" content="Index · NeuralEstimators.jl"/><meta property="og:title" content="Index · NeuralEstimators.jl"/><meta property="twitter:title" content="Index · NeuralEstimators.jl"/><meta name="description" content="Documentation for NeuralEstimators.jl."/><meta property="og:description" content="Documentation for NeuralEstimators.jl."/><meta property="twitter:description" content="Documentation for NeuralEstimators.jl."/><script data-outdated-warner src="../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../search_index.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../"><img src="../assets/logo.png" alt="NeuralEstimators.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../">NeuralEstimators.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../">NeuralEstimators</a></li><li><a class="tocitem" href="../framework/">Framework</a></li><li><span class="tocitem">Workflow</span><ul><li><a class="tocitem" href="../workflow/overview/">Overview</a></li><li><a class="tocitem" href="../workflow/examples/">Examples</a></li><li><a class="tocitem" href="../workflow/advancedusage/">Advanced usage</a></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="core/">Core</a></li><li><a class="tocitem" href="architectures/">Architectures</a></li><li><a class="tocitem" href="loss/">Loss functions</a></li><li><a class="tocitem" href="simulation/">Model-specific functions</a></li><li><a class="tocitem" href="utility/">Miscellaneous</a></li><li class="is-active"><a class="tocitem" href>Index</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">API</a></li><li class="is-active"><a href>Index</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Index</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/main/docs/src/API/index.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Index"><a class="docs-heading-anchor" href="#Index">Index</a><a id="Index-1"></a><a class="docs-heading-anchor-permalink" href="#Index" title="Permalink"></a></h1><ul><li><a href="core/#NeuralEstimators.Assessment"><code>NeuralEstimators.Assessment</code></a></li><li><a href="architectures/#NeuralEstimators.Compress"><code>NeuralEstimators.Compress</code></a></li><li><a href="architectures/#NeuralEstimators.CorrelationMatrix"><code>NeuralEstimators.CorrelationMatrix</code></a></li><li><a href="architectures/#NeuralEstimators.CovarianceMatrix"><code>NeuralEstimators.CovarianceMatrix</code></a></li><li><a href="architectures/#NeuralEstimators.DeepSet"><code>NeuralEstimators.DeepSet</code></a></li><li><a href="architectures/#NeuralEstimators.DensePositive"><code>NeuralEstimators.DensePositive</code></a></li><li><a href="utility/#NeuralEstimators.EM"><code>NeuralEstimators.EM</code></a></li><li><a href="core/#NeuralEstimators.Ensemble"><code>NeuralEstimators.Ensemble</code></a></li><li><a href="architectures/#NeuralEstimators.GNNSummary"><code>NeuralEstimators.GNNSummary</code></a></li><li><a href="utility/#NeuralEstimators.IndicatorWeights"><code>NeuralEstimators.IndicatorWeights</code></a></li><li><a href="core/#NeuralEstimators.IntervalEstimator"><code>NeuralEstimators.IntervalEstimator</code></a></li><li><a href="architectures/#NeuralEstimators.NeighbourhoodVariogram"><code>NeuralEstimators.NeighbourhoodVariogram</code></a></li><li><a href="core/#NeuralEstimators.NeuralEstimator"><code>NeuralEstimators.NeuralEstimator</code></a></li><li><a href="core/#NeuralEstimators.ParameterConfigurations"><code>NeuralEstimators.ParameterConfigurations</code></a></li><li><a href="core/#NeuralEstimators.PiecewiseEstimator"><code>NeuralEstimators.PiecewiseEstimator</code></a></li><li><a href="core/#NeuralEstimators.PointEstimator"><code>NeuralEstimators.PointEstimator</code></a></li><li><a href="architectures/#NeuralEstimators.PowerDifference"><code>NeuralEstimators.PowerDifference</code></a></li><li><a href="core/#NeuralEstimators.QuantileEstimatorContinuous"><code>NeuralEstimators.QuantileEstimatorContinuous</code></a></li><li><a href="core/#NeuralEstimators.QuantileEstimatorDiscrete"><code>NeuralEstimators.QuantileEstimatorDiscrete</code></a></li><li><a href="core/#NeuralEstimators.RatioEstimator"><code>NeuralEstimators.RatioEstimator</code></a></li><li><a href="architectures/#NeuralEstimators.SpatialGraphConv"><code>NeuralEstimators.SpatialGraphConv</code></a></li><li><a href="utility/#NeuralEstimators.adjacencymatrix"><code>NeuralEstimators.adjacencymatrix</code></a></li><li><a href="core/#NeuralEstimators.assess"><code>NeuralEstimators.assess</code></a></li><li><a href="core/#NeuralEstimators.bias"><code>NeuralEstimators.bias</code></a></li><li><a href="core/#NeuralEstimators.bootstrap"><code>NeuralEstimators.bootstrap</code></a></li><li><a href="utility/#NeuralEstimators.containertype"><code>NeuralEstimators.containertype</code></a></li><li><a href="core/#NeuralEstimators.coverage"><code>NeuralEstimators.coverage</code></a></li><li><a href="utility/#NeuralEstimators.encodedata"><code>NeuralEstimators.encodedata</code></a></li><li><a href="utility/#NeuralEstimators.estimateinbatches"><code>NeuralEstimators.estimateinbatches</code></a></li><li><a href="utility/#NeuralEstimators.expandgrid"><code>NeuralEstimators.expandgrid</code></a></li><li><a href="simulation/#NeuralEstimators.gaussiandensity"><code>NeuralEstimators.gaussiandensity</code></a></li><li><a href="utility/#NeuralEstimators.initialise_estimator"><code>NeuralEstimators.initialise_estimator</code></a></li><li><a href="core/#NeuralEstimators.interval"><code>NeuralEstimators.interval</code></a></li><li><a href="loss/#NeuralEstimators.intervalscore"><code>NeuralEstimators.intervalscore</code></a></li><li><a href="loss/#NeuralEstimators.kpowerloss"><code>NeuralEstimators.kpowerloss</code></a></li><li><a href="utility/#NeuralEstimators.loadbestweights"><code>NeuralEstimators.loadbestweights</code></a></li><li><a href="core/#NeuralEstimators.mapestimate"><code>NeuralEstimators.mapestimate</code></a></li><li><a href="simulation/#NeuralEstimators.matern"><code>NeuralEstimators.matern</code></a></li><li><a href="utility/#NeuralEstimators.maternchols"><code>NeuralEstimators.maternchols</code></a></li><li><a href="simulation/#NeuralEstimators.maternclusterprocess"><code>NeuralEstimators.maternclusterprocess</code></a></li><li><a href="core/#NeuralEstimators.mlestimate"><code>NeuralEstimators.mlestimate</code></a></li><li><a href="utility/#NeuralEstimators.numberreplicates"><code>NeuralEstimators.numberreplicates</code></a></li><li><a href="simulation/#NeuralEstimators.paciorek"><code>NeuralEstimators.paciorek</code></a></li><li><a href="loss/#NeuralEstimators.quantileloss"><code>NeuralEstimators.quantileloss</code></a></li><li><a href="utility/#NeuralEstimators.removedata"><code>NeuralEstimators.removedata</code></a></li><li><a href="core/#NeuralEstimators.risk"><code>NeuralEstimators.risk</code></a></li><li><a href="core/#NeuralEstimators.rmse"><code>NeuralEstimators.rmse</code></a></li><li><a href="utility/#NeuralEstimators.rowwisenorm"><code>NeuralEstimators.rowwisenorm</code></a></li><li><a href="architectures/#NeuralEstimators.samplecorrelation"><code>NeuralEstimators.samplecorrelation</code></a></li><li><a href="architectures/#NeuralEstimators.samplecovariance"><code>NeuralEstimators.samplecovariance</code></a></li><li><a href="core/#NeuralEstimators.sampleposterior"><code>NeuralEstimators.sampleposterior</code></a></li><li><a href="architectures/#NeuralEstimators.samplesize"><code>NeuralEstimators.samplesize</code></a></li><li><a href="simulation/#NeuralEstimators.schlatherbivariatedensity"><code>NeuralEstimators.schlatherbivariatedensity</code></a></li><li><a href="simulation/#NeuralEstimators.simulategaussian"><code>NeuralEstimators.simulategaussian</code></a></li><li><a href="simulation/#NeuralEstimators.simulatepotts"><code>NeuralEstimators.simulatepotts</code></a></li><li><a href="simulation/#NeuralEstimators.simulateschlather"><code>NeuralEstimators.simulateschlather</code></a></li><li><a href="utility/#NeuralEstimators.spatialgraph"><code>NeuralEstimators.spatialgraph</code></a></li><li><a href="utility/#NeuralEstimators.stackarrays"><code>NeuralEstimators.stackarrays</code></a></li><li><a href="utility/#NeuralEstimators.subsetdata"><code>NeuralEstimators.subsetdata</code></a></li><li><a href="utility/#NeuralEstimators.subsetparameters"><code>NeuralEstimators.subsetparameters</code></a></li><li><a href="loss/#NeuralEstimators.tanhloss"><code>NeuralEstimators.tanhloss</code></a></li><li><a href="core/#NeuralEstimators.train"><code>NeuralEstimators.train</code></a></li><li><a href="core/#NeuralEstimators.trainx"><code>NeuralEstimators.trainx</code></a></li><li><a href="utility/#NeuralEstimators.vectotril"><code>NeuralEstimators.vectotril</code></a></li></ul></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="utility/">« Miscellaneous</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.6.0 on <span class="colophon-date" title="Wednesday 28 August 2024 08:07">Wednesday 28 August 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Index · NeuralEstimators.jl</title><meta name="title" content="Index · NeuralEstimators.jl"/><meta property="og:title" content="Index · NeuralEstimators.jl"/><meta property="twitter:title" content="Index · NeuralEstimators.jl"/><meta name="description" content="Documentation for NeuralEstimators.jl."/><meta property="og:description" content="Documentation for NeuralEstimators.jl."/><meta property="twitter:description" content="Documentation for NeuralEstimators.jl."/><script data-outdated-warner src="../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../search_index.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../"><img src="../assets/logo.png" alt="NeuralEstimators.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../">NeuralEstimators.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../">NeuralEstimators</a></li><li><a class="tocitem" href="../framework/">Framework</a></li><li><span class="tocitem">Workflow</span><ul><li><a class="tocitem" href="../workflow/overview/">Overview</a></li><li><a class="tocitem" href="../workflow/examples/">Examples</a></li><li><a class="tocitem" href="../workflow/advancedusage/">Advanced usage</a></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="core/">Core</a></li><li><a class="tocitem" href="architectures/">Architectures</a></li><li><a class="tocitem" href="loss/">Loss functions</a></li><li><a class="tocitem" href="simulation/">Model-specific functions</a></li><li><a class="tocitem" href="utility/">Miscellaneous</a></li><li class="is-active"><a class="tocitem" href>Index</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">API</a></li><li class="is-active"><a href>Index</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Index</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/main/docs/src/API/index.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Index"><a class="docs-heading-anchor" href="#Index">Index</a><a id="Index-1"></a><a class="docs-heading-anchor-permalink" href="#Index" title="Permalink"></a></h1><ul><li><a href="core/#NeuralEstimators.Assessment"><code>NeuralEstimators.Assessment</code></a></li><li><a href="architectures/#NeuralEstimators.Compress"><code>NeuralEstimators.Compress</code></a></li><li><a href="architectures/#NeuralEstimators.CorrelationMatrix"><code>NeuralEstimators.CorrelationMatrix</code></a></li><li><a href="architectures/#NeuralEstimators.CovarianceMatrix"><code>NeuralEstimators.CovarianceMatrix</code></a></li><li><a href="architectures/#NeuralEstimators.DeepSet"><code>NeuralEstimators.DeepSet</code></a></li><li><a href="architectures/#NeuralEstimators.DensePositive"><code>NeuralEstimators.DensePositive</code></a></li><li><a href="utility/#NeuralEstimators.EM"><code>NeuralEstimators.EM</code></a></li><li><a href="core/#NeuralEstimators.Ensemble"><code>NeuralEstimators.Ensemble</code></a></li><li><a href="architectures/#NeuralEstimators.GNNSummary"><code>NeuralEstimators.GNNSummary</code></a></li><li><a href="utility/#NeuralEstimators.IndicatorWeights"><code>NeuralEstimators.IndicatorWeights</code></a></li><li><a href="core/#NeuralEstimators.IntervalEstimator"><code>NeuralEstimators.IntervalEstimator</code></a></li><li><a href="architectures/#NeuralEstimators.NeighbourhoodVariogram"><code>NeuralEstimators.NeighbourhoodVariogram</code></a></li><li><a href="core/#NeuralEstimators.NeuralEstimator"><code>NeuralEstimators.NeuralEstimator</code></a></li><li><a href="core/#NeuralEstimators.ParameterConfigurations"><code>NeuralEstimators.ParameterConfigurations</code></a></li><li><a href="core/#NeuralEstimators.PiecewiseEstimator"><code>NeuralEstimators.PiecewiseEstimator</code></a></li><li><a href="core/#NeuralEstimators.PointEstimator"><code>NeuralEstimators.PointEstimator</code></a></li><li><a href="architectures/#NeuralEstimators.PowerDifference"><code>NeuralEstimators.PowerDifference</code></a></li><li><a href="core/#NeuralEstimators.QuantileEstimatorContinuous"><code>NeuralEstimators.QuantileEstimatorContinuous</code></a></li><li><a href="core/#NeuralEstimators.QuantileEstimatorDiscrete"><code>NeuralEstimators.QuantileEstimatorDiscrete</code></a></li><li><a href="core/#NeuralEstimators.RatioEstimator"><code>NeuralEstimators.RatioEstimator</code></a></li><li><a href="architectures/#NeuralEstimators.ResidualBlock"><code>NeuralEstimators.ResidualBlock</code></a></li><li><a href="architectures/#NeuralEstimators.SpatialGraphConv"><code>NeuralEstimators.SpatialGraphConv</code></a></li><li><a href="utility/#NeuralEstimators.adjacencymatrix"><code>NeuralEstimators.adjacencymatrix</code></a></li><li><a href="core/#NeuralEstimators.assess"><code>NeuralEstimators.assess</code></a></li><li><a href="core/#NeuralEstimators.bias"><code>NeuralEstimators.bias</code></a></li><li><a href="core/#NeuralEstimators.bootstrap"><code>NeuralEstimators.bootstrap</code></a></li><li><a href="utility/#NeuralEstimators.containertype"><code>NeuralEstimators.containertype</code></a></li><li><a href="core/#NeuralEstimators.coverage"><code>NeuralEstimators.coverage</code></a></li><li><a href="utility/#NeuralEstimators.encodedata"><code>NeuralEstimators.encodedata</code></a></li><li><a href="utility/#NeuralEstimators.estimateinbatches"><code>NeuralEstimators.estimateinbatches</code></a></li><li><a href="utility/#NeuralEstimators.expandgrid"><code>NeuralEstimators.expandgrid</code></a></li><li><a href="simulation/#NeuralEstimators.gaussiandensity"><code>NeuralEstimators.gaussiandensity</code></a></li><li><a href="utility/#NeuralEstimators.initialise_estimator"><code>NeuralEstimators.initialise_estimator</code></a></li><li><a href="core/#NeuralEstimators.interval"><code>NeuralEstimators.interval</code></a></li><li><a href="loss/#NeuralEstimators.intervalscore"><code>NeuralEstimators.intervalscore</code></a></li><li><a href="loss/#NeuralEstimators.kpowerloss"><code>NeuralEstimators.kpowerloss</code></a></li><li><a href="utility/#NeuralEstimators.loadbestweights"><code>NeuralEstimators.loadbestweights</code></a></li><li><a href="core/#NeuralEstimators.mapestimate"><code>NeuralEstimators.mapestimate</code></a></li><li><a href="simulation/#NeuralEstimators.matern"><code>NeuralEstimators.matern</code></a></li><li><a href="utility/#NeuralEstimators.maternchols"><code>NeuralEstimators.maternchols</code></a></li><li><a href="simulation/#NeuralEstimators.maternclusterprocess"><code>NeuralEstimators.maternclusterprocess</code></a></li><li><a href="core/#NeuralEstimators.mlestimate"><code>NeuralEstimators.mlestimate</code></a></li><li><a href="utility/#NeuralEstimators.numberreplicates"><code>NeuralEstimators.numberreplicates</code></a></li><li><a href="simulation/#NeuralEstimators.paciorek"><code>NeuralEstimators.paciorek</code></a></li><li><a href="loss/#NeuralEstimators.quantileloss"><code>NeuralEstimators.quantileloss</code></a></li><li><a href="utility/#NeuralEstimators.removedata"><code>NeuralEstimators.removedata</code></a></li><li><a href="core/#NeuralEstimators.risk"><code>NeuralEstimators.risk</code></a></li><li><a href="core/#NeuralEstimators.rmse"><code>NeuralEstimators.rmse</code></a></li><li><a href="utility/#NeuralEstimators.rowwisenorm"><code>NeuralEstimators.rowwisenorm</code></a></li><li><a href="architectures/#NeuralEstimators.samplecorrelation"><code>NeuralEstimators.samplecorrelation</code></a></li><li><a href="architectures/#NeuralEstimators.samplecovariance"><code>NeuralEstimators.samplecovariance</code></a></li><li><a href="core/#NeuralEstimators.sampleposterior"><code>NeuralEstimators.sampleposterior</code></a></li><li><a href="architectures/#NeuralEstimators.samplesize"><code>NeuralEstimators.samplesize</code></a></li><li><a href="simulation/#NeuralEstimators.schlatherbivariatedensity"><code>NeuralEstimators.schlatherbivariatedensity</code></a></li><li><a href="simulation/#NeuralEstimators.simulategaussian"><code>NeuralEstimators.simulategaussian</code></a></li><li><a href="simulation/#NeuralEstimators.simulatepotts"><code>NeuralEstimators.simulatepotts</code></a></li><li><a href="simulation/#NeuralEstimators.simulateschlather"><code>NeuralEstimators.simulateschlather</code></a></li><li><a href="utility/#NeuralEstimators.spatialgraph"><code>NeuralEstimators.spatialgraph</code></a></li><li><a href="utility/#NeuralEstimators.stackarrays"><code>NeuralEstimators.stackarrays</code></a></li><li><a href="utility/#NeuralEstimators.subsetdata"><code>NeuralEstimators.subsetdata</code></a></li><li><a href="utility/#NeuralEstimators.subsetparameters"><code>NeuralEstimators.subsetparameters</code></a></li><li><a href="loss/#NeuralEstimators.tanhloss"><code>NeuralEstimators.tanhloss</code></a></li><li><a href="core/#NeuralEstimators.train"><code>NeuralEstimators.train</code></a></li><li><a href="core/#NeuralEstimators.trainx"><code>NeuralEstimators.trainx</code></a></li><li><a href="utility/#NeuralEstimators.vectotril"><code>NeuralEstimators.vectotril</code></a></li></ul></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="utility/">« Miscellaneous</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.6.0 on <span class="colophon-date" title="Sunday 1 September 2024 07:59">Sunday 1 September 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/API/loss/index.html b/dev/API/loss/index.html
index b9840948..e0de2da5 100644
--- a/dev/API/loss/index.html
+++ b/dev/API/loss/index.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Loss functions · NeuralEstimators.jl</title><meta name="title" content="Loss functions · NeuralEstimators.jl"/><meta property="og:title" content="Loss functions · NeuralEstimators.jl"/><meta property="twitter:title" content="Loss functions · NeuralEstimators.jl"/><meta name="description" content="Documentation for NeuralEstimators.jl."/><meta property="og:description" content="Documentation for NeuralEstimators.jl."/><meta property="twitter:description" content="Documentation for NeuralEstimators.jl."/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="NeuralEstimators.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../../">NeuralEstimators.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">NeuralEstimators</a></li><li><a class="tocitem" href="../../framework/">Framework</a></li><li><span class="tocitem">Workflow</span><ul><li><a class="tocitem" href="../../workflow/overview/">Overview</a></li><li><a class="tocitem" href="../../workflow/examples/">Examples</a></li><li><a class="tocitem" href="../../workflow/advancedusage/">Advanced usage</a></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="../core/">Core</a></li><li><a class="tocitem" href="../architectures/">Architectures</a></li><li class="is-active"><a class="tocitem" href>Loss functions</a></li><li><a class="tocitem" href="../simulation/">Model-specific functions</a></li><li><a class="tocitem" href="../utility/">Miscellaneous</a></li><li><a class="tocitem" href="../">Index</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">API</a></li><li class="is-active"><a href>Loss functions</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Loss functions</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/main/docs/src/API/loss.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Loss-functions"><a class="docs-heading-anchor" href="#Loss-functions">Loss functions</a><a id="Loss-functions-1"></a><a class="docs-heading-anchor-permalink" href="#Loss-functions" title="Permalink"></a></h1><p>In addition to the standard loss functions provided by <code>Flux</code> (e.g., <code>mae</code>, <code>mse</code>, etc.), <code>NeuralEstimators</code> provides the following loss functions.</p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.tanhloss" href="#NeuralEstimators.tanhloss"><code>NeuralEstimators.tanhloss</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">tanhloss(θ̂, θ, k; agg = mean, joint = true)</code></pre><p>For <code>k</code> &gt; 0, computes the loss function,</p><p class="math-container">\[L(θ̂, θ) = tanh(|θ̂ - θ|/k),\]</p><p>which approximates the 0-1 loss as <code>k</code> → 0. Compared with the <a href="#NeuralEstimators.kpowerloss"><code>kpowerloss</code></a>,  which may also be used as a continuous surrogate for the 0-1 loss, the gradient of the tanh loss is bounded as |θ̂ - θ| → 0, which can improve numerical stability during  training. </p><p>If <code>joint = true</code>, the L₁ norm is computed over each parameter vector, so that, with  <code>k</code> close to zero, the resulting Bayes estimator is the mode of the joint posterior distribution; otherwise, if <code>joint = false</code>, the Bayes estimator is the vector containing the modes of the marginal posterior distributions.</p><p>See also <a href="#NeuralEstimators.kpowerloss"><code>kpowerloss</code></a>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/loss.jl#L18-L38">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.kpowerloss" href="#NeuralEstimators.kpowerloss"><code>NeuralEstimators.kpowerloss</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">kpowerloss(θ̂, θ, k; agg = mean, joint = true, safeorigin = true, ϵ = 0.1)</code></pre><p>For <code>k</code> &gt; 0, the <code>k</code>-th power absolute-distance loss function,</p><p class="math-container">\[L(θ̂, θ) = |θ̂ - θ|ᵏ,\]</p><p>contains the squared-error, absolute-error, and 0-1 loss functions as special cases (the latter obtained in the limit as <code>k</code> → 0). It is Lipschitz continuous iff <code>k</code> = 1, convex iff <code>k</code> ≥ 1, and strictly convex iff <code>k</code> &gt; 1: it is quasiconvex for all <code>k</code> &gt; 0.</p><p>If <code>joint = true</code>, the L₁ norm is computed over each parameter vector, so that, with  <code>k</code> close to zero, the resulting Bayes estimator is the mode of the joint posterior distribution; otherwise, if <code>joint = false</code>, the Bayes estimator is the vector containing the modes of the marginal posterior distributions.</p><p>If <code>safeorigin = true</code>, the loss function is modified to avoid pathologies around the origin, so that the resulting loss function behaves similarly to the absolute-error loss in the <code>ϵ</code>-interval surrounding the origin.</p><p>See also <a href="#NeuralEstimators.tanhloss"><code>tanhloss</code></a>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/loss.jl#L54-L78">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.quantileloss" href="#NeuralEstimators.quantileloss"><code>NeuralEstimators.quantileloss</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">quantileloss(θ̂, θ, τ; agg = mean)
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Loss functions · NeuralEstimators.jl</title><meta name="title" content="Loss functions · NeuralEstimators.jl"/><meta property="og:title" content="Loss functions · NeuralEstimators.jl"/><meta property="twitter:title" content="Loss functions · NeuralEstimators.jl"/><meta name="description" content="Documentation for NeuralEstimators.jl."/><meta property="og:description" content="Documentation for NeuralEstimators.jl."/><meta property="twitter:description" content="Documentation for NeuralEstimators.jl."/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="NeuralEstimators.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../../">NeuralEstimators.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">NeuralEstimators</a></li><li><a class="tocitem" href="../../framework/">Framework</a></li><li><span class="tocitem">Workflow</span><ul><li><a class="tocitem" href="../../workflow/overview/">Overview</a></li><li><a class="tocitem" href="../../workflow/examples/">Examples</a></li><li><a class="tocitem" href="../../workflow/advancedusage/">Advanced usage</a></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="../core/">Core</a></li><li><a class="tocitem" href="../architectures/">Architectures</a></li><li class="is-active"><a class="tocitem" href>Loss functions</a></li><li><a class="tocitem" href="../simulation/">Model-specific functions</a></li><li><a class="tocitem" href="../utility/">Miscellaneous</a></li><li><a class="tocitem" href="../">Index</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">API</a></li><li class="is-active"><a href>Loss functions</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Loss functions</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/main/docs/src/API/loss.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Loss-functions"><a class="docs-heading-anchor" href="#Loss-functions">Loss functions</a><a id="Loss-functions-1"></a><a class="docs-heading-anchor-permalink" href="#Loss-functions" title="Permalink"></a></h1><p>In addition to the standard loss functions provided by <code>Flux</code> (e.g., <code>mae</code>, <code>mse</code>, etc.), <code>NeuralEstimators</code> provides the following loss functions.</p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.tanhloss" href="#NeuralEstimators.tanhloss"><code>NeuralEstimators.tanhloss</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">tanhloss(θ̂, θ, k; agg = mean, joint = true)</code></pre><p>For <code>k</code> &gt; 0, computes the loss function,</p><p class="math-container">\[L(θ̂, θ) = tanh(|θ̂ - θ|/k),\]</p><p>which approximates the 0-1 loss as <code>k</code> → 0. Compared with the <a href="#NeuralEstimators.kpowerloss"><code>kpowerloss</code></a>,  which may also be used as a continuous surrogate for the 0-1 loss, the gradient of the tanh loss is bounded as |θ̂ - θ| → 0, which can improve numerical stability during  training. </p><p>If <code>joint = true</code>, the L₁ norm is computed over each parameter vector, so that, with  <code>k</code> close to zero, the resulting Bayes estimator is the mode of the joint posterior distribution; otherwise, if <code>joint = false</code>, the Bayes estimator is the vector containing the modes of the marginal posterior distributions.</p><p>See also <a href="#NeuralEstimators.kpowerloss"><code>kpowerloss</code></a>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/loss.jl#L18-L38">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.kpowerloss" href="#NeuralEstimators.kpowerloss"><code>NeuralEstimators.kpowerloss</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">kpowerloss(θ̂, θ, k; agg = mean, joint = true, safeorigin = true, ϵ = 0.1)</code></pre><p>For <code>k</code> &gt; 0, the <code>k</code>-th power absolute-distance loss function,</p><p class="math-container">\[L(θ̂, θ) = |θ̂ - θ|ᵏ,\]</p><p>contains the squared-error, absolute-error, and 0-1 loss functions as special cases (the latter obtained in the limit as <code>k</code> → 0). It is Lipschitz continuous iff <code>k</code> = 1, convex iff <code>k</code> ≥ 1, and strictly convex iff <code>k</code> &gt; 1: it is quasiconvex for all <code>k</code> &gt; 0.</p><p>If <code>joint = true</code>, the L₁ norm is computed over each parameter vector, so that, with  <code>k</code> close to zero, the resulting Bayes estimator is the mode of the joint posterior distribution; otherwise, if <code>joint = false</code>, the Bayes estimator is the vector containing the modes of the marginal posterior distributions.</p><p>If <code>safeorigin = true</code>, the loss function is modified to avoid pathologies around the origin, so that the resulting loss function behaves similarly to the absolute-error loss in the <code>ϵ</code>-interval surrounding the origin.</p><p>See also <a href="#NeuralEstimators.tanhloss"><code>tanhloss</code></a>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/loss.jl#L54-L78">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.quantileloss" href="#NeuralEstimators.quantileloss"><code>NeuralEstimators.quantileloss</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">quantileloss(θ̂, θ, τ; agg = mean)
 quantileloss(θ̂, θ, τ::Vector; agg = mean)</code></pre><p>The asymmetric quantile loss function,</p><p class="math-container">\[  L(θ̂, θ; τ) = (θ̂ - θ)(𝕀(θ̂ - θ &gt; 0) - τ),\]</p><p>where <code>τ</code> ∈ (0, 1) is a probability level and 𝕀(⋅) is the indicator function.</p><p>The method that takes <code>τ</code> as a vector is useful for jointly approximating several quantiles of the posterior distribution. In this case, the number of rows in <code>θ̂</code> is assumed to be <span>$pr$</span>, where <span>$p$</span> is the number of parameters and <span>$r$</span> is the number probability levels in <code>τ</code> (i.e., the length of <code>τ</code>).</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">p = 1
 K = 10
 θ = rand(p, K)
@@ -15,6 +15,6 @@
 quantileloss(θ̂, θ, 0.1)
 
 θ̂ = rand(3p, K)
-quantileloss(θ̂, θ, [0.1, 0.5, 0.9])</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/loss.jl#L108-L142">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.intervalscore" href="#NeuralEstimators.intervalscore"><code>NeuralEstimators.intervalscore</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">intervalscore(l, u, θ, α; agg = mean)
+quantileloss(θ̂, θ, [0.1, 0.5, 0.9])</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/loss.jl#L108-L142">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.intervalscore" href="#NeuralEstimators.intervalscore"><code>NeuralEstimators.intervalscore</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">intervalscore(l, u, θ, α; agg = mean)
 intervalscore(θ̂, θ, α; agg = mean)
-intervalscore(assessment::Assessment; average_over_parameters::Bool = false, average_over_sample_sizes::Bool = true)</code></pre><p>Given an interval [<code>l</code>, <code>u</code>] with nominal coverage 100×(1-<code>α</code>)%  and true value <code>θ</code>, the interval score is defined by</p><p class="math-container">\[S(l, u, θ; α) = (u - l) + 2α⁻¹(l - θ)𝕀(θ &lt; l) + 2α⁻¹(θ - u)𝕀(θ &gt; u),\]</p><p>where <code>α</code> ∈ (0, 1) and 𝕀(⋅) is the indicator function.</p><p>The method that takes a single value <code>θ̂</code> assumes that <code>θ̂</code> is a matrix with <span>$2p$</span> rows, where <span>$p$</span> is the number of parameters in the statistical model. Then, the first and second set of <span>$p$</span> rows will be used as <code>l</code> and <code>u</code>, respectively.</p><p>For further discussion, see Section 6 of Gneiting, T. and Raftery, A. E. (2007), &quot;Strictly proper scoring rules, prediction, and estimation&quot;, Journal of the American statistical Association, 102, 359–378.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/loss.jl#L195-L216">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../architectures/">« Architectures</a><a class="docs-footer-nextpage" href="../simulation/">Model-specific functions »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.6.0 on <span class="colophon-date" title="Wednesday 28 August 2024 08:07">Wednesday 28 August 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+intervalscore(assessment::Assessment; average_over_parameters::Bool = false, average_over_sample_sizes::Bool = true)</code></pre><p>Given an interval [<code>l</code>, <code>u</code>] with nominal coverage 100×(1-<code>α</code>)%  and true value <code>θ</code>, the interval score is defined by</p><p class="math-container">\[S(l, u, θ; α) = (u - l) + 2α⁻¹(l - θ)𝕀(θ &lt; l) + 2α⁻¹(θ - u)𝕀(θ &gt; u),\]</p><p>where <code>α</code> ∈ (0, 1) and 𝕀(⋅) is the indicator function.</p><p>The method that takes a single value <code>θ̂</code> assumes that <code>θ̂</code> is a matrix with <span>$2p$</span> rows, where <span>$p$</span> is the number of parameters in the statistical model. Then, the first and second set of <span>$p$</span> rows will be used as <code>l</code> and <code>u</code>, respectively.</p><p>For further discussion, see Section 6 of Gneiting, T. and Raftery, A. E. (2007), &quot;Strictly proper scoring rules, prediction, and estimation&quot;, Journal of the American statistical Association, 102, 359–378.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/loss.jl#L195-L216">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../architectures/">« Architectures</a><a class="docs-footer-nextpage" href="../simulation/">Model-specific functions »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.6.0 on <span class="colophon-date" title="Sunday 1 September 2024 07:59">Sunday 1 September 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/API/simulation/index.html b/dev/API/simulation/index.html
index 43f49437..56f46751 100644
--- a/dev/API/simulation/index.html
+++ b/dev/API/simulation/index.html
@@ -8,7 +8,7 @@
 D = pairwise(Euclidean(), S, dims = 1)
 Σ = Symmetric(matern.(D, ρ, ν))
 L = cholesky(Σ).L
-simulategaussian(L)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/simulate.jl#L46-L70">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.simulatepotts" href="#NeuralEstimators.simulatepotts"><code>NeuralEstimators.simulatepotts</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">simulatepotts(grid::Matrix{Int}, β)
+simulategaussian(L)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/simulate.jl#L46-L70">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.simulatepotts" href="#NeuralEstimators.simulatepotts"><code>NeuralEstimators.simulatepotts</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">simulatepotts(grid::Matrix{Int}, β)
 simulatepotts(grid::Matrix{Union{Int, Nothing}}, β)
 simulatepotts(nrows::Int, ncols::Int, num_states::Int, β)</code></pre><p>Chequerboard Gibbs sampling from 2D Potts model with parameter <code>β</code>&gt;0.</p><p>Approximately independent simulations can be obtained by setting  <code>nsims</code> &gt; 1 or <code>num_iterations &gt; burn</code>. The degree to which the  resulting simulations can be considered independent depends on the  thinning factor (<code>thin</code>) and the burn-in (<code>burn</code>).</p><p><strong>Keyword arguments</strong></p><ul><li><code>nsims = 1</code>: number of approximately independent replicates. </li><li><code>num_iterations = 2000</code>: number of MCMC iterations.</li><li><code>burn = num_iterations</code>: burn-in.</li><li><code>thin = 10</code>: thinning factor.</li></ul><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators 
 
@@ -32,7 +32,7 @@
 using Plots 
 grids = [simulatepotts(100, 100, 2, β) for β ∈ 0.3:0.1:1.2]
 heatmaps = heatmap.(grids, legend = false, aspect_ratio=1)
-Plots.plot(heatmaps...)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/simulate.jl#L362-L406">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.simulateschlather" href="#NeuralEstimators.simulateschlather"><code>NeuralEstimators.simulateschlather</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">simulateschlather(L::Matrix, m = 1; C = 3.5, Gumbel::Bool = false)</code></pre><p>Simulates <code>m</code> independent and identically distributed (i.i.d.) realisations from Schlather&#39;s max-stable model using the algorithm for approximate simulation given by <a href="https://link.springer.com/article/10.1023/A:1020977924878">Schlather (2002)</a>.</p><p>Requires the lower Cholesky factor <code>L</code> associated with the covariance matrix of  the underlying Gaussian process. </p><p>If <code>m</code> is not specified, the simulated data are returned as a vector with length equal to the number of spatial locations, <span>$n$</span>; otherwise, the data are  returned as an <span>$n$</span>x<code>m</code> matrix.</p><p><strong>Keyword arguments</strong></p><ul><li><code>C = 3.5</code>: a tuning parameter that controls the accuracy of the algorithm: small <code>C</code> favours computational efficiency, while large <code>C</code> favours accuracy. Schlather (2002) recommends the use of <code>C = 3</code>.</li><li><code>Gumbel = true</code>: flag indicating whether the data should be log-transformed from the unit Fréchet scale to the <code>Gumbel</code> scale.</li></ul><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Distances, LinearAlgebra
+Plots.plot(heatmaps...)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/simulate.jl#L362-L406">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.simulateschlather" href="#NeuralEstimators.simulateschlather"><code>NeuralEstimators.simulateschlather</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">simulateschlather(L::Matrix, m = 1; C = 3.5, Gumbel::Bool = false)</code></pre><p>Simulates <code>m</code> independent and identically distributed (i.i.d.) realisations from Schlather&#39;s max-stable model using the algorithm for approximate simulation given by <a href="https://link.springer.com/article/10.1023/A:1020977924878">Schlather (2002)</a>.</p><p>Requires the lower Cholesky factor <code>L</code> associated with the covariance matrix of  the underlying Gaussian process. </p><p>If <code>m</code> is not specified, the simulated data are returned as a vector with length equal to the number of spatial locations, <span>$n$</span>; otherwise, the data are  returned as an <span>$n$</span>x<code>m</code> matrix.</p><p><strong>Keyword arguments</strong></p><ul><li><code>C = 3.5</code>: a tuning parameter that controls the accuracy of the algorithm: small <code>C</code> favours computational efficiency, while large <code>C</code> favours accuracy. Schlather (2002) recommends the use of <code>C = 3</code>.</li><li><code>Gumbel = true</code>: flag indicating whether the data should be log-transformed from the unit Fréchet scale to the <code>Gumbel</code> scale.</li></ul><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Distances, LinearAlgebra
 
 n = 500
 ρ = 0.6
@@ -41,7 +41,7 @@
 D = pairwise(Euclidean(), S, dims = 1)
 Σ = Symmetric(matern.(D, ρ, ν))
 L = cholesky(Σ).L
-simulateschlather(L)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/simulate.jl#L85-L116">source</a></section></article><h2 id="Spatial-point-processes"><a class="docs-heading-anchor" href="#Spatial-point-processes">Spatial point processes</a><a id="Spatial-point-processes-1"></a><a class="docs-heading-anchor-permalink" href="#Spatial-point-processes" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.maternclusterprocess" href="#NeuralEstimators.maternclusterprocess"><code>NeuralEstimators.maternclusterprocess</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">maternclusterprocess(; λ=10, μ=10, r=0.1, xmin=0, xmax=1, ymin=0, ymax=1, unit_bounding_box=false)</code></pre><p>Simulates a Matérn cluster process with density of parent Poisson point process <code>λ</code>, mean number of daughter points <code>μ</code>, and radius of cluster disk <code>r</code>, over the simulation window defined by <code>xmin</code> and <code>xmax</code>, <code>ymin</code> and <code>ymax</code>.</p><p>If <code>unit_bounding_box</code> is <code>true</code>, then the simulated points will be scaled so that the longest side of their bounding box is equal to one (this may change the simulation window). </p><p>See also the R package <a href="https://cran.r-project.org/web/packages/spatstat/index.html"><code>spatstat</code></a>, which provides functions for simulating from a range of point processes and which can be interfaced from Julia using <a href="https://juliainterop.github.io/RCall.jl/stable/"><code>RCall</code></a>.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators
+simulateschlather(L)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/simulate.jl#L85-L116">source</a></section></article><h2 id="Spatial-point-processes"><a class="docs-heading-anchor" href="#Spatial-point-processes">Spatial point processes</a><a id="Spatial-point-processes-1"></a><a class="docs-heading-anchor-permalink" href="#Spatial-point-processes" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.maternclusterprocess" href="#NeuralEstimators.maternclusterprocess"><code>NeuralEstimators.maternclusterprocess</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">maternclusterprocess(; λ=10, μ=10, r=0.1, xmin=0, xmax=1, ymin=0, ymax=1, unit_bounding_box=false)</code></pre><p>Simulates a Matérn cluster process with density of parent Poisson point process <code>λ</code>, mean number of daughter points <code>μ</code>, and radius of cluster disk <code>r</code>, over the simulation window defined by <code>xmin</code> and <code>xmax</code>, <code>ymin</code> and <code>ymax</code>.</p><p>If <code>unit_bounding_box</code> is <code>true</code>, then the simulated points will be scaled so that the longest side of their bounding box is equal to one (this may change the simulation window). </p><p>See also the R package <a href="https://cran.r-project.org/web/packages/spatstat/index.html"><code>spatstat</code></a>, which provides functions for simulating from a range of point processes and which can be interfaced from Julia using <a href="https://juliainterop.github.io/RCall.jl/stable/"><code>RCall</code></a>.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators
 
 # Simulate a realisation from a Matérn cluster process
 S = maternclusterprocess()
@@ -57,13 +57,13 @@
 plots = map(eachindex(λ)) do i
 	S = maternclusterprocess(λ = λ[i], μ = μ[i])
 	scatterplot(S[:, 1], S[:, 2])
-end</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Graphs.jl#L1063-L1099">source</a></section></article><h2 id="Covariance-functions"><a class="docs-heading-anchor" href="#Covariance-functions">Covariance functions</a><a id="Covariance-functions-1"></a><a class="docs-heading-anchor-permalink" href="#Covariance-functions" title="Permalink"></a></h2><p>These covariance functions may be of use for various models.</p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.matern" href="#NeuralEstimators.matern"><code>NeuralEstimators.matern</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">matern(h, ρ, ν, σ² = 1)</code></pre><p>Given distance <span>$\|\boldsymbol{h}\|$</span> (<code>h</code>), computes the Matérn covariance function,</p><p class="math-container">\[C(\|\boldsymbol{h}\|) = \sigma^2 \frac{2^{1 - \nu}}{\Gamma(\nu)} \left(\frac{\|\boldsymbol{h}\|}{\rho}\right)^\nu K_\nu \left(\frac{\|\boldsymbol{h}\|}{\rho}\right),\]</p><p>where <code>ρ</code> is a range parameter, <code>ν</code> is a smoothness parameter, <code>σ²</code> is the marginal variance,  <span>$\Gamma(\cdot)$</span> is the gamma function, and <span>$K_\nu(\cdot)$</span> is the modified Bessel function of the second kind of order <span>$\nu$</span>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/simulate.jl#L166-L177">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.paciorek" href="#NeuralEstimators.paciorek"><code>NeuralEstimators.paciorek</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">paciorek(s, r, ω₁, ω₂, ρ, β)</code></pre><p>Given spatial locations <code>s</code> and <code>r</code>, computes the nonstationary covariance function, </p><p class="math-container">\[C(\boldsymbol{s}, \boldsymbol{r}) = 
+end</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Graphs.jl#L1063-L1099">source</a></section></article><h2 id="Covariance-functions"><a class="docs-heading-anchor" href="#Covariance-functions">Covariance functions</a><a id="Covariance-functions-1"></a><a class="docs-heading-anchor-permalink" href="#Covariance-functions" title="Permalink"></a></h2><p>These covariance functions may be of use for various models.</p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.matern" href="#NeuralEstimators.matern"><code>NeuralEstimators.matern</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">matern(h, ρ, ν, σ² = 1)</code></pre><p>Given distance <span>$\|\boldsymbol{h}\|$</span> (<code>h</code>), computes the Matérn covariance function,</p><p class="math-container">\[C(\|\boldsymbol{h}\|) = \sigma^2 \frac{2^{1 - \nu}}{\Gamma(\nu)} \left(\frac{\|\boldsymbol{h}\|}{\rho}\right)^\nu K_\nu \left(\frac{\|\boldsymbol{h}\|}{\rho}\right),\]</p><p>where <code>ρ</code> is a range parameter, <code>ν</code> is a smoothness parameter, <code>σ²</code> is the marginal variance,  <span>$\Gamma(\cdot)$</span> is the gamma function, and <span>$K_\nu(\cdot)$</span> is the modified Bessel function of the second kind of order <span>$\nu$</span>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/simulate.jl#L166-L177">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.paciorek" href="#NeuralEstimators.paciorek"><code>NeuralEstimators.paciorek</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">paciorek(s, r, ω₁, ω₂, ρ, β)</code></pre><p>Given spatial locations <code>s</code> and <code>r</code>, computes the nonstationary covariance function, </p><p class="math-container">\[C(\boldsymbol{s}, \boldsymbol{r}) = 
 |\boldsymbol{\Sigma}(\boldsymbol{s})|^{1/4}
 |\boldsymbol{\Sigma}(\boldsymbol{r})|^{1/4}
 \left|\frac{\boldsymbol{\Sigma}(\boldsymbol{s}) + \boldsymbol{\Sigma}(\boldsymbol{r})}{2}\right|^{-1/2}
 C^0\big(\sqrt{Q(\boldsymbol{s}, \boldsymbol{r})}\big), \]</p><p>where <span>$C^0(h) = \exp\{-(h/\rho)^{3/2}\}$</span> for range parameter <span>$\rho &gt; 0$</span>,  the matrix  <span>$\boldsymbol{\Sigma}(\boldsymbol{s}) = \exp(\beta\|\boldsymbol{s} - \boldsymbol{\omega}\|)\boldsymbol{I}$</span>  is a kernel matrix (<a href="https://onlinelibrary.wiley.com/doi/abs/10.1002/env.785">Paciorek and Schervish, 2006</a>)  with scale parameter <span>$\beta &gt; 0$</span> and <span>$\boldsymbol{\omega} \equiv (\omega_1, \omega_2)&#39; \in \mathcal{D}$</span>, and </p><p class="math-container">\[Q(\boldsymbol{s}, \boldsymbol{r}) = 
 (\boldsymbol{s} - \boldsymbol{r})&#39;
 \left(\frac{\boldsymbol{\Sigma}(\boldsymbol{s}) + \boldsymbol{\Sigma}(\boldsymbol{r})}{2}\right)^{-1}
-(\boldsymbol{s} - \boldsymbol{r})\]</p><p>is the squared Mahalanobis distance between <span>$\boldsymbol{s}$</span> and <span>$\boldsymbol{r}$</span>. </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/simulate.jl#L196-L223">source</a></section></article><h2 id="Density-functions"><a class="docs-heading-anchor" href="#Density-functions">Density functions</a><a id="Density-functions-1"></a><a class="docs-heading-anchor-permalink" href="#Density-functions" title="Permalink"></a></h2><p>Density functions are not needed in the workflow of <code>NeuralEstimators</code>. However, as part of a series of comparison studies between neural estimators and likelihood-based estimators given in various paper, we have developed the following functions for evaluating the density function for several popular distributions. We include these in <code>NeuralEstimators</code> to cater for the possibility that they may be of use in future comparison studies.</p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.gaussiandensity" href="#NeuralEstimators.gaussiandensity"><code>NeuralEstimators.gaussiandensity</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">gaussiandensity(y::V, L::LT) where {V &lt;: AbstractVector, LT &lt;: LowerTriangular}
+(\boldsymbol{s} - \boldsymbol{r})\]</p><p>is the squared Mahalanobis distance between <span>$\boldsymbol{s}$</span> and <span>$\boldsymbol{r}$</span>. </p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/simulate.jl#L196-L223">source</a></section></article><h2 id="Density-functions"><a class="docs-heading-anchor" href="#Density-functions">Density functions</a><a id="Density-functions-1"></a><a class="docs-heading-anchor-permalink" href="#Density-functions" title="Permalink"></a></h2><p>Density functions are not needed in the workflow of <code>NeuralEstimators</code>. However, as part of a series of comparison studies between neural estimators and likelihood-based estimators given in various paper, we have developed the following functions for evaluating the density function for several popular distributions. We include these in <code>NeuralEstimators</code> to cater for the possibility that they may be of use in future comparison studies.</p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.gaussiandensity" href="#NeuralEstimators.gaussiandensity"><code>NeuralEstimators.gaussiandensity</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">gaussiandensity(y::V, L::LT) where {V &lt;: AbstractVector, LT &lt;: LowerTriangular}
 gaussiandensity(y::A, L::LT) where {A &lt;: AbstractArray, LT &lt;: LowerTriangular}
-gaussiandensity(y::A, Σ::M) where {A &lt;: AbstractArray, M &lt;: AbstractMatrix}</code></pre><p>Efficiently computes the density function for <code>y</code> ~ 𝑁(0, <code>Σ</code>) for covariance matrix <code>Σ</code>, and where <code>L</code> is lower Cholesky factor of <code>Σ</code>.</p><p>The method <code>gaussiandensity(y::A, L::LT)</code> assumes that the last dimension of <code>y</code> contains independent and identically distributed (iid) replicates.</p><p>The log-density is returned if the keyword argument <code>logdensity</code> is true (default).</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/densities.jl#L23-L35">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.schlatherbivariatedensity" href="#NeuralEstimators.schlatherbivariatedensity"><code>NeuralEstimators.schlatherbivariatedensity</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">schlatherbivariatedensity(z₁, z₂, ψ; logdensity = true)</code></pre><p>The bivariate density function for Schlather&#39;s max-stable model.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/densities.jl#L66-L69">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../loss/">« Loss functions</a><a class="docs-footer-nextpage" href="../utility/">Miscellaneous »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.6.0 on <span class="colophon-date" title="Wednesday 28 August 2024 08:07">Wednesday 28 August 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+gaussiandensity(y::A, Σ::M) where {A &lt;: AbstractArray, M &lt;: AbstractMatrix}</code></pre><p>Efficiently computes the density function for <code>y</code> ~ 𝑁(0, <code>Σ</code>) for covariance matrix <code>Σ</code>, and where <code>L</code> is lower Cholesky factor of <code>Σ</code>.</p><p>The method <code>gaussiandensity(y::A, L::LT)</code> assumes that the last dimension of <code>y</code> contains independent and identically distributed (iid) replicates.</p><p>The log-density is returned if the keyword argument <code>logdensity</code> is true (default).</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/densities.jl#L23-L35">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.schlatherbivariatedensity" href="#NeuralEstimators.schlatherbivariatedensity"><code>NeuralEstimators.schlatherbivariatedensity</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">schlatherbivariatedensity(z₁, z₂, ψ; logdensity = true)</code></pre><p>The bivariate density function for Schlather&#39;s max-stable model.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/densities.jl#L66-L69">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../loss/">« Loss functions</a><a class="docs-footer-nextpage" href="../utility/">Miscellaneous »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.6.0 on <span class="colophon-date" title="Sunday 1 September 2024 07:59">Sunday 1 September 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/API/utility/index.html b/dev/API/utility/index.html
index 9dc127e0..ca69aec4 100644
--- a/dev/API/utility/index.html
+++ b/dev/API/utility/index.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Miscellaneous · NeuralEstimators.jl</title><meta name="title" content="Miscellaneous · NeuralEstimators.jl"/><meta property="og:title" content="Miscellaneous · NeuralEstimators.jl"/><meta property="twitter:title" content="Miscellaneous · NeuralEstimators.jl"/><meta name="description" content="Documentation for NeuralEstimators.jl."/><meta property="og:description" content="Documentation for NeuralEstimators.jl."/><meta property="twitter:description" content="Documentation for NeuralEstimators.jl."/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="NeuralEstimators.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../../">NeuralEstimators.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">NeuralEstimators</a></li><li><a class="tocitem" href="../../framework/">Framework</a></li><li><span class="tocitem">Workflow</span><ul><li><a class="tocitem" href="../../workflow/overview/">Overview</a></li><li><a class="tocitem" href="../../workflow/examples/">Examples</a></li><li><a class="tocitem" href="../../workflow/advancedusage/">Advanced usage</a></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="../core/">Core</a></li><li><a class="tocitem" href="../architectures/">Architectures</a></li><li><a class="tocitem" href="../loss/">Loss functions</a></li><li><a class="tocitem" href="../simulation/">Model-specific functions</a></li><li class="is-active"><a class="tocitem" href>Miscellaneous</a><ul class="internal"><li><a class="tocitem" href="#Core"><span>Core</span></a></li><li><a class="tocitem" href="#Downstream-inference-algorithms"><span>Downstream-inference algorithms</span></a></li><li><a class="tocitem" href="#Utility-functions"><span>Utility functions</span></a></li></ul></li><li><a class="tocitem" href="../">Index</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">API</a></li><li class="is-active"><a href>Miscellaneous</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Miscellaneous</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/main/docs/src/API/utility.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Miscellaneous"><a class="docs-heading-anchor" href="#Miscellaneous">Miscellaneous</a><a id="Miscellaneous-1"></a><a class="docs-heading-anchor-permalink" href="#Miscellaneous" title="Permalink"></a></h1><ul><li><a href="#NeuralEstimators.EM"><code>NeuralEstimators.EM</code></a></li><li><a href="#NeuralEstimators.IndicatorWeights"><code>NeuralEstimators.IndicatorWeights</code></a></li><li><a href="#NeuralEstimators.adjacencymatrix"><code>NeuralEstimators.adjacencymatrix</code></a></li><li><a href="#NeuralEstimators.containertype"><code>NeuralEstimators.containertype</code></a></li><li><a href="#NeuralEstimators.encodedata"><code>NeuralEstimators.encodedata</code></a></li><li><a href="#NeuralEstimators.estimateinbatches"><code>NeuralEstimators.estimateinbatches</code></a></li><li><a href="#NeuralEstimators.expandgrid"><code>NeuralEstimators.expandgrid</code></a></li><li><a href="#NeuralEstimators.initialise_estimator"><code>NeuralEstimators.initialise_estimator</code></a></li><li><a href="#NeuralEstimators.loadbestweights"><code>NeuralEstimators.loadbestweights</code></a></li><li><a href="#NeuralEstimators.maternchols"><code>NeuralEstimators.maternchols</code></a></li><li><a href="#NeuralEstimators.numberreplicates"><code>NeuralEstimators.numberreplicates</code></a></li><li><a href="#NeuralEstimators.removedata"><code>NeuralEstimators.removedata</code></a></li><li><a href="#NeuralEstimators.rowwisenorm"><code>NeuralEstimators.rowwisenorm</code></a></li><li><a href="#NeuralEstimators.spatialgraph"><code>NeuralEstimators.spatialgraph</code></a></li><li><a href="#NeuralEstimators.stackarrays"><code>NeuralEstimators.stackarrays</code></a></li><li><a href="#NeuralEstimators.subsetdata"><code>NeuralEstimators.subsetdata</code></a></li><li><a href="#NeuralEstimators.subsetparameters"><code>NeuralEstimators.subsetparameters</code></a></li><li><a href="#NeuralEstimators.vectotril"><code>NeuralEstimators.vectotril</code></a></li></ul><h2 id="Core"><a class="docs-heading-anchor" href="#Core">Core</a><a id="Core-1"></a><a class="docs-heading-anchor-permalink" href="#Core" title="Permalink"></a></h2><p>These functions can appear during the core workflow, and may need to be overloaded in some applications.</p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.numberreplicates" href="#NeuralEstimators.numberreplicates"><code>NeuralEstimators.numberreplicates</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">numberofreplicates(Z)</code></pre><p>Generic function that returns the number of replicates in a given object. Default implementations are provided for commonly used data formats, namely, data stored as an <code>Array</code> or as a <code>GNNGraph</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/utility.jl#L129-L135">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.subsetdata" href="#NeuralEstimators.subsetdata"><code>NeuralEstimators.subsetdata</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">subsetdata(Z::V, i) where {V &lt;: AbstractArray{A}} where {A &lt;: Any}
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Miscellaneous · NeuralEstimators.jl</title><meta name="title" content="Miscellaneous · NeuralEstimators.jl"/><meta property="og:title" content="Miscellaneous · NeuralEstimators.jl"/><meta property="twitter:title" content="Miscellaneous · NeuralEstimators.jl"/><meta name="description" content="Documentation for NeuralEstimators.jl."/><meta property="og:description" content="Documentation for NeuralEstimators.jl."/><meta property="twitter:description" content="Documentation for NeuralEstimators.jl."/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="NeuralEstimators.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../../">NeuralEstimators.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">NeuralEstimators</a></li><li><a class="tocitem" href="../../framework/">Framework</a></li><li><span class="tocitem">Workflow</span><ul><li><a class="tocitem" href="../../workflow/overview/">Overview</a></li><li><a class="tocitem" href="../../workflow/examples/">Examples</a></li><li><a class="tocitem" href="../../workflow/advancedusage/">Advanced usage</a></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="../core/">Core</a></li><li><a class="tocitem" href="../architectures/">Architectures</a></li><li><a class="tocitem" href="../loss/">Loss functions</a></li><li><a class="tocitem" href="../simulation/">Model-specific functions</a></li><li class="is-active"><a class="tocitem" href>Miscellaneous</a><ul class="internal"><li><a class="tocitem" href="#Core"><span>Core</span></a></li><li><a class="tocitem" href="#Downstream-inference-algorithms"><span>Downstream-inference algorithms</span></a></li><li><a class="tocitem" href="#Utility-functions"><span>Utility functions</span></a></li></ul></li><li><a class="tocitem" href="../">Index</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">API</a></li><li class="is-active"><a href>Miscellaneous</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Miscellaneous</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/main/docs/src/API/utility.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Miscellaneous"><a class="docs-heading-anchor" href="#Miscellaneous">Miscellaneous</a><a id="Miscellaneous-1"></a><a class="docs-heading-anchor-permalink" href="#Miscellaneous" title="Permalink"></a></h1><ul><li><a href="#NeuralEstimators.EM"><code>NeuralEstimators.EM</code></a></li><li><a href="#NeuralEstimators.IndicatorWeights"><code>NeuralEstimators.IndicatorWeights</code></a></li><li><a href="#NeuralEstimators.adjacencymatrix"><code>NeuralEstimators.adjacencymatrix</code></a></li><li><a href="#NeuralEstimators.containertype"><code>NeuralEstimators.containertype</code></a></li><li><a href="#NeuralEstimators.encodedata"><code>NeuralEstimators.encodedata</code></a></li><li><a href="#NeuralEstimators.estimateinbatches"><code>NeuralEstimators.estimateinbatches</code></a></li><li><a href="#NeuralEstimators.expandgrid"><code>NeuralEstimators.expandgrid</code></a></li><li><a href="#NeuralEstimators.initialise_estimator"><code>NeuralEstimators.initialise_estimator</code></a></li><li><a href="#NeuralEstimators.loadbestweights"><code>NeuralEstimators.loadbestweights</code></a></li><li><a href="#NeuralEstimators.maternchols"><code>NeuralEstimators.maternchols</code></a></li><li><a href="#NeuralEstimators.numberreplicates"><code>NeuralEstimators.numberreplicates</code></a></li><li><a href="#NeuralEstimators.removedata"><code>NeuralEstimators.removedata</code></a></li><li><a href="#NeuralEstimators.rowwisenorm"><code>NeuralEstimators.rowwisenorm</code></a></li><li><a href="#NeuralEstimators.spatialgraph"><code>NeuralEstimators.spatialgraph</code></a></li><li><a href="#NeuralEstimators.stackarrays"><code>NeuralEstimators.stackarrays</code></a></li><li><a href="#NeuralEstimators.subsetdata"><code>NeuralEstimators.subsetdata</code></a></li><li><a href="#NeuralEstimators.subsetparameters"><code>NeuralEstimators.subsetparameters</code></a></li><li><a href="#NeuralEstimators.vectotril"><code>NeuralEstimators.vectotril</code></a></li></ul><h2 id="Core"><a class="docs-heading-anchor" href="#Core">Core</a><a id="Core-1"></a><a class="docs-heading-anchor-permalink" href="#Core" title="Permalink"></a></h2><p>These functions can appear during the core workflow, and may need to be overloaded in some applications.</p><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.numberreplicates" href="#NeuralEstimators.numberreplicates"><code>NeuralEstimators.numberreplicates</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">numberreplicates(Z)</code></pre><p>Generic function that returns the number of replicates in a given object. Default implementations are provided for commonly used data formats, namely, data stored as an <code>Array</code> or as a <code>GNNGraph</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/utility.jl#L129-L135">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.subsetdata" href="#NeuralEstimators.subsetdata"><code>NeuralEstimators.subsetdata</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">subsetdata(Z::V, i) where {V &lt;: AbstractArray{A}} where {A &lt;: Any}
 subsetdata(Z::A, i) where {A &lt;: AbstractArray{T, N}} where {T, N}
 subsetdata(Z::G, i) where {G &lt;: AbstractGraph}</code></pre><p>Return replicate(s) <code>i</code> from each data set in <code>Z</code>.</p><p>If the user is working with data that are not covered by the default methods, simply overload the function with the appropriate type for <code>Z</code>.</p><p>For graphical data, calls <a href="https://carlolucibello.github.io/GraphNeuralNetworks.jl/dev/api/gnngraph/#GraphNeuralNetworks.GNNGraphs.getgraph-Tuple{GNNGraph,%20Int64}"><code>getgraph()</code></a>, where the replicates are assumed be to stored as batched graphs. Since this can be slow, one should consider using a method of <a href="../core/#NeuralEstimators.train"><code>train()</code></a> that does not require the data to be subsetted when working with graphical data (use <a href="#NeuralEstimators.numberreplicates"><code>numberreplicates()</code></a> to check that the training and validation data sets are equally replicated, which prevents subsetting).</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators
 using GraphNeuralNetworks
@@ -19,11 +19,11 @@
 e = 8 # number of edges
 Z = [batch([rand_graph(n, e, ndata = rand(d, n)) for _ ∈ 1:m]) for k ∈ 1:K]
 subsetdata(Z, 2)   # extract second replicate from each data set
-subsetdata(Z, 1:3) # extract first 3 replicates from each data set</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/utility.jl#L178-L217">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.subsetparameters" href="#NeuralEstimators.subsetparameters"><code>NeuralEstimators.subsetparameters</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">subsetparameters(parameters::M, indices) where {M &lt;: AbstractMatrix}
-subsetparameters(parameters::P, indices) where {P &lt;: ParameterConfigurations}</code></pre><p>Subset <code>parameters</code> using a collection of <code>indices</code>.</p><p>Arrays in <code>parameters::P</code> with last dimension equal in size to the number of parameter configurations, K, are also subsetted (over their last dimension) using <code>indices</code>. All other fields are left unchanged. To modify this default behaviour, overload <code>subsetparameters</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Parameters.jl#L34-L44">source</a></section></article><h2 id="Downstream-inference-algorithms"><a class="docs-heading-anchor" href="#Downstream-inference-algorithms">Downstream-inference algorithms</a><a id="Downstream-inference-algorithms-1"></a><a class="docs-heading-anchor-permalink" href="#Downstream-inference-algorithms" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.EM" href="#NeuralEstimators.EM"><code>NeuralEstimators.EM</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">EM(simulateconditional::Function, MAP::Union{Function, NeuralEstimator}, θ₀ = nothing)</code></pre><p>Implements the (Bayesian) Monte Carlo expectation-maximisation (EM) algorithm,  with <span>$l$</span>th iteration</p><p class="math-container">\[\boldsymbol{\theta}^{(l)} = 
+subsetdata(Z, 1:3) # extract first 3 replicates from each data set</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/utility.jl#L174-L213">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.subsetparameters" href="#NeuralEstimators.subsetparameters"><code>NeuralEstimators.subsetparameters</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">subsetparameters(parameters::M, indices) where {M &lt;: AbstractMatrix}
+subsetparameters(parameters::P, indices) where {P &lt;: ParameterConfigurations}</code></pre><p>Subset <code>parameters</code> using a collection of <code>indices</code>.</p><p>Arrays in <code>parameters::P</code> with last dimension equal in size to the number of parameter configurations, K, are also subsetted (over their last dimension) using <code>indices</code>. All other fields are left unchanged. To modify this default behaviour, overload <code>subsetparameters</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Parameters.jl#L34-L44">source</a></section></article><h2 id="Downstream-inference-algorithms"><a class="docs-heading-anchor" href="#Downstream-inference-algorithms">Downstream-inference algorithms</a><a id="Downstream-inference-algorithms-1"></a><a class="docs-heading-anchor-permalink" href="#Downstream-inference-algorithms" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.EM" href="#NeuralEstimators.EM"><code>NeuralEstimators.EM</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">EM(simulateconditional::Function, MAP::Union{Function, NeuralEstimator}, θ₀ = nothing)</code></pre><p>Implements the (Bayesian) Monte Carlo expectation-maximisation (EM) algorithm,  with <span>$l$</span>th iteration</p><p class="math-container">\[\boldsymbol{\theta}^{(l)} = 
 \argmax_{\boldsymbol{\theta}}
 \sum_{h = 1}^H \ell(\boldsymbol{\theta};  \boldsymbol{Z}_1,  \boldsymbol{Z}_2^{(lh)}) + H\log \pi(\boldsymbol{\theta})\]</p><p>where <span>$\ell(\cdot)$</span> is the complete-data log-likelihood function, <span>$\boldsymbol{Z} \equiv (\boldsymbol{Z}_1&#39;, \boldsymbol{Z}_2&#39;)&#39;$</span> denotes the complete data with <span>$\boldsymbol{Z}_1$</span> and <span>$\boldsymbol{Z}_2$</span> the observed and missing components, respectively, <span>$\boldsymbol{Z}_2^{(lh)}$</span>, <span>$h = 1, \dots, H$</span>, is simulated from the  distribution of <span>$\boldsymbol{Z}_2 \mid \boldsymbol{Z}_1, \boldsymbol{\theta}^{(l-1)}$</span>, and  <span>$\pi(\boldsymbol{\theta})$</span> denotes the prior density. </p><p><strong>Fields</strong></p><p>The function <code>simulateconditional</code> should have a signature of the form,</p><pre><code class="nohighlight hljs">simulateconditional(Z::A, θ; nsims = 1) where {A &lt;: AbstractArray{Union{Missing, T}}} where T</code></pre><p>The output of <code>simulateconditional</code> should be the completed-data <code>Z</code>, and it should be  returned in whatever form is appropriate to be passed to the MAP estimator as <code>MAP(Z)</code>. For example, if the data are gridded and  the <code>MAP</code> is a neural MAP estimator based on a CNN architecture, then <code>Z</code> should  be returned as a four-dimensional array.</p><p>The field <code>MAP</code> can be a function (to facilitate the conventional Monte Carlo EM algorithm) or a  <code>NeuralEstimator</code> (to facilitate the so-called neural EM algorithm). </p><p>The starting values <code>θ₀</code> may be provided during initialisation (as a vector),  or when applying the <code>EM</code> object to data (see below). The starting values   given in a function call take precedence over those stored in the object.</p><p><strong>Methods</strong></p><p>Once constructed, obects of type <code>EM</code> can be applied to data via the methods,</p><pre><code class="nohighlight hljs">(em::EM)(Z::A, θ₀::Union{Nothing, Vector} = nothing; ...) where {A &lt;: AbstractArray{Union{Missing, T}, N}} where {T, N}
-(em::EM)(Z::V, θ₀::Union{Nothing, Vector, Matrix} = nothing; ...) where {V &lt;: AbstractVector{A}} where {A &lt;: AbstractArray{Union{Missing, T}, N}} where {T, N}</code></pre><p>where <code>Z</code> is the complete data containing the observed data and <code>Missing</code> values. Note that the second method caters for the case that one has multiple data sets. The keyword arguments are:</p><ul><li><code>niterations::Integer = 50</code>: the maximum number of iterations.</li><li><code>ϵ = 0.01</code>: tolerance used to assess convergence; the algorithm halts if the relative change in parameter values in successive iterations is less than <code>ϵ</code>.</li><li><code>return_iterates::Bool</code>: if <code>true</code>, the estimate at each iteration of the algorithm is returned; otherwise, only the final estimate is returned.</li><li><code>nsims::Integer = 1</code>: the number <span>$H$</span> of conditional simulations in each iteration. </li><li><code>ξ = nothing</code>: model information needed for conditional simulation (e.g., distance matrices) or in the MAP estimator.</li><li><code>use_ξ_in_simulateconditional::Bool = false</code>: if set to <code>true</code>, the conditional simulator is called as <code>simulateconditional(Z, θ, ξ; nsims = nsims)</code>.</li><li><code>use_ξ_in_MAP::Bool = false</code>: if set to <code>true</code>, the MAP estimator is called as <code>MAP(Z, ξ)</code>.</li><li><code>use_gpu::Bool = true</code></li><li><code>verbose::Bool = false</code></li></ul><p><strong>Examples</strong></p><pre><code class="nohighlight hljs"># See the &quot;Missing data&quot; section in &quot;Advanced usage&quot;</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/missingdata.jl#L1-L61">source</a></section></article><h2 id="Utility-functions"><a class="docs-heading-anchor" href="#Utility-functions">Utility functions</a><a id="Utility-functions-1"></a><a class="docs-heading-anchor-permalink" href="#Utility-functions" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.adjacencymatrix" href="#NeuralEstimators.adjacencymatrix"><code>NeuralEstimators.adjacencymatrix</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">adjacencymatrix(S::Matrix, k::Integer; maxmin = false, combined = false)
+(em::EM)(Z::V, θ₀::Union{Nothing, Vector, Matrix} = nothing; ...) where {V &lt;: AbstractVector{A}} where {A &lt;: AbstractArray{Union{Missing, T}, N}} where {T, N}</code></pre><p>where <code>Z</code> is the complete data containing the observed data and <code>Missing</code> values. Note that the second method caters for the case that one has multiple data sets. The keyword arguments are:</p><ul><li><code>niterations::Integer = 50</code>: the maximum number of iterations.</li><li><code>ϵ = 0.01</code>: tolerance used to assess convergence; the algorithm halts if the relative change in parameter values in successive iterations is less than <code>ϵ</code>.</li><li><code>return_iterates::Bool</code>: if <code>true</code>, the estimate at each iteration of the algorithm is returned; otherwise, only the final estimate is returned.</li><li><code>nsims::Integer = 1</code>: the number <span>$H$</span> of conditional simulations in each iteration. </li><li><code>ξ = nothing</code>: model information needed for conditional simulation (e.g., distance matrices) or in the MAP estimator.</li><li><code>use_ξ_in_simulateconditional::Bool = false</code>: if set to <code>true</code>, the conditional simulator is called as <code>simulateconditional(Z, θ, ξ; nsims = nsims)</code>.</li><li><code>use_ξ_in_MAP::Bool = false</code>: if set to <code>true</code>, the MAP estimator is called as <code>MAP(Z, ξ)</code>.</li><li><code>use_gpu::Bool = true</code></li><li><code>verbose::Bool = false</code></li></ul><p><strong>Examples</strong></p><pre><code class="nohighlight hljs"># See the &quot;Missing data&quot; section in &quot;Advanced usage&quot;</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/missingdata.jl#L1-L61">source</a></section></article><h2 id="Utility-functions"><a class="docs-heading-anchor" href="#Utility-functions">Utility functions</a><a id="Utility-functions-1"></a><a class="docs-heading-anchor-permalink" href="#Utility-functions" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.adjacencymatrix" href="#NeuralEstimators.adjacencymatrix"><code>NeuralEstimators.adjacencymatrix</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">adjacencymatrix(S::Matrix, k::Integer; maxmin = false, combined = false)
 adjacencymatrix(S::Matrix, r::AbstractFloat)
 adjacencymatrix(S::Matrix, r::AbstractFloat, k::Integer; random = true)
 adjacencymatrix(M::Matrix; k, r, kwargs...)</code></pre><p>Computes a spatially weighted adjacency matrix from spatial locations <code>S</code> based  on either the <code>k</code>-nearest neighbours of each location; all nodes within a disc of fixed radius <code>r</code>; or, if both <code>r</code> and <code>k</code> are provided, a subset of <code>k</code> neighbours within a disc of fixed radius <code>r</code>.</p><p>Several subsampling strategies are possible when choosing a subset of <code>k</code> neighbours within  a disc of fixed radius <code>r</code>. If <code>random=true</code> (default), the neighbours are randomly selected from  within the disc (note that this also approximately preserves the distribution of  distances within the neighbourhood set). If <code>random=false</code>, a deterministic algorithm is used  that aims to preserve the distribution of distances within the neighbourhood set, by choosing  those nodes with distances to the central node corresponding to the  <span>$\{0, \frac{1}{k}, \frac{2}{k}, \dots, \frac{k-1}{k}, 1\}$</span> quantiles of the empirical  distribution function of distances within the disc.  (This algorithm in fact yields <span>$k+1$</span> neighbours, since both the closest and furthest nodes are always included.)  Otherwise, </p><p>If <code>maxmin=false</code> (default) the <code>k</code>-nearest neighbours are chosen based on all points in the graph. If <code>maxmin=true</code>, a so-called maxmin ordering is applied, whereby an initial point is selected, and each subsequent point is selected to maximise the minimum distance to those points that have already been selected. Then, the neighbours of each point are defined as the <code>k</code>-nearest neighbours amongst the points that have already appeared in the ordering. If <code>combined=true</code>, the  neighbours are defined to be the union of the <code>k</code>-nearest neighbours and the  <code>k</code>-nearest neighbours subject to a maxmin ordering. </p><p>If <code>S</code> is a square matrix, it is treated as a distance matrix; otherwise, it should be an <span>$n$</span> x <span>$d$</span> matrix, where <span>$n$</span> is the number of spatial locations and <span>$d$</span> is the spatial dimension (typically <span>$d$</span> = 2). In the latter case, the distance metric is taken to be the Euclidean distance. Note that use of a  maxmin ordering currently requires a matrix of spatial locations (not a distance matrix).</p><p>By convention with the functionality in <code>GraphNeuralNetworks.jl</code> which is based on directed graphs,  the neighbours of location <code>i</code> are stored in the column <code>A[:, i]</code> where <code>A</code> is the  returned adjacency matrix. Therefore, the number of neighbours for each location is given by <code>collect(mapslices(nnz, A; dims = 1))</code>, and the number of times each node is  a neighbour of another node is given by <code>collect(mapslices(nnz, A; dims = 2))</code>.</p><p>By convention, we do not consider a location to neighbour itself (i.e., the diagonal elements of the adjacency matrix are zero). </p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators, Distances, SparseArrays
@@ -47,31 +47,31 @@
 adjacencymatrix(D, k)
 adjacencymatrix(D, r)
 adjacencymatrix(D, r, k)
-adjacencymatrix(D, r, k; random = false)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Graphs.jl#L585-L655">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.containertype" href="#NeuralEstimators.containertype"><code>NeuralEstimators.containertype</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">containertype(A::Type)
+adjacencymatrix(D, r, k; random = false)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Graphs.jl#L585-L655">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.containertype" href="#NeuralEstimators.containertype"><code>NeuralEstimators.containertype</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">containertype(A::Type)
 containertype(::Type{A}) where A &lt;: SubArray
 containertype(a::A) where A</code></pre><p>Returns the container type of its argument.</p><p>If given a <code>SubArray</code>, returns the container type of the parent array.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">a = rand(3, 4)
 containertype(a)
 containertype(typeof(a))
-[containertype(x) for x ∈ eachcol(a)]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/utility.jl#L109-L124">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.encodedata" href="#NeuralEstimators.encodedata"><code>NeuralEstimators.encodedata</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">encodedata(Z::A; c::T = zero(T)) where {A &lt;: AbstractArray{Union{Missing, T}, N}} where T, N</code></pre><p>For data <code>Z</code> with missing entries, returns an encoded data set (U, W) where W encodes the missingness pattern as an indicator vector and U is the original data Z with missing entries replaced by a fixed constant <code>c</code>.</p><p>The indicator vector W is stored in the second-to-last dimension of <code>Z</code>, which should be singleton. If the second-to-last dimension is not singleton, then two singleton dimensions will be added to the array, and W will be stored in the new second-to-last dimension.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators
+[containertype(x) for x ∈ eachcol(a)]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/utility.jl#L109-L124">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.encodedata" href="#NeuralEstimators.encodedata"><code>NeuralEstimators.encodedata</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">encodedata(Z::A; c::T = zero(T)) where {A &lt;: AbstractArray{Union{Missing, T}, N}} where T, N</code></pre><p>For data <code>Z</code> with missing entries, returns an encoded data set (U, W) where W encodes the missingness pattern as an indicator vector and U is the original data Z with missing entries replaced by a fixed constant <code>c</code>.</p><p>The indicator vector W is stored in the second-to-last dimension of <code>Z</code>, which should be singleton. If the second-to-last dimension is not singleton, then two singleton dimensions will be added to the array, and W will be stored in the new second-to-last dimension.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators
 
 # Generate some missing data
 Z = rand(16, 16, 1, 1)
 Z = removedata(Z, 0.25)	 # remove 25% of the data
 
 # Encode the data
-UW = encodedata(Z)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/missingdata.jl#L317-L339">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.estimateinbatches" href="#NeuralEstimators.estimateinbatches"><code>NeuralEstimators.estimateinbatches</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">estimateinbatches(θ̂, z, t = nothing; batchsize::Integer = 32, use_gpu::Bool = true, kwargs...)</code></pre><p>Apply the estimator <code>θ̂</code> on minibatches of <code>z</code> (and optionally other set-level information <code>t</code>) of size <code>batchsize</code>.</p><p>This can prevent memory issues that can occur with large data sets, particularly on the GPU.</p><p>Minibatching will only be done if there are multiple data sets in <code>z</code>; this will be inferred by <code>z</code> being a vector, or a tuple whose first element is a vector.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/utility.jl#L360-L371">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.expandgrid" href="#NeuralEstimators.expandgrid"><code>NeuralEstimators.expandgrid</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">expandgrid(xs, ys)</code></pre><p>Same as <code>expand.grid()</code> in <code>R</code>, but currently caters for two dimensions only.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/utility.jl#L419-L423">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.IndicatorWeights" href="#NeuralEstimators.IndicatorWeights"><code>NeuralEstimators.IndicatorWeights</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">IndicatorWeights(h_max, n_bins::Integer)
+UW = encodedata(Z)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/missingdata.jl#L317-L339">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.estimateinbatches" href="#NeuralEstimators.estimateinbatches"><code>NeuralEstimators.estimateinbatches</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">estimateinbatches(θ̂, z, t = nothing; batchsize::Integer = 32, use_gpu::Bool = true, kwargs...)</code></pre><p>Apply the estimator <code>θ̂</code> on minibatches of <code>z</code> (and optionally other set-level information <code>t</code>) of size <code>batchsize</code>.</p><p>This can prevent memory issues that can occur with large data sets, particularly on the GPU.</p><p>Minibatching will only be done if there are multiple data sets in <code>z</code>; this will be inferred by <code>z</code> being a vector, or a tuple whose first element is a vector.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/utility.jl#L296-L307">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.expandgrid" href="#NeuralEstimators.expandgrid"><code>NeuralEstimators.expandgrid</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">expandgrid(xs, ys)</code></pre><p>Same as <code>expand.grid()</code> in <code>R</code>, but currently caters for two dimensions only.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/utility.jl#L355-L359">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.IndicatorWeights" href="#NeuralEstimators.IndicatorWeights"><code>NeuralEstimators.IndicatorWeights</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">IndicatorWeights(h_max, n_bins::Integer)
 (w::IndicatorWeights)(h::Matrix)</code></pre><p>For spatial locations <span>$\boldsymbol{s}$</span> and  <span>$\boldsymbol{u}$</span>, creates a spatial weight function defined as</p><p class="math-container">\[\boldsymbol{w}(\boldsymbol{s}, \boldsymbol{u}) \equiv (\mathbb{I}(h \in B_k) : k = 1, \dots, K)&#39;,\]</p><p>where <span>$\mathbb{I}(\cdot)$</span> denotes the indicator function,  <span>$h \equiv \|\boldsymbol{s} - \boldsymbol{u} \|$</span> is the spatial distance between <span>$\boldsymbol{s}$</span> and  <span>$\boldsymbol{u}$</span>, and <span>$\{B_k : k = 1, \dots, K\}$</span> is a set of <span>$K =$</span><code>n_bins</code> equally-sized distance bins covering the spatial distances between 0 and <code>h_max</code>. </p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators 
 
 h_max = 1
 n_bins = 10
 w = IndicatorWeights(h_max, n_bins)
 h = rand(1, 30) # distances between 30 pairs of spatial locations 
-w(h)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Graphs.jl#L157-L180">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.initialise_estimator" href="#NeuralEstimators.initialise_estimator"><code>NeuralEstimators.initialise_estimator</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">initialise_estimator(p::Integer; ...)</code></pre><p>Initialise a neural estimator for a statistical model with <code>p</code> unknown parameters.</p><p>The estimator is couched in the DeepSets framework (see <a href="../architectures/#NeuralEstimators.DeepSet"><code>DeepSet</code></a>) so that it can be applied to data sets containing an arbitrary number of independent replicates (including the special case of a single replicate).</p><p>Note also that the user is free to initialise their neural estimator however they see fit using arbitrary <code>Flux</code> code; see <a href="https://fluxml.ai/Flux.jl/stable/models/layers/">here</a> for <code>Flux</code>&#39;s API reference.</p><p>Finally, the method with positional argument <code>data_type</code>is a wrapper that allows one to specify the type of their data (either &quot;unstructured&quot;, &quot;gridded&quot;, or &quot;irregular_spatial&quot;).</p><p><strong>Keyword arguments</strong></p><ul><li><code>architecture::String</code>: for unstructured multivariate data, one may use a fully-connected multilayer perceptron (<code>&quot;MLP&quot;</code>); for data collected over a grid, a convolutional neural network (<code>&quot;CNN&quot;</code>); and for graphical or irregular spatial data, a graphical neural network (<code>&quot;GNN&quot;</code>).</li><li><code>d::Integer = 1</code>: for unstructured multivariate data (i.e., when <code>architecture = &quot;MLP&quot;</code>), the dimension of the data (e.g., <code>d = 3</code> for trivariate data); otherwise, if <code>architecture ∈ [&quot;CNN&quot;, &quot;GNN&quot;]</code>, the argument <code>d</code> controls the number of input channels (e.g., <code>d = 1</code> for univariate spatial processes).</li><li><code>estimator_type::String = &quot;point&quot;</code>: the type of estimator; either <code>&quot;point&quot;</code> or <code>&quot;interval&quot;</code>.</li><li><code>depth = 3</code>: the number of hidden layers; either a single integer or an integer vector of length two specifying the depth of the inner (summary) and outer (inference) network of the DeepSets framework.</li><li><code>width = 32</code>: a single integer or an integer vector of length <code>sum(depth)</code> specifying the width (or number of convolutional filters/channels) in each hidden layer.</li><li><code>activation::Function = relu</code>: the (non-linear) activation function of each hidden layer.</li><li><code>activation_output::Function = identity</code>: the activation function of the output layer.</li><li><code>variance_stabiliser::Union{Nothing, Function} = nothing</code>: a function that will be applied directly to the input, usually to stabilise the variance.</li><li><code>kernel_size = nothing</code>: (applicable only to CNNs) a vector of length <code>depth[1]</code> containing integer tuples of length <code>D</code>, where <code>D</code> is the dimension of the convolution (e.g., <code>D = 2</code> for two-dimensional convolution).</li><li><code>weight_by_distance::Bool = true</code>: (applicable only to GNNs) flag indicating whether the estimator will weight by spatial distance; if true, a <code>SpatialGraphConv</code> layer is used in the propagation module; otherwise, a regular <code>GraphConv</code> layer is used.</li><li><code>probs = [0.025, 0.975]</code>: (applicable only if <code>estimator_type = &quot;interval&quot;</code>) probability levels defining the lower and upper endpoints of the posterior credible interval.</li></ul><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">## MLP, GNN, 1D CNN, and 2D CNN for a statistical model with two parameters:
+w(h)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Graphs.jl#L157-L180">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.initialise_estimator" href="#NeuralEstimators.initialise_estimator"><code>NeuralEstimators.initialise_estimator</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">initialise_estimator(p::Integer; ...)</code></pre><p>Initialise a neural estimator for a statistical model with <code>p</code> unknown parameters.</p><p>The estimator is couched in the DeepSets framework (see <a href="../architectures/#NeuralEstimators.DeepSet"><code>DeepSet</code></a>) so that it can be applied to data sets containing an arbitrary number of independent replicates (including the special case of a single replicate).</p><p>Note also that the user is free to initialise their neural estimator however they see fit using arbitrary <code>Flux</code> code; see <a href="https://fluxml.ai/Flux.jl/stable/models/layers/">here</a> for <code>Flux</code>&#39;s API reference.</p><p>Finally, the method with positional argument <code>data_type</code>is a wrapper that allows one to specify the type of their data (either &quot;unstructured&quot;, &quot;gridded&quot;, or &quot;irregular_spatial&quot;).</p><p><strong>Keyword arguments</strong></p><ul><li><code>architecture::String</code>: for unstructured multivariate data, one may use a fully-connected multilayer perceptron (<code>&quot;MLP&quot;</code>); for data collected over a grid, a convolutional neural network (<code>&quot;CNN&quot;</code>); and for graphical or irregular spatial data, a graphical neural network (<code>&quot;GNN&quot;</code>).</li><li><code>d::Integer = 1</code>: for unstructured multivariate data (i.e., when <code>architecture = &quot;MLP&quot;</code>), the dimension of the data (e.g., <code>d = 3</code> for trivariate data); otherwise, if <code>architecture ∈ [&quot;CNN&quot;, &quot;GNN&quot;]</code>, the argument <code>d</code> controls the number of input channels (e.g., <code>d = 1</code> for univariate spatial processes).</li><li><code>estimator_type::String = &quot;point&quot;</code>: the type of estimator; either <code>&quot;point&quot;</code> or <code>&quot;interval&quot;</code>.</li><li><code>depth = 3</code>: the number of hidden layers; either a single integer or an integer vector of length two specifying the depth of the inner (summary) and outer (inference) network of the DeepSets framework.</li><li><code>width = 32</code>: a single integer or an integer vector of length <code>sum(depth)</code> specifying the width (or number of convolutional filters/channels) in each hidden layer.</li><li><code>activation::Function = relu</code>: the (non-linear) activation function of each hidden layer.</li><li><code>activation_output::Function = identity</code>: the activation function of the output layer.</li><li><code>variance_stabiliser::Union{Nothing, Function} = nothing</code>: a function that will be applied directly to the input, usually to stabilise the variance.</li><li><code>kernel_size = nothing</code>: (applicable only to CNNs) a vector of length <code>depth[1]</code> containing integer tuples of length <code>D</code>, where <code>D</code> is the dimension of the convolution (e.g., <code>D = 2</code> for two-dimensional convolution).</li><li><code>weight_by_distance::Bool = true</code>: (applicable only to GNNs) flag indicating whether the estimator will weight by spatial distance; if true, a <code>SpatialGraphConv</code> layer is used in the propagation module; otherwise, a regular <code>GraphConv</code> layer is used.</li><li><code>probs = [0.025, 0.975]</code>: (applicable only if <code>estimator_type = &quot;interval&quot;</code>) probability levels defining the lower and upper endpoints of the posterior credible interval.</li></ul><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">## MLP, GNN, 1D CNN, and 2D CNN for a statistical model with two parameters:
 p = 2
 initialise_estimator(p, architecture = &quot;MLP&quot;)
 initialise_estimator(p, architecture = &quot;GNN&quot;)
 initialise_estimator(p, architecture = &quot;CNN&quot;, kernel_size = [10, 5, 3])
-initialise_estimator(p, architecture = &quot;CNN&quot;, kernel_size = [(10, 10), (5, 5), (3, 3)])</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Estimators.jl#L752-L790">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.loadbestweights" href="#NeuralEstimators.loadbestweights"><code>NeuralEstimators.loadbestweights</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">loadbestweights(path::String)</code></pre><p>Returns the weights of the neural network saved as &#39;best_network.bson&#39; in the given <code>path</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/utility.jl#L339-L343">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.maternchols" href="#NeuralEstimators.maternchols"><code>NeuralEstimators.maternchols</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">maternchols(D, ρ, ν, σ² = 1; stack = true)</code></pre><p>Given a matrix <code>D</code> of distances, constructs the Cholesky factor of the covariance matrix under the Matérn covariance function with range parameter <code>ρ</code>, smoothness parameter <code>ν</code>, and marginal variance <code>σ²</code>.</p><p>Providing vectors of parameters will yield a three-dimensional array of Cholesky factors (note that the vectors must of the same length, but a mix of vectors and scalars is allowed). A vector of distance matrices <code>D</code> may also be provided.</p><p>If <code>stack = true</code>, the Cholesky factors will be &quot;stacked&quot; into a three-dimensional array (this is only possible if all distance matrices in <code>D</code> are the same size).</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators
+initialise_estimator(p, architecture = &quot;CNN&quot;, kernel_size = [(10, 10), (5, 5), (3, 3)])</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Estimators.jl#L752-L790">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.loadbestweights" href="#NeuralEstimators.loadbestweights"><code>NeuralEstimators.loadbestweights</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">loadbestweights(path::String)</code></pre><p>Returns the weights of the neural network saved as &#39;best_network.bson&#39; in the given <code>path</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/deprecated.jl#L2-L6">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.maternchols" href="#NeuralEstimators.maternchols"><code>NeuralEstimators.maternchols</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">maternchols(D, ρ, ν, σ² = 1; stack = true)</code></pre><p>Given a matrix <code>D</code> of distances, constructs the Cholesky factor of the covariance matrix under the Matérn covariance function with range parameter <code>ρ</code>, smoothness parameter <code>ν</code>, and marginal variance <code>σ²</code>.</p><p>Providing vectors of parameters will yield a three-dimensional array of Cholesky factors (note that the vectors must of the same length, but a mix of vectors and scalars is allowed). A vector of distance matrices <code>D</code> may also be provided.</p><p>If <code>stack = true</code>, the Cholesky factors will be &quot;stacked&quot; into a three-dimensional array (this is only possible if all distance matrices in <code>D</code> are the same size).</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators
 using LinearAlgebra: norm
 n  = 10
 S  = rand(n, 2)
@@ -90,7 +90,7 @@
 
 S̃  = rand(2n, 2)
 D̃  = [norm(sᵢ - sⱼ) for sᵢ ∈ eachrow(S̃), sⱼ ∈ eachrow(S̃)]
-maternchols([D, D̃], ρ, ν, σ²; stack = false)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/simulate.jl#L257-L294">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.removedata" href="#NeuralEstimators.removedata"><code>NeuralEstimators.removedata</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">removedata(Z::Array, Iᵤ::Vector{Integer})
+maternchols([D, D̃], ρ, ν, σ²; stack = false)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/simulate.jl#L257-L294">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.removedata" href="#NeuralEstimators.removedata"><code>NeuralEstimators.removedata</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">removedata(Z::Array, Iᵤ::Vector{Integer})
 removedata(Z::Array, p::Union{Float, Vector{Float}}; prevent_complete_missing = true)
 removedata(Z::Array, n::Integer; fixed_pattern = false, contiguous_pattern = false, variable_proportion = false)</code></pre><p>Replaces elements of <code>Z</code> with <code>missing</code>.</p><p>The simplest method accepts a vector of integers <code>Iᵤ</code> that give the specific indices of the data to be removed.</p><p>Alterntivaly, there are two methods available to generate data that are missing completely at random (MCAR).</p><p>First, a vector <code>p</code> may be given that specifies the proportion of missingness for each element in the response vector. Hence, <code>p</code> should have length equal to the dimension of the response vector. If a single proportion is given, it will be replicated accordingly. If <code>prevent_complete_missing = true</code>, no replicates will contain 100% missingness (note that this can slightly alter the effective values of <code>p</code>).</p><p>Second, if an integer <code>n</code> is provided, all replicates will contain <code>n</code> observations after the data are removed. If <code>fixed_pattern = true</code>, the missingness pattern is fixed for all replicates. If <code>contiguous_pattern = true</code>, the data will be removed in a contiguous block. If <code>variable_proportion = true</code>, the proportion of missingness will vary across replicates, with each replicate containing between 1 and <code>n</code> observations after data removal, sampled uniformly (note that <code>variable_proportion</code> overrides <code>fixed_pattern</code>).</p><p>The return type is <code>Array{Union{T, Missing}}</code>.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">d = 5           # dimension of each replicate
 m = 2000        # number of replicates
@@ -102,7 +102,7 @@
 
 # Passing a desired final sample size
 n = 3  # number of observed elements of each replicate: must have n &lt;= d
-removedata(Z, n)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/missingdata.jl#L160-L204">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.rowwisenorm" href="#NeuralEstimators.rowwisenorm"><code>NeuralEstimators.rowwisenorm</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">rowwisenorm(A)</code></pre><p>Computes the row-wise norm of a matrix <code>A</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/utility.jl#L12-L15">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.spatialgraph" href="#NeuralEstimators.spatialgraph"><code>NeuralEstimators.spatialgraph</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">spatialgraph(S)
+removedata(Z, n)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/missingdata.jl#L160-L204">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.rowwisenorm" href="#NeuralEstimators.rowwisenorm"><code>NeuralEstimators.rowwisenorm</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">rowwisenorm(A)</code></pre><p>Computes the row-wise norm of a matrix <code>A</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/utility.jl#L12-L15">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.spatialgraph" href="#NeuralEstimators.spatialgraph"><code>NeuralEstimators.spatialgraph</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">spatialgraph(S)
 spatialgraph(S, Z)
 spatialgraph(g::GNNGraph, Z)</code></pre><p>Given spatial data <code>Z</code> measured at spatial locations <code>S</code>, constructs a <a href="https://carlolucibello.github.io/GraphNeuralNetworks.jl/stable/api/gnngraph/#GNNGraph-type"><code>GNNGraph</code></a> ready for use in a graph neural network that employs <a href="../architectures/#NeuralEstimators.SpatialGraphConv"><code>SpatialGraphConv</code></a> layers. </p><p>When <span>$m$</span> independent replicates are collected over the same set of <span>$n$</span> spatial locations,</p><p class="math-container">\[\{\boldsymbol{s}_1, \dots, \boldsymbol{s}_n\} \subset \mathcal{D},\]</p><p>where <span>$\mathcal{D} \subset \mathbb{R}^d$</span> denotes the spatial domain of interest,  <code>Z</code> should be given as an <span>$n \times m$</span> matrix and <code>S</code> should be given as an <span>$n \times d$</span> matrix.  Otherwise, when <span>$m$</span> independent replicates are collected over differing sets of spatial locations,</p><p class="math-container">\[\{\boldsymbol{s}_{ij}, \dots, \boldsymbol{s}_{in_i}\} \subset \mathcal{D}, \quad i = 1, \dots, m,\]</p><p><code>Z</code> should be given as an <span>$m$</span>-vector of <span>$n_i$</span>-vectors, and <code>S</code> should be given as an <span>$m$</span>-vector of <span>$n_i \times d$</span> matrices.</p><p>The spatial information between neighbours is stored as an edge feature, with the specific  information controlled by the keyword arguments <code>stationary</code> and <code>isotropic</code>.  Specifically, the edge feature between node <span>$j$</span> and node <span>$j&#39;$</span> stores the spatial  distance <span>$\|\boldsymbol{s}_{j&#39;} - \boldsymbol{s}_j\|$</span> (if <code>isotropic</code>), the spatial  displacement <span>$\boldsymbol{s}_{j&#39;} - \boldsymbol{s}_j$</span> (if <code>stationary</code>), or the matrix of   locations <span>$(\boldsymbol{s}_{j&#39;}, \boldsymbol{s}_j)$</span> (if <code>!stationary</code>).  </p><p>Additional keyword arguments inherit from <a href="#NeuralEstimators.adjacencymatrix"><code>adjacencymatrix()</code></a> to determine the neighbourhood of each node, with the default being a randomly selected set of  <code>k=30</code> neighbours within a disc of radius <code>r=0.15</code> units.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators
 
@@ -120,14 +120,14 @@
 n = rand(50:100, m)
 S = rand.(n, d)
 Z = rand.(n)
-g = spatialgraph(S, Z)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/Graphs.jl#L1-L54">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.stackarrays" href="#NeuralEstimators.stackarrays"><code>NeuralEstimators.stackarrays</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">stackarrays(v::V; merge = true) where {V &lt;: AbstractVector{A}} where {A &lt;: AbstractArray{T, N}} where {T, N}</code></pre><p>Stack a vector of arrays <code>v</code> along the last dimension of each array, optionally merging the final dimension of the stacked array.</p><p>The arrays must be of the same size for the first <code>N-1</code> dimensions. However, if <code>merge = true</code>, the size of the final dimension can vary.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs"># Vector containing arrays of the same size:
+g = spatialgraph(S, Z)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/Graphs.jl#L1-L54">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.stackarrays" href="#NeuralEstimators.stackarrays"><code>NeuralEstimators.stackarrays</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">stackarrays(v::V; merge = true) where {V &lt;: AbstractVector{A}} where {A &lt;: AbstractArray{T, N}} where {T, N}</code></pre><p>Stack a vector of arrays <code>v</code> along the last dimension of each array, optionally merging the final dimension of the stacked array.</p><p>The arrays must be of the same size for the first <code>N-1</code> dimensions. However, if <code>merge = true</code>, the size of the final dimension can vary.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs"># Vector containing arrays of the same size:
 Z = [rand(2, 3, m) for m ∈ (1, 1)];
 stackarrays(Z)
 stackarrays(Z, merge = false)
 
 # Vector containing arrays with differing final dimension size:
 Z = [rand(2, 3, m) for m ∈ (1, 2)];
-stackarrays(Z)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/utility.jl#L467-L486">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.vectotril" href="#NeuralEstimators.vectotril"><code>NeuralEstimators.vectotril</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">vectotril(v; strict = false)
+stackarrays(Z)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/utility.jl#L403-L422">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="NeuralEstimators.vectotril" href="#NeuralEstimators.vectotril"><code>NeuralEstimators.vectotril</code></a> — <span class="docstring-category">Function</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">vectotril(v; strict = false)
 vectotriu(v; strict = false)</code></pre><p>Converts a vector <code>v</code> of length <span>$d(d+1)÷2$</span> (a triangular number) into a <span>$d × d$</span> lower or upper triangular matrix.</p><p>If <code>strict = true</code>, the matrix will be <em>strictly</em> lower or upper triangular, that is, a <span>$(d+1) × (d+1)$</span> triangular matrix with zero diagonal.</p><p>Note that the triangular matrix is constructed on the CPU, but the returned matrix will be a GPU array if <code>v</code> is a GPU array. Note also that the return type is not of type <code>Triangular</code> matrix (i.e., the zeros are materialised) since <code>Traingular</code> matrices are not always compatible with other GPU operations.</p><p><strong>Examples</strong></p><pre><code class="nohighlight hljs">using NeuralEstimators
 
 d = 4
@@ -136,4 +136,4 @@
 vectotril(v)
 vectotriu(v)
 vectotril(v; strict = true)
-vectotriu(v; strict = true)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/04ba9a75a5763781059a8da2c34552cd6353109b/src/utility.jl#L48-L75">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../simulation/">« Model-specific functions</a><a class="docs-footer-nextpage" href="../">Index »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.6.0 on <span class="colophon-date" title="Wednesday 28 August 2024 08:07">Wednesday 28 August 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+vectotriu(v; strict = true)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/adc9ab24ef5bbdc0c42ffaf422cc722c0b608412/src/utility.jl#L48-L75">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../simulation/">« Model-specific functions</a><a class="docs-footer-nextpage" href="../">Index »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.6.0 on <span class="colophon-date" title="Sunday 1 September 2024 07:59">Sunday 1 September 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/framework/index.html b/dev/framework/index.html
index 6f6df5c6..56ca4b23 100644
--- a/dev/framework/index.html
+++ b/dev/framework/index.html
@@ -2,4 +2,4 @@
 <html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Framework · NeuralEstimators.jl</title><meta name="title" content="Framework · NeuralEstimators.jl"/><meta property="og:title" content="Framework · NeuralEstimators.jl"/><meta property="twitter:title" content="Framework · NeuralEstimators.jl"/><meta name="description" content="Documentation for NeuralEstimators.jl."/><meta property="og:description" content="Documentation for NeuralEstimators.jl."/><meta property="twitter:description" content="Documentation for NeuralEstimators.jl."/><script data-outdated-warner src="../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../search_index.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../"><img src="../assets/logo.png" alt="NeuralEstimators.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../">NeuralEstimators.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../">NeuralEstimators</a></li><li class="is-active"><a class="tocitem" href>Framework</a></li><li><span class="tocitem">Workflow</span><ul><li><a class="tocitem" href="../workflow/overview/">Overview</a></li><li><a class="tocitem" href="../workflow/examples/">Examples</a></li><li><a class="tocitem" href="../workflow/advancedusage/">Advanced usage</a></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="../API/core/">Core</a></li><li><a class="tocitem" href="../API/architectures/">Architectures</a></li><li><a class="tocitem" href="../API/loss/">Loss functions</a></li><li><a class="tocitem" href="../API/simulation/">Model-specific functions</a></li><li><a class="tocitem" href="../API/utility/">Miscellaneous</a></li><li><a class="tocitem" href="../API/">Index</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Framework</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Framework</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/main/docs/src/framework.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Framework"><a class="docs-heading-anchor" href="#Framework">Framework</a><a id="Framework-1"></a><a class="docs-heading-anchor-permalink" href="#Framework" title="Permalink"></a></h1><p>In this section, we provide an overview of point estimation using neural Bayes estimators. For a more detailed discussion on the framework and its implementation, see the paper <a href="https://www.tandfonline.com/doi/full/10.1080/00031305.2023.2249522">Likelihood-Free Parameter Estimation with Neural Bayes Estimators</a>. For an accessible introduction to amortised neural inferential methods more broadly, see the review paper <a href="https://arxiv.org/abs/2404.12484">Neural Methods for Amortised Inference</a>.</p><h3 id="Neural-Bayes-estimators"><a class="docs-heading-anchor" href="#Neural-Bayes-estimators">Neural Bayes estimators</a><a id="Neural-Bayes-estimators-1"></a><a class="docs-heading-anchor-permalink" href="#Neural-Bayes-estimators" title="Permalink"></a></h3><p>A parametric statistical model is a set of probability distributions on a sample space <span>$\mathcal{Z} \subseteq \mathbb{R}^n$</span>, where the probability distributions are parameterised via some parameter vector <span>$\boldsymbol{\theta}$</span> on a parameter space <span>$\Theta \subseteq \mathbb{R}^p$</span>. Suppose that we have data from one such distribution, which we denote as <span>$\boldsymbol{Z}$</span>. Then, the goal of parameter point estimation is to come up with an estimate of the unknown <span>$\boldsymbol{\theta}$</span> from <span>$\boldsymbol{Z}$</span> using an estimator,</p><p class="math-container">\[ \hat{\boldsymbol{\theta}} : \mathcal{Z} \to \Theta,\]</p><p>which is a mapping from the sample space to the parameter space.</p><p>Estimators can be constructed within a decision-theoretic framework. Consider a nonnegative loss function, <span>$L(\boldsymbol{\theta}, \hat{\boldsymbol{\theta}}(\boldsymbol{Z}))$</span>, which assesses an estimator <span>$\hat{\boldsymbol{\theta}}(\cdot)$</span> for a given <span>$\boldsymbol{\theta}$</span> and data set <span>$\boldsymbol{Z} \sim f(\boldsymbol{z} \mid \boldsymbol{\theta})$</span>, where <span>$f(\boldsymbol{z} \mid \boldsymbol{\theta})$</span> is the probability density function of the data conditional on <span>$\boldsymbol{\theta}$</span>. An estimator&#39;s <em>Bayes risk</em> is its loss averaged over all possible parameter values and data realisations,</p><p class="math-container">\[\int_\Theta \int_{\mathcal{Z}}  L(\boldsymbol{\theta}, \hat{\boldsymbol{\theta}}(\boldsymbol{z}))f(\boldsymbol{z} \mid \boldsymbol{\theta}) \rm{d} \boldsymbol{z} \rm{d} \Pi(\boldsymbol{\theta}),  \]</p><p>where <span>$\Pi(\cdot)$</span> is a prior measure for <span>$\boldsymbol{\theta}$</span>. Any minimiser of the Bayes risk is said to be a <em>Bayes estimator</em> with respect to <span>$L(\cdot, \cdot)$</span> and <span>$\Pi(\cdot)$</span>.</p><p>Bayes estimators are theoretically attractive: for example, unique Bayes estimators are admissible and, under suitable regularity conditions and the squared-error loss, are consistent and asymptotically efficient. Further, for a large class of prior distributions, every set of conditions that imply consistency of the maximum likelihood (ML) estimator also imply consistency of Bayes estimators. Importantly, Bayes estimators are not motivated purely by asymptotics: by construction, they are Bayes irrespective of the sample size and model class. Unfortunately, however, Bayes estimators are typically unavailable in closed form for the complex models often encountered in practice. A way forward is to assume a flexible parametric model for <span>$\hat{\boldsymbol{\theta}}(\cdot)$</span>, and to optimise the parameters within that model in order to approximate the Bayes estimator. Neural networks are ideal candidates, since they are universal function approximators, and because they are also fast to evaluate, usually involving only simple matrix-vector operations.</p><p>Let <span>$\hat{\boldsymbol{\theta}}(\boldsymbol{Z}; \boldsymbol{\gamma})$</span> denote a neural network that returns a point estimate from data <span>$\boldsymbol{Z}$</span>, where <span>$\boldsymbol{\gamma}$</span> contains the neural-network parameters. Bayes estimators may be approximated with <span>$\hat{\boldsymbol{\theta}}(\cdot; \boldsymbol{\gamma}^*)$</span> by solving the optimisation problem,  </p><p class="math-container">\[\boldsymbol{\gamma}^*
 \equiv
 \underset{\boldsymbol{\gamma}}{\mathrm{arg\,min}} \;
-\frac{1}{K} \sum_{k = 1}^K L(\boldsymbol{\theta}, \hat{\boldsymbol{\theta}}(\boldsymbol{z}; \boldsymbol{\gamma})),\]</p><p>whose objective function is a Monte Carlo approximation of the Bayes risk made using a set <span>$\{\boldsymbol{\theta}^{(k)} : k = 1, \dots, K\}$</span> of parameter vectors sampled from the prior <span>$\Pi(\cdot)$</span> and, for each <span>$k$</span>, data <span>$\boldsymbol{Z}^{(k)}$</span> simulated from <span>$f(\boldsymbol{z} \mid  \boldsymbol{\theta})$</span>. Note that this Monte Carlo approximation does not involve evaluation, or knowledge, of the likelihood function.</p><p>The Monte Carlo approximation of the Bayes risk can be straightforwardly minimised with respect to <span>$\boldsymbol{\gamma}$</span> using back-propagation and stochastic gradient descent. For sufficiently flexible architectures, the point estimator targets a Bayes estimator with respect to <span>$L(\cdot, \cdot)$</span> and <span>$\Pi(\cdot)$</span>. We therefore call the fitted neural point estimator a  <em>neural Bayes estimator</em>. Like Bayes estimators, neural Bayes estimators target a specific point summary of the posterior distribution. For instance, the absolute-error and squared-error loss functions lead to neural Bayes estimators that approximate the posterior median and mean, respectively.</p><h3 id="Construction-of-neural-Bayes-estimators"><a class="docs-heading-anchor" href="#Construction-of-neural-Bayes-estimators">Construction of neural Bayes estimators</a><a id="Construction-of-neural-Bayes-estimators-1"></a><a class="docs-heading-anchor-permalink" href="#Construction-of-neural-Bayes-estimators" title="Permalink"></a></h3><p>The neural Bayes estimator is conceptually simple and can be used in a wide range of problems where other approaches, such as maximum-likelihood estimation, are computationally infeasible. The estimator also has marked practical appeal, as the general workflow for its construction is only loosely connected to the statistical or physical model being considered. The workflow is as follows:</p><ol><li>Define the prior, <span>$\Pi(\cdot)$</span>.</li><li>Choose a loss function, <span>$L(\cdot, \cdot)$</span>, typically the mean-absolute-error or mean-squared-error loss.</li><li>Design a suitable neural-network architecture for the neural point estimator <span>$\hat{\boldsymbol{\theta}}(\cdot; \boldsymbol{\gamma})$</span>.</li><li>Sample parameters from <span>$\Pi(\cdot)$</span> to form training/validation/test parameter sets.</li><li>Given the above parameter sets, simulate data from the model, to form training/validation/test data sets.</li><li>Train the neural network (i.e., estimate <span>$\boldsymbol{\gamma}$</span>) by minimising the loss function averaged over the training sets. During training, monitor performance and convergence using the validation sets.</li><li>Assess the fitted neural Bayes estimator, <span>$\hat{\boldsymbol{\theta}}(\cdot; \boldsymbol{\gamma}^*)$</span>, using the test set.</li></ol></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../">« NeuralEstimators</a><a class="docs-footer-nextpage" href="../workflow/overview/">Overview »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.6.0 on <span class="colophon-date" title="Wednesday 28 August 2024 08:07">Wednesday 28 August 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+\frac{1}{K} \sum_{k = 1}^K L(\boldsymbol{\theta}, \hat{\boldsymbol{\theta}}(\boldsymbol{z}; \boldsymbol{\gamma})),\]</p><p>whose objective function is a Monte Carlo approximation of the Bayes risk made using a set <span>$\{\boldsymbol{\theta}^{(k)} : k = 1, \dots, K\}$</span> of parameter vectors sampled from the prior <span>$\Pi(\cdot)$</span> and, for each <span>$k$</span>, data <span>$\boldsymbol{Z}^{(k)}$</span> simulated from <span>$f(\boldsymbol{z} \mid  \boldsymbol{\theta})$</span>. Note that this Monte Carlo approximation does not involve evaluation, or knowledge, of the likelihood function.</p><p>The Monte Carlo approximation of the Bayes risk can be straightforwardly minimised with respect to <span>$\boldsymbol{\gamma}$</span> using back-propagation and stochastic gradient descent. For sufficiently flexible architectures, the point estimator targets a Bayes estimator with respect to <span>$L(\cdot, \cdot)$</span> and <span>$\Pi(\cdot)$</span>. We therefore call the fitted neural point estimator a  <em>neural Bayes estimator</em>. Like Bayes estimators, neural Bayes estimators target a specific point summary of the posterior distribution. For instance, the absolute-error and squared-error loss functions lead to neural Bayes estimators that approximate the posterior median and mean, respectively.</p><h3 id="Construction-of-neural-Bayes-estimators"><a class="docs-heading-anchor" href="#Construction-of-neural-Bayes-estimators">Construction of neural Bayes estimators</a><a id="Construction-of-neural-Bayes-estimators-1"></a><a class="docs-heading-anchor-permalink" href="#Construction-of-neural-Bayes-estimators" title="Permalink"></a></h3><p>The neural Bayes estimator is conceptually simple and can be used in a wide range of problems where other approaches, such as maximum-likelihood estimation, are computationally infeasible. The estimator also has marked practical appeal, as the general workflow for its construction is only loosely connected to the statistical or physical model being considered. The workflow is as follows:</p><ol><li>Define the prior, <span>$\Pi(\cdot)$</span>.</li><li>Choose a loss function, <span>$L(\cdot, \cdot)$</span>, typically the mean-absolute-error or mean-squared-error loss.</li><li>Design a suitable neural-network architecture for the neural point estimator <span>$\hat{\boldsymbol{\theta}}(\cdot; \boldsymbol{\gamma})$</span>.</li><li>Sample parameters from <span>$\Pi(\cdot)$</span> to form training/validation/test parameter sets.</li><li>Given the above parameter sets, simulate data from the model, to form training/validation/test data sets.</li><li>Train the neural network (i.e., estimate <span>$\boldsymbol{\gamma}$</span>) by minimising the loss function averaged over the training sets. During training, monitor performance and convergence using the validation sets.</li><li>Assess the fitted neural Bayes estimator, <span>$\hat{\boldsymbol{\theta}}(\cdot; \boldsymbol{\gamma}^*)$</span>, using the test set.</li></ol></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../">« NeuralEstimators</a><a class="docs-footer-nextpage" href="../workflow/overview/">Overview »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.6.0 on <span class="colophon-date" title="Sunday 1 September 2024 07:59">Sunday 1 September 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/index.html b/dev/index.html
index 57a959b0..d3fd45ef 100644
--- a/dev/index.html
+++ b/dev/index.html
@@ -8,4 +8,4 @@
 	pages = {1--14},
 	doi = {10.1080/00031305.2023.2249522},
 	url = {https://doi.org/10.1080/00031305.2023.2249522}
-}</code></pre><h3 id="Papers-using-NeuralEstimators"><a class="docs-heading-anchor" href="#Papers-using-NeuralEstimators">Papers using NeuralEstimators</a><a id="Papers-using-NeuralEstimators-1"></a><a class="docs-heading-anchor-permalink" href="#Papers-using-NeuralEstimators" title="Permalink"></a></h3><ul><li><p><strong>Likelihood-free parameter estimation with neural Bayes estimators</strong> <a href="https://www.tandfonline.com/doi/full/10.1080/00031305.2023.2249522">[paper]</a> <a href="https://github.com/msainsburydale/NeuralBayesEstimators">[code]</a></p></li><li><p><strong>Neural Bayes estimators for censored inference with peaks-over-threshold models</strong> <a href="https://arxiv.org/abs/2306.15642">[paper]</a></p></li><li><p><strong>Neural Bayes estimators for irregular spatial data using graph neural networks</strong> <a href="https://arxiv.org/abs/2310.02600">[paper]</a><a href="https://github.com/msainsburydale/NeuralEstimatorsGNN">[code]</a></p></li><li><p><strong>Modern extreme value statistics for Utopian extremes</strong> <a href="https://arxiv.org/abs/2311.11054">[paper]</a></p></li><li><p><strong>Neural Methods for Amortised Inference</strong> <a href="https://arxiv.org/abs/2404.12484">[paper]</a><a href="https://github.com/andrewzm/Amortised_Neural_Inference_Review">[code]</a></p></li></ul><h3 id="Related-packages"><a class="docs-heading-anchor" href="#Related-packages">Related packages</a><a id="Related-packages-1"></a><a class="docs-heading-anchor-permalink" href="#Related-packages" title="Permalink"></a></h3><p>Several other software packages have been developed to facilitate neural likelihood-free inference. These include:</p><ol><li><a href="https://github.com/stefanradev93/BayesFlow">BayesFlow</a> (TensorFlow)</li><li><a href="https://github.com/probabilists/lampe">LAMPE</a> (PyTorch)</li><li><a href="https://github.com/sbi-dev/sbi">sbi</a> (PyTorch)</li><li><a href="https://github.com/undark-lab/swyft">swyft</a> (PyTorch)</li></ol><p>A summary of the functionality in these packages is given in <a href="https://arxiv.org/abs/2404.12484">Zammit-Mangion et al. (2024, Section 6.1)</a>. Note that this list of related packages was created in July 2024; if you have software to add to this list, please contact the package maintainer. </p></article><nav class="docs-footer"><a class="docs-footer-nextpage" href="framework/">Framework »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.6.0 on <span class="colophon-date" title="Wednesday 28 August 2024 08:07">Wednesday 28 August 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+}</code></pre><h3 id="Papers-using-NeuralEstimators"><a class="docs-heading-anchor" href="#Papers-using-NeuralEstimators">Papers using NeuralEstimators</a><a id="Papers-using-NeuralEstimators-1"></a><a class="docs-heading-anchor-permalink" href="#Papers-using-NeuralEstimators" title="Permalink"></a></h3><ul><li><p><strong>Likelihood-free parameter estimation with neural Bayes estimators</strong> <a href="https://www.tandfonline.com/doi/full/10.1080/00031305.2023.2249522">[paper]</a> <a href="https://github.com/msainsburydale/NeuralBayesEstimators">[code]</a></p></li><li><p><strong>Neural Bayes estimators for censored inference with peaks-over-threshold models</strong> <a href="https://arxiv.org/abs/2306.15642">[paper]</a></p></li><li><p><strong>Neural Bayes estimators for irregular spatial data using graph neural networks</strong> <a href="https://arxiv.org/abs/2310.02600">[paper]</a><a href="https://github.com/msainsburydale/NeuralEstimatorsGNN">[code]</a></p></li><li><p><strong>Modern extreme value statistics for Utopian extremes</strong> <a href="https://arxiv.org/abs/2311.11054">[paper]</a></p></li><li><p><strong>Neural Methods for Amortised Inference</strong> <a href="https://arxiv.org/abs/2404.12484">[paper]</a><a href="https://github.com/andrewzm/Amortised_Neural_Inference_Review">[code]</a></p></li></ul><h3 id="Related-packages"><a class="docs-heading-anchor" href="#Related-packages">Related packages</a><a id="Related-packages-1"></a><a class="docs-heading-anchor-permalink" href="#Related-packages" title="Permalink"></a></h3><p>Several other software packages have been developed to facilitate neural likelihood-free inference. These include:</p><ol><li><a href="https://github.com/stefanradev93/BayesFlow">BayesFlow</a> (TensorFlow)</li><li><a href="https://github.com/probabilists/lampe">LAMPE</a> (PyTorch)</li><li><a href="https://github.com/sbi-dev/sbi">sbi</a> (PyTorch)</li><li><a href="https://github.com/undark-lab/swyft">swyft</a> (PyTorch)</li></ol><p>A summary of the functionality in these packages is given in <a href="https://arxiv.org/abs/2404.12484">Zammit-Mangion et al. (2024, Section 6.1)</a>. Note that this list of related packages was created in July 2024; if you have software to add to this list, please contact the package maintainer. </p></article><nav class="docs-footer"><a class="docs-footer-nextpage" href="framework/">Framework »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.6.0 on <span class="colophon-date" title="Sunday 1 September 2024 07:59">Sunday 1 September 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/objects.inv b/dev/objects.inv
index a5d7cab35c67726d37956e3a38295ef0d7618f75..6294ebb273b0c61c3457c9446e7d94e27437ee6a 100644
GIT binary patch
delta 1702
zcmV;X23h%!4xA2<ihr9p5We#(RJFTD;@UmtHkomzrYiH;ILRJUGPG@t1!5&(&+M<?
z)&r0M+lX9j4E?oQ{rI|tS((EdQ9A#ai;Sdy67mGKq<pC~E2tJyB^yZ#I10HB#oC(-
z^0~Q8%*Td{<3b!j9vB=$#NgqAJo~E-2ye8gIH*xz^Ke0y{(q_i()>Wn4DwphEsUa1
zBo^ex|0MQza`W=A$b>wBHuo0f7C)^BmD!GI$aF1FjT}706~dnu&4n6(%)JHqVxDa3
zGSf_yPFS&`=G6S|L%Jsm@+flPsj47j8<zD{)S7Xo-}`uf8^KIvz&R~J)DFS=>0oY>
z2Xi-hp%qA#)_;nX+q9o07Q^WpxIz8)J8{y)N)L1D+YZu#s=ZM~t4a!*?M_vki5qH0
zEb&;<V3UW|@K%E{9mUgRRacde+OAdRX~xXuz~D}bpcHXZ$rt(#{oM^?ZI}7(+?uvA
z7@``_qr8_6B6iN1Z5rBD?x0jxRtfS0As!GcbT3(+Lw}y;RMRs~Z%YjocyVIHla;w|
zJXujw_IlJgjkS$1>4cLTr9i0yO1;P^XKUG|_=t{9`j5Xphw>>#d;=0NGtwi|yHu<d
zlF8_<LX%n)F(}fY+~z)?FkOr9xXwo4xlA>t?}oqIbb76oP8r@~1*E;Aosi9)Hk)>#
zHqvQJ41Ymfin*l`RM#VtZ<4aogv<KqKqaGVj%M2EqP72lnp}2Xm2+I9ia1ros!DDK
z^mwgke*B6_KgNylVrkb4Cg|$*gOztZy6$p9b%HL3kW<w->ipUs;}qQCN0UdRT+l0T
zyG|>fII2xQA#bS^&QQupMmq$t2+|>vd(d|ON`F(S(J_6(^v{WZ9u%Z&5tU%2K5N2w
zQMz_vk)bJHVT=2dOhuIzQgW%ZtZlyKDY{5Ao3|RKa$l%`gbG}&9;9@ZVFyOj(C-uV
zwm{;6PT%)3-oR6fUKnI>WD3qY{=C0hZ0PzsaWp;vS|BYo0j7K_B+@`}xyOxRNr$yl
zK!54-b@|vSjTr<SoGc$Zy~(ZcQNvtG#im&zq9j5ML~NQ>1|{+xyGNSChLxCKsxAsD
z-%Y(}hBDa{1`IV2uYq(LlIL)e9{Qx6rER+7Fj(`6#Jdb9?aMiqwr%9nU~+XLOtFlS
zq|j3p&Gs0>p@<hCjX-iTp<c|jbihBH?SFx@oe;V0)4@+IQaG^O%G!Bi7E5&sZRftE
zKg^ls8xF?$YPOtDd#7M5SxyfF(?`U!hlpF89fiC+Dh}*xK#q`Zh2<yXiplnLn822m
zXIR%(aO}^(42Rcrd6@4hEer?QLvtYDg68Jh^F%yxI$bUcoUVp3g1E2n)2sp*K7X9Z
z4}hGA6LA5#<$JMy#rnh<Pe|+S8Kb0MfwU`-a0TL5Ah-g_gQ>xcm(R7XYMs)|G&swf
z7lJil6Ne@pItW{z7CvCUVsT4J-xYb~9~((BQqcqK-!&~!0NPu3$PW?^7DkhPuYg=*
zr4M+Nm*95&NVQ~dJ?IEk4s@)A(0|w;Xf?8d3rl-{!<N=K@6mDueVV%?I-Y=ScWY5g
zLo{ao6<cgvGxqQm6w7PMKXH-mXW)_CkTq-`#YPcZi781OG=8@A5v|0}O}P7N7Y$xB
zI+lDFV)w7lJo^e~h-{8uBS^ytYuxA2L{7)&`~l-2Fj09la$kuf$asq%41a4Jf?Lz8
z%N@kbIc#LfDQSf>{yr`;=KTh|2!%Oy=<~po6>mjR;ZrpOTF}ZT@Yr@Q*xXLqGhCdB
zQsXp+BY+W)uRRyrIC~`&IBCu*_J;ISw!^O-j8*zo)_XC{dxo!Mp<*LrO6HDjno%!4
zDex$A3cBK-daXdua_v{+vVYk{nstqEX019sl~9;cY?Q($G{Z{hQBu5`6N6a*4{e&2
zwK*tEcEv-Pv&>xn)3Nj9&{6wxGH%-ipT`)PIq8|#S#jCiymv+4Y}mGTFHa-<g9kB;
zER0d|)c~1I%2`8>Hpk7lo%U`_-ysgmIJa*l%cJYhZ7|yj9^ciQ8h>mLI`c}fJ1NZ`
zW;HE$@iT%HN7KJbA&dCETgnUmK$zRal>Nsp%I+l&3x#=QJe@6@#<gzVwB`OEl^L1c
zjQ+o%;{NGr<&XaH!S4P~neKqHF4lm(qT+@sAwFEbPIzq^g+GDM*||T;-<y#i3GFcF
w)Z)pPm20ZA-HgrL0UuN==(}&q+BAQq84eZiMO&&-D_d3^gLSd{AJ^SVguI_lrvLx|

delta 1689
zcmV;K24?x34v!9yihrXv5We#(RF&k&x{~AGCNrL?soHsDy}LQ4WN6!(0YoHWkMrx(
zdVvhsM(oAL&|j<7kFQ&pRXKbRsq)V`&q(?^A<xhVS}dhvB~@I=WFu$^2QJ=2vG(SI
z-22N!e{2eVSnxfFJ%dAt7(85%7kAYG;e(cS0df@RA1=t!U4L~zn(t|qLEcEZg;Dg0
z#Dd(opTzu5ZeAZ385bu|e{Vr<@zaP<k?oj*Of}-v$iYKgA^d631(ySmzPBJ>^pj0f
zWs3323QJZt1=YX%knYKXJn<ZgRMwEO4a<5e8pR5xj(xnljbJ7-C<<BuZ!CiK(?Q=P
zkNR%%N^1}@ZGR-IwrM|045pxKC^YJqUx}3_MtYc2-*%9eRK9Cfw5|oG+3r-unYf{5
z#1e-k9-BP2hPN8D=_sBitER5GP-d+vPcx=32L^Xi1f__TO1{t|^mjLmwOQu7b!*zj
zV2JXbM|me5MC_b1+cY$*+(9L;tP<o0LL4Ai=s~bNhkrcHsiJ3`-j*6FaN<OZCo6ql
zd$Ojw?A54qytQ7Kbi&DvlptjZm0D!9U~AE(_=t{9`nNysL-`aVz5xN48R>zkUCP%I
z$z*g_p-C-@XcX}%ySe)lrfcyX*O>^sRH?%B-4sWYPH&A=DZ_iLgtS+*6SBF}X3{Ry
zMmlYYCVz-aF*h`V>Uu=-O;A?pa2X%%sbF+npqVDRXzhO>CzqX5WgX{L5vz(ARmshO
z9<LS6kDoE=$G8zrEbV&11YNy;uyU?P*IiDiPSEBMa;h9honPBwtb#lI@Od=K1-<e%
z>$KvDrP}mQ<Sm)P8A>_HXp0~QK{`b80LtuNDt{^zI;KyU?m2PKgMzdzqUNkpXH6I{
zO4}|BGBo8YY;k{r$*9soO0J|5jmft>MHeY%@>aoA?h6%=P=SrrqY&0I?7(Om`hB9_
z7Dyb>>ib^B8+dBbD}xLUOu|{mpZ8ab4PAf7584MnOQa<qU|MXsKpH47_qZ`E>9A%B
zsDEs}E}uH3F@u1GljT#VH@W35YM3i2-}ogWN+Q%i_{Og?D3R~jJ<=RDtit?~O<7WL
z)b*km%4Aa*Fw{Vt2GVIrUcyOw=#zGqHtCMTU{g#a-eovxU(T_#Z6lWkldBV9ie-!>
zg`TQu_Kq<eiZ}t{1(K5q^<u821O6%4JAV{x$9Zo0bnsL06!t7PvSyx`#ZsL@)44C{
z4|8VufrGKSnl0zk-YOVNmea$)^bzswA>tNiM=mapiUa#R$P&^mH~gesG1;CD6WFqH
z3~Sp8j{O<v;qaO+2Xj58h2bE(_XmQ4(_CLWo`@$-tIKAA)#Vu@i2DjZ^(uhj!+(i<
z56Fo)5ton~zUS*VtWTWrxG>(HF-p1>NV@_FS0HW$f-8_bm>TqW`O>JmQ7O%IgEPE&
zAy@-8v1rnvgRu2!;S=U77Pk=eD9Ky#*-Mg<iXLG9YG{Q5(B8U3eh_%D(3;e-26By+
zKH*VbfZg>IRf2u=pd(Z{(6Q!RVSj(1^~eS;EbaUaTiW2fN2?L^Y3`2bcm}rJt$8Cf
z(dhY?Z?SRB*aMOiR=vrC*eGBt&?SX~!ar?(M=No24_aUEqQPrM$CBrQ@Bej~V_v}w
zk^cBJg47kg#(f^N=T&^p?lBG=6Rn6wzSsN!BHrRhUG@9m*7WMI2Pu7y8h=@GN?PHJ
z{vH<@{eA*oxx}2>_v?7dinqM1@u{2vEoto%cx<~DY;Gsz87|IvrEr?U7OutPYtP0u
z&Rzi}PKvXN-IJEgcKEe}u}Z(nTCckK&hV8eWo%@0$=tDxAJyWM0*?}>pez1q))Le#
z*M8M5n_Z+?*T`nps?}3-iGL}@Mk#zkGpo58CB>^bG3e!Z=u)g|^q@c46^~WUGJW}X
z$Ig>ON6pK{xNVokJjTfMNlm|wipysI-V}YaVcW*O{EYAq9;7g`Fh<E&1Ee=8W(_Gy
z9~a|x+Pg7*hdAuw+`SbnkFGzv!EDEi_^$42usx{EE5YuhGkX|Sw13*g&j^AabpNWj
zDC75TC@=T}U~UtW<{!5xyO%gDl=_wLbhh-3Ywh2Z<?bJm8JX=z_n%Pl@cg`TNB8((
zcmIb>cR<yYYrtMn7n&*|K3uL&ibgjIcY>a?b9WSf&?7$*+J4Td#gQ+o)>LJ?8JoES
js8`T;-;}j!{zfw#DiV%mTdGkj8&({Hb+P*&H2X90LVi)F

diff --git a/dev/search_index.js b/dev/search_index.js
index e087b7b8..daa0f24f 100644
--- a/dev/search_index.js
+++ b/dev/search_index.js
@@ -1,3 +1,3 @@
 var documenterSearchIndex = {"docs":
-[{"location":"API/simulation/#Model-specific-functions","page":"Model-specific functions","title":"Model-specific functions","text":"","category":"section"},{"location":"API/simulation/#Data-simulators","page":"Model-specific functions","title":"Data simulators","text":"","category":"section"},{"location":"API/simulation/","page":"Model-specific functions","title":"Model-specific functions","text":"The philosophy of NeuralEstimators is to cater for arbitrary statistical models by having the user define their statistical model implicitly through simulated data. However, the following functions have been included as they may be helpful to others, and their source code illustrates how a user could formulate code for their own model.","category":"page"},{"location":"API/simulation/","page":"Model-specific functions","title":"Model-specific functions","text":"See also Distributions.jl for a large range of distributions implemented in Julia, and the package RCall for calling R functions within Julia. ","category":"page"},{"location":"API/simulation/","page":"Model-specific functions","title":"Model-specific functions","text":"simulategaussian\n\nsimulatepotts\n\nsimulateschlather","category":"page"},{"location":"API/simulation/#NeuralEstimators.simulategaussian","page":"Model-specific functions","title":"NeuralEstimators.simulategaussian","text":"simulategaussian(L::AbstractMatrix, m = 1)\n\nSimulates m independent and identically distributed (i.i.d.) realisations from a mean-zero multivariate Gaussian random variable with associated lower Cholesky  factor L. \n\nIf m is not specified, the simulated data are returned as a vector with length equal to the number of spatial locations, n; otherwise, the data are returned as an nxm matrix.\n\nExamples\n\nusing NeuralEstimators, Distances, LinearAlgebra\n\nn = 500\nρ = 0.6\nν = 1.0\nS = rand(n, 2)\nD = pairwise(Euclidean(), S, dims = 1)\nΣ = Symmetric(matern.(D, ρ, ν))\nL = cholesky(Σ).L\nsimulategaussian(L)\n\n\n\n\n\n","category":"function"},{"location":"API/simulation/#NeuralEstimators.simulatepotts","page":"Model-specific functions","title":"NeuralEstimators.simulatepotts","text":"simulatepotts(grid::Matrix{Int}, β)\nsimulatepotts(grid::Matrix{Union{Int, Nothing}}, β)\nsimulatepotts(nrows::Int, ncols::Int, num_states::Int, β)\n\nChequerboard Gibbs sampling from 2D Potts model with parameter β>0.\n\nApproximately independent simulations can be obtained by setting  nsims > 1 or num_iterations > burn. The degree to which the  resulting simulations can be considered independent depends on the  thinning factor (thin) and the burn-in (burn).\n\nKeyword arguments\n\nnsims = 1: number of approximately independent replicates. \nnum_iterations = 2000: number of MCMC iterations.\nburn = num_iterations: burn-in.\nthin = 10: thinning factor.\n\nExamples\n\nusing NeuralEstimators \n\n## Marginal simulation \nβ = 0.8\nsimulatepotts(10, 10, 5, β)\n\n## Marginal simulation: approximately independent samples \nsimulatepotts(10, 10, 5, β; nsims = 100, thin = 10)\n\n## Conditional simulation \nβ = 0.8\ncomplete_grid   = simulatepotts(50, 50, 2, β)        # simulate marginally from the Ising model \nincomplete_grid = removedata(complete_grid, 0.1)     # remove 10% of the pixels at random  \nimputed_grid    = simulatepotts(incomplete_grid, β)  # conditionally simulate over missing pixels\n\n## Multiple conditional simulations \nimputed_grids   = simulatepotts(incomplete_grid, β; num_iterations = 2000, burn = 1000, thin = 10)\n\n## Recreate Fig. 8.8 of Marin & Robert (2007) “Bayesian Core”\nusing Plots \ngrids = [simulatepotts(100, 100, 2, β) for β ∈ 0.3:0.1:1.2]\nheatmaps = heatmap.(grids, legend = false, aspect_ratio=1)\nPlots.plot(heatmaps...)\n\n\n\n\n\n","category":"function"},{"location":"API/simulation/#NeuralEstimators.simulateschlather","page":"Model-specific functions","title":"NeuralEstimators.simulateschlather","text":"simulateschlather(L::Matrix, m = 1; C = 3.5, Gumbel::Bool = false)\n\nSimulates m independent and identically distributed (i.i.d.) realisations from Schlather's max-stable model using the algorithm for approximate simulation given by Schlather (2002).\n\nRequires the lower Cholesky factor L associated with the covariance matrix of  the underlying Gaussian process. \n\nIf m is not specified, the simulated data are returned as a vector with length equal to the number of spatial locations, n; otherwise, the data are  returned as an nxm matrix.\n\nKeyword arguments\n\nC = 3.5: a tuning parameter that controls the accuracy of the algorithm: small C favours computational efficiency, while large C favours accuracy. Schlather (2002) recommends the use of C = 3.\nGumbel = true: flag indicating whether the data should be log-transformed from the unit Fréchet scale to the Gumbel scale.\n\nExamples\n\nusing NeuralEstimators, Distances, LinearAlgebra\n\nn = 500\nρ = 0.6\nν = 1.0\nS = rand(n, 2)\nD = pairwise(Euclidean(), S, dims = 1)\nΣ = Symmetric(matern.(D, ρ, ν))\nL = cholesky(Σ).L\nsimulateschlather(L)\n\n\n\n\n\n","category":"function"},{"location":"API/simulation/#Spatial-point-processes","page":"Model-specific functions","title":"Spatial point processes","text":"","category":"section"},{"location":"API/simulation/","page":"Model-specific functions","title":"Model-specific functions","text":"maternclusterprocess","category":"page"},{"location":"API/simulation/#NeuralEstimators.maternclusterprocess","page":"Model-specific functions","title":"NeuralEstimators.maternclusterprocess","text":"maternclusterprocess(; λ=10, μ=10, r=0.1, xmin=0, xmax=1, ymin=0, ymax=1, unit_bounding_box=false)\n\nSimulates a Matérn cluster process with density of parent Poisson point process λ, mean number of daughter points μ, and radius of cluster disk r, over the simulation window defined by xmin and xmax, ymin and ymax.\n\nIf unit_bounding_box is true, then the simulated points will be scaled so that the longest side of their bounding box is equal to one (this may change the simulation window). \n\nSee also the R package spatstat, which provides functions for simulating from a range of point processes and which can be interfaced from Julia using RCall.\n\nExamples\n\nusing NeuralEstimators\n\n# Simulate a realisation from a Matérn cluster process\nS = maternclusterprocess()\n\n# Visualise realisation (requires UnicodePlots)\nusing UnicodePlots\nscatterplot(S[:, 1], S[:, 2])\n\n# Visualise realisations from the cluster process with varying parameters\nn = 250\nλ = [10, 25, 50, 90]\nμ = n ./ λ\nplots = map(eachindex(λ)) do i\n\tS = maternclusterprocess(λ = λ[i], μ = μ[i])\n\tscatterplot(S[:, 1], S[:, 2])\nend\n\n\n\n\n\n","category":"function"},{"location":"API/simulation/#Covariance-functions","page":"Model-specific functions","title":"Covariance functions","text":"","category":"section"},{"location":"API/simulation/","page":"Model-specific functions","title":"Model-specific functions","text":"These covariance functions may be of use for various models.","category":"page"},{"location":"API/simulation/","page":"Model-specific functions","title":"Model-specific functions","text":"matern\n\npaciorek","category":"page"},{"location":"API/simulation/#NeuralEstimators.matern","page":"Model-specific functions","title":"NeuralEstimators.matern","text":"matern(h, ρ, ν, σ² = 1)\n\nGiven distance boldsymbolh (h), computes the Matérn covariance function,\n\nC(boldsymbolh) = sigma^2 frac2^1 - nuGamma(nu) left(fracboldsymbolhrhoright)^nu K_nu left(fracboldsymbolhrhoright)\n\nwhere ρ is a range parameter, ν is a smoothness parameter, σ² is the marginal variance,  Gamma(cdot) is the gamma function, and K_nu(cdot) is the modified Bessel function of the second kind of order nu.\n\n\n\n\n\n","category":"function"},{"location":"API/simulation/#NeuralEstimators.paciorek","page":"Model-specific functions","title":"NeuralEstimators.paciorek","text":"paciorek(s, r, ω₁, ω₂, ρ, β)\n\nGiven spatial locations s and r, computes the nonstationary covariance function, \n\nC(boldsymbols boldsymbolr) = \nboldsymbolSigma(boldsymbols)^14\nboldsymbolSigma(boldsymbolr)^14\nleftfracboldsymbolSigma(boldsymbols) + boldsymbolSigma(boldsymbolr)2right^-12\nC^0big(sqrtQ(boldsymbols boldsymbolr)big) \n\nwhere C^0(h) = exp-(hrho)^32 for range parameter rho  0,  the matrix  boldsymbolSigma(boldsymbols) = exp(betaboldsymbols - boldsymbolomega)boldsymbolI  is a kernel matrix (Paciorek and Schervish, 2006)  with scale parameter beta  0 and boldsymbolomega equiv (omega_1 omega_2) in mathcalD, and \n\nQ(boldsymbols boldsymbolr) = \n(boldsymbols - boldsymbolr)\nleft(fracboldsymbolSigma(boldsymbols) + boldsymbolSigma(boldsymbolr)2right)^-1\n(boldsymbols - boldsymbolr)\n\nis the squared Mahalanobis distance between boldsymbols and boldsymbolr. \n\n\n\n\n\n","category":"function"},{"location":"API/simulation/#Density-functions","page":"Model-specific functions","title":"Density functions","text":"","category":"section"},{"location":"API/simulation/","page":"Model-specific functions","title":"Model-specific functions","text":"Density functions are not needed in the workflow of NeuralEstimators. However, as part of a series of comparison studies between neural estimators and likelihood-based estimators given in various paper, we have developed the following functions for evaluating the density function for several popular distributions. We include these in NeuralEstimators to cater for the possibility that they may be of use in future comparison studies.","category":"page"},{"location":"API/simulation/","page":"Model-specific functions","title":"Model-specific functions","text":"gaussiandensity\n\nschlatherbivariatedensity","category":"page"},{"location":"API/simulation/#NeuralEstimators.gaussiandensity","page":"Model-specific functions","title":"NeuralEstimators.gaussiandensity","text":"gaussiandensity(y::V, L::LT) where {V <: AbstractVector, LT <: LowerTriangular}\ngaussiandensity(y::A, L::LT) where {A <: AbstractArray, LT <: LowerTriangular}\ngaussiandensity(y::A, Σ::M) where {A <: AbstractArray, M <: AbstractMatrix}\n\nEfficiently computes the density function for y ~ 𝑁(0, Σ) for covariance matrix Σ, and where L is lower Cholesky factor of Σ.\n\nThe method gaussiandensity(y::A, L::LT) assumes that the last dimension of y contains independent and identically distributed (iid) replicates.\n\nThe log-density is returned if the keyword argument logdensity is true (default).\n\n\n\n\n\n","category":"function"},{"location":"API/simulation/#NeuralEstimators.schlatherbivariatedensity","page":"Model-specific functions","title":"NeuralEstimators.schlatherbivariatedensity","text":"schlatherbivariatedensity(z₁, z₂, ψ; logdensity = true)\n\nThe bivariate density function for Schlather's max-stable model.\n\n\n\n\n\n","category":"function"},{"location":"workflow/examples/#Examples","page":"Examples","title":"Examples","text":"","category":"section"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Before proceeding, we first load the required packages. The following packages are used throughout these examples:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"using NeuralEstimators\nusing Flux                 # Julia's deep-learning library\nusing Distributions        # sampling from probability distributions\nusing AlgebraOfGraphics    # visualisation\nusing CairoMakie           # visualisation","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"The following packages will be used in the examples with Gridded data and Irregular spatial data:  ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"using Distances            # computing distance matrices \nusing Folds                # parallel simulation (start Julia with --threads=auto)\nusing LinearAlgebra        # Cholesky factorisation","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"The following packages are used only in the example with Irregular spatial data: ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"using GraphNeuralNetworks  # GNN architecture\nusing Statistics           # mean()","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Finally, various GPU backends can be used (see the Flux documentation for details). For instance, if one wishes to employ an NVIDIA GPU when running the following examples, simply the load the following packages:  ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"using CUDA\nusing cuDNN","category":"page"},{"location":"workflow/examples/#Univariate-data","page":"Examples","title":"Univariate data","text":"","category":"section"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Here we develop a neural Bayes estimator for boldsymboltheta equiv (mu sigma) from data Z_1 dots Z_m that are independent and identically distributed realisations from the distribution N(mu sigma^2). ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"First, we define a function to sample parameters from the prior distribution. Here, we assume that the parameters are independent a priori and we adopt the marginal priors mu sim N(0 1) and sigma sim IG(3 1). The sampled parameters are stored as p times K matrices, with p the number of parameters in the model and K the number of sampled parameter vectors:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"function sample(K)\n\tμ = rand(Normal(0, 1), 1, K)\n\tσ = rand(InverseGamma(3, 1), 1, K)\n\tθ = vcat(μ, σ)\n\treturn θ\nend","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Next, we implicitly define the statistical model through data simulation. In this package, the data are always stored as a Vector{A}, where each element of the vector is associated with one parameter vector, and where the type A depends on the multivariate structure of the data. Since in this example each replicate Z_1 dots Z_m is univariate, A should be a Matrix with d=1 row and m columns. Below, we define our simulator given a single parameter vector, and given a matrix of parameter vectors (which simply applies the simulator to each column):","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"simulate(θ, m) = [ϑ[1] .+ ϑ[2] .* randn(1, m) for ϑ ∈ eachcol(θ)]","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"We now design our neural-network architecture. The workhorse of the package is the DeepSet architecture, which provides an elegant framework for making inference with an arbitrary number of independent replicates and for incorporating both neural and user-defined statistics. The DeepSets framework consists of two neural networks, a summary network and an inference network. The inference network (also known as the outer network) is always a multilayer perceptron (MLP). However, the architecture of the summary network (also known as the inner network) depends on the multivariate structure of the data. With unstructured data (i.e., when there is no spatial or temporal correlation within a replicate), we use an MLP with input dimension equal to the dimension of each replicate of the statistical model (i.e., one for univariate data): ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"p = 2                                                # number of parameters \nψ = Chain(Dense(1, 64, relu), Dense(64, 64, relu))   # summary network\nϕ = Chain(Dense(64, 64, relu), Dense(64, p))         # inference network\narchitecture = DeepSet(ψ, ϕ)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"In this example, we wish to construct a point estimator for the unknown parameter vector, and we therefore initialise a PointEstimator object based on our chosen architecture (see Estimators for a list of other estimators available in the package): ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"θ̂ = PointEstimator(architecture)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Next, we train the estimator using train(), here using the default absolute-error loss. We'll train the estimator using 50 independent replicates per parameter configuration. Below, we pass our user-defined functions for sampling parameters and simulating data, but one may also pass parameter or data instances, which will be held fixed during training:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"m = 50\nθ̂ = train(θ̂, sample, simulate, m = m)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"To fully exploit the amortised nature of neural estimators, one may wish to save a trained estimator and load it in later sessions: see Saving and loading neural estimators for details on how this can be done. ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"The function assess() can be used to assess the trained estimator. Parametric and non-parametric bootstrap-based uncertainty quantification are facilitated by bootstrap() and interval(), and this can also be included in the assessment stage through the keyword argument boot:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"θ_test = sample(1000)\nZ_test = simulate(θ_test, m)\nassessment = assess(θ̂, θ_test, Z_test, boot = true)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"The resulting Assessment object contains the sampled parameters, the corresponding point estimates, and the corresponding lower and upper bounds of the bootstrap intervals. This object can be used to compute various diagnostics:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"bias(assessment)      # μ = 0.002, σ = 0.017\nrmse(assessment)      # μ = 0.086, σ = 0.078\nrisk(assessment)      # μ = 0.055, σ = 0.056\nplot(assessment)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"(Image: Univariate Gaussian example: Estimates vs. truth)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"As an alternative form of uncertainty quantification, one may approximate a set of marginal posterior quantiles by training a second estimator under the quantile loss function, which allows one to generate approximate marginal posterior credible intervals. This is facilitated with IntervalEstimator which, by default, targets 95% central credible intervals:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"q̂ = IntervalEstimator(architecture)\nq̂ = train(q̂, sample, simulate, m = m)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"The resulting posterior credible-interval estimator can also be assessed with empirical simulation-based methods using assess(), as we did above for the point estimator. Often, these intervals have better coverage than bootstrap-based intervals.","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Once an estimator is deemed to be satisfactorily calibrated, it may be applied to observed data (below, we use simulated data as a substitute for observed data):","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"θ = sample(1)               # true parameters\nZ = simulate(θ, m)          # \"observed\" data\nθ̂(Z)                        # point estimates\ninterval(bootstrap(θ̂, Z))   # 95% non-parametric bootstrap intervals\ninterval(q̂, Z)              # 95% marginal posterior credible intervals","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"To utilise a GPU for improved computational efficiency, one may simply move the estimator and the data to the GPU through the calls θ̂ = gpu(θ̂) and Z = gpu(Z) before applying the estimator. Note that GPUs often have limited memory relative to CPUs, and this can sometimes lead to memory issues when working with very large data sets: in these cases, the function estimateinbatches() can be used to apply the estimator over batches of data to circumvent any memory concerns. ","category":"page"},{"location":"workflow/examples/#Unstructured-multivariate-data","page":"Examples","title":"Unstructured multivariate data","text":"","category":"section"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Suppose now that each data set now consists of m replicates boldsymbolZ_1 dots boldsymbolZ_m of a d-dimensional multivariate distribution. Everything remains as given in the univariate example above, except that we now store each data set as a d times m matrix (previously they were stored as 1times m matrices), and the summary network of the DeepSets representation takes a d-dimensional input (previously it took a 1-dimensional input).","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Note that, when estimating a full covariance matrix, one may wish to constrain the neural estimator to only produce parameters that imply a valid (i.e., positive definite) covariance matrix. This can be achieved by appending a  CovarianceMatrix layer to the end of the outer network of the DeepSets representation. However, the estimator will often learn to provide valid estimates, even if not constrained to do so.","category":"page"},{"location":"workflow/examples/#Gridded-data","page":"Examples","title":"Gridded data","text":"","category":"section"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"For data collected over a regular grid, neural estimators are typically based on a convolutional neural network (CNN; see, e.g., Dumoulin and Visin, 2016). ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"When using CNNs with NeuralEstimators, each data set must be stored as a multi-dimensional array. The penultimate dimension stores the so-called \"channels\" (this dimension is singleton for univariate processes, two for bivariate processes, etc.), while the final dimension stores independent replicates. For example, to store 50 independent replicates of a bivariate spatial process measured over a 10times15 grid, one would construct an array of dimension 10times15times2times50.","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"For illustration, here we develop a neural Bayes estimator for the spatial Gaussian process model with exponential covariance function and unknown range parameter theta. The spatial domain is taken to be the unit square, and we adopt the prior theta sim U(005 05). ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Simulation from Gaussian processes typically involves the computation of an expensive intermediate object, namely, the Cholesky factor of a covariance matrix. Storing intermediate objects can enable the fast simulation of new data sets when the parameters are held fixed. Hence, in this example, we define a custom type Parameters subtyping ParameterConfigurations for storing the matrix of parameters and the corresponding Cholesky factors: ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"struct Parameters{T} <: ParameterConfigurations\n\tθ::Matrix{T}\n\tL\nend","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Further, we define two constructors for our custom type: one that accepts an integer K, and another that accepts a ptimes K matrix of parameters. The former constructor will be useful during the training stage for sampling from the prior distribution, while the latter constructor will be useful for parametric bootstrap (since this involves repeated simulation from the fitted model):","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"function sample(K::Integer)\n\n\t# Sample parameters from the prior \n\tθ = rand(Uniform(0.05, 0.5), 1, K)\n\n\t# Pass to matrix constructor\n\tParameters(θ)\nend\n\nfunction Parameters(θ::Matrix)\n\n\t# Spatial locations, a 16x16 grid over the unit square\n\tpts = range(0, 1, length = 16)\n\tS = expandgrid(pts, pts)\n\n\t# Distance matrix, covariance matrices, and Cholesky factors\n\tD = pairwise(Euclidean(), S, dims = 1)\n\tK = size(θ, 2)\n\tL = Folds.map(1:K) do k\n\t\tΣ = exp.(-D ./ θ[k])\n\t\tcholesky(Symmetric(Σ)).L\n\tend\n\n\tParameters(θ, L)\nend","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Next, we define the model simulator: ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"function simulate(parameters::Parameters, m = 1) \n\tZ = Folds.map(parameters.L) do L\n\t\tn = size(L, 1)\n\t\tz = L * randn(n, m)\n\t\tz = reshape(z, 16, 16, 1, m) # reshape to 16x16 images\n\t\tz\n\tend\n\tZ\nend","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"A possible architecture is as follows:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"# Summary network\nψ = Chain(\n\tConv((3, 3), 1 => 32, relu),\n\tMaxPool((2, 2)),\n\tConv((3, 3),  32 => 64, relu),\n\tMaxPool((2, 2)),\n\tFlux.flatten\n\t)\n\n# Inference network\nϕ = Chain(Dense(256, 64, relu), Dense(64, 1))\n\n# DeepSet\narchitecture = DeepSet(ψ, ϕ)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Next, we initialise a point estimator and a posterior credible-interval estimator:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"θ̂ = PointEstimator(architecture)\nq̂ = IntervalEstimator(architecture)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Now we train the estimators, here using fixed parameter instances to avoid repeated Cholesky factorisations (see Storing expensive intermediate objects for data simulation and On-the-fly and just-in-time simulation for further discussion):","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"K = 10000  # number of training parameter vectors\nm = 1      # number of independent replicates in each data set\nθ_train = sample(K)\nθ_val = sample(K ÷ 10)\nθ̂ = train(θ̂, θ_train, θ_val, simulate, m = m)\nq̂ = train(q̂, θ_train, θ_val, simulate, m = m)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Once the estimators have been trained, we assess them using empirical simulation-based methods:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"θ_test = sample(1000)\nZ_test = simulate(θ_test)\nassessment = assess([θ̂, q̂], θ_test, Z_test)\n\nbias(assessment)       # 0.005\nrmse(assessment)       # 0.032\ncoverage(assessment)   # 0.953\nplot(assessment)       ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"(Image: Gridded spatial Gaussian process example: Estimates vs. truth)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Finally, we can apply our estimators to observed data. Note that when we have a single replicate only (which is often the case in spatial statistics), non-parametric bootstrap is not possible, and we instead use parametric bootstrap:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"θ = sample(1)                          # true parameter\nZ = simulate(θ)                        # \"observed\" data\nθ̂(Z)                                   # point estimates\ninterval(q̂, Z)                         # 95% marginal posterior credible intervals\nbs = bootstrap(θ̂, θ̂(Z), simulate, m)   # parametric bootstrap intervals\ninterval(bs)                           # 95% parametric bootstrap intervals","category":"page"},{"location":"workflow/examples/#Irregular-spatial-data","page":"Examples","title":"Irregular spatial data","text":"","category":"section"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"To cater for spatial data collected over arbitrary spatial locations, one may construct a neural estimator with a graph neural network (GNN) architecture (see Sainsbury-Dale, Zammit-Mangion, Richards, and Huser, 2023). The overall workflow remains as given in previous examples, with some key additional steps:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Sampling spatial configurations during the training phase, typically using an appropriately chosen spatial point process: see, for example, maternclusterprocess.\nStoring the spatial data as a graph: see spatialgraph.\nConstructing an appropriate architecture: see GNNSummary and SpatialGraphConv.","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"For illustration, we again consider the spatial Gaussian process model with exponential covariance function, and we define a struct for storing expensive intermediate objects needed for data simulation. In this case, these objects include Cholesky factors and spatial graphs (which store the adjacency matrices needed to perform graph convolution): ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"struct Parameters{T} <: ParameterConfigurations\n\tθ::Matrix{T}   # true parameters  \n\tL              # Cholesky factors\n\tg              # spatial graphs\n\tS              # spatial locations \nend","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Again, we define two constructors, which will be convenient for sampling parameters from the prior during training and assessment, and for performing parametric bootstrap sampling when making inferences from observed data:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"function sample(K::Integer)\n\n\t# Sample parameters from the prior \n\tθ = rand(Uniform(0.05, 0.5), 1, K)\n\n\t# Simulate spatial configurations over the unit square\n\tn = rand(200:300, K)\n\tλ = rand(Uniform(10, 50), K)\n\tS = [maternclusterprocess(λ = λ[k], μ = n[k]/λ[k]) for k ∈ 1:K]\n\n\t# Pass to constructor\n\tParameters(θ, S)\nend\n\nfunction Parameters(θ::Matrix, S)\n\n\t# Number of parameter vectors\n\tK = size(θ, 2)\n\n\t# Distance matrices, covariance matrices, and Cholesky factors\n\tD = pairwise.(Ref(Euclidean()), S, dims = 1)\n\tL = Folds.map(1:K) do k\n\t\tΣ = exp.(-D[k] ./ θ[k])\n\t\tcholesky(Symmetric(Σ)).L\n\tend\n\n\t# Construct spatial graphs\n\tg = spatialgraph.(S)\n\n\tParameters(θ, L, g, S)\nend","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Next, we define a function for simulating from the model given an object of type Parameters. The code below enables simulation of an arbitrary number of independent replicates m, and one may provide a single integer for m, or any object that can be sampled using rand(m, K) (e.g., an integer range or some distribution over the possible sample sizes):","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"function simulate(parameters::Parameters, m)\n\tK = size(parameters, 2)\n\tm = rand(m, K)\n\tmap(1:K) do k\n\t\tL = parameters.L[k]\n\t\tg = parameters.g[k]\n\t\tn = size(L, 1)\n\t\tZ = L * randn(n, m[k])      \n\t\tspatialgraph(g, Z)            \n\tend\nend\nsimulate(parameters::Parameters, m::Integer = 1) = simulate(parameters, range(m, m))","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Next we construct an appropriate GNN architecture, as illustrated below. Here, our goal is to construct a point estimator, however any other kind of estimator (see Estimators) can be constructed by simply substituting the appropriate estimator class in the final line below:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"# Spatial weight function constructed using 0-1 basis functions \nh_max = 0.15 # maximum distance to consider \nq = 10       # output dimension of the spatial weights\nw = IndicatorWeights(h_max, q)\n\n# Propagation module\npropagation = GNNChain(\n\tSpatialGraphConv(1 => q, relu, w = w, w_out = q),\n\tSpatialGraphConv(q => q, relu, w = w, w_out = q)\n)\n\n# Readout module\nreadout = GlobalPool(mean)\n\n# Global features \nglobalfeatures = SpatialGraphConv(1 => q, relu, w = w, w_out = q, glob = true)\n\n# Summary network\nψ = GNNSummary(propagation, readout, globalfeatures)\n\n# Mapping module\nϕ = Chain(\n\tDense(2q => 128, relu), \n\tDense(128 => 128, relu), \n\tDense(128 => 1, identity)\n)\n\n# DeepSet object\ndeepset = DeepSet(ψ, ϕ)\n\n# Point estimator\nθ̂ = PointEstimator(deepset)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Next, we train the estimator:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"m = 1\nK = 3000\nθ_train = sample(K)\nθ_val   = sample(K÷5)\nθ̂ = train(θ̂, θ_train, θ_val, simulate, m = m, epochs = 5)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Then, we assess our trained estimator as before: ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"θ_test = sample(1000)\nZ_test = simulate(θ_test, m)\nassessment = assess(θ̂, θ_test, Z_test)\nbias(assessment)    # 0.001\nrmse(assessment)    # 0.037\nrisk(assessment)    # 0.029\nplot(assessment)   ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"(Image: Estimates from a graph neural network (GNN) based neural Bayes estimator)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Finally, once the estimator has been assessed and is deemed to be performant, it may be applied to observed data, with bootstrap-based uncertainty quantification facilitated by bootstrap and interval. Below, we use simulated data as a substitute for observed data:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"parameters = sample(1)             # sample a single parameter vector\nZ = simulate(parameters)           # simulate data                  \nθ = parameters.θ                   # true parameters used to generate data\nS = parameters.S                   # observed locations\nθ̂(Z)                               # point estimates\nθ̃ = Parameters(θ̂(Z), S)            # construct Parameters object from the point estimates\nbs = bootstrap(θ̂, θ̃, simulate, m)  # bootstrap estimates\ninterval(bs)                       # parametric bootstrap confidence interval              ","category":"page"},{"location":"framework/#Framework","page":"Framework","title":"Framework","text":"","category":"section"},{"location":"framework/","page":"Framework","title":"Framework","text":"In this section, we provide an overview of point estimation using neural Bayes estimators. For a more detailed discussion on the framework and its implementation, see the paper Likelihood-Free Parameter Estimation with Neural Bayes Estimators. For an accessible introduction to amortised neural inferential methods more broadly, see the review paper Neural Methods for Amortised Inference.","category":"page"},{"location":"framework/#Neural-Bayes-estimators","page":"Framework","title":"Neural Bayes estimators","text":"","category":"section"},{"location":"framework/","page":"Framework","title":"Framework","text":"A parametric statistical model is a set of probability distributions on a sample space mathcalZ subseteq mathbbR^n, where the probability distributions are parameterised via some parameter vector boldsymboltheta on a parameter space Theta subseteq mathbbR^p. Suppose that we have data from one such distribution, which we denote as boldsymbolZ. Then, the goal of parameter point estimation is to come up with an estimate of the unknown boldsymboltheta from boldsymbolZ using an estimator,","category":"page"},{"location":"framework/","page":"Framework","title":"Framework","text":" hatboldsymboltheta  mathcalZ to Theta","category":"page"},{"location":"framework/","page":"Framework","title":"Framework","text":"which is a mapping from the sample space to the parameter space.","category":"page"},{"location":"framework/","page":"Framework","title":"Framework","text":"Estimators can be constructed within a decision-theoretic framework. Consider a nonnegative loss function, L(boldsymboltheta hatboldsymboltheta(boldsymbolZ)), which assesses an estimator hatboldsymboltheta(cdot) for a given boldsymboltheta and data set boldsymbolZ sim f(boldsymbolz mid boldsymboltheta), where f(boldsymbolz mid boldsymboltheta) is the probability density function of the data conditional on boldsymboltheta. An estimator's Bayes risk is its loss averaged over all possible parameter values and data realisations,","category":"page"},{"location":"framework/","page":"Framework","title":"Framework","text":"int_Theta int_mathcalZ  L(boldsymboltheta hatboldsymboltheta(boldsymbolz))f(boldsymbolz mid boldsymboltheta) rmd boldsymbolz rmd Pi(boldsymboltheta)  ","category":"page"},{"location":"framework/","page":"Framework","title":"Framework","text":"where Pi(cdot) is a prior measure for boldsymboltheta. Any minimiser of the Bayes risk is said to be a Bayes estimator with respect to L(cdot cdot) and Pi(cdot).","category":"page"},{"location":"framework/","page":"Framework","title":"Framework","text":"Bayes estimators are theoretically attractive: for example, unique Bayes estimators are admissible and, under suitable regularity conditions and the squared-error loss, are consistent and asymptotically efficient. Further, for a large class of prior distributions, every set of conditions that imply consistency of the maximum likelihood (ML) estimator also imply consistency of Bayes estimators. Importantly, Bayes estimators are not motivated purely by asymptotics: by construction, they are Bayes irrespective of the sample size and model class. Unfortunately, however, Bayes estimators are typically unavailable in closed form for the complex models often encountered in practice. A way forward is to assume a flexible parametric model for hatboldsymboltheta(cdot), and to optimise the parameters within that model in order to approximate the Bayes estimator. Neural networks are ideal candidates, since they are universal function approximators, and because they are also fast to evaluate, usually involving only simple matrix-vector operations.","category":"page"},{"location":"framework/","page":"Framework","title":"Framework","text":"Let hatboldsymboltheta(boldsymbolZ boldsymbolgamma) denote a neural network that returns a point estimate from data boldsymbolZ, where boldsymbolgamma contains the neural-network parameters. Bayes estimators may be approximated with hatboldsymboltheta(cdot boldsymbolgamma^*) by solving the optimisation problem,  ","category":"page"},{"location":"framework/","page":"Framework","title":"Framework","text":"boldsymbolgamma^*\nequiv\nundersetboldsymbolgammamathrmargmin \nfrac1K sum_k = 1^K L(boldsymboltheta hatboldsymboltheta(boldsymbolz boldsymbolgamma))","category":"page"},{"location":"framework/","page":"Framework","title":"Framework","text":"whose objective function is a Monte Carlo approximation of the Bayes risk made using a set boldsymboltheta^(k)  k = 1 dots K of parameter vectors sampled from the prior Pi(cdot) and, for each k, data boldsymbolZ^(k) simulated from f(boldsymbolz mid  boldsymboltheta). Note that this Monte Carlo approximation does not involve evaluation, or knowledge, of the likelihood function.","category":"page"},{"location":"framework/","page":"Framework","title":"Framework","text":"The Monte Carlo approximation of the Bayes risk can be straightforwardly minimised with respect to boldsymbolgamma using back-propagation and stochastic gradient descent. For sufficiently flexible architectures, the point estimator targets a Bayes estimator with respect to L(cdot cdot) and Pi(cdot). We therefore call the fitted neural point estimator a  neural Bayes estimator. Like Bayes estimators, neural Bayes estimators target a specific point summary of the posterior distribution. For instance, the absolute-error and squared-error loss functions lead to neural Bayes estimators that approximate the posterior median and mean, respectively.","category":"page"},{"location":"framework/#Construction-of-neural-Bayes-estimators","page":"Framework","title":"Construction of neural Bayes estimators","text":"","category":"section"},{"location":"framework/","page":"Framework","title":"Framework","text":"The neural Bayes estimator is conceptually simple and can be used in a wide range of problems where other approaches, such as maximum-likelihood estimation, are computationally infeasible. The estimator also has marked practical appeal, as the general workflow for its construction is only loosely connected to the statistical or physical model being considered. The workflow is as follows:","category":"page"},{"location":"framework/","page":"Framework","title":"Framework","text":"Define the prior, Pi(cdot).\nChoose a loss function, L(cdot cdot), typically the mean-absolute-error or mean-squared-error loss.\nDesign a suitable neural-network architecture for the neural point estimator hatboldsymboltheta(cdot boldsymbolgamma).\nSample parameters from Pi(cdot) to form training/validation/test parameter sets.\nGiven the above parameter sets, simulate data from the model, to form training/validation/test data sets.\nTrain the neural network (i.e., estimate boldsymbolgamma) by minimising the loss function averaged over the training sets. During training, monitor performance and convergence using the validation sets.\nAssess the fitted neural Bayes estimator, hatboldsymboltheta(cdot boldsymbolgamma^*), using the test set.","category":"page"},{"location":"API/#Index","page":"Index","title":"Index","text":"","category":"section"},{"location":"API/","page":"Index","title":"Index","text":"","category":"page"},{"location":"workflow/advancedusage/#Advanced-usage","page":"Advanced usage","title":"Advanced usage","text":"","category":"section"},{"location":"workflow/advancedusage/#Saving-and-loading-neural-estimators","page":"Advanced usage","title":"Saving and loading neural estimators","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"As training is by far the most computationally demanding part of the workflow, one often trains an estimator and then saves it for later use. As discussed in the Flux documentation, there are a number of ways to do this. Perhaps the simplest approach is to save the parameters (i.e., weights and biases) of the neural network in a BSON file:","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"using Flux\nusing BSON: @save, @load\nmodel_state = Flux.state(θ̂)\n@save \"estimator.bson\" model_state","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Then, in a later session, one may initialise a neural network with the same architecture used previously, and load the saved parameters:","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"@load \"estimator.bson\" model_state\nFlux.loadmodel!(θ̂, model_state)","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Note that the estimator θ̂ must be already defined (i.e., only the network parameters are saved, not the architecture). ","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"For convenience, the function train() allows for the automatic saving of the neural-network parameters during the training stage, via the argument savepath. Specifically, if savepath is specified, neural estimator's parameters will be saved in the folder savepath and, to load the optimal parameters post training, one may use the following code, or similar:","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"using NeuralEstimators\nFlux.loadparams!(θ̂, loadbestweights(savepath))","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Above, the function loadparams!() loads the parameters of the best (as determined by loadbestweights()) neural estimator saved in savepath.","category":"page"},{"location":"workflow/advancedusage/#Storing-expensive-intermediate-objects-for-data-simulation","page":"Advanced usage","title":"Storing expensive intermediate objects for data simulation","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Parameters sampled from the prior distribution may be stored in two ways. Most simply, they can be stored as a p times K matrix, where p is the number of parameters in the model and K is the number of parameter vectors sampled from the prior distribution. Alternatively, they can be stored in a user-defined struct subtyping ParameterConfigurations, whose only requirement is a field θ that stores the p times K matrix of parameters. With this approach, one may store computationally expensive intermediate objects, such as Cholesky factors, for later use when conducting \"on-the-fly\" simulation, which is discussed below.","category":"page"},{"location":"workflow/advancedusage/#On-the-fly-and-just-in-time-simulation","page":"Advanced usage","title":"On-the-fly and just-in-time simulation","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"When data simulation is (relatively) computationally inexpensive, the training data set, mathcalZ_texttrain, can be simulated continuously during training, a technique coined \"simulation-on-the-fly\". Regularly refreshing mathcalZ_texttrain leads to lower out-of-sample error and to a reduction in overfitting. This strategy therefore facilitates the use of larger, more representationally-powerful networks that are prone to overfitting when mathcalZ_texttrain is fixed. Further, this technique allows for data be simulated \"just-in-time\", in the sense that they can be simulated in small batches, used to train the neural estimator, and then removed from memory. This can substantially reduce pressure on memory resources, particularly when working with large data sets. ","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"One may also regularly refresh the set vartheta_texttrain of parameter vectors used during training, and doing so leads to similar benefits. However, fixing vartheta_texttrain allows computationally expensive terms, such as Cholesky factors when working with Gaussian process models, to be reused throughout training, which can substantially reduce the training time for some models. Hybrid approaches are also possible, whereby the parameters (and possibly the data) are held fixed for several epochs (i.e., several passes through the training set when performing stochastic gradient descent) before being refreshed. ","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"The above strategies are facilitated with various methods of train().","category":"page"},{"location":"workflow/advancedusage/#Regularisation","page":"Advanced usage","title":"Regularisation","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"The term regularisation refers to a variety of techniques aimed to reduce overfitting when training a neural network, primarily by discouraging complex models.","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"One common regularisation technique is known as dropout (Srivastava et al., 2014), implemented in Flux's Dropout layer. Dropout involves temporarily dropping (\"turning off\") a randomly selected set of neurons (along with their connections) at each iteration of the training stage, and this results in a computationally-efficient form of model (neural-network) averaging.","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Another class of regularisation techniques involve modifying the loss function. For instance, L₁ regularisation (sometimes called lasso regression) adds to the loss a penalty based on the absolute value of the neural-network parameters. Similarly, L₂ regularisation (sometimes called ridge regression) adds to the loss a penalty based on the square of the neural-network parameters. Note that these penalty terms are not functions of the data or of the statistical-model parameters that we are trying to infer, and therefore do not modify the Bayes risk or the associated Bayes estimator. These regularisation techniques can be implemented straightforwardly by providing a custom optimiser to train that includes a SignDecay object for L₁ regularisation, or a WeightDecay object for L₂ regularisation. See the Flux documentation for further details.","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"For example, the following code constructs a neural Bayes estimator using dropout and L₁ regularisation with penalty coefficient lambda = 10^-4:","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"using NeuralEstimators\nusing Flux\n\n# Generate data from the model Z ~ N(θ, 1) and θ ~ N(0, 1)\np = 1       # number of unknown parameters in the statistical model\nm = 5       # number of independent replicates\nd = 1       # dimension of each independent replicate\nK = 3000    # number of training samples\nθ_train = randn(1, K)\nθ_val   = randn(1, K)\nZ_train = [μ .+ randn(1, m) for μ ∈ eachcol(θ_train)]\nZ_val   = [μ .+ randn(1, m) for μ ∈ eachcol(θ_val)]\n\n# Architecture with dropout layers\nψ = Chain(\n\tDense(1, 32, relu),\n\tDropout(0.1),\n\tDense(32, 32, relu),\n\tDropout(0.5)\n\t)     \nϕ = Chain(\n\tDense(32, 32, relu),\n\tDropout(0.5),\n\tDense(32, 1)\n\t)           \nθ̂ = DeepSet(ψ, ϕ)\n\n# Optimiser with L₂ regularisation\noptimiser = Flux.setup(OptimiserChain(SignDecay(1e-4), Adam()), θ̂)\n\n# Train the estimator\ntrain(θ̂, θ_train, θ_val, Z_train, Z_val; optimiser = optimiser)","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Note that when the training data and/or parameters are held fixed during training, L₂ regularisation with penalty coefficient lambda = 10^-4 is applied by default.","category":"page"},{"location":"workflow/advancedusage/#Expert-summary-statistics","page":"Advanced usage","title":"Expert summary statistics","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Implicitly, neural estimators involve the learning of summary statistics. However, some summary statistics are available in closed form, simple to compute, and highly informative (e.g., sample quantiles, the empirical variogram, etc.). Often, explicitly incorporating these expert summary statistics in a neural estimator can simplify the optimisation problem, and lead to a better estimator. ","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"The fusion of learned and expert summary statistics is facilitated by our implementation of the DeepSet framework. Note that this implementation also allows the user to construct a neural estimator using only expert summary statistics, following, for example, Gerber and Nychka (2021) and Rai et al. (2024). Note also that the user may specify arbitrary expert summary statistics, however, for convenience several standard User-defined summary statistics are provided with the package, including a fast approximate version of the empirical variogram. ","category":"page"},{"location":"workflow/advancedusage/#Variable-sample-sizes","page":"Advanced usage","title":"Variable sample sizes","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"A neural estimator in the Deep Set representation can be applied to data sets of arbitrary size. However, even when the neural Bayes estimator approximates the true Bayes estimator arbitrarily well, it is conditional on the number of replicates, m, and is not necessarily a Bayes estimator for m^* ne m. Denote a data set comprising m replicates as boldsymbolZ^(m) equiv (boldsymbolZ_1 dots boldsymbolZ_m). There are at least two (non-mutually exclusive) approaches one could adopt if data sets with varying m are envisaged, which we describe below.","category":"page"},{"location":"workflow/advancedusage/#Piecewise-estimators","page":"Advanced usage","title":"Piecewise estimators","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"If data sets with varying m are envisaged, one could train l neural Bayes estimators for different sample sizes, or groups thereof (e.g., a small-sample estimator and a large-sample estimator).  Specifically, for sample-size changepoints m_1, m_2, dots, m_l-1, one could construct a piecewise neural Bayes estimator,","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"hatboldsymboltheta(boldsymbolZ^(m) boldsymbolgamma^*)\n=\nbegincases\nhatboldsymboltheta(boldsymbolZ^(m) boldsymbolgamma^*_tildem_1)  m leq m_1\nhatboldsymboltheta(boldsymbolZ^(m) boldsymbolgamma^*_tildem_2)  m_1  m leq m_2\nquad vdots \nhatboldsymboltheta(boldsymbolZ^(m) boldsymbolgamma^*_tildem_l)  m  m_l-1\nendcases","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"where, here, boldsymbolgamma^* equiv (boldsymbolgamma^*_tildem_1 dots boldsymbolgamma^*_tildem_l-1), and where boldsymbolgamma^*_tildem are the neural-network parameters optimised for sample size tildem chosen so that hatboldsymboltheta(cdot boldsymbolgamma^*_tildem) is near-optimal over the range of sample sizes in which it is applied. This approach works well in practice, and it is less computationally burdensome than it first appears when used in conjunction with pre-training.","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Piecewise neural estimators are implemented with the struct, PiecewiseEstimator, and their construction is facilitated with trainx().  ","category":"page"},{"location":"workflow/advancedusage/#Training-with-variable-sample-sizes","page":"Advanced usage","title":"Training with variable sample sizes","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Alternatively, one could treat the sample size as a random variable, M, with support over a set of positive integers, mathcalM, in which case, for the neural Bayes estimator, the risk function becomes","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"sum_m in mathcalM\nP(M=m)left(\nint_Theta int_mathcalZ^m  L(boldsymboltheta hatboldsymboltheta(boldsymbolz^(m)))f(boldsymbolz^(m) mid boldsymboltheta) rmd boldsymbolz^(m) rmd Pi(boldsymboltheta)\nright)","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"This approach does not materially alter the workflow, except that one must also sample the number of replicates before simulating the data during the training phase. ","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"The following pseudocode illustrates how one may modify a general data simulator to train under a range of sample sizes, with the distribution of M defined by passing any object that can be sampled using rand(m, K) (e.g., an integer range like 1:30, an integer-valued distribution from Distributions.jl, etc.):","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"function simulate(parameters, m) \n\n\t## Number of parameter vectors stored in parameters\n\tK = size(parameters, 2)\n\n\t## Generate K sample sizes from the prior distribution for M\n\tm̃ = rand(m, K)\n\n\t## Pseudocode for data simulation\n\tZ = [<simulate m̃[k] realisations from the model> for k ∈ 1:K]\n\n\treturn Z\nend\n\n## Method that allows an integer to be passed for m\nsimulate(parameters, m::Integer) = simulate(parameters, range(m, m))","category":"page"},{"location":"workflow/advancedusage/#Missing-data","page":"Advanced usage","title":"Missing data","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Neural networks do not naturally handle missing data, and this property can preclude their use in a broad range of applications. Here, we describe two techniques that alleviate this challenge in the context of parameter point estimation: The masking approach and The neural EM algorithm.","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"As a running example, we consider a Gaussian process model where the data are collected over a regular grid, but where some elements of the grid are unobserved. This situation often arises in, for example, remote-sensing applications, where the presence of cloud cover prevents measurement in some places. Below, we load the packages needed in this example, and define some aspects of the model that will remain constant throughout (e.g., the prior, the spatial domain, etc.). We also define structs and functions for sampling from the prior distribution and for simulating marginally from the data model. ","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"using Distances \nusing Distributions \nusing Flux\nusing LinearAlgebra\nusing NeuralEstimators\nusing Statistics: mean\n\n# Set the prior and define the number of parameters in the statistical model\nΠ = (\n\tτ = Uniform(0, 1.0), \n\tρ = Uniform(0, 0.4)\n)\np = length(Π) \n\n# Define the (gridded) spatial domain and compute the distance matrix \npoints = range(0, 1, 16)\nS = expandgrid(points, points)\nD = pairwise(Euclidean(), S, dims = 1)\n\n# Store model information for later use\nξ = (\n\tΠ = Π,\n\tS = S,\n\tD = D\n)\n\n# Struct for storing parameters+Cholesky factors \nstruct Parameters <: ParameterConfigurations\n\tθ\n\tL\nend\n\n# Constructor for above struct\nfunction Parameters(K::Integer, ξ)\n\n\t# Sample parameters from the prior\n\tΠ = ξ.Π\n\tτ = rand(Π.τ, K)\n\tρ = rand(Π.ρ, K)\n\tν = 1 # fixed smoothness\n\n\t# Compute Cholesky factors  \n\tL = maternchols(ξ.D, ρ, ν)\n\n\t# Concatenate into matrix\n\tθ = permutedims(hcat(τ, ρ))\n\n\tParameters(θ, L)\nend\n\n# Marginal simulation from the data model\nfunction simulate(parameters::Parameters, m::Integer)\n\n\tK = size(parameters, 2)\n\tτ = parameters.θ[1, :]\n\tL = parameters.L\n\tn = isqrt(size(L, 1))\n\n\tZ = map(1:K) do k\n\t\tz = simulategaussian(L[:, :, k], m)\n\t\tz = z + τ[k] * randn(size(z)...)\n\t\tz = Float32.(z)\n\t\tz = reshape(z, n, n, 1, :)\n\t\tz\n\tend\n\n\treturn Z\nend","category":"page"},{"location":"workflow/advancedusage/#The-masking-approach","page":"Advanced usage","title":"The masking approach","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"The first missing-data technique that we consider is the so-called masking approach of Wang et al. (2024). The strategy involves completing the data by replacing missing values with zeros, and using auxiliary variables to encode the missingness pattern, which are also passed into the network.","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Let boldsymbolZ denote the complete-data vector. Then, the masking approach considers inference based on boldsymbolW, a vector of indicator variables that encode the missingness pattern (with elements equal to one or zero if the corresponding element of boldsymbolZ is observed or missing, respectively), and","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"boldsymbolU equiv boldsymbolZ odot boldsymbolW","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"where odot denotes elementwise multiplication and the product of a missing element and zero is defined to be zero. Irrespective of the missingness pattern, boldsymbolU and boldsymbolW have the same fixed dimensions and hence may be processed easily using a single neural network. A neural point estimator is then trained on realisations of boldsymbolU boldsymbolW which, by construction, do not contain any missing elements.","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Since the missingness pattern boldsymbolW is now an input to the neural network, it must be incorporated during the training phase. When interest lies only in making inference from a single already-observed data set, boldsymbolW is fixed and known, and the Bayes risk remains unchanged. However, amortised inference, whereby one trains a single neural network that will be used to make inference with many data sets, requires a joint model for the data boldsymbolZ and the missingness pattern boldsymbolW: ","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"# Marginal simulation from the data model and a MCAR missingness model\nfunction simulatemissing(parameters::Parameters, m::Integer)\n\n\tZ = simulate(parameters, m)   # simulate completely-observed data\n\n\tUW = map(Z) do z\n\t\tprop = rand()             # sample a missingness proportion \n\t\tz = removedata(z, prop)   # randomly remove a proportion of the data\n\t\tuw = encodedata(z)        # replace missing entries with zero and encode missingness pattern\n\t\tuw\n\tend\n\n\treturn UW\nend","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Note that the helper functions removedata() and encodedata() facilitate the construction of augmented data sets boldsymbolU boldsymbolW. ","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Next, we construct and train a masked neural Bayes estimator. Here, the first convolutional layer takes two input channels, since we store the augmented data boldsymbolU in the first channel and the missingness pattern boldsymbolW in the second. We construct a point estimator, but the masking approach is applicable with any other kind of estimator (see Estimators): ","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"# Construct DeepSet object \nψ = Chain(\n\tConv((10, 10), 2 => 16,  relu),\n\tConv((5, 5),  16 => 32,  relu),\n\tConv((3, 3),  32 => 64, relu),\n\tFlux.flatten\n\t)\nϕ = Chain(Dense(64, 256, relu), Dense(256, p, exp))\ndeepset = DeepSet(ψ, ϕ)\n\n# Initialise point estimator \nθ̂ = PointEstimator(deepset)\n\n# Train the masked neural Bayes estimator\nθ̂ = train(θ̂, Parameters, simulatemissing, m = 1, ξ = ξ, K = 1000, epochs = 10)","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Once trained, we can apply our masked neural Bayes estimator to (incomplete) observed data. The data must be encoded in the same manner that was done during training. Below, we use simulated data as a surrogate for real data, with a missingness proportion of 0.25: ","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"θ = Parameters(1, ξ)\nZ = simulate(θ, 1)[1]\nZ = removedata(Z, 0.25) \nUW = encodedata(Z)\nθ̂(UW)","category":"page"},{"location":"workflow/advancedusage/#The-neural-EM-algorithm","page":"Advanced usage","title":"The neural EM algorithm","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Let boldsymbolZ_1 and boldsymbolZ_2 denote the observed and unobserved (i.e., missing) data, respectively, and let boldsymbolZ equiv (boldsymbolZ_1 boldsymbolZ_2) denote the complete data. A classical approach to facilitating inference when data are missing is the expectation-maximisation (EM) algorithm. The neural EM algorithm is an approximate version of the conventional (Bayesian) Monte Carlo EM algorithm which, at the lth iteration, updates the parameter vector through","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"boldsymboltheta^(l) = argmax_boldsymboltheta sum_h = 1^H ell(boldsymboltheta  boldsymbolZ_1  boldsymbolZ_2^(lh)) + log pi_H(boldsymboltheta)","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"where realisations of the missing-data component, boldsymbolZ_2^(lh)  h = 1 dots H, are sampled from the probability distribution of boldsymbolZ_2 given boldsymbolZ_1 and boldsymboltheta^(l-1), and where pi_H(boldsymboltheta) propto pi(boldsymboltheta)^H is a concentrated version of the original prior density. Given the conditionally simulated data, the neural EM algorithm performs the above EM update using a neural network that returns the MAP estimate (i.e., the posterior mode) conditionally simulated data. Such a neural network can be obtained by training a neural Bayes estimator under a continuous relaxation of the 0–1 loss function, such as ","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"First, we construct a neural approximation of the MAP estimator. In this example, we will take H=50. When H is taken to be reasonably large, one may lean on the Bernstein-von Mises theorem to train the neural Bayes estimator under linear or quadratic loss; otherwise, one should train the estimator under a continuous relaxation of the 0–1 loss (e.g., the tanhloss or kpowerloss in the limit kappa to 0):","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"# Construct DeepSet object \nψ = Chain(\n\tConv((10, 10), 1 => 16,  relu),\n\tConv((5, 5),  16 => 32,  relu),\n\tConv((3, 3),  32 => 64, relu),\n\tFlux.flatten\n\t)\nϕ = Chain(\n\tDense(64, 256, relu),\n\tDense(256, p, exp)\n\t)\ndeepset = DeepSet(ψ, ϕ)\n\n# Initialise point estimator \nθ̂ = PointEstimator(deepset)\n\n# Train neural Bayes estimator\nH = 50\nθ̂ = train(θ̂, Parameters, simulate, m = H, ξ = ξ, K = 1000, epochs = 10)","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Next, we define a function for conditional simulation (see EM for details on the required format of this function): ","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"function simulateconditional(Z::M, θ, ξ; nsims::Integer = 1) where {M <: AbstractMatrix{Union{Missing, T}}} where T\n\n\t# Save the original dimensions\n\tdims = size(Z)\n\n\t# Convert to vector\n\tZ = vec(Z)\n\n\t# Compute the indices of the observed and missing data\n\tI₁ = findall(z -> !ismissing(z), Z) # indices of observed data\n\tI₂ = findall(z -> ismissing(z), Z)  # indices of missing data\n\tn₁ = length(I₁)\n\tn₂ = length(I₂)\n\n\t# Extract the observed data and drop Missing from the eltype of the container\n\tZ₁ = Z[I₁]\n\tZ₁ = [Z₁...]\n\n\t# Distance matrices needed for covariance matrices\n\tD   = ξ.D # distance matrix for all locations in the grid\n\tD₂₂ = D[I₂, I₂]\n\tD₁₁ = D[I₁, I₁]\n\tD₁₂ = D[I₁, I₂]\n\n\t# Extract the parameters from θ\n\tτ = θ[1]\n\tρ = θ[2]\n\n\t# Compute covariance matrices\n\tν = 1 # fixed smoothness\n\tΣ₂₂ = matern.(UpperTriangular(D₂₂), ρ, ν); Σ₂₂[diagind(Σ₂₂)] .+= τ^2\n\tΣ₁₁ = matern.(UpperTriangular(D₁₁), ρ, ν); Σ₁₁[diagind(Σ₁₁)] .+= τ^2\n\tΣ₁₂ = matern.(D₁₂, ρ, ν)\n\n\t# Compute the Cholesky factor of Σ₁₁ and solve the lower triangular system\n\tL₁₁ = cholesky(Symmetric(Σ₁₁)).L\n\tx = L₁₁ \\ Σ₁₂\n\n\t# Conditional covariance matrix, cov(Z₂ ∣ Z₁, θ),  and its Cholesky factor\n\tΣ = Σ₂₂ - x'x\n\tL = cholesky(Symmetric(Σ)).L\n\n\t# Conditonal mean, E(Z₂ ∣ Z₁, θ)\n\ty = L₁₁ \\ Z₁\n\tμ = x'y\n\n\t# Simulate from the distribution Z₂ ∣ Z₁, θ ∼ N(μ, Σ)\n\tz = randn(n₂, nsims)\n\tZ₂ = μ .+ L * z\n\n\t# Combine the observed and missing data to form the complete data\n\tZ = map(1:nsims) do l\n\t\tz = Vector{T}(undef, n₁ + n₂)\n\t\tz[I₁] = Z₁\n\t\tz[I₂] = Z₂[:, l]\n\t\tz\n\tend\n\tZ = stackarrays(Z, merge = false)\n\n\t# Convert Z to an array with appropriate dimensions\n\tZ = reshape(Z, dims..., 1, nsims)\n\n\treturn Z\nend","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Now we can use the neural EM algorithm to get parameter point estimates from data containing missing values. The algorithm is implemented with the struct EM. Again, here we use simulated data as a surrogate for real data: ","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"θ = Parameters(1, ξ)\nZ = simulate(θ, 1)[1][:, :]     # simulate a single gridded field\nZ = removedata(Z, 0.25)         # remove 25% of the data\nθ₀ = mean.([Π...])              # initial estimate, the prior mean\n\nneuralem = EM(simulateconditional, θ̂)\nneuralem(Z, θ₀, ξ = ξ, nsims = H, use_ξ_in_simulateconditional = true)","category":"page"},{"location":"workflow/advancedusage/#Censored-data","page":"Advanced usage","title":"Censored data","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Coming soon, based on the methodology presented in Richards et al. (2023+).","category":"page"},{"location":"workflow/overview/#Overview","page":"Overview","title":"Overview","text":"","category":"section"},{"location":"workflow/overview/","page":"Overview","title":"Overview","text":"To develop a neural estimator with NeuralEstimators,","category":"page"},{"location":"workflow/overview/","page":"Overview","title":"Overview","text":"Sample parameters from the prior distribution. The parameters are stored as p times K matrices, with p the number of parameters in the model and K the number of parameter vectors in the given parameter set (i.e., training, validation, or test set).\nSimulate data from the assumed model over the parameter sets generated above. These data are stored as a Vector{A}, with each element of the vector associated with one parameter configuration, and where A depends on the multivariate structure of the data and the representation of the neural estimator (e.g., an Array for CNN-based estimators, a GNNGraph for GNN-based estimators, etc.).\nInitialise a neural network θ̂.  \nTrain θ̂ under the chosen loss function using train().\nAssess θ̂ using assess(), which uses simulation-based methods to assess the estimator with respect to its sampling distribution.","category":"page"},{"location":"workflow/overview/","page":"Overview","title":"Overview","text":"Once the estimator θ̂ has passed our assessments and is therefore deemed to be well calibrated, it may be applied to observed data. See the Examples and, once familiar with the basic workflow, see Advanced usage for practical considerations on how to most effectively construct neural estimators.","category":"page"},{"location":"API/core/#Core","page":"Core","title":"Core","text":"","category":"section"},{"location":"API/core/","page":"Core","title":"Core","text":"This page documents the classes and functions that are central to the workflow of NeuralEstimators. Its organisation reflects the order in which these classes and functions appear in a standard implementation; that is, from sampling parameters from the prior distribution, to using a neural Bayes estimator to make inference with observed data sets.","category":"page"},{"location":"API/core/#Sampling-parameters","page":"Core","title":"Sampling parameters","text":"","category":"section"},{"location":"API/core/","page":"Core","title":"Core","text":"Parameters sampled from the prior distribution are stored as a p times K matrix, where p is the number of parameters in the statistical model and K is the number of parameter vectors sampled from the prior distribution.","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"It can sometimes be helpful to wrap the parameter matrix in a user-defined type that also stores expensive intermediate objects needed for data simulated (e.g., Cholesky factors). In this case, the user-defined type should be a subtype of the abstract type ParameterConfigurations, whose only requirement is a field θ that stores the matrix of parameters. See Storing expensive intermediate objects for data simulation for further discussion.   ","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"ParameterConfigurations","category":"page"},{"location":"API/core/#NeuralEstimators.ParameterConfigurations","page":"Core","title":"NeuralEstimators.ParameterConfigurations","text":"ParameterConfigurations\n\nAn abstract supertype for user-defined types that store parameters and any intermediate objects needed for data simulation.\n\nThe user-defined type must have a field θ that stores the p × K matrix of parameters, where p is the number of parameters in the model and K is the number of parameter vectors sampled from the prior distribution. There are no other restrictions.\n\nSee subsetparameters for the generic function for subsetting these objects.\n\nExamples\n\nstruct P <: ParameterConfigurations\n\tθ\n\t# other expensive intermediate objects...\nend\n\n\n\n\n\n","category":"type"},{"location":"API/core/#Simulating-data","page":"Core","title":"Simulating data","text":"","category":"section"},{"location":"API/core/","page":"Core","title":"Core","text":"NeuralEstimators facilitates neural estimation for arbitrary statistical models by having the user implicitly define their model via simulated data, either as fixed instances or via a function that simulates data from the statistical model.","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"The data are always stored as a Vector{A}, where each element of the vector corresponds to a data set of m independent replicates associated with one parameter vector (note that m is arbitrary), and where the type A depends on the multivariate structure of the data:","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"For univariate and unstructured multivariate data, A is a d times m matrix where d is the dimension each replicate (e.g., d=1 for univariate data).\nFor data collected over a regular grid, A is a (N + 2)-dimensional array, where N is the dimension of the grid (e.g., N = 1 for time series, N = 2 for two-dimensional spatial grids, etc.). The first N dimensions of the array correspond to the dimensions of the grid; the penultimate dimension stores the so-called \"channels\" (this dimension is singleton for univariate processes, two for bivariate processes, and so on); and the final dimension stores the independent replicates. For example, to store 50 independent replicates of a bivariate spatial process measured over a 10x15 grid, one would construct an array of dimension 10x15x2x50.\nFor spatial data collected over irregular spatial locations, A is a GNNGraph with independent replicates (possibly with differing spatial locations) stored as subgraphs using the function batch.","category":"page"},{"location":"API/core/#Estimators","page":"Core","title":"Estimators","text":"","category":"section"},{"location":"API/core/","page":"Core","title":"Core","text":"Several classes of neural estimators are available in the package.","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"The simplest class is PointEstimator, used for constructing arbitrary mappings from the sample space to the parameter space. When constructing a generic point estimator, the user defines the loss function and therefore the Bayes estimator that will be targeted.","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"Several classes cater for the estimation of marginal posterior quantiles, based on the quantile loss function (see quantileloss()); in particular, see IntervalEstimator and QuantileEstimatorDiscrete for estimating marginal posterior quantiles for a fixed set of probability levels, and QuantileEstimatorContinuous for estimating marginal posterior quantiles with the probability level as an input to the neural network.","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"In addition to point estimation, the package also provides the class RatioEstimator for approximating the so-called likelihood-to-evidence ratio. The binary classification problem at the heart of this approach proceeds based on the binary cross-entropy loss.","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"Users are free to choose the neural-network architecture of these estimators as they see fit (subject to some class-specific requirements), but the package also provides the convenience constructor initialise_estimator().","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"NeuralEstimator\n\nPointEstimator\n\nIntervalEstimator\n\nQuantileEstimatorDiscrete\n\nQuantileEstimatorContinuous\n\nRatioEstimator\n\nPiecewiseEstimator\n\nEnsemble","category":"page"},{"location":"API/core/#NeuralEstimators.NeuralEstimator","page":"Core","title":"NeuralEstimators.NeuralEstimator","text":"NeuralEstimator\n\nAn abstract supertype for neural estimators.\n\n\n\n\n\n","category":"type"},{"location":"API/core/#NeuralEstimators.PointEstimator","page":"Core","title":"NeuralEstimators.PointEstimator","text":"PointEstimator(deepset::DeepSet)\n\nA neural point estimator, a mapping from the sample space to the parameter space.\n\nThe estimator leverages the DeepSet architecture. The only requirement is that number of output neurons in the final layer of the inference network (i.e., the outer network) is equal to the number of parameters in the statistical model.\n\n\n\n\n\n","category":"type"},{"location":"API/core/#NeuralEstimators.IntervalEstimator","page":"Core","title":"NeuralEstimators.IntervalEstimator","text":"IntervalEstimator(u, v = u; probs = [0.025, 0.975], g::Function = exp)\nIntervalEstimator(u, c::Union{Function,Compress}; probs = [0.025, 0.975], g::Function = exp)\nIntervalEstimator(u, v, c::Union{Function,Compress}; probs = [0.025, 0.975], g::Function = exp)\n\nA neural interval estimator which, given data Z, jointly estimates marginal posterior credible intervals based on the probability levels probs.\n\nThe estimator employs a representation that prevents quantile crossing, namely, it constructs marginal posterior credible intervals for each parameter theta_i, i = 1 dots p  of the form,\n\nc_i(u_i(boldsymbolZ))  c_i(u_i(boldsymbolZ)) + g(v_i(boldsymbolZ)))\n\nwhere  boldsymbolu() equiv (u_1(cdot) dots u_p(cdot)) and boldsymbolv() equiv (v_1(cdot) dots v_p(cdot)) are neural networks that transform data into p-dimensional vectors; g(cdot) is a monotonically increasing function (e.g., exponential or softplus); and each c_i() is a monotonically increasing function that maps its input to the prior support of theta_i.\n\nThe functions c_i() may be defined by a p-dimensional object of type Compress. If these functions are unspecified, they will be set to the identity function so that the range of the intervals will be unrestricted.\n\nIf only a single neural-network architecture is provided, it will be used for both boldsymbolu() and boldsymbolv().\n\nThe return value  when applied to data is a matrix with 2p rows, where the first and second p rows correspond to the lower and upper bounds, respectively.\n\nSee also QuantileEstimatorDiscrete and QuantileEstimatorContinuous.\n\nExamples\n\nusing NeuralEstimators, Flux\n\n# Generate some toy data\nn = 2   # bivariate data\nm = 100 # number of independent replicates\nZ = rand(n, m)\n\n# prior\np = 3  # number of parameters in the statistical model\nmin_supp = [25, 0.5, -pi/2]\nmax_supp = [500, 2.5, 0]\ng = Compress(min_supp, max_supp)\n\n# Create an architecture\nw = 8  # width of each layer\nψ = Chain(Dense(n, w, relu), Dense(w, w, relu));\nϕ = Chain(Dense(w, w, relu), Dense(w, p));\nu = DeepSet(ψ, ϕ)\n\n# Initialise the interval estimator\nestimator = IntervalEstimator(u, g)\n\n# Apply the (untrained) interval estimator\nestimator(Z)\ninterval(estimator, Z)\n\n\n\n\n\n","category":"type"},{"location":"API/core/#NeuralEstimators.QuantileEstimatorDiscrete","page":"Core","title":"NeuralEstimators.QuantileEstimatorDiscrete","text":"QuantileEstimatorDiscrete(v::DeepSet; probs = [0.05, 0.25, 0.5, 0.75, 0.95], g = Flux.softplus, i = nothing)\n(estimator::QuantileEstimatorDiscrete)(Z)\n(estimator::QuantileEstimatorDiscrete)(Z, θ₋ᵢ)\n\nA neural estimator that jointly estimates a fixed set of marginal posterior quantiles with probability levels tau_1 dots tau_T, controlled by the keyword argument probs.\n\nBy default, the estimator approximates the marginal quantiles for all parameters in the model, that is, the quantiles of\n\ntheta_i mid boldsymbolZ\n\nfor parameters boldsymboltheta equiv (theta_1 dots theta_p). Alternatively, if initialised with i set to a positive integer, the estimator approximates the quantiles of the full conditional distribution\n\ntheta_i mid boldsymbolZ boldsymboltheta_-i\n\nwhere boldsymboltheta_-i denotes the parameter vector with its ith element removed. For ease of exposition, when targetting marginal posteriors of the form theta_i mid boldsymbolZ (i.e., the default behaviour), we define textdim(boldsymboltheta_-i)  0.\n\nThe estimator leverages the DeepSet architecture, subject to two requirements. First, the number of input neurons in the first layer of the inference network (i.e., the outer network) must be equal to the number of neurons in the final layer of the summary network plus textdim(boldsymboltheta_-i). Second, the number of output neurons in the final layer of the inference network must be equal to p - textdim(boldsymboltheta_-i).  The estimator employs a representation that prevents quantile crossing, namely,\n\nbeginaligned\nboldsymbolq^(tau_1)(boldsymbolZ) = boldsymbolv^(tau_1)(boldsymbolZ)\nboldsymbolq^(tau_t)(boldsymbolZ) = boldsymbolv^(tau_1)(boldsymbolZ) + sum_j=2^t g(boldsymbolv^(tau_j)(boldsymbolZ)) quad t = 2 dots T\nendaligned\n\nwhere boldsymbolq^(tau)(boldsymbolZ) denotes the vector of tau-quantiles for parameters boldsymboltheta equiv (theta_1 dots theta_p), and boldsymbolv^(tau_t)(cdot), t = 1 dots T, are unconstrained neural networks that transform data into p-dimensional vectors, and g(cdot) is a non-negative function (e.g., exponential or softplus) applied elementwise to its arguments. If g=nothing, the quantiles are estimated independently through the representation,\n\nboldsymbolq^(tau_t)(boldsymbolZ) = boldsymbolv^(tau_t)(boldsymbolZ) quad t = 1 dots T\n\nThe return value is a matrix with (p - textdim(boldsymboltheta_-i)) times T rows, where the first set of T rows corresponds to the estimated quantiles for the first parameter, the second set of T rows corresponds to the estimated quantiles for the second parameter, and so on.\n\nSee also IntervalEstimator and QuantileEstimatorContinuous.\n\nExamples\n\nusing NeuralEstimators, Flux, Distributions\nusing AlgebraOfGraphics, CairoMakie\n\n# Model: Z|θ ~ N(θ, 1) with θ ~ N(0, 1)\nd = 1   # dimension of each independent replicate\np = 1   # number of unknown parameters in the statistical model\nm = 30  # number of independent replicates in each data set\nprior(K) = randn32(p, K)\nsimulate(θ, m) = [μ .+ randn32(1, m) for μ ∈ eachcol(θ)]\n\n# Architecture\nψ = Chain(Dense(d, 64, relu), Dense(64, 64, relu))\nϕ = Chain(Dense(64, 64, relu), Dense(64, p))\nv = DeepSet(ψ, ϕ)\n\n# Initialise the estimator\nτ = [0.05, 0.25, 0.5, 0.75, 0.95]\nq̂ = QuantileEstimatorDiscrete(v; probs = τ)\n\n# Train the estimator\nq̂ = train(q̂, prior, simulate, m = m)\n\n# Assess the estimator\nθ = prior(1000)\nZ = simulate(θ, m)\nassessment = assess(q̂, θ, Z)\nplot(assessment)\n\n# Estimate posterior quantiles\nq̂(Z)\n\n\n# -------------------------------------------------------------\n# --------------------- Full conditionals ---------------------\n# -------------------------------------------------------------\n\n\n# Model: Z|μ,σ ~ N(μ, σ²) with μ ~ N(0, 1), σ ∼ IG(3,1)\nd = 1         # dimension of each independent replicate\np = 2         # number of unknown parameters in the statistical model\nm = 30        # number of independent replicates in each data set\nfunction prior(K)\n\tμ = randn(1, K)\n\tσ = rand(InverseGamma(3, 1), 1, K)\n\tθ = Float32.(vcat(μ, σ))\nend\nsimulate(θ, m) = [ϑ[1] .+ ϑ[2] .* randn32(1, m) for ϑ ∈ eachcol(θ)]\n\n# Architecture\nψ = Chain(Dense(d, 64, relu), Dense(64, 64, relu))\nϕ = Chain(Dense(64 + 1, 64, relu), Dense(64, 1))\nv = DeepSet(ψ, ϕ)\n\n# Initialise estimators respectively targetting quantiles of μ∣Z,σ and σ∣Z,μ\nτ = [0.05, 0.25, 0.5, 0.75, 0.95]\nq₁ = QuantileEstimatorDiscrete(v; probs = τ, i = 1)\nq₂ = QuantileEstimatorDiscrete(v; probs = τ, i = 2)\n\n# Train the estimators\nq₁ = train(q₁, prior, simulate, m = m)\nq₂ = train(q₂, prior, simulate, m = m)\n\n# Assess the estimators\nθ = prior(1000)\nZ = simulate(θ, m)\nassessment = assess([q₁, q₂], θ, Z, parameter_names = [\"μ\", \"σ\"])\nplot(assessment)\n\n# Estimate quantiles of μ∣Z,σ with σ = 0.5 and for many data sets\nθ₋ᵢ = 0.5f0\nq₁(Z, θ₋ᵢ)\n\n# Estimate quantiles of μ∣Z,σ with σ = 0.5 for only a single data set\nq₁(Z[1], θ₋ᵢ)\n\n\n\n\n\n","category":"type"},{"location":"API/core/#NeuralEstimators.QuantileEstimatorContinuous","page":"Core","title":"NeuralEstimators.QuantileEstimatorContinuous","text":"QuantileEstimatorContinuous(deepset::DeepSet; i = nothing, num_training_probs::Integer = 1)\n(estimator::QuantileEstimatorContinuous)(Z, τ)\n(estimator::QuantileEstimatorContinuous)(Z, θ₋ᵢ, τ)\n\nA neural estimator targetting posterior quantiles.\n\nGiven as input data boldsymbolZ and the desired probability level tau  (0 1), by default the estimator approximates the tau-quantile of\n\ntheta_i mid boldsymbolZ\n\nfor parameters boldsymboltheta equiv (theta_1 dots theta_p). Alternatively, if initialised with i set to a positive integer, the estimator approximates the tau-quantile of the full conditional distribution\n\ntheta_i mid boldsymbolZ boldsymboltheta_-i\n\nwhere boldsymboltheta_-i denotes the parameter vector with its ith element removed. For ease of exposition, when targetting marginal posteriors of the form theta_i mid boldsymbolZ (i.e., the default behaviour), we define textdim(boldsymboltheta_-i)  0.\n\nThe estimator leverages the DeepSet architecture, subject to two requirements. First, the number of input neurons in the first layer of the inference network (i.e., the outer network) must be equal to the number of neurons in the final layer of the summary network plus 1 + textdim(boldsymboltheta_-i). Second, the number of output neurons in the final layer of the inference network must be equal to p - textdim(boldsymboltheta_-i).\n\nAlthough not a requirement, one may employ a (partially) monotonic neural network to prevent quantile crossing (i.e., to ensure that the tau_1-quantile does not exceed the tau_2-quantile for any tau_2  tau_1). There are several ways to construct such a neural network: one simple yet effective approach is to ensure that all weights associated with tau are strictly positive (see, e.g., Cannon, 2018), and this can be done using the DensePositive layer as illustrated in the examples below.\n\nThe return value is a matrix with p - textdim(boldsymboltheta_-i) rows, corresponding to the estimated quantile for each parameter not in boldsymboltheta_-i.\n\nSee also QuantileEstimatorDiscrete.\n\nExamples\n\nusing NeuralEstimators, Flux, Distributions , InvertedIndices, Statistics\nusing AlgebraOfGraphics, CairoMakie\n\n# Model: Z|θ ~ N(θ, 1) with θ ~ N(0, 1)\nd = 1         # dimension of each independent replicate\np = 1         # number of unknown parameters in the statistical model\nm = 30        # number of independent replicates in each data set\nprior(K) = randn32(p, K)\nsimulateZ(θ, m) = [ϑ .+ randn32(1, m) for ϑ ∈ eachcol(θ)]\nsimulateτ(K)    = [rand32(10) for k in 1:K]\nsimulate(θ, m)  = simulateZ(θ, m), simulateτ(size(θ, 2))\n\n# Architecture: partially monotonic network to preclude quantile crossing\nw = 64  # width of each hidden layer\nψ = Chain(\n\tDense(d, w, relu),\n\tDense(w, w, relu),\n\tDense(w, w, relu)\n\t)\nϕ = Chain(\n\tDensePositive(Dense(w + 1, w, relu); last_only = true),\n\tDensePositive(Dense(w, w, relu)),\n\tDensePositive(Dense(w, p))\n\t)\ndeepset = DeepSet(ψ, ϕ)\n\n# Initialise the estimator\nq̂ = QuantileEstimatorContinuous(deepset)\n\n# Train the estimator\nq̂ = train(q̂, prior, simulate, m = m)\n\n# Assess the estimator\nθ = prior(1000)\nZ = simulateZ(θ, m)\nassessment = assess(q̂, θ, Z)\nplot(assessment)\n\n# Estimate 0.1-quantile for many data sets\nτ = 0.1f0\nq̂(Z, τ)\n\n# Estimate several quantiles for a single data set\n# (note that τ is given as a row vector)\nz = Z[1]\nτ = Float32.([0.1, 0.25, 0.5, 0.75, 0.9])'\nq̂(z, τ)\n\n# -------------------------------------------------------------\n# --------------------- Full conditionals ---------------------\n# -------------------------------------------------------------\n\n# Model: Z|μ,σ ~ N(μ, σ²) with μ ~ N(0, 1), σ ∼ IG(3,1)\nd = 1         # dimension of each independent replicate\np = 2         # number of unknown parameters in the statistical model\nm = 30        # number of independent replicates in each data set\nfunction prior(K)\n\tμ = randn(1, K)\n\tσ = rand(InverseGamma(3, 1), 1, K)\n\tθ = vcat(μ, σ)\n\tθ = Float32.(θ)\n\treturn θ\nend\nsimulateZ(θ, m) = [ϑ[1] .+ ϑ[2] .* randn32(1, m) for ϑ ∈ eachcol(θ)]\nsimulateτ(θ)    = [rand32(10) for k in 1:size(θ, 2)]\nsimulate(θ, m)  = simulateZ(θ, m), simulateτ(θ)\n\n# Architecture: partially monotonic network to preclude quantile crossing\nw = 64  # width of each hidden layer\nψ = Chain(\n\tDense(d, w, relu),\n\tDense(w, w, relu),\n\tDense(w, w, relu)\n\t)\nϕ = Chain(\n\tDensePositive(Dense(w + 2, w, relu); last_only = true),\n\tDensePositive(Dense(w, w, relu)),\n\tDensePositive(Dense(w, 1))\n\t)\ndeepset = DeepSet(ψ, ϕ)\n\n# Initialise the estimator for the first parameter, targetting μ∣Z,σ\ni = 1\nq̂ = QuantileEstimatorContinuous(deepset; i = i)\n\n# Train the estimator\nq̂ = train(q̂, prior, simulate, m = m)\n\n# Assess the estimator\nθ = prior(1000)\nZ = simulateZ(θ, m)\nassessment = assess(q̂, θ, Z)\nplot(assessment)\n\n# Estimate quantiles of μ∣Z,σ with σ = 0.5 and for many data sets\n# (use θ[Not(i), :] to determine the order in which the conditioned parameters should be given)\nθ = prior(1000)\nZ = simulateZ(θ, m)\nθ₋ᵢ = 0.5f0\nτ = Float32.([0.1, 0.25, 0.5, 0.75, 0.9])\nq̂(Z, θ₋ᵢ, τ)\n\n# Estimate quantiles for a single data set\nq̂(Z[1], θ₋ᵢ, τ)\n\n\n\n\n\n","category":"type"},{"location":"API/core/#NeuralEstimators.RatioEstimator","page":"Core","title":"NeuralEstimators.RatioEstimator","text":"RatioEstimator(deepset::DeepSet)\n\nA neural estimator that estimates the likelihood-to-evidence ratio,\n\nr(boldsymbolZ boldsymboltheta) equiv p(boldsymbolZ mid boldsymboltheta)p(boldsymbolZ)\n\nwhere p(boldsymbolZ mid boldsymboltheta) is the likelihood and p(boldsymbolZ) is the marginal likelihood, also known as the model evidence.\n\nThe estimator leverages the DeepSet architecture, subject to two requirements. First, the number of input neurons in the first layer of the inference network (i.e., the outer network) must equal the number of output neurons in the final layer of the summary network plus the number of parameters in the statistical model. Second, the number of output neurons in the final layer of the inference network must be equal to one.\n\nThe ratio estimator is trained by solving a relatively straightforward binary classification problem. Specifically, consider the problem of distinguishing dependent parameter–data pairs (boldsymboltheta boldsymbolZ) sim p(boldsymbolZ boldsymboltheta) with class labels Y=1 from independent parameter–data pairs (tildeboldsymboltheta tildeboldsymbolZ) sim p(boldsymboltheta)p(boldsymbolZ) with class labels Y=0, and where the classes are balanced. Then the Bayes classifier under binary cross-entropy loss is given by\n\nc(boldsymbolZ boldsymboltheta) = fracp(boldsymbolZ boldsymboltheta)p(boldsymbolZ boldsymboltheta) + p(boldsymboltheta)p(boldsymbolZ)\n\nand hence,\n\nr(boldsymbolZ boldsymboltheta) = fracc(boldsymbolZ boldsymboltheta)1 - c(boldsymbolZ boldsymboltheta)\n\nFor numerical stability, training is done on the log-scale using log r(boldsymbolZ boldsymboltheta) = textlogit(c(boldsymbolZ boldsymboltheta)).\n\nWhen applying the estimator to data, by default the likelihood-to-evidence ratio r(boldsymbolZ boldsymboltheta) is returned (setting the keyword argument classifier = true will yield class probability estimates). The estimated ratio can then be used in various downstream Bayesian (e.g., Hermans et al., 2020) or Frequentist (e.g., Walchessen et al., 2023) inferential algorithms.\n\nSee also mlestimate and mapestimate for obtaining approximate maximum-likelihood and maximum-a-posteriori estimates, and sampleposterior for obtaining approximate posterior samples.\n\nExamples\n\nusing NeuralEstimators, Flux, Statistics\n\n# Generate data from Z|μ,σ ~ N(μ, σ²) with μ, σ ~ U(0, 1)\np = 2     # number of unknown parameters in the statistical model\nd = 1     # dimension of each independent replicate\nm = 100   # number of independent replicates\n\nprior(K) = rand32(p, K)\nsimulate(θ, m) = θ[1] .+ θ[2] .* randn32(d, m)\nsimulate(θ::AbstractMatrix, m) = simulate.(eachcol(θ), m)\n\n# Architecture\nw = 64 # width of each hidden layer\nψ = Chain(\n\tDense(d, w, relu),\n\tDense(w, w, relu),\n\tDense(w, q, relu)\n\t)\nϕ = Chain(\n\tDense(w + p, w, relu),\n\tDense(w, w, relu),\n\tDense(w, 1)\n\t)\ndeepset = DeepSet(ψ, ϕ)\n\n# Initialise the estimator\nr̂ = RatioEstimator(deepset)\n\n# Train the estimator\nr̂ = train(r̂, prior, simulate, m = m)\n\n# Inference with \"observed\" data set\nθ = prior(1)\nz = simulate(θ, m)[1]\nθ₀ = [0.5, 0.5]                           # initial estimate\nmlestimate(r̂, z;  θ₀ = θ₀)                # maximum-likelihood estimate\nmapestimate(r̂, z; θ₀ = θ₀)                # maximum-a-posteriori estimate\nθ_grid = expandgrid(0:0.01:1, 0:0.01:1)'  # fine gridding of the parameter space\nθ_grid = Float32.(θ_grid)\nr̂(z, θ_grid)                              # likelihood-to-evidence ratios over grid\nsampleposterior(r̂, z; θ_grid = θ_grid)    # posterior samples\n\n\n\n\n\n","category":"type"},{"location":"API/core/#NeuralEstimators.PiecewiseEstimator","page":"Core","title":"NeuralEstimators.PiecewiseEstimator","text":"PiecewiseEstimator(estimators, changepoints)\n\nCreates a piecewise estimator (Sainsbury-Dale et al., 2024, sec. 2.2.2) from a collection of estimators and sample-size changepoints.\n\nSpecifically, with l estimators and sample-size changepoints m_1  m_2  dots  m_l-1, the piecewise etimator takes the form,\n\nhatboldsymboltheta(boldsymbolZ)\n=\nbegincases\nhatboldsymboltheta_1(boldsymbolZ)  m leq m_1\nhatboldsymboltheta_2(boldsymbolZ)  m_1  m leq m_2\nquad vdots \nhatboldsymboltheta_l(boldsymbolZ)  m  m_l-1\nendcases\n\nFor example, given an estimator  hatboldsymboltheta_1(cdot) trained for small sample sizes (e.g., m ≤ 30) and an estimator hatboldsymboltheta_2(cdot) trained for moderate-to-large sample sizes (e.g., m > 30), we may construct a PiecewiseEstimator that dispatches hatboldsymboltheta_1(cdot) if m ≤ 30 and hatboldsymboltheta_2(cdot) otherwise.\n\nSee also trainx() for training estimators for a range of sample sizes.\n\nExamples\n\nusing NeuralEstimators, Flux\n\nd = 2  # bivariate data\np = 3  # number of parameters in the statistical model\nw = 8  # width of each hidden layer\n\n# Small-sample estimator\nψ₁ = Chain(Dense(d, w, relu), Dense(w, w, relu));\nϕ₁ = Chain(Dense(w, w, relu), Dense(w, p));\nθ̂₁ = PointEstimator(DeepSet(ψ₁, ϕ₁))\n\n# Large-sample estimator\nψ₂ = Chain(Dense(d, w, relu), Dense(w, w, relu));\nϕ₂ = Chain(Dense(w, w, relu), Dense(w, p));\nθ̂₂ = PointEstimator(DeepSet(ψ₂, ϕ₂))\n\n# Piecewise estimator with changepoint m=30\nθ̂ = PiecewiseEstimator([θ̂₁, θ̂₂], 30)\n\n# Apply the (untrained) piecewise estimator to data\nZ = [rand(d, 1, m) for m ∈ (10, 50)]\nθ̂(Z)\n\n\n\n\n\n","category":"type"},{"location":"API/core/#NeuralEstimators.Ensemble","page":"Core","title":"NeuralEstimators.Ensemble","text":"Ensemble(estimators)\nEnsemble(architecture::Function, J::Integer)\n(ensemble::Ensemble)(Z; aggr = median)\n\nDefines an ensemble based on a collection of estimators which, when applied to data Z, returns the median (or another summary defined by aggr) of the estimates.\n\nThe ensemble can be initialised with a collection of trained estimators and then applied immediately to observed data. Alternatively, the ensemble can be initialised with a collection of untrained estimators  (or a function defining the architecture of each estimator, and the number of estimators in the ensemble),  trained with train(), and then applied to observed data. In the latter case, where the ensemble is trained directly,  if savepath is specified both the ensemble and component estimators will be saved. \n\nNote that the training of ensemble components can be done in parallel; however, currently this needs to be done manually by the user, since train() currently trains the ensemble components sequentially.\n\nThe ensemble components can be accessed by indexing the ensemble directly; the number  of component estimators can be obtained using length(). \n\nExamples\n\nusing NeuralEstimators, Flux\n\n# Define the model, Z|θ ~ N(θ, 1), θ ~ N(0, 1)\nd = 1   # dimension of each replicate\np = 1   # number of unknown parameters in the statistical model\nm = 30  # number of independent replicates in each data set\nsampler(K) = randn32(p, K)\nsimulator(θ, m) = [μ .+ randn32(d, m) for μ ∈ eachcol(θ)]\n\n# Architecture of each ensemble component\nfunction architecture()\n\tψ = Chain(Dense(d, 64, relu), Dense(64, 64, relu))\n\tϕ = Chain(Dense(64, 64, relu), Dense(64, p))\n\tdeepset = DeepSet(ψ, ϕ)\n\tPointEstimator(deepset)\nend\n\n# Ensemble size\nJ = 5 \n\n# Initialise ensemble\nensemble = Ensemble(architecture, J)\nensemble[1]      # access component estimators by indexing \nlength(ensemble) # number of component estimators \n\n# Training\nensemble = train(ensemble, sampler, simulator, m = m, epochs = 5)\n\n# Assessment\nθ = sampler(1000)\nZ = simulator(θ, m)\nassessment = assess(ensemble, θ, Z)\nrmse(assessment)\n\n# Apply to data\nensemble(Z)\n\n# Testing\nJ = 5 # ensemble size\nensemble = Ensemble(architecture, J)\ntrain(ensemble, sampler, simulator, m = m, epochs = 5, savepath=\"testing-path\")\nensemble = Ensemble(architecture, J)\nensemble(Z)\nloadpath = joinpath(pwd(), \"testing-path\", \"ensemble.bson\")\nFlux.loadparams!(ensemble, load(loadpath, @__MODULE__)[:weights])\nensemble(Z)\n\n# Testing\nJ = 5 # ensemble size\nensemble = Ensemble(architecture, J)\ntrainx(ensemble, sampler, simulator, [30, 50], epochs = 5, savepath=\"testing-path\")\nensemble = Ensemble(architecture, J)\nensemble(Z)\nloadpath = joinpath(pwd(), \"testing-path_m50\", \"ensemble.bson\")\nFlux.loadparams!(ensemble, load(loadpath, @__MODULE__)[:weights])\nensemble(Z)\n\n\n\n\n\n","category":"type"},{"location":"API/core/#Training","page":"Core","title":"Training","text":"","category":"section"},{"location":"API/core/","page":"Core","title":"Core","text":"The function train is used to train a single neural estimator, while the wrapper function trainx is useful for training multiple neural estimators over a range of sample sizes, making using of the technique known as pre-training.","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"train\n\ntrainx","category":"page"},{"location":"API/core/#NeuralEstimators.train","page":"Core","title":"NeuralEstimators.train","text":"train(θ̂, sampler::Function, simulator::Function; ...)\ntrain(θ̂, θ_train::P, θ_val::P, simulator::Function; ...) where {P <: Union{AbstractMatrix, ParameterConfigurations}}\ntrain(θ̂, θ_train::P, θ_val::P, Z_train::T, Z_val::T; ...) where {T, P <: Union{AbstractMatrix, ParameterConfigurations}}\n\nTrain a neural estimator θ̂.\n\nThe methods cater for different variants of \"on-the-fly\" simulation. Specifically, a sampler can be provided to continuously sample new parameter vectors from the prior, and a simulator can be provided to continuously simulate new data conditional on the parameters. If provided with specific sets of parameters (θ_train and θ_val) and/or data (Z_train and Z_val), they will be held fixed during training.\n\nIn all methods, the validation parameters and data are held fixed to reduce noise when evaluating the validation risk.\n\nKeyword arguments common to all methods:\n\nloss = mae\nepochs::Integer = 100\nbatchsize::Integer = 32\noptimiser = ADAM()\nsavepath::String = \"\": path to save the neural-network weights during training (as bson files) and other information, such as the risk vs epoch (the risk function evaluated over the training and validation sets are saved in the first and second columns of loss_per_epoch.csv). If savepath is an empty string (default), nothing is saved.\nstopping_epochs::Integer = 5: cease training if the risk doesn't improve in this number of epochs.\nuse_gpu::Bool = true\nverbose::Bool = true\n\nKeyword arguments common to train(θ̂, sampler, simulator) and train(θ̂, θ_train, θ_val, simulator):\n\nm: sample sizes (either an Integer or a collection of Integers). The simulator is called as simulator(θ, m).\nepochs_per_Z_refresh::Integer = 1: how often to refresh the training data.\nsimulate_just_in_time::Bool = false: flag indicating whether we should simulate just-in-time, in the sense that only a batchsize number of parameter vectors and corresponding data are in memory at a given time.\n\nKeyword arguments unique to train(θ̂, sampler, simulator):\n\nK::Integer = 10000: number of parameter vectors in the training set; the size of the validation set is K ÷ 5.\nξ = nothing: an arbitrary collection of objects that are fixed (e.g., distance matrices). If provided, the parameter sampler is called as sampler(K, ξ); otherwise, the parameter sampler will be called as sampler(K). Can also be provided as xi.\nepochs_per_θ_refresh::Integer = 1: how often to refresh the training parameters. Must be a multiple of epochs_per_Z_refresh. Can also be provided as epochs_per_theta_refresh.\n\nExamples\n\nusing NeuralEstimators, Flux\n\nfunction sampler(K)\n\tμ = randn(K) # Gaussian prior\n\tσ = rand(K)  # Uniform prior\n\tθ = hcat(μ, σ)'\n\treturn θ\nend\n\nfunction simulator(θ_matrix, m)\n\t[θ[1] .+ θ[2] * randn(1, m) for θ ∈ eachcol(θ_matrix)]\nend\n\n# architecture\nd = 1   # dimension of each replicate\np = 2   # number of parameters in the statistical model\nψ = Chain(Dense(1, 32, relu), Dense(32, 32, relu))\nϕ = Chain(Dense(32, 32, relu), Dense(32, p))\nθ̂ = DeepSet(ψ, ϕ)\n\n# number of independent replicates to use during training\nm = 15\n\n# training: full simulation on-the-fly\nθ̂ = train(θ̂, sampler, simulator, m = m, epochs = 5)\n\n# training: simulation on-the-fly with fixed parameters\nK = 10000\nθ_train = sampler(K)\nθ_val   = sampler(K ÷ 5)\nθ̂       = train(θ̂, θ_train, θ_val, simulator, m = m, epochs = 5)\n\n# training: fixed parameters and fixed data\nZ_train = simulator(θ_train, m)\nZ_val   = simulator(θ_val, m)\nθ̂       = train(θ̂, θ_train, θ_val, Z_train, Z_val, epochs = 5)\n\n\n\n\n\n","category":"function"},{"location":"API/core/#NeuralEstimators.trainx","page":"Core","title":"NeuralEstimators.trainx","text":"trainx(θ̂, sampler::Function, simulator::Function, m::Vector{Integer}; ...)\ntrainx(θ̂, θ_train, θ_val, simulator::Function, m::Vector{Integer}; ...)\ntrainx(θ̂, θ_train, θ_val, Z_train, Z_val, m::Vector{Integer}; ...)\ntrainx(θ̂, θ_train, θ_val, Z_train::V, Z_val::V; ...) where {V <: AbstractVector{AbstractVector{Any}}}\n\nA wrapper around train() to construct neural estimators for different sample sizes.\n\nThe positional argument m specifies the desired sample sizes. Each estimator is pre-trained with the estimator for the previous sample size. For example, if m = [m₁, m₂], the estimator for sample size m₂ is pre-trained with the estimator for sample size m₁.\n\nThe method for Z_train and Z_val subsets the data using subsetdata(Z, 1:mᵢ) for each mᵢ ∈ m. The method for Z_train::V and Z_val::V trains an estimator for each element of Z_train::V and Z_val::V and, hence, it does not need to invoke subsetdata(), which can be slow or difficult to define in some cases (e.g., for graphical data). Note that, in this case, m is inferred from the data.\n\nThe keyword arguments inherit from train(). The keyword arguments epochs, batchsize, stopping_epochs, and optimiser can each be given as vectors. For example, if training two estimators, one may use a different number of epochs for each estimator by providing epochs = [epoch₁, epoch₂].\n\n\n\n\n\n","category":"function"},{"location":"API/core/#Assessment/calibration","page":"Core","title":"Assessment/calibration","text":"","category":"section"},{"location":"API/core/","page":"Core","title":"Core","text":"assess\n\nAssessment\n\nrisk\n\nbias\n\nrmse\n\ncoverage","category":"page"},{"location":"API/core/#NeuralEstimators.assess","page":"Core","title":"NeuralEstimators.assess","text":"assess(estimator, θ, Z)\n\nUsing an estimator (or a collection of estimators), computes estimates from data Z simulated based on true parameter vectors stored in θ.\n\nThe data Z should be a Vector, with each element corresponding to a single simulated data set. If Z contains more data sets than parameter vectors, the parameter matrix θ will be recycled by horizontal concatenation via the call θ = repeat(θ, outer = (1, J)) where J = length(Z) ÷ K is the number of simulated data sets and K = size(θ, 2) is the number of parameter vectors.\n\nThe output is of type Assessment; see ?Assessment for details.\n\nKeyword arguments\n\nestimator_names::Vector{String}: names of the estimators (sensible defaults provided).\nparameter_names::Vector{String}: names of the parameters (sensible defaults provided). If ξ is provided with a field parameter_names, those names will be used.\nξ = nothing: an arbitrary collection of objects that are fixed (e.g., distance matrices). Can also be provided as xi.\nuse_ξ = false: a Bool or a collection of Bool objects with length equal to the number of estimators. Specifies whether or not the estimator uses ξ: if it does, the estimator will be applied as estimator(Z, ξ). This argument is useful when multiple estimators are provided, only some of which need ξ; hence, if only one estimator is provided and ξ is not nothing, use_ξ is automatically set to true. Can also be provided as use_xi.\nuse_gpu = true: a Bool or a collection of Bool objects with length equal to the number of estimators.\nprobs = range(0.01, stop=0.99, length=100): (relevant only for estimator::QuantileEstimatorContinuous) a collection of probability levels in (0, 1)\n\nExamples\n\nusing NeuralEstimators, Flux\n\nn = 10 # number of observations in each realisation\np = 4  # number of parameters in the statistical model\n\n# Construct the neural estimator\nw = 32 # width of each layer\nψ = Chain(Dense(n, w, relu), Dense(w, w, relu));\nϕ = Chain(Dense(w, w, relu), Dense(w, p));\nθ̂ = DeepSet(ψ, ϕ)\n\n# Generate testing parameters\nK = 100\nθ = rand32(p, K)\n\n# Data for a single sample size\nm = 30\nZ = [rand32(n, m) for _ ∈ 1:K];\nassessment = assess(θ̂, θ, Z);\nrisk(assessment)\n\n# Multiple data sets for each parameter vector\nJ = 5\nZ = repeat(Z, J);\nassessment = assess(θ̂, θ, Z);\nrisk(assessment)\n\n# With set-level information\nqₓ = 2\nϕ  = Chain(Dense(w + qₓ, w, relu), Dense(w, p));\nθ̂ = DeepSet(ψ, ϕ)\nx = [rand(qₓ) for _ ∈ eachindex(Z)]\nassessment = assess(θ̂, θ, (Z, x));\nrisk(assessment)\n\n\n\n\n\n","category":"function"},{"location":"API/core/#NeuralEstimators.Assessment","page":"Core","title":"NeuralEstimators.Assessment","text":"Assessment(df::DataFrame, runtime::DataFrame)\n\nA type for storing the output of assess(). The field runtime contains the total time taken for each estimator. The field df is a long-form DataFrame with columns:\n\nestimator: the name of the estimator\nparameter: the name of the parameter\ntruth:     the true value of the parameter\nestimate:  the estimated value of the parameter\nm:         the sample size (number of iid replicates) for the given data set\nk:         the index of the parameter vector\nj:         the index of the data set (in the case that multiple data sets are associated with each parameter vector)\n\nIf estimator is an IntervalEstimator, the column estimate will be replaced by the columns lower and upper, containing the lower and upper bounds of the interval, respectively.\n\nIf estimator is a QuantileEstimator, the df will also contain a column prob indicating the probability level of the corresponding quantile estimate.\n\nMultiple Assessment objects can be combined with merge() (used for combining assessments from multiple point estimators) or join() (used for combining assessments from a point estimator and an interval estimator).\n\n\n\n\n\n","category":"type"},{"location":"API/core/#NeuralEstimators.risk","page":"Core","title":"NeuralEstimators.risk","text":"risk(assessment::Assessment; ...)\n\nComputes a Monte Carlo approximation of an estimator's Bayes risk,\n\nr(hatboldsymboltheta(cdot))\napprox\nfrac1K sum_k=1^K L(boldsymboltheta^(k) hatboldsymboltheta(boldsymbolZ^(k)))\n\nwhere boldsymboltheta^(k)  k = 1 dots K denotes a set of K parameter vectors sampled from the prior and, for each k, data boldsymbolZ^(k) are simulated from the statistical model conditional on boldsymboltheta^(k).\n\nKeyword arguments\n\nloss = (x, y) -> abs(x - y): a binary operator defining the loss function (default absolute-error loss).\naverage_over_parameters::Bool = false: if true, the loss is averaged over all parameters; otherwise (default), the loss is averaged over each parameter separately.\naverage_over_sample_sizes::Bool = true: if true (default), the loss is averaged over all sample sizes m; otherwise, the loss is averaged over each sample size separately.\n\n\n\n\n\n","category":"function"},{"location":"API/core/#NeuralEstimators.bias","page":"Core","title":"NeuralEstimators.bias","text":"bias(assessment::Assessment; ...)\n\nComputes a Monte Carlo approximation of an estimator's bias,\n\nrmbias(hatboldsymboltheta(cdot))\napprox\nfrac1K sum_k=1^K hatboldsymboltheta(boldsymbolZ^(k)) - boldsymboltheta^(k)\n\nwhere boldsymboltheta^(k)  k = 1 dots K denotes a set of K parameter vectors sampled from the prior and, for each k, data boldsymbolZ^(k) are simulated from the statistical model conditional on boldsymboltheta^(k).\n\nThis function inherits the keyword arguments of risk (excluding the argument loss).\n\n\n\n\n\n","category":"function"},{"location":"API/core/#NeuralEstimators.rmse","page":"Core","title":"NeuralEstimators.rmse","text":"rmse(assessment::Assessment; ...)\n\nComputes a Monte Carlo approximation of an estimator's root-mean-squared error,\n\nrmrmse(hatboldsymboltheta(cdot))\napprox\nsqrtfrac1K sum_k=1^K (hatboldsymboltheta(boldsymbolZ^(k)) - boldsymboltheta^(k))^2\n\nwhere boldsymboltheta^(k)  k = 1 dots K denotes a set of K parameter vectors sampled from the prior and, for each k, data boldsymbolZ^(k) are simulated from the statistical model conditional on boldsymboltheta^(k).\n\nThis function inherits the keyword arguments of risk (excluding the argument loss).\n\n\n\n\n\n","category":"function"},{"location":"API/core/#NeuralEstimators.coverage","page":"Core","title":"NeuralEstimators.coverage","text":"coverage(assessment::Assessment; ...)\n\nComputes a Monte Carlo approximation of an interval estimator's expected coverage, as defined in Hermans et al. (2022, Definition 2.1), and the proportion of parameters below and above the lower and upper bounds, respectively.\n\nKeyword arguments\n\naverage_over_parameters::Bool = false: if true, the coverage is averaged over all parameters; otherwise (default), it is computed over each parameter separately.\naverage_over_sample_sizes::Bool = true: if true (default), the coverage is averaged over all sample sizes m; otherwise, it is computed over each sample size separately.\n\n\n\n\n\n","category":"function"},{"location":"API/core/#Inference-with-observed-data","page":"Core","title":"Inference with observed data","text":"","category":"section"},{"location":"API/core/#Inference-using-point-estimators","page":"Core","title":"Inference using point estimators","text":"","category":"section"},{"location":"API/core/","page":"Core","title":"Core","text":"Inference with a neural Bayes (point) estimator proceeds simply by applying the estimator θ̂ to the observed data Z (possibly containing multiple data sets) in a call of the form θ̂(Z). To leverage a GPU, simply move the estimator and the data to the GPU using gpu(); see also estimateinbatches() to apply the estimator over batches of data, which can alleviate memory issues when working with a large number of data sets.","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"Uncertainty quantification often proceeds through the bootstrap distribution, which is essentially available \"for free\" when bootstrap data sets can be quickly generated; this is facilitated by bootstrap() and interval(). Alternatively, one may approximate a set of low and high marginal posterior quantiles using a specially constructed neural Bayes estimator, which can then be used to construct credible intervals: see IntervalEstimator, QuantileEstimatorDiscrete, and QuantileEstimatorContinuous.  ","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"bootstrap\n\ninterval","category":"page"},{"location":"API/core/#NeuralEstimators.bootstrap","page":"Core","title":"NeuralEstimators.bootstrap","text":"bootstrap(θ̂, parameters::P, Z) where P <: Union{AbstractMatrix, ParameterConfigurations}\nbootstrap(θ̂, parameters::P, simulator, m::Integer; B = 400) where P <: Union{AbstractMatrix, ParameterConfigurations}\nbootstrap(θ̂, Z; B = 400, blocks = nothing)\n\nGenerates B bootstrap estimates from an estimator θ̂.\n\nParametric bootstrapping is facilitated by passing a single parameter configuration, parameters, and corresponding simulated data, Z, whose length implicitly defines B. Alternatively, one may provide a simulator and the desired sample size, in which case the data will be simulated using simulator(parameters, m).\n\nNon-parametric bootstrapping is facilitated by passing a single data set, Z. The argument blocks caters for block bootstrapping, and it should be a vector of integers specifying the block for each replicate. For example, with 5 replicates, the first two corresponding to block 1 and the remaining three corresponding to block 2, blocks should be [1, 1, 2, 2, 2]. The resampling algorithm aims to produce resampled data sets that are of a similar size to Z, but this can only be achieved exactly if all blocks are equal in length.\n\nThe keyword argument use_gpu is a flag determining whether to use the GPU, if it is available (default true).\n\nThe return type is a p × B matrix, where p is the number of parameters in the model.\n\n\n\n\n\n","category":"function"},{"location":"API/core/#NeuralEstimators.interval","page":"Core","title":"NeuralEstimators.interval","text":"interval(θ::Matrix; probs = [0.05, 0.95], parameter_names = nothing)\ninterval(estimator::IntervalEstimator, Z; parameter_names = nothing, use_gpu = true)\n\nCompute a confidence interval based either on a p × B matrix θ of parameters (typically containing bootstrap estimates or posterior draws) with p the number of parameters in the model, or from an IntervalEstimator and data Z.\n\nWhen given θ, the intervals are constructed by compute quantiles with probability levels controlled by the keyword argument probs.\n\nThe return type is a p × 2 matrix, whose first and second columns respectively contain the lower and upper bounds of the interval. The rows of this matrix can be named by passing a vector of strings to the keyword argument parameter_names.\n\nExamples\n\nusing NeuralEstimators\np = 3\nB = 50\nθ = rand(p, B)\ninterval(θ)\n\n\n\n\n\n","category":"function"},{"location":"API/core/#Inference-using-likelihood-and-likelihood-to-evidence-ratio-estimators","page":"Core","title":"Inference using likelihood and likelihood-to-evidence-ratio estimators","text":"","category":"section"},{"location":"API/core/","page":"Core","title":"Core","text":"mlestimate\n\nmapestimate\n\nsampleposterior","category":"page"},{"location":"API/core/#NeuralEstimators.mlestimate","page":"Core","title":"NeuralEstimators.mlestimate","text":"mlestimate(estimator::RatioEstimator, Z; θ₀ = nothing, θ_grid = nothing, penalty::Function = θ -> 1, use_gpu = true)\n\nComputes the (approximate) maximum likelihood estimate given data boldsymbolZ,\n\nargmax_boldsymboltheta ell(boldsymboltheta  boldsymbolZ)\n\nwhere ell(cdot  cdot) denotes the approximate log-likelihood function derived from estimator.\n\nIf a vector θ₀ of initial parameter estimates is given, the approximate likelihood is maximised by gradient descent. Otherwise, if a matrix of parameters θ_grid is given, the approximate likelihood is maximised by grid search.\n\nA maximum penalised likelihood estimate,\n\nargmax_boldsymboltheta ell(boldsymboltheta  boldsymbolZ) + log p(boldsymboltheta)\n\ncan be obtained by specifying the keyword argument penalty that defines the penalty term p(boldsymboltheta).\n\nSee also mapestimate() for computing (approximate) maximum a posteriori estimates.\n\n\n\n\n\n","category":"function"},{"location":"API/core/#NeuralEstimators.mapestimate","page":"Core","title":"NeuralEstimators.mapestimate","text":"mapestimate(estimator::RatioEstimator, Z; θ₀ = nothing, θ_grid = nothing, prior::Function = θ -> 1, use_gpu = true)\n\nComputes the (approximate) maximum a posteriori estimate given data boldsymbolZ,\n\nargmax_boldsymboltheta ell(boldsymboltheta  boldsymbolZ) + log p(boldsymboltheta)\n\nwhere ell(cdot  cdot) denotes the approximate log-likelihood function derived from estimator, and p(boldsymboltheta) denotes the prior density function controlled through the keyword argument prior (by default, a uniform prior is used).\n\nIf a vector θ₀ of initial parameter estimates is given, the approximate posterior density is maximised by gradient descent. Otherwise, if a matrix of parameters θ_grid is given, the approximate posterior density is maximised by grid search.\n\nSee also mlestimate() for computing (approximate) maximum likelihood estimates.\n\n\n\n\n\n","category":"function"},{"location":"API/core/#NeuralEstimators.sampleposterior","page":"Core","title":"NeuralEstimators.sampleposterior","text":"sampleposterior(estimator::RatioEstimator, Z, N::Integer = 1000; θ_grid, prior::Function = θ -> 1f0)\n\nSamples from the approximate posterior distribution p(boldsymboltheta mid boldsymbolZ) implied by estimator.\n\nThe positional argument N controls the size of the posterior sample.\n\nCurrently, the sampling algorithm is based on a fine-gridding of the parameter space, specified through the keyword argument θ_grid (or theta_grid).  The approximate posterior density is evaluated over this grid, which is then used to draw samples. This is very effective when making inference with a small number of parameters. For models with a large number of parameters, other sampling algorithms may be needed (please feel free to contact the package maintainer for discussion).\n\nThe prior distribution p(boldsymboltheta) is controlled through the keyword argument prior (by default, a uniform prior is used).\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#Miscellaneous","page":"Miscellaneous","title":"Miscellaneous","text":"","category":"section"},{"location":"API/utility/","page":"Miscellaneous","title":"Miscellaneous","text":"Order = [:type, :function]\nPages   = [\"utility.md\"]","category":"page"},{"location":"API/utility/#Core","page":"Miscellaneous","title":"Core","text":"","category":"section"},{"location":"API/utility/","page":"Miscellaneous","title":"Miscellaneous","text":"These functions can appear during the core workflow, and may need to be overloaded in some applications.","category":"page"},{"location":"API/utility/","page":"Miscellaneous","title":"Miscellaneous","text":"numberreplicates\n\nsubsetdata\n\nsubsetparameters","category":"page"},{"location":"API/utility/#NeuralEstimators.numberreplicates","page":"Miscellaneous","title":"NeuralEstimators.numberreplicates","text":"numberofreplicates(Z)\n\nGeneric function that returns the number of replicates in a given object. Default implementations are provided for commonly used data formats, namely, data stored as an Array or as a GNNGraph.\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.subsetdata","page":"Miscellaneous","title":"NeuralEstimators.subsetdata","text":"subsetdata(Z::V, i) where {V <: AbstractArray{A}} where {A <: Any}\nsubsetdata(Z::A, i) where {A <: AbstractArray{T, N}} where {T, N}\nsubsetdata(Z::G, i) where {G <: AbstractGraph}\n\nReturn replicate(s) i from each data set in Z.\n\nIf the user is working with data that are not covered by the default methods, simply overload the function with the appropriate type for Z.\n\nFor graphical data, calls getgraph(), where the replicates are assumed be to stored as batched graphs. Since this can be slow, one should consider using a method of train() that does not require the data to be subsetted when working with graphical data (use numberreplicates() to check that the training and validation data sets are equally replicated, which prevents subsetting).\n\nExamples\n\nusing NeuralEstimators\nusing GraphNeuralNetworks\nusing Flux: batch\n\nd = 1  # dimension of the response variable\nn = 4  # number of observations in each realisation\nm = 6  # number of replicates in each data set\nK = 2  # number of data sets\n\n# Array data\nZ = [rand(n, d, m) for k ∈ 1:K]\nsubsetdata(Z, 2)   # extract second replicate from each data set\nsubsetdata(Z, 1:3) # extract first 3 replicates from each data set\n\n# Graphical data\ne = 8 # number of edges\nZ = [batch([rand_graph(n, e, ndata = rand(d, n)) for _ ∈ 1:m]) for k ∈ 1:K]\nsubsetdata(Z, 2)   # extract second replicate from each data set\nsubsetdata(Z, 1:3) # extract first 3 replicates from each data set\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.subsetparameters","page":"Miscellaneous","title":"NeuralEstimators.subsetparameters","text":"subsetparameters(parameters::M, indices) where {M <: AbstractMatrix}\nsubsetparameters(parameters::P, indices) where {P <: ParameterConfigurations}\n\nSubset parameters using a collection of indices.\n\nArrays in parameters::P with last dimension equal in size to the number of parameter configurations, K, are also subsetted (over their last dimension) using indices. All other fields are left unchanged. To modify this default behaviour, overload subsetparameters.\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#Downstream-inference-algorithms","page":"Miscellaneous","title":"Downstream-inference algorithms","text":"","category":"section"},{"location":"API/utility/","page":"Miscellaneous","title":"Miscellaneous","text":"EM","category":"page"},{"location":"API/utility/#NeuralEstimators.EM","page":"Miscellaneous","title":"NeuralEstimators.EM","text":"EM(simulateconditional::Function, MAP::Union{Function, NeuralEstimator}, θ₀ = nothing)\n\nImplements the (Bayesian) Monte Carlo expectation-maximisation (EM) algorithm,  with lth iteration\n\nboldsymboltheta^(l) = \nargmax_boldsymboltheta\nsum_h = 1^H ell(boldsymboltheta  boldsymbolZ_1  boldsymbolZ_2^(lh)) + Hlog pi(boldsymboltheta)\n\nwhere ell(cdot) is the complete-data log-likelihood function, boldsymbolZ equiv (boldsymbolZ_1 boldsymbolZ_2) denotes the complete data with boldsymbolZ_1 and boldsymbolZ_2 the observed and missing components, respectively, boldsymbolZ_2^(lh), h = 1 dots H, is simulated from the  distribution of boldsymbolZ_2 mid boldsymbolZ_1 boldsymboltheta^(l-1), and  pi(boldsymboltheta) denotes the prior density. \n\nFields\n\nThe function simulateconditional should have a signature of the form,\n\nsimulateconditional(Z::A, θ; nsims = 1) where {A <: AbstractArray{Union{Missing, T}}} where T\n\nThe output of simulateconditional should be the completed-data Z, and it should be  returned in whatever form is appropriate to be passed to the MAP estimator as MAP(Z). For example, if the data are gridded and  the MAP is a neural MAP estimator based on a CNN architecture, then Z should  be returned as a four-dimensional array.\n\nThe field MAP can be a function (to facilitate the conventional Monte Carlo EM algorithm) or a  NeuralEstimator (to facilitate the so-called neural EM algorithm). \n\nThe starting values θ₀ may be provided during initialisation (as a vector),  or when applying the EM object to data (see below). The starting values   given in a function call take precedence over those stored in the object.\n\nMethods\n\nOnce constructed, obects of type EM can be applied to data via the methods,\n\n(em::EM)(Z::A, θ₀::Union{Nothing, Vector} = nothing; ...) where {A <: AbstractArray{Union{Missing, T}, N}} where {T, N}\n(em::EM)(Z::V, θ₀::Union{Nothing, Vector, Matrix} = nothing; ...) where {V <: AbstractVector{A}} where {A <: AbstractArray{Union{Missing, T}, N}} where {T, N}\n\nwhere Z is the complete data containing the observed data and Missing values. Note that the second method caters for the case that one has multiple data sets. The keyword arguments are:\n\nniterations::Integer = 50: the maximum number of iterations.\nϵ = 0.01: tolerance used to assess convergence; the algorithm halts if the relative change in parameter values in successive iterations is less than ϵ.\nreturn_iterates::Bool: if true, the estimate at each iteration of the algorithm is returned; otherwise, only the final estimate is returned.\nnsims::Integer = 1: the number H of conditional simulations in each iteration. \nξ = nothing: model information needed for conditional simulation (e.g., distance matrices) or in the MAP estimator.\nuse_ξ_in_simulateconditional::Bool = false: if set to true, the conditional simulator is called as simulateconditional(Z, θ, ξ; nsims = nsims).\nuse_ξ_in_MAP::Bool = false: if set to true, the MAP estimator is called as MAP(Z, ξ).\nuse_gpu::Bool = true\nverbose::Bool = false\n\nExamples\n\n# See the \"Missing data\" section in \"Advanced usage\"\n\n\n\n\n\n","category":"type"},{"location":"API/utility/#Utility-functions","page":"Miscellaneous","title":"Utility functions","text":"","category":"section"},{"location":"API/utility/","page":"Miscellaneous","title":"Miscellaneous","text":"adjacencymatrix\n\ncontainertype\n\nencodedata\n\nestimateinbatches\n\nexpandgrid\n\nIndicatorWeights\n\ninitialise_estimator\n\nloadbestweights\n\nmaternchols\n\nremovedata\n\nrowwisenorm\n\nspatialgraph\n\nstackarrays\n\nvectotril","category":"page"},{"location":"API/utility/#NeuralEstimators.adjacencymatrix","page":"Miscellaneous","title":"NeuralEstimators.adjacencymatrix","text":"adjacencymatrix(S::Matrix, k::Integer; maxmin = false, combined = false)\nadjacencymatrix(S::Matrix, r::AbstractFloat)\nadjacencymatrix(S::Matrix, r::AbstractFloat, k::Integer; random = true)\nadjacencymatrix(M::Matrix; k, r, kwargs...)\n\nComputes a spatially weighted adjacency matrix from spatial locations S based  on either the k-nearest neighbours of each location; all nodes within a disc of fixed radius r; or, if both r and k are provided, a subset of k neighbours within a disc of fixed radius r.\n\nSeveral subsampling strategies are possible when choosing a subset of k neighbours within  a disc of fixed radius r. If random=true (default), the neighbours are randomly selected from  within the disc (note that this also approximately preserves the distribution of  distances within the neighbourhood set). If random=false, a deterministic algorithm is used  that aims to preserve the distribution of distances within the neighbourhood set, by choosing  those nodes with distances to the central node corresponding to the  0 frac1k frac2k dots frack-1k 1 quantiles of the empirical  distribution function of distances within the disc.  (This algorithm in fact yields k+1 neighbours, since both the closest and furthest nodes are always included.)  Otherwise, \n\nIf maxmin=false (default) the k-nearest neighbours are chosen based on all points in the graph. If maxmin=true, a so-called maxmin ordering is applied, whereby an initial point is selected, and each subsequent point is selected to maximise the minimum distance to those points that have already been selected. Then, the neighbours of each point are defined as the k-nearest neighbours amongst the points that have already appeared in the ordering. If combined=true, the  neighbours are defined to be the union of the k-nearest neighbours and the  k-nearest neighbours subject to a maxmin ordering. \n\nIf S is a square matrix, it is treated as a distance matrix; otherwise, it should be an n x d matrix, where n is the number of spatial locations and d is the spatial dimension (typically d = 2). In the latter case, the distance metric is taken to be the Euclidean distance. Note that use of a  maxmin ordering currently requires a matrix of spatial locations (not a distance matrix).\n\nBy convention with the functionality in GraphNeuralNetworks.jl which is based on directed graphs,  the neighbours of location i are stored in the column A[:, i] where A is the  returned adjacency matrix. Therefore, the number of neighbours for each location is given by collect(mapslices(nnz, A; dims = 1)), and the number of times each node is  a neighbour of another node is given by collect(mapslices(nnz, A; dims = 2)).\n\nBy convention, we do not consider a location to neighbour itself (i.e., the diagonal elements of the adjacency matrix are zero). \n\nExamples\n\nusing NeuralEstimators, Distances, SparseArrays\n\nn = 250\nd = 2\nS = rand(Float32, n, d)\nk = 10\nr = 0.10\n\n# Memory efficient constructors\nadjacencymatrix(S, k)\nadjacencymatrix(S, k; maxmin = true)\nadjacencymatrix(S, k; maxmin = true, combined = true)\nadjacencymatrix(S, r)\nadjacencymatrix(S, r, k)\nadjacencymatrix(S, r, k; random = false)\n\n# Construct from full distance matrix D\nD = pairwise(Euclidean(), S, dims = 1)\nadjacencymatrix(D, k)\nadjacencymatrix(D, r)\nadjacencymatrix(D, r, k)\nadjacencymatrix(D, r, k; random = false)\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.containertype","page":"Miscellaneous","title":"NeuralEstimators.containertype","text":"containertype(A::Type)\ncontainertype(::Type{A}) where A <: SubArray\ncontainertype(a::A) where A\n\nReturns the container type of its argument.\n\nIf given a SubArray, returns the container type of the parent array.\n\nExamples\n\na = rand(3, 4)\ncontainertype(a)\ncontainertype(typeof(a))\n[containertype(x) for x ∈ eachcol(a)]\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.encodedata","page":"Miscellaneous","title":"NeuralEstimators.encodedata","text":"encodedata(Z::A; c::T = zero(T)) where {A <: AbstractArray{Union{Missing, T}, N}} where T, N\n\nFor data Z with missing entries, returns an encoded data set (U, W) where W encodes the missingness pattern as an indicator vector and U is the original data Z with missing entries replaced by a fixed constant c.\n\nThe indicator vector W is stored in the second-to-last dimension of Z, which should be singleton. If the second-to-last dimension is not singleton, then two singleton dimensions will be added to the array, and W will be stored in the new second-to-last dimension.\n\nExamples\n\nusing NeuralEstimators\n\n# Generate some missing data\nZ = rand(16, 16, 1, 1)\nZ = removedata(Z, 0.25)\t # remove 25% of the data\n\n# Encode the data\nUW = encodedata(Z)\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.estimateinbatches","page":"Miscellaneous","title":"NeuralEstimators.estimateinbatches","text":"estimateinbatches(θ̂, z, t = nothing; batchsize::Integer = 32, use_gpu::Bool = true, kwargs...)\n\nApply the estimator θ̂ on minibatches of z (and optionally other set-level information t) of size batchsize.\n\nThis can prevent memory issues that can occur with large data sets, particularly on the GPU.\n\nMinibatching will only be done if there are multiple data sets in z; this will be inferred by z being a vector, or a tuple whose first element is a vector.\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.expandgrid","page":"Miscellaneous","title":"NeuralEstimators.expandgrid","text":"expandgrid(xs, ys)\n\nSame as expand.grid() in R, but currently caters for two dimensions only.\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.IndicatorWeights","page":"Miscellaneous","title":"NeuralEstimators.IndicatorWeights","text":"IndicatorWeights(h_max, n_bins::Integer)\n(w::IndicatorWeights)(h::Matrix)\n\nFor spatial locations boldsymbols and  boldsymbolu, creates a spatial weight function defined as\n\nboldsymbolw(boldsymbols boldsymbolu) equiv (mathbbI(h in B_k)  k = 1 dots K)\n\nwhere mathbbI(cdot) denotes the indicator function,  h equiv boldsymbols - boldsymbolu  is the spatial distance between boldsymbols and  boldsymbolu, and B_k  k = 1 dots K is a set of K =n_bins equally-sized distance bins covering the spatial distances between 0 and h_max. \n\nExamples\n\nusing NeuralEstimators \n\nh_max = 1\nn_bins = 10\nw = IndicatorWeights(h_max, n_bins)\nh = rand(1, 30) # distances between 30 pairs of spatial locations \nw(h)\n\n\n\n\n\n","category":"type"},{"location":"API/utility/#NeuralEstimators.initialise_estimator","page":"Miscellaneous","title":"NeuralEstimators.initialise_estimator","text":"initialise_estimator(p::Integer; ...)\n\nInitialise a neural estimator for a statistical model with p unknown parameters.\n\nThe estimator is couched in the DeepSets framework (see DeepSet) so that it can be applied to data sets containing an arbitrary number of independent replicates (including the special case of a single replicate).\n\nNote also that the user is free to initialise their neural estimator however they see fit using arbitrary Flux code; see here for Flux's API reference.\n\nFinally, the method with positional argument data_typeis a wrapper that allows one to specify the type of their data (either \"unstructured\", \"gridded\", or \"irregular_spatial\").\n\nKeyword arguments\n\narchitecture::String: for unstructured multivariate data, one may use a fully-connected multilayer perceptron (\"MLP\"); for data collected over a grid, a convolutional neural network (\"CNN\"); and for graphical or irregular spatial data, a graphical neural network (\"GNN\").\nd::Integer = 1: for unstructured multivariate data (i.e., when architecture = \"MLP\"), the dimension of the data (e.g., d = 3 for trivariate data); otherwise, if architecture ∈ [\"CNN\", \"GNN\"], the argument d controls the number of input channels (e.g., d = 1 for univariate spatial processes).\nestimator_type::String = \"point\": the type of estimator; either \"point\" or \"interval\".\ndepth = 3: the number of hidden layers; either a single integer or an integer vector of length two specifying the depth of the inner (summary) and outer (inference) network of the DeepSets framework.\nwidth = 32: a single integer or an integer vector of length sum(depth) specifying the width (or number of convolutional filters/channels) in each hidden layer.\nactivation::Function = relu: the (non-linear) activation function of each hidden layer.\nactivation_output::Function = identity: the activation function of the output layer.\nvariance_stabiliser::Union{Nothing, Function} = nothing: a function that will be applied directly to the input, usually to stabilise the variance.\nkernel_size = nothing: (applicable only to CNNs) a vector of length depth[1] containing integer tuples of length D, where D is the dimension of the convolution (e.g., D = 2 for two-dimensional convolution).\nweight_by_distance::Bool = true: (applicable only to GNNs) flag indicating whether the estimator will weight by spatial distance; if true, a SpatialGraphConv layer is used in the propagation module; otherwise, a regular GraphConv layer is used.\nprobs = [0.025, 0.975]: (applicable only if estimator_type = \"interval\") probability levels defining the lower and upper endpoints of the posterior credible interval.\n\nExamples\n\n## MLP, GNN, 1D CNN, and 2D CNN for a statistical model with two parameters:\np = 2\ninitialise_estimator(p, architecture = \"MLP\")\ninitialise_estimator(p, architecture = \"GNN\")\ninitialise_estimator(p, architecture = \"CNN\", kernel_size = [10, 5, 3])\ninitialise_estimator(p, architecture = \"CNN\", kernel_size = [(10, 10), (5, 5), (3, 3)])\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.loadbestweights","page":"Miscellaneous","title":"NeuralEstimators.loadbestweights","text":"loadbestweights(path::String)\n\nReturns the weights of the neural network saved as 'best_network.bson' in the given path.\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.maternchols","page":"Miscellaneous","title":"NeuralEstimators.maternchols","text":"maternchols(D, ρ, ν, σ² = 1; stack = true)\n\nGiven a matrix D of distances, constructs the Cholesky factor of the covariance matrix under the Matérn covariance function with range parameter ρ, smoothness parameter ν, and marginal variance σ².\n\nProviding vectors of parameters will yield a three-dimensional array of Cholesky factors (note that the vectors must of the same length, but a mix of vectors and scalars is allowed). A vector of distance matrices D may also be provided.\n\nIf stack = true, the Cholesky factors will be \"stacked\" into a three-dimensional array (this is only possible if all distance matrices in D are the same size).\n\nExamples\n\nusing NeuralEstimators\nusing LinearAlgebra: norm\nn  = 10\nS  = rand(n, 2)\nD  = [norm(sᵢ - sⱼ) for sᵢ ∈ eachrow(S), sⱼ ∈ eachrow(S)]\nρ  = [0.6, 0.5]\nν  = [0.7, 1.2]\nσ² = [0.2, 0.4]\nmaternchols(D, ρ, ν)\nmaternchols([D], ρ, ν)\nmaternchols(D, ρ, ν, σ²; stack = false)\n\nS̃  = rand(n, 2)\nD̃  = [norm(sᵢ - sⱼ) for sᵢ ∈ eachrow(S̃), sⱼ ∈ eachrow(S̃)]\nmaternchols([D, D̃], ρ, ν, σ²)\nmaternchols([D, D̃], ρ, ν, σ²; stack = false)\n\nS̃  = rand(2n, 2)\nD̃  = [norm(sᵢ - sⱼ) for sᵢ ∈ eachrow(S̃), sⱼ ∈ eachrow(S̃)]\nmaternchols([D, D̃], ρ, ν, σ²; stack = false)\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.removedata","page":"Miscellaneous","title":"NeuralEstimators.removedata","text":"removedata(Z::Array, Iᵤ::Vector{Integer})\nremovedata(Z::Array, p::Union{Float, Vector{Float}}; prevent_complete_missing = true)\nremovedata(Z::Array, n::Integer; fixed_pattern = false, contiguous_pattern = false, variable_proportion = false)\n\nReplaces elements of Z with missing.\n\nThe simplest method accepts a vector of integers Iᵤ that give the specific indices of the data to be removed.\n\nAlterntivaly, there are two methods available to generate data that are missing completely at random (MCAR).\n\nFirst, a vector p may be given that specifies the proportion of missingness for each element in the response vector. Hence, p should have length equal to the dimension of the response vector. If a single proportion is given, it will be replicated accordingly. If prevent_complete_missing = true, no replicates will contain 100% missingness (note that this can slightly alter the effective values of p).\n\nSecond, if an integer n is provided, all replicates will contain n observations after the data are removed. If fixed_pattern = true, the missingness pattern is fixed for all replicates. If contiguous_pattern = true, the data will be removed in a contiguous block. If variable_proportion = true, the proportion of missingness will vary across replicates, with each replicate containing between 1 and n observations after data removal, sampled uniformly (note that variable_proportion overrides fixed_pattern).\n\nThe return type is Array{Union{T, Missing}}.\n\nExamples\n\nd = 5           # dimension of each replicate\nm = 2000        # number of replicates\nZ = rand(d, m)  # simulated data\n\n# Passing a desired proportion of missingness\np = rand(d)\nremovedata(Z, p)\n\n# Passing a desired final sample size\nn = 3  # number of observed elements of each replicate: must have n <= d\nremovedata(Z, n)\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.rowwisenorm","page":"Miscellaneous","title":"NeuralEstimators.rowwisenorm","text":"rowwisenorm(A)\n\nComputes the row-wise norm of a matrix A.\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.spatialgraph","page":"Miscellaneous","title":"NeuralEstimators.spatialgraph","text":"spatialgraph(S)\nspatialgraph(S, Z)\nspatialgraph(g::GNNGraph, Z)\n\nGiven spatial data Z measured at spatial locations S, constructs a GNNGraph ready for use in a graph neural network that employs SpatialGraphConv layers. \n\nWhen m independent replicates are collected over the same set of n spatial locations,\n\nboldsymbols_1 dots boldsymbols_n subset mathcalD\n\nwhere mathcalD subset mathbbR^d denotes the spatial domain of interest,  Z should be given as an n times m matrix and S should be given as an n times d matrix.  Otherwise, when m independent replicates are collected over differing sets of spatial locations,\n\nboldsymbols_ij dots boldsymbols_in_i subset mathcalD quad i = 1 dots m\n\nZ should be given as an m-vector of n_i-vectors, and S should be given as an m-vector of n_i times d matrices.\n\nThe spatial information between neighbours is stored as an edge feature, with the specific  information controlled by the keyword arguments stationary and isotropic.  Specifically, the edge feature between node j and node j stores the spatial  distance boldsymbols_j - boldsymbols_j (if isotropic), the spatial  displacement boldsymbols_j - boldsymbols_j (if stationary), or the matrix of   locations (boldsymbols_j boldsymbols_j) (if !stationary).  \n\nAdditional keyword arguments inherit from adjacencymatrix() to determine the neighbourhood of each node, with the default being a randomly selected set of  k=30 neighbours within a disc of radius r=0.15 units.\n\nExamples\n\nusing NeuralEstimators\n\n# Number of replicates and spatial dimension\nm = 5  \nd = 2  \n\n# Spatial locations fixed for all replicates\nn = 100\nS = rand(n, d)\nZ = rand(n, m)\ng = spatialgraph(S, Z)\n\n# Spatial locations varying between replicates\nn = rand(50:100, m)\nS = rand.(n, d)\nZ = rand.(n)\ng = spatialgraph(S, Z)\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.stackarrays","page":"Miscellaneous","title":"NeuralEstimators.stackarrays","text":"stackarrays(v::V; merge = true) where {V <: AbstractVector{A}} where {A <: AbstractArray{T, N}} where {T, N}\n\nStack a vector of arrays v along the last dimension of each array, optionally merging the final dimension of the stacked array.\n\nThe arrays must be of the same size for the first N-1 dimensions. However, if merge = true, the size of the final dimension can vary.\n\nExamples\n\n# Vector containing arrays of the same size:\nZ = [rand(2, 3, m) for m ∈ (1, 1)];\nstackarrays(Z)\nstackarrays(Z, merge = false)\n\n# Vector containing arrays with differing final dimension size:\nZ = [rand(2, 3, m) for m ∈ (1, 2)];\nstackarrays(Z)\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.vectotril","page":"Miscellaneous","title":"NeuralEstimators.vectotril","text":"vectotril(v; strict = false)\nvectotriu(v; strict = false)\n\nConverts a vector v of length d(d+1)2 (a triangular number) into a d  d lower or upper triangular matrix.\n\nIf strict = true, the matrix will be strictly lower or upper triangular, that is, a (d+1)  (d+1) triangular matrix with zero diagonal.\n\nNote that the triangular matrix is constructed on the CPU, but the returned matrix will be a GPU array if v is a GPU array. Note also that the return type is not of type Triangular matrix (i.e., the zeros are materialised) since Traingular matrices are not always compatible with other GPU operations.\n\nExamples\n\nusing NeuralEstimators\n\nd = 4\nn = d*(d+1)÷2\nv = collect(range(1, n))\nvectotril(v)\nvectotriu(v)\nvectotril(v; strict = true)\nvectotriu(v; strict = true)\n\n\n\n\n\n","category":"function"},{"location":"API/loss/#Loss-functions","page":"Loss functions","title":"Loss functions","text":"","category":"section"},{"location":"API/loss/","page":"Loss functions","title":"Loss functions","text":"In addition to the standard loss functions provided by Flux (e.g., mae, mse, etc.), NeuralEstimators provides the following loss functions.","category":"page"},{"location":"API/loss/","page":"Loss functions","title":"Loss functions","text":"tanhloss\n\nkpowerloss\n\nquantileloss\n\nintervalscore","category":"page"},{"location":"API/loss/#NeuralEstimators.tanhloss","page":"Loss functions","title":"NeuralEstimators.tanhloss","text":"tanhloss(θ̂, θ, k; agg = mean, joint = true)\n\nFor k > 0, computes the loss function,\n\nL(θ θ) = tanh(θ - θk)\n\nwhich approximates the 0-1 loss as k → 0. Compared with the kpowerloss,  which may also be used as a continuous surrogate for the 0-1 loss, the gradient of the tanh loss is bounded as |θ̂ - θ| → 0, which can improve numerical stability during  training. \n\nIf joint = true, the L₁ norm is computed over each parameter vector, so that, with  k close to zero, the resulting Bayes estimator is the mode of the joint posterior distribution; otherwise, if joint = false, the Bayes estimator is the vector containing the modes of the marginal posterior distributions.\n\nSee also kpowerloss.\n\n\n\n\n\n","category":"function"},{"location":"API/loss/#NeuralEstimators.kpowerloss","page":"Loss functions","title":"NeuralEstimators.kpowerloss","text":"kpowerloss(θ̂, θ, k; agg = mean, joint = true, safeorigin = true, ϵ = 0.1)\n\nFor k > 0, the k-th power absolute-distance loss function,\n\nL(θ θ) = θ - θᵏ\n\ncontains the squared-error, absolute-error, and 0-1 loss functions as special cases (the latter obtained in the limit as k → 0). It is Lipschitz continuous iff k = 1, convex iff k ≥ 1, and strictly convex iff k > 1: it is quasiconvex for all k > 0.\n\nIf joint = true, the L₁ norm is computed over each parameter vector, so that, with  k close to zero, the resulting Bayes estimator is the mode of the joint posterior distribution; otherwise, if joint = false, the Bayes estimator is the vector containing the modes of the marginal posterior distributions.\n\nIf safeorigin = true, the loss function is modified to avoid pathologies around the origin, so that the resulting loss function behaves similarly to the absolute-error loss in the ϵ-interval surrounding the origin.\n\nSee also tanhloss.\n\n\n\n\n\n","category":"function"},{"location":"API/loss/#NeuralEstimators.quantileloss","page":"Loss functions","title":"NeuralEstimators.quantileloss","text":"quantileloss(θ̂, θ, τ; agg = mean)\nquantileloss(θ̂, θ, τ::Vector; agg = mean)\n\nThe asymmetric quantile loss function,\n\n  L(θ θ τ) = (θ - θ)(𝕀(θ - θ  0) - τ)\n\nwhere τ ∈ (0, 1) is a probability level and 𝕀(⋅) is the indicator function.\n\nThe method that takes τ as a vector is useful for jointly approximating several quantiles of the posterior distribution. In this case, the number of rows in θ̂ is assumed to be pr, where p is the number of parameters and r is the number probability levels in τ (i.e., the length of τ).\n\nExamples\n\np = 1\nK = 10\nθ = rand(p, K)\nθ̂ = rand(p, K)\nquantileloss(θ̂, θ, 0.1)\n\nθ̂ = rand(3p, K)\nquantileloss(θ̂, θ, [0.1, 0.5, 0.9])\n\np = 2\nθ = rand(p, K)\nθ̂ = rand(p, K)\nquantileloss(θ̂, θ, 0.1)\n\nθ̂ = rand(3p, K)\nquantileloss(θ̂, θ, [0.1, 0.5, 0.9])\n\n\n\n\n\n","category":"function"},{"location":"API/loss/#NeuralEstimators.intervalscore","page":"Loss functions","title":"NeuralEstimators.intervalscore","text":"intervalscore(l, u, θ, α; agg = mean)\nintervalscore(θ̂, θ, α; agg = mean)\nintervalscore(assessment::Assessment; average_over_parameters::Bool = false, average_over_sample_sizes::Bool = true)\n\nGiven an interval [l, u] with nominal coverage 100×(1-α)%  and true value θ, the interval score is defined by\n\nS(l u θ α) = (u - l) + 2α¹(l - θ)𝕀(θ  l) + 2α¹(θ - u)𝕀(θ  u)\n\nwhere α ∈ (0, 1) and 𝕀(⋅) is the indicator function.\n\nThe method that takes a single value θ̂ assumes that θ̂ is a matrix with 2p rows, where p is the number of parameters in the statistical model. Then, the first and second set of p rows will be used as l and u, respectively.\n\nFor further discussion, see Section 6 of Gneiting, T. and Raftery, A. E. (2007), \"Strictly proper scoring rules, prediction, and estimation\", Journal of the American statistical Association, 102, 359–378.\n\n\n\n\n\n","category":"function"},{"location":"API/architectures/#Architectures","page":"Architectures","title":"Architectures","text":"","category":"section"},{"location":"API/architectures/#Modules","page":"Architectures","title":"Modules","text":"","category":"section"},{"location":"API/architectures/","page":"Architectures","title":"Architectures","text":"The following high-level modules are often used when constructing a neural-network architecture. In particular, the DeepSet is the building block for most classes of Estimators in the package. ","category":"page"},{"location":"API/architectures/","page":"Architectures","title":"Architectures","text":"DeepSet\n\nGNNSummary","category":"page"},{"location":"API/architectures/#NeuralEstimators.DeepSet","page":"Architectures","title":"NeuralEstimators.DeepSet","text":"DeepSet(ψ, ϕ, a = mean; S = nothing)\n\nThe DeepSets representation (Zaheer et al., 2017),\n\nθ(𝐙) = ϕ(𝐓(𝐙))\t \t 𝐓(𝐙) = 𝐚(ψ(𝐙ᵢ)  i = 1  m)\n\nwhere 𝐙 ≡ (𝐙₁', …, 𝐙ₘ')' are independent replicates from the statistical model, ψ and ϕ are neural networks, and a is a permutation-invariant aggregation function. Expert summary statistics can be incorporated as,\n\nθ(𝐙) = ϕ((𝐓(𝐙) 𝐒(𝐙)))\n\nwhere S is a function that returns a vector of user-defined summary statistics. These user-defined summary statistics are provided either as a Function that returns a Vector, or as a vector of functions. In the case that ψ is set to nothing, only expert summary statistics will be used.\n\nThe aggregation function a can be any function that acts on an array and has a keyword argument dims that allows aggregation over a specific dimension of the array (e.g., sum, mean, maximum, minimum, logsumexp).\n\nDeepSet objects act on data of type Vector{A}, where each element of the vector is associated with one data set (i.e., one set of independent replicates from the statistical model), and where the type A depends on the form of the data and the chosen architecture for ψ. As a rule of thumb, when A is an array, the replicates are stored in the final dimension. For example, with gridded spatial data and ψ a CNN, A should be a 4-dimensional array, with the replicates stored in the 4ᵗʰ dimension. Note that in Flux, the final dimension is usually the \"batch\" dimension, but batching with DeepSet objects is done at the data set level (i.e., sets of replicates are batched together).\n\nData stored as Vector{Arrays} are first concatenated along the replicates dimension before being passed into the summary network ψ. This means that ψ is applied to a single large array rather than many small arrays, which can substantially improve computational efficiency.\n\nSet-level information, 𝐱, that is not a function of the data can be passed directly into the inference network ϕ in the following manner,\n\nθ(𝐙) = ϕ((𝐓(𝐙) 𝐱))\t \t \n\nor, in the case that expert summary statistics are also used,\n\nθ(𝐙) = ϕ((𝐓(𝐙) 𝐒(𝐙) 𝐱))\t \n\nThis is done by calling the DeepSet object on a Tuple{Vector{A}, Vector{Vector}}, where the first element of the tuple contains a vector of data sets and the second element contains a vector of set-level information (i.e., one vector for each data set).\n\nExamples\n\nusing NeuralEstimators, Flux\n\n# Two dummy data sets containing 3 and 4 replicates\np = 5  # number of parameters in the statistical model\nn = 10 # dimension of each replicate\nZ = [rand32(n, m) for m ∈ (3, 4)]\n\n# Construct the deepset object\nS = samplesize\nqₛ = 1   # dimension of expert summary statistic\nqₜ = 16  # dimension of neural summary statistic\nw = 32  # width of hidden layers\nψ = Chain(Dense(n, w, relu), Dense(w, qₜ, relu))\nϕ = Chain(Dense(qₜ + qₛ, w, relu), Dense(w, p))\nθ̂ = DeepSet(ψ, ϕ; S = S)\n\n# Apply the deepset object\nθ̂(Z)\n\n# Data with set-level information\nqₓ = 2 # dimension of set-level vector\nϕ = Chain(Dense(qₜ + qₛ + qₓ, w, relu), Dense(w, p))\nθ̂ = DeepSet(ψ, ϕ; S = S)\nx = [rand32(qₓ) for _ ∈ eachindex(Z)]\nθ̂((Z, x))\n\n\n\n\n\n","category":"type"},{"location":"API/architectures/#NeuralEstimators.GNNSummary","page":"Architectures","title":"NeuralEstimators.GNNSummary","text":"GNNSummary(propagation, readout; globalfeatures = nothing)\n\nA graph neural network (GNN) module designed to serve as the summary network ψ in the DeepSet representation when the data are graphical (e.g., irregularly observed spatial data).\n\nThe propagation module transforms graphical input data into a set of hidden-feature graphs. The readout module aggregates these feature graphs into a single hidden feature vector of fixed length (i.e., a vector of summary statistics). The summary network is then defined as the composition of the propagation and readout modules.\n\nOptionally, one may also include a module that extracts features directly  from the graph, through the keyword argument globalfeatures. This module,  when applied to a GNNGraph, should return a matrix of features,  where the columns of the matrix correspond to the independent replicates  (e.g., a 5x10 matrix is expected for 5 hidden features for each of 10  independent replicates stored in the graph).  \n\nThe data should be stored as a GNNGraph or Vector{GNNGraph}, where each graph is associated with a single parameter vector. The graphs may contain subgraphs corresponding to independent replicates.\n\nExamples\n\nusing NeuralEstimators, Flux, GraphNeuralNetworks\nusing Flux: batch\nusing Statistics: mean\n\n# Propagation module\nd = 1      # dimension of response variable\nnₕ = 32    # dimension of node feature vectors\npropagation = GNNChain(GraphConv(d => nₕ), GraphConv(nₕ => nₕ))\n\n# Readout module\nreadout = GlobalPool(mean)\nnᵣ = nₕ   # dimension of readout vector\n\n# Summary network\nψ = GNNSummary(propagation, readout)\n\n# Inference network\np = 3     # number of parameters in the statistical model\nw = 64    # width of hidden layer\nϕ = Chain(Dense(nᵣ, w, relu), Dense(w, p))\n\n# Construct the estimator\nθ̂ = DeepSet(ψ, ϕ)\n\n# Apply the estimator to a single graph, a single graph with subgraphs\n# (corresponding to independent replicates), and a vector of graphs\n# (corresponding to multiple data sets each with independent replicates)\ng₁ = rand_graph(11, 30, ndata=rand(d, 11))\ng₂ = rand_graph(13, 40, ndata=rand(d, 13))\ng₃ = batch([g₁, g₂])\nθ̂(g₁)\nθ̂(g₃)\nθ̂([g₁, g₂, g₃])\n\n\n\n\n\n","category":"type"},{"location":"API/architectures/#User-defined-summary-statistics","page":"Architectures","title":"User-defined summary statistics","text":"","category":"section"},{"location":"API/architectures/","page":"Architectures","title":"Architectures","text":"Order = [:type, :function]\nPages   = [\"summarystatistics.md\"]","category":"page"},{"location":"API/architectures/","page":"Architectures","title":"Architectures","text":"The following functions correspond to summary statistics that are often useful as user-defined summary statistics in DeepSet objects.","category":"page"},{"location":"API/architectures/","page":"Architectures","title":"Architectures","text":"samplesize\n\nsamplecorrelation\n\nsamplecovariance\n\nNeighbourhoodVariogram","category":"page"},{"location":"API/architectures/#NeuralEstimators.samplesize","page":"Architectures","title":"NeuralEstimators.samplesize","text":"samplesize(Z::AbstractArray)\n\nComputes the sample size of a set of independent realisations Z.\n\nNote that this function is a wrapper around numberreplicates, but this function returns the number of replicates as the eltype of Z, rather than as an integer.\n\n\n\n\n\n","category":"function"},{"location":"API/architectures/#NeuralEstimators.samplecorrelation","page":"Architectures","title":"NeuralEstimators.samplecorrelation","text":"samplecorrelation(Z::AbstractArray)\n\nComputes the sample correlation matrix, R̂, and returns the vectorised strict lower triangle of R̂.\n\nExamples\n\n# 5 independent replicates of a 3-dimensional vector\nz = rand(3, 5)\nsamplecorrelation(z)\n\n\n\n\n\n","category":"function"},{"location":"API/architectures/#NeuralEstimators.samplecovariance","page":"Architectures","title":"NeuralEstimators.samplecovariance","text":"samplecovariance(Z::AbstractArray)\n\nComputes the sample covariance matrix, Σ̂, and returns the vectorised lower triangle of Σ̂.\n\nExamples\n\n# 5 independent replicates of a 3-dimensional vector\nz = rand(3, 5)\nsamplecovariance(z)\n\n\n\n\n\n","category":"function"},{"location":"API/architectures/#NeuralEstimators.NeighbourhoodVariogram","page":"Architectures","title":"NeuralEstimators.NeighbourhoodVariogram","text":"NeighbourhoodVariogram(h_max, n_bins) \n(l::NeighbourhoodVariogram)(g::GNNGraph)\n\nComputes the empirical variogram, \n\nhatgamma(h pm delta) = frac12N(h pm delta) sum_(ij) in N(h pm delta) (Z_i - Z_j)^2\n\nwhere N(h pm delta) equiv left(ij)  boldsymbols_i - boldsymbols_j in (h-delta h+delta)right  is the set of pairs of locations separated by a distance within (h-delta h+delta), and cdot denotes set cardinality. \n\nThe distance bins are constructed to have constant width 2delta, chosen based on the maximum distance  h_max to be considered, and the specified number of bins n_bins. \n\nThe input type is a GNNGraph, and the empirical variogram is computed based on the corresponding graph structure.  Specifically, only locations that are considered neighbours will be used when computing the empirical variogram. \n\nExamples\n\nusing NeuralEstimators, Distances, LinearAlgebra\n  \n# Simulate Gaussian spatial data with exponential covariance function \nθ = 0.1                                 # true range parameter \nn = 250                                 # number of spatial locations \nS = rand(n, 2)                          # spatial locations \nD = pairwise(Euclidean(), S, dims = 1)  # distance matrix \nΣ = exp.(-D ./ θ)                       # covariance matrix \nL = cholesky(Symmetric(Σ)).L            # Cholesky factor \nm = 5                                   # number of independent replicates \nZ = L * randn(n, m)                     # simulated data \n\n# Construct the spatial graph \nr = 0.15                                # radius of neighbourhood set\ng = spatialgraph(S, Z, r = r)\n\n# Construct the variogram object wth 10 bins\nnv = NeighbourhoodVariogram(r, 10) \n\n# Compute the empirical variogram \nnv(g)\n\n\n\n\n\n","category":"type"},{"location":"API/architectures/#Layers","page":"Architectures","title":"Layers","text":"","category":"section"},{"location":"API/architectures/","page":"Architectures","title":"Architectures","text":"In addition to the built-in layers provided by Flux, the following layers may be used when constructing a neural-network architecture. ","category":"page"},{"location":"API/architectures/","page":"Architectures","title":"Architectures","text":"DensePositive\n\nPowerDifference\n\nSpatialGraphConv","category":"page"},{"location":"API/architectures/#NeuralEstimators.DensePositive","page":"Architectures","title":"NeuralEstimators.DensePositive","text":"DensePositive(layer::Dense, g::Function)\nDensePositive(layer::Dense; g::Function = Flux.relu)\n\nWrapper around the standard Dense layer that ensures positive weights (biases are left unconstrained).\n\nThis layer can be useful for constucting (partially) monotonic neural networks (see, e.g., QuantileEstimatorContinuous).\n\nExamples\n\nusing NeuralEstimators, Flux\n\nlayer = DensePositive(Dense(5 => 2))\nx = rand32(5, 64)\nlayer(x)\n\n\n\n\n\n","category":"type"},{"location":"API/architectures/#NeuralEstimators.PowerDifference","page":"Architectures","title":"NeuralEstimators.PowerDifference","text":"PowerDifference(a, b)\n\nFunction f(x y) = ax - (1-a)y^b for trainable parameters a ∈ [0, 1] and b > 0.\n\nExamples\n\nusing NeuralEstimators, Flux\n\n# Generate some data\nd = 5\nK = 10000\nX = randn32(d, K)\nY = randn32(d, K)\nXY = (X, Y)\na = 0.2f0\nb = 1.3f0\nZ = (abs.(a .* X - (1 .- a) .* Y)).^b\n\n# Initialise layer\nf = PowerDifference([0.5f0], [2.0f0])\n\n# Optimise the layer\nloader = Flux.DataLoader((XY, Z), batchsize=32, shuffle=false)\noptim = Flux.setup(Flux.Adam(0.01), f)\nfor epoch in 1:100\n    for (xy, z) in loader\n        loss, grads = Flux.withgradient(f) do m\n            Flux.mae(m(xy), z)\n        end\n        Flux.update!(optim, f, grads[1])\n    end\nend\n\n# Estimates of a and b\nf.a\nf.b\n\n\n\n\n\n","category":"type"},{"location":"API/architectures/#NeuralEstimators.SpatialGraphConv","page":"Architectures","title":"NeuralEstimators.SpatialGraphConv","text":"SpatialGraphConv(in => out, g=relu; args...)\n\nImplements a spatial graph convolution for isotropic processes, \n\n boldsymbolh^(l)_j =\n gBig(\n boldsymbolGamma_1^(l) boldsymbolh^(l-1)_j\n +\n boldsymbolGamma_2^(l) barboldsymbolh^(l)_j\n +\n boldsymbolgamma^(l)\n Big)\n quad\n barboldsymbolh^(l)_j = sum_j in mathcalN(j)boldsymbolw^(l)(boldsymbols_j - boldsymbols_j) odot f^(l)(boldsymbolh^(l-1)_j boldsymbolh^(l-1)_j)\n\nwhere boldsymbolh^(l)_j is the hidden feature vector at location boldsymbols_j at layer l, g(cdot) is a non-linear activation function applied elementwise, boldsymbolGamma_1^(l) and boldsymbolGamma_2^(l) are trainable parameter matrices, boldsymbolgamma^(l) is a trainable bias vector, mathcalN(j) denotes the indices of neighbours of boldsymbols_j, boldsymbolw^(l)(cdot) is a (learnable) spatial weighting function, odot denotes elementwise multiplication,  and f^(l)(cdot cdot) is a (learnable) function. \n\nBy default, the function f^(l)(cdot cdot) is modelled using a PowerDifference function.  One may alternatively employ a nonlearnable function, for example, f = (hᵢ, hⱼ) -> (hᵢ - hⱼ).^2,  specified through the keyword argument f.  \n\nThe spatial distances between locations must be stored as an edge feature, as facilitated by spatialgraph().  The input to boldsymbolw(cdot) is a 1 times n matrix (i.e., a row vector) of spatial distances.  The output of boldsymbolw(cdot) must be either a scalar; a vector of the same dimension as the feature vectors of the previous layer;  or, if the features vectors of the previous layer are scalars, a vector of arbitrary dimension.  To promote identifiability, the weights are normalised to sum to one (row-wise) within each neighbourhood set.  By default, boldsymbolw(cdot) is taken to be a multilayer perceptron with a single hidden layer,  although a custom choice for this function can be provided using the keyword argument w. \n\nArguments\n\nin: The dimension of input features.\nout: The dimension of output features.\ng = relu: Activation function.\nbias = true: Add learnable bias?\ninit = glorot_uniform: Initialiser for boldsymbolGamma_1^(l), boldsymbolGamma_2^(l), and boldsymbolgamma^(l). \nf = nothing\nw = nothing \nw_width = 128: (Only applicable if w = nothing) The width of the hidden layer in the MLP used to model boldsymbolw(cdot cdot). \nw_out = in: (Only applicable if w = nothing) The output dimension of boldsymbolw(cdot cdot).  \nglob = false: If true, global features will be computed directly from the entire spatial graph. These features are of the form: boldsymbolT = sum_j=1^nsum_j in mathcalN(j)boldsymbolw^(l)(boldsymbols_j - boldsymbols_j) odot f^(l)(boldsymbolh^(l-1)_j boldsymbolh^(l-1)_j). Note that these global features are no longer associated with a graph structure, and should therefore only be used in the final layer of a summary-statistics module. \n\nExamples\n\nusing NeuralEstimators, Flux, GraphNeuralNetworks\n\n# Toy spatial data\nm = 5                  # number of replicates\nd = 2                  # spatial dimension\nn = 250                # number of spatial locations\nS = rand(n, d)         # spatial locations\nZ = rand(n, m)         # data\ng = spatialgraph(S, Z) # construct the graph\n\n# Construct and apply spatial graph convolution layer\nl = SpatialGraphConv(1 => 10)\nl(g)\n\n\n\n\n\n","category":"type"},{"location":"API/architectures/#Output-activation-functions","page":"Architectures","title":"Output activation functions","text":"","category":"section"},{"location":"API/architectures/","page":"Architectures","title":"Architectures","text":"Order = [:type, :function]\nPages   = [\"activationfunctions.md\"]","category":"page"},{"location":"API/architectures/","page":"Architectures","title":"Architectures","text":"In addition to the standard activation functions provided by Flux, the following structs can be used at the end of an architecture to act as output activation functions that ensure valid estimates for certain models. NB: Although we refer to the following objects as \"activation functions\", they should be treated as layers that are included in the final stage of a Flux Chain(). ","category":"page"},{"location":"API/architectures/","page":"Architectures","title":"Architectures","text":"Compress\n\nCorrelationMatrix\n\nCovarianceMatrix","category":"page"},{"location":"API/architectures/#NeuralEstimators.Compress","page":"Architectures","title":"NeuralEstimators.Compress","text":"Compress(a, b, k = 1)\n\nLayer that compresses its input to be within the range a and b, where each element of a is less than the corresponding element of b.\n\nThe layer uses a logistic function,\n\nl(θ) = a + fracb - a1 + e^-kθ\n\nwhere the arguments a and b together combine to shift and scale the logistic function to the range (a, b), and the growth rate k controls the steepness of the curve.\n\nThe logistic function given here contains an additional parameter, θ₀, which is the input value corresponding to the functions midpoint. In Compress, we fix θ₀ = 0, since the output of a randomly initialised neural network is typically around zero.\n\nExamples\n\nusing NeuralEstimators, Flux\n\na = [25, 0.5, -pi/2]\nb = [500, 2.5, 0]\np = length(a)\nK = 100\nθ = randn(p, K)\nl = Compress(a, b)\nl(θ)\n\nn = 20\nθ̂ = Chain(Dense(n, p), l)\nZ = randn(n, K)\nθ̂(Z)\n\n\n\n\n\n","category":"type"},{"location":"API/architectures/#NeuralEstimators.CorrelationMatrix","page":"Architectures","title":"NeuralEstimators.CorrelationMatrix","text":"CorrelationMatrix(d)\n(object::CorrelationMatrix)(x::Matrix, cholesky::Bool = false)\n\nTransforms a vector 𝐯 ∈ ℝᵈ to the parameters of an unconstrained d×d correlation matrix or, if cholesky = true, the lower Cholesky factor of an unconstrained d×d correlation matrix.\n\nThe expected input is a Matrix with T(d-1) = (d-1)d÷2 rows, where T(d-1) is the (d-1)th triangular number (the number of free parameters in an unconstrained d×d correlation matrix), and the output is a Matrix of the same dimension. The columns of the input and output matrices correspond to independent parameter configurations (i.e., different correlation matrices).\n\nInternally, the layer constructs a valid Cholesky factor 𝐋 for a correlation matrix, and then extracts the strict lower triangle from the correlation matrix 𝐑 = 𝐋𝐋'. The lower triangle is extracted and vectorised in line with Julia's column-major ordering: for example, when modelling the correlation matrix\n\nbeginbmatrix\n1    R₁₂   R₁₃ \nR₂₁  1     R₂₃\nR₃₁  R₃₂  1\nendbmatrix\n\nthe rows of the matrix returned by a CorrelationMatrix layer are ordered as\n\nbeginbmatrix\nR₂₁ \nR₃₁ \nR₃₂ \nendbmatrix\n\nwhich means that the output can easily be transformed into the implied correlation matrices using vectotril and Symmetric.\n\nSee also CovarianceMatrix.\n\nExamples\n\nusing NeuralEstimators\nusing LinearAlgebra\nusing Flux\n\nd  = 4\nl  = CorrelationMatrix(d)\np  = (d-1)*d÷2\nθ  = randn(p, 100)\n\n# Returns a matrix of parameters, which can be converted to correlation matrices\nR = l(θ)\nR = map(eachcol(R)) do r\n\tR = Symmetric(cpu(vectotril(r, strict = true)), :L)\n\tR[diagind(R)] .= 1\n\tR\nend\n\n# Obtain the Cholesky factor directly\nL = l(θ, true)\nL = map(eachcol(L)) do x\n\t# Only the strict lower diagonal elements are returned\n\tL = LowerTriangular(cpu(vectotril(x, strict = true)))\n\n\t# Diagonal elements are determined under the constraint diag(L*L') = 𝟏\n\tL[diagind(L)] .= sqrt.(1 .- rowwisenorm(L).^2)\n\tL\nend\nL[1] * L[1]'\n\n\n\n\n\n","category":"type"},{"location":"API/architectures/#NeuralEstimators.CovarianceMatrix","page":"Architectures","title":"NeuralEstimators.CovarianceMatrix","text":"CovarianceMatrix(d)\n(object::CovarianceMatrix)(x::Matrix, cholesky::Bool = false)\n\nTransforms a vector 𝐯 ∈ ℝᵈ to the parameters of an unconstrained d×d covariance matrix or, if cholesky = true, the lower Cholesky factor of an unconstrained d×d covariance matrix.\n\nThe expected input is a Matrix with T(d) = d(d+1)÷2 rows, where T(d) is the dth triangular number (the number of free parameters in an unconstrained d×d covariance matrix), and the output is a Matrix of the same dimension. The columns of the input and output matrices correspond to independent parameter configurations (i.e., different covariance matrices).\n\nInternally, the layer constructs a valid Cholesky factor 𝐋 and then extracts the lower triangle from the positive-definite covariance matrix 𝚺 = 𝐋𝐋'. The lower triangle is extracted and vectorised in line with Julia's column-major ordering: for example, when modelling the covariance matrix\n\nbeginbmatrix\nΣ₁₁  Σ₁₂  Σ₁₃ \nΣ₂₁  Σ₂₂  Σ₂₃ \nΣ₃₁  Σ₃₂  Σ₃₃ \nendbmatrix\n\nthe rows of the matrix returned by a CovarianceMatrix are ordered as\n\nbeginbmatrix\nΣ₁₁ \nΣ₂₁ \nΣ₃₁ \nΣ₂₂ \nΣ₃₂ \nΣ₃₃ \nendbmatrix\n\nwhich means that the output can easily be transformed into the implied covariance matrices using vectotril and Symmetric.\n\nSee also CorrelationMatrix.\n\nExamples\n\nusing NeuralEstimators\nusing Flux\nusing LinearAlgebra\n\nd = 4\nl = CovarianceMatrix(d)\np = d*(d+1)÷2\nθ = randn(p, 50)\n\n# Returns a matrix of parameters, which can be converted to covariance matrices\nΣ = l(θ)\nΣ = [Symmetric(cpu(vectotril(x)), :L) for x ∈ eachcol(Σ)]\n\n# Obtain the Cholesky factor directly\nL = l(θ, true)\nL = [LowerTriangular(cpu(vectotril(x))) for x ∈ eachcol(L)]\nL[1] * L[1]'\n\n\n\n\n\n","category":"type"},{"location":"#NeuralEstimators","page":"NeuralEstimators","title":"NeuralEstimators","text":"","category":"section"},{"location":"","page":"NeuralEstimators","title":"NeuralEstimators","text":"Neural Bayes estimators are neural networks that transform data into point summaries of the posterior distribution. They are likelihood free and, once constructed, substantially faster than classical methods. Uncertainty quantification with neural Bayes estimators is also straightforward through the bootstrap distribution, which is essentially available \"for free\" with a neural estimator, or by training a neural Bayes estimator to approximate a set of marginal posterior quantiles. A related class of methods uses neural networks to approximate the likelihood function, the likelihood-to-evidence ratio, and the full posterior distribution. ","category":"page"},{"location":"","page":"NeuralEstimators","title":"NeuralEstimators","text":"The package NeuralEstimators facilitates the development of neural Bayes estimators and related neural inferential methods in a user-friendly manner. It caters for arbitrary models by having the user implicitly define their model via simulated data. This makes development particularly straightforward for models with existing implementations (possibly in other programming languages, e.g., R or python). A convenient interface for R users is available here.","category":"page"},{"location":"#Getting-started","page":"NeuralEstimators","title":"Getting started","text":"","category":"section"},{"location":"","page":"NeuralEstimators","title":"NeuralEstimators","text":"Install NeuralEstimators using the following command inside Julia:","category":"page"},{"location":"","page":"NeuralEstimators","title":"NeuralEstimators","text":"using Pkg; Pkg.add(url = \"https://github.com/msainsburydale/NeuralEstimators.jl\")","category":"page"},{"location":"","page":"NeuralEstimators","title":"NeuralEstimators","text":"Once familiar with the details of the Framework, see the Examples.","category":"page"},{"location":"#Supporting-and-citing","page":"NeuralEstimators","title":"Supporting and citing","text":"","category":"section"},{"location":"","page":"NeuralEstimators","title":"NeuralEstimators","text":"This software was developed as part of academic research. If you would like to support it, please star the repository. If you use it in your research or other activities, please also use the following citation.","category":"page"},{"location":"","page":"NeuralEstimators","title":"NeuralEstimators","text":"@article{,\n\tauthor = {Sainsbury-Dale, Matthew and Zammit-Mangion, Andrew and Huser, Raphaël},\n\ttitle = {Likelihood-Free Parameter Estimation with Neural {B}ayes Estimators},\n\tjournal = {The American Statistician},\n\tyear = {2024},\n\tvolume = {78},\n\tpages = {1--14},\n\tdoi = {10.1080/00031305.2023.2249522},\n\turl = {https://doi.org/10.1080/00031305.2023.2249522}\n}","category":"page"},{"location":"#Papers-using-NeuralEstimators","page":"NeuralEstimators","title":"Papers using NeuralEstimators","text":"","category":"section"},{"location":"","page":"NeuralEstimators","title":"NeuralEstimators","text":"Likelihood-free parameter estimation with neural Bayes estimators [paper] [code]\nNeural Bayes estimators for censored inference with peaks-over-threshold models [paper]\nNeural Bayes estimators for irregular spatial data using graph neural networks [paper][code]\nModern extreme value statistics for Utopian extremes [paper]\nNeural Methods for Amortised Inference [paper][code]","category":"page"},{"location":"#Related-packages","page":"NeuralEstimators","title":"Related packages","text":"","category":"section"},{"location":"","page":"NeuralEstimators","title":"NeuralEstimators","text":"Several other software packages have been developed to facilitate neural likelihood-free inference. These include:","category":"page"},{"location":"","page":"NeuralEstimators","title":"NeuralEstimators","text":"BayesFlow (TensorFlow)\nLAMPE (PyTorch)\nsbi (PyTorch)\nswyft (PyTorch)","category":"page"},{"location":"","page":"NeuralEstimators","title":"NeuralEstimators","text":"A summary of the functionality in these packages is given in Zammit-Mangion et al. (2024, Section 6.1). Note that this list of related packages was created in July 2024; if you have software to add to this list, please contact the package maintainer. ","category":"page"}]
+[{"location":"API/simulation/#Model-specific-functions","page":"Model-specific functions","title":"Model-specific functions","text":"","category":"section"},{"location":"API/simulation/#Data-simulators","page":"Model-specific functions","title":"Data simulators","text":"","category":"section"},{"location":"API/simulation/","page":"Model-specific functions","title":"Model-specific functions","text":"The philosophy of NeuralEstimators is to cater for arbitrary statistical models by having the user define their statistical model implicitly through simulated data. However, the following functions have been included as they may be helpful to others, and their source code illustrates how a user could formulate code for their own model.","category":"page"},{"location":"API/simulation/","page":"Model-specific functions","title":"Model-specific functions","text":"See also Distributions.jl for a large range of distributions implemented in Julia, and the package RCall for calling R functions within Julia. ","category":"page"},{"location":"API/simulation/","page":"Model-specific functions","title":"Model-specific functions","text":"simulategaussian\n\nsimulatepotts\n\nsimulateschlather","category":"page"},{"location":"API/simulation/#NeuralEstimators.simulategaussian","page":"Model-specific functions","title":"NeuralEstimators.simulategaussian","text":"simulategaussian(L::AbstractMatrix, m = 1)\n\nSimulates m independent and identically distributed (i.i.d.) realisations from a mean-zero multivariate Gaussian random variable with associated lower Cholesky  factor L. \n\nIf m is not specified, the simulated data are returned as a vector with length equal to the number of spatial locations, n; otherwise, the data are returned as an nxm matrix.\n\nExamples\n\nusing NeuralEstimators, Distances, LinearAlgebra\n\nn = 500\nρ = 0.6\nν = 1.0\nS = rand(n, 2)\nD = pairwise(Euclidean(), S, dims = 1)\nΣ = Symmetric(matern.(D, ρ, ν))\nL = cholesky(Σ).L\nsimulategaussian(L)\n\n\n\n\n\n","category":"function"},{"location":"API/simulation/#NeuralEstimators.simulatepotts","page":"Model-specific functions","title":"NeuralEstimators.simulatepotts","text":"simulatepotts(grid::Matrix{Int}, β)\nsimulatepotts(grid::Matrix{Union{Int, Nothing}}, β)\nsimulatepotts(nrows::Int, ncols::Int, num_states::Int, β)\n\nChequerboard Gibbs sampling from 2D Potts model with parameter β>0.\n\nApproximately independent simulations can be obtained by setting  nsims > 1 or num_iterations > burn. The degree to which the  resulting simulations can be considered independent depends on the  thinning factor (thin) and the burn-in (burn).\n\nKeyword arguments\n\nnsims = 1: number of approximately independent replicates. \nnum_iterations = 2000: number of MCMC iterations.\nburn = num_iterations: burn-in.\nthin = 10: thinning factor.\n\nExamples\n\nusing NeuralEstimators \n\n## Marginal simulation \nβ = 0.8\nsimulatepotts(10, 10, 5, β)\n\n## Marginal simulation: approximately independent samples \nsimulatepotts(10, 10, 5, β; nsims = 100, thin = 10)\n\n## Conditional simulation \nβ = 0.8\ncomplete_grid   = simulatepotts(50, 50, 2, β)        # simulate marginally from the Ising model \nincomplete_grid = removedata(complete_grid, 0.1)     # remove 10% of the pixels at random  \nimputed_grid    = simulatepotts(incomplete_grid, β)  # conditionally simulate over missing pixels\n\n## Multiple conditional simulations \nimputed_grids   = simulatepotts(incomplete_grid, β; num_iterations = 2000, burn = 1000, thin = 10)\n\n## Recreate Fig. 8.8 of Marin & Robert (2007) “Bayesian Core”\nusing Plots \ngrids = [simulatepotts(100, 100, 2, β) for β ∈ 0.3:0.1:1.2]\nheatmaps = heatmap.(grids, legend = false, aspect_ratio=1)\nPlots.plot(heatmaps...)\n\n\n\n\n\n","category":"function"},{"location":"API/simulation/#NeuralEstimators.simulateschlather","page":"Model-specific functions","title":"NeuralEstimators.simulateschlather","text":"simulateschlather(L::Matrix, m = 1; C = 3.5, Gumbel::Bool = false)\n\nSimulates m independent and identically distributed (i.i.d.) realisations from Schlather's max-stable model using the algorithm for approximate simulation given by Schlather (2002).\n\nRequires the lower Cholesky factor L associated with the covariance matrix of  the underlying Gaussian process. \n\nIf m is not specified, the simulated data are returned as a vector with length equal to the number of spatial locations, n; otherwise, the data are  returned as an nxm matrix.\n\nKeyword arguments\n\nC = 3.5: a tuning parameter that controls the accuracy of the algorithm: small C favours computational efficiency, while large C favours accuracy. Schlather (2002) recommends the use of C = 3.\nGumbel = true: flag indicating whether the data should be log-transformed from the unit Fréchet scale to the Gumbel scale.\n\nExamples\n\nusing NeuralEstimators, Distances, LinearAlgebra\n\nn = 500\nρ = 0.6\nν = 1.0\nS = rand(n, 2)\nD = pairwise(Euclidean(), S, dims = 1)\nΣ = Symmetric(matern.(D, ρ, ν))\nL = cholesky(Σ).L\nsimulateschlather(L)\n\n\n\n\n\n","category":"function"},{"location":"API/simulation/#Spatial-point-processes","page":"Model-specific functions","title":"Spatial point processes","text":"","category":"section"},{"location":"API/simulation/","page":"Model-specific functions","title":"Model-specific functions","text":"maternclusterprocess","category":"page"},{"location":"API/simulation/#NeuralEstimators.maternclusterprocess","page":"Model-specific functions","title":"NeuralEstimators.maternclusterprocess","text":"maternclusterprocess(; λ=10, μ=10, r=0.1, xmin=0, xmax=1, ymin=0, ymax=1, unit_bounding_box=false)\n\nSimulates a Matérn cluster process with density of parent Poisson point process λ, mean number of daughter points μ, and radius of cluster disk r, over the simulation window defined by xmin and xmax, ymin and ymax.\n\nIf unit_bounding_box is true, then the simulated points will be scaled so that the longest side of their bounding box is equal to one (this may change the simulation window). \n\nSee also the R package spatstat, which provides functions for simulating from a range of point processes and which can be interfaced from Julia using RCall.\n\nExamples\n\nusing NeuralEstimators\n\n# Simulate a realisation from a Matérn cluster process\nS = maternclusterprocess()\n\n# Visualise realisation (requires UnicodePlots)\nusing UnicodePlots\nscatterplot(S[:, 1], S[:, 2])\n\n# Visualise realisations from the cluster process with varying parameters\nn = 250\nλ = [10, 25, 50, 90]\nμ = n ./ λ\nplots = map(eachindex(λ)) do i\n\tS = maternclusterprocess(λ = λ[i], μ = μ[i])\n\tscatterplot(S[:, 1], S[:, 2])\nend\n\n\n\n\n\n","category":"function"},{"location":"API/simulation/#Covariance-functions","page":"Model-specific functions","title":"Covariance functions","text":"","category":"section"},{"location":"API/simulation/","page":"Model-specific functions","title":"Model-specific functions","text":"These covariance functions may be of use for various models.","category":"page"},{"location":"API/simulation/","page":"Model-specific functions","title":"Model-specific functions","text":"matern\n\npaciorek","category":"page"},{"location":"API/simulation/#NeuralEstimators.matern","page":"Model-specific functions","title":"NeuralEstimators.matern","text":"matern(h, ρ, ν, σ² = 1)\n\nGiven distance boldsymbolh (h), computes the Matérn covariance function,\n\nC(boldsymbolh) = sigma^2 frac2^1 - nuGamma(nu) left(fracboldsymbolhrhoright)^nu K_nu left(fracboldsymbolhrhoright)\n\nwhere ρ is a range parameter, ν is a smoothness parameter, σ² is the marginal variance,  Gamma(cdot) is the gamma function, and K_nu(cdot) is the modified Bessel function of the second kind of order nu.\n\n\n\n\n\n","category":"function"},{"location":"API/simulation/#NeuralEstimators.paciorek","page":"Model-specific functions","title":"NeuralEstimators.paciorek","text":"paciorek(s, r, ω₁, ω₂, ρ, β)\n\nGiven spatial locations s and r, computes the nonstationary covariance function, \n\nC(boldsymbols boldsymbolr) = \nboldsymbolSigma(boldsymbols)^14\nboldsymbolSigma(boldsymbolr)^14\nleftfracboldsymbolSigma(boldsymbols) + boldsymbolSigma(boldsymbolr)2right^-12\nC^0big(sqrtQ(boldsymbols boldsymbolr)big) \n\nwhere C^0(h) = exp-(hrho)^32 for range parameter rho  0,  the matrix  boldsymbolSigma(boldsymbols) = exp(betaboldsymbols - boldsymbolomega)boldsymbolI  is a kernel matrix (Paciorek and Schervish, 2006)  with scale parameter beta  0 and boldsymbolomega equiv (omega_1 omega_2) in mathcalD, and \n\nQ(boldsymbols boldsymbolr) = \n(boldsymbols - boldsymbolr)\nleft(fracboldsymbolSigma(boldsymbols) + boldsymbolSigma(boldsymbolr)2right)^-1\n(boldsymbols - boldsymbolr)\n\nis the squared Mahalanobis distance between boldsymbols and boldsymbolr. \n\n\n\n\n\n","category":"function"},{"location":"API/simulation/#Density-functions","page":"Model-specific functions","title":"Density functions","text":"","category":"section"},{"location":"API/simulation/","page":"Model-specific functions","title":"Model-specific functions","text":"Density functions are not needed in the workflow of NeuralEstimators. However, as part of a series of comparison studies between neural estimators and likelihood-based estimators given in various paper, we have developed the following functions for evaluating the density function for several popular distributions. We include these in NeuralEstimators to cater for the possibility that they may be of use in future comparison studies.","category":"page"},{"location":"API/simulation/","page":"Model-specific functions","title":"Model-specific functions","text":"gaussiandensity\n\nschlatherbivariatedensity","category":"page"},{"location":"API/simulation/#NeuralEstimators.gaussiandensity","page":"Model-specific functions","title":"NeuralEstimators.gaussiandensity","text":"gaussiandensity(y::V, L::LT) where {V <: AbstractVector, LT <: LowerTriangular}\ngaussiandensity(y::A, L::LT) where {A <: AbstractArray, LT <: LowerTriangular}\ngaussiandensity(y::A, Σ::M) where {A <: AbstractArray, M <: AbstractMatrix}\n\nEfficiently computes the density function for y ~ 𝑁(0, Σ) for covariance matrix Σ, and where L is lower Cholesky factor of Σ.\n\nThe method gaussiandensity(y::A, L::LT) assumes that the last dimension of y contains independent and identically distributed (iid) replicates.\n\nThe log-density is returned if the keyword argument logdensity is true (default).\n\n\n\n\n\n","category":"function"},{"location":"API/simulation/#NeuralEstimators.schlatherbivariatedensity","page":"Model-specific functions","title":"NeuralEstimators.schlatherbivariatedensity","text":"schlatherbivariatedensity(z₁, z₂, ψ; logdensity = true)\n\nThe bivariate density function for Schlather's max-stable model.\n\n\n\n\n\n","category":"function"},{"location":"workflow/examples/#Examples","page":"Examples","title":"Examples","text":"","category":"section"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Before proceeding, we first load the required packages. The following packages are used throughout these examples:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"using NeuralEstimators\nusing Flux                 # Julia's deep-learning library\nusing Distributions        # sampling from probability distributions\nusing AlgebraOfGraphics    # visualisation\nusing CairoMakie           # visualisation","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"The following packages will be used in the examples with Gridded data and Irregular spatial data:  ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"using Distances            # computing distance matrices \nusing Folds                # parallel simulation (start Julia with --threads=auto)\nusing LinearAlgebra        # Cholesky factorisation","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"The following packages are used only in the example with Irregular spatial data: ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"using GraphNeuralNetworks  # GNN architecture\nusing Statistics           # mean()","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Finally, various GPU backends can be used (see the Flux documentation for details). For instance, if one wishes to employ an NVIDIA GPU when running the following examples, simply the load the following packages:  ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"using CUDA\nusing cuDNN","category":"page"},{"location":"workflow/examples/#Univariate-data","page":"Examples","title":"Univariate data","text":"","category":"section"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Here we develop a neural Bayes estimator for boldsymboltheta equiv (mu sigma) from data Z_1 dots Z_m that are independent and identically distributed realisations from the distribution N(mu sigma^2). ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"First, we define a function to sample parameters from the prior distribution. Here, we assume that the parameters are independent a priori and we adopt the marginal priors mu sim N(0 1) and sigma sim IG(3 1). The sampled parameters are stored as p times K matrices, with p the number of parameters in the model and K the number of sampled parameter vectors:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"function sample(K)\n\tμ = rand(Normal(0, 1), 1, K)\n\tσ = rand(InverseGamma(3, 1), 1, K)\n\tθ = vcat(μ, σ)\n\treturn θ\nend","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Next, we implicitly define the statistical model through data simulation. In this package, the data are always stored as a Vector{A}, where each element of the vector is associated with one parameter vector, and where the type A depends on the multivariate structure of the data. Since in this example each replicate Z_1 dots Z_m is univariate, A should be a Matrix with d=1 row and m columns. Below, we define our simulator given a single parameter vector, and given a matrix of parameter vectors (which simply applies the simulator to each column):","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"simulate(θ, m) = [ϑ[1] .+ ϑ[2] .* randn(1, m) for ϑ ∈ eachcol(θ)]","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"We now design our neural-network architecture. The workhorse of the package is the DeepSet architecture, which provides an elegant framework for making inference with an arbitrary number of independent replicates and for incorporating both neural and user-defined statistics. The DeepSets framework consists of two neural networks, a summary network and an inference network. The inference network (also known as the outer network) is always a multilayer perceptron (MLP). However, the architecture of the summary network (also known as the inner network) depends on the multivariate structure of the data. With unstructured data (i.e., when there is no spatial or temporal correlation within a replicate), we use an MLP with input dimension equal to the dimension of each replicate of the statistical model (i.e., one for univariate data): ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"p = 2                                                # number of parameters \nψ = Chain(Dense(1, 64, relu), Dense(64, 64, relu))   # summary network\nϕ = Chain(Dense(64, 64, relu), Dense(64, p))         # inference network\narchitecture = DeepSet(ψ, ϕ)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"In this example, we wish to construct a point estimator for the unknown parameter vector, and we therefore initialise a PointEstimator object based on our chosen architecture (see Estimators for a list of other estimators available in the package): ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"θ̂ = PointEstimator(architecture)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Next, we train the estimator using train(), here using the default absolute-error loss. We'll train the estimator using 50 independent replicates per parameter configuration. Below, we pass our user-defined functions for sampling parameters and simulating data, but one may also pass parameter or data instances, which will be held fixed during training:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"m = 50\nθ̂ = train(θ̂, sample, simulate, m = m)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"To fully exploit the amortised nature of neural estimators, one may wish to save a trained estimator and load it in later sessions: see Saving and loading neural estimators for details on how this can be done. ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"The function assess() can be used to assess the trained estimator. Parametric and non-parametric bootstrap-based uncertainty quantification are facilitated by bootstrap() and interval(), and this can also be included in the assessment stage through the keyword argument boot:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"θ_test = sample(1000)\nZ_test = simulate(θ_test, m)\nassessment = assess(θ̂, θ_test, Z_test, boot = true)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"The resulting Assessment object contains the sampled parameters, the corresponding point estimates, and the corresponding lower and upper bounds of the bootstrap intervals. This object can be used to compute various diagnostics:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"bias(assessment)      # μ = 0.002, σ = 0.017\nrmse(assessment)      # μ = 0.086, σ = 0.078\nrisk(assessment)      # μ = 0.055, σ = 0.056\nplot(assessment)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"(Image: Univariate Gaussian example: Estimates vs. truth)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"As an alternative form of uncertainty quantification, one may approximate a set of marginal posterior quantiles by training a second estimator under the quantile loss function, which allows one to generate approximate marginal posterior credible intervals. This is facilitated with IntervalEstimator which, by default, targets 95% central credible intervals:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"q̂ = IntervalEstimator(architecture)\nq̂ = train(q̂, sample, simulate, m = m)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"The resulting posterior credible-interval estimator can also be assessed with empirical simulation-based methods using assess(), as we did above for the point estimator. Often, these intervals have better coverage than bootstrap-based intervals.","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Once an estimator is deemed to be satisfactorily calibrated, it may be applied to observed data (below, we use simulated data as a substitute for observed data):","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"θ = sample(1)               # true parameters\nZ = simulate(θ, m)          # \"observed\" data\nθ̂(Z)                        # point estimates\ninterval(bootstrap(θ̂, Z))   # 95% non-parametric bootstrap intervals\ninterval(q̂, Z)              # 95% marginal posterior credible intervals","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"To utilise a GPU for improved computational efficiency, one may simply move the estimator and the data to the GPU through the calls θ̂ = gpu(θ̂) and Z = gpu(Z) before applying the estimator. Note that GPUs often have limited memory relative to CPUs, and this can sometimes lead to memory issues when working with very large data sets: in these cases, the function estimateinbatches() can be used to apply the estimator over batches of data to circumvent any memory concerns. ","category":"page"},{"location":"workflow/examples/#Unstructured-multivariate-data","page":"Examples","title":"Unstructured multivariate data","text":"","category":"section"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Suppose now that each data set now consists of m replicates boldsymbolZ_1 dots boldsymbolZ_m of a d-dimensional multivariate distribution. Everything remains as given in the univariate example above, except that we now store each data set as a d times m matrix (previously they were stored as 1times m matrices), and the summary network of the DeepSets representation takes a d-dimensional input (previously it took a 1-dimensional input).","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Note that, when estimating a full covariance matrix, one may wish to constrain the neural estimator to only produce parameters that imply a valid (i.e., positive definite) covariance matrix. This can be achieved by appending a  CovarianceMatrix layer to the end of the outer network of the DeepSets representation. However, the estimator will often learn to provide valid estimates, even if not constrained to do so.","category":"page"},{"location":"workflow/examples/#Gridded-data","page":"Examples","title":"Gridded data","text":"","category":"section"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"For data collected over a regular grid, neural estimators are typically based on a convolutional neural network (CNN; see, e.g., Dumoulin and Visin, 2016). ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"When using CNNs with NeuralEstimators, each data set must be stored as a multi-dimensional array. The penultimate dimension stores the so-called \"channels\" (this dimension is singleton for univariate processes, two for bivariate processes, etc.), while the final dimension stores independent replicates. For example, to store 50 independent replicates of a bivariate spatial process measured over a 10times15 grid, one would construct an array of dimension 10times15times2times50.","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"For illustration, here we develop a neural Bayes estimator for the spatial Gaussian process model with exponential covariance function and unknown range parameter theta. The spatial domain is taken to be the unit square, and we adopt the prior theta sim U(005 05). ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Simulation from Gaussian processes typically involves the computation of an expensive intermediate object, namely, the Cholesky factor of a covariance matrix. Storing intermediate objects can enable the fast simulation of new data sets when the parameters are held fixed. Hence, in this example, we define a custom type Parameters subtyping ParameterConfigurations for storing the matrix of parameters and the corresponding Cholesky factors: ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"struct Parameters{T} <: ParameterConfigurations\n\tθ::Matrix{T}\n\tL\nend","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Further, we define two constructors for our custom type: one that accepts an integer K, and another that accepts a ptimes K matrix of parameters. The former constructor will be useful during the training stage for sampling from the prior distribution, while the latter constructor will be useful for parametric bootstrap (since this involves repeated simulation from the fitted model):","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"function sample(K::Integer)\n\n\t# Sample parameters from the prior \n\tθ = rand(Uniform(0.05, 0.5), 1, K)\n\n\t# Pass to matrix constructor\n\tParameters(θ)\nend\n\nfunction Parameters(θ::Matrix)\n\n\t# Spatial locations, a 16x16 grid over the unit square\n\tpts = range(0, 1, length = 16)\n\tS = expandgrid(pts, pts)\n\n\t# Distance matrix, covariance matrices, and Cholesky factors\n\tD = pairwise(Euclidean(), S, dims = 1)\n\tK = size(θ, 2)\n\tL = Folds.map(1:K) do k\n\t\tΣ = exp.(-D ./ θ[k])\n\t\tcholesky(Symmetric(Σ)).L\n\tend\n\n\tParameters(θ, L)\nend","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Next, we define the model simulator: ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"function simulate(parameters::Parameters, m = 1) \n\tZ = Folds.map(parameters.L) do L\n\t\tn = size(L, 1)\n\t\tz = L * randn(n, m)\n\t\tz = reshape(z, 16, 16, 1, m) # reshape to 16x16 images\n\t\tz\n\tend\n\tZ\nend","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"A possible architecture is as follows:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"# Summary network\nψ = Chain(\n\tConv((3, 3), 1 => 32, relu),\n\tMaxPool((2, 2)),\n\tConv((3, 3),  32 => 64, relu),\n\tMaxPool((2, 2)),\n\tFlux.flatten\n\t)\n\n# Inference network\nϕ = Chain(Dense(256, 64, relu), Dense(64, 1))\n\n# DeepSet\narchitecture = DeepSet(ψ, ϕ)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Next, we initialise a point estimator and a posterior credible-interval estimator:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"θ̂ = PointEstimator(architecture)\nq̂ = IntervalEstimator(architecture)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Now we train the estimators, here using fixed parameter instances to avoid repeated Cholesky factorisations (see Storing expensive intermediate objects for data simulation and On-the-fly and just-in-time simulation for further discussion):","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"K = 10000  # number of training parameter vectors\nm = 1      # number of independent replicates in each data set\nθ_train = sample(K)\nθ_val = sample(K ÷ 10)\nθ̂ = train(θ̂, θ_train, θ_val, simulate, m = m)\nq̂ = train(q̂, θ_train, θ_val, simulate, m = m)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Once the estimators have been trained, we assess them using empirical simulation-based methods:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"θ_test = sample(1000)\nZ_test = simulate(θ_test)\nassessment = assess([θ̂, q̂], θ_test, Z_test)\n\nbias(assessment)       # 0.005\nrmse(assessment)       # 0.032\ncoverage(assessment)   # 0.953\nplot(assessment)       ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"(Image: Gridded spatial Gaussian process example: Estimates vs. truth)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Finally, we can apply our estimators to observed data. Note that when we have a single replicate only (which is often the case in spatial statistics), non-parametric bootstrap is not possible, and we instead use parametric bootstrap:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"θ = sample(1)                          # true parameter\nZ = simulate(θ)                        # \"observed\" data\nθ̂(Z)                                   # point estimates\ninterval(q̂, Z)                         # 95% marginal posterior credible intervals\nbs = bootstrap(θ̂, θ̂(Z), simulate, m)   # parametric bootstrap intervals\ninterval(bs)                           # 95% parametric bootstrap intervals","category":"page"},{"location":"workflow/examples/#Irregular-spatial-data","page":"Examples","title":"Irregular spatial data","text":"","category":"section"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"To cater for spatial data collected over arbitrary spatial locations, one may construct a neural estimator with a graph neural network (GNN) architecture (see Sainsbury-Dale, Zammit-Mangion, Richards, and Huser, 2023). The overall workflow remains as given in previous examples, with some key additional steps:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Sampling spatial configurations during the training phase, typically using an appropriately chosen spatial point process: see, for example, maternclusterprocess.\nStoring the spatial data as a graph: see spatialgraph.\nConstructing an appropriate architecture: see GNNSummary and SpatialGraphConv.","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"For illustration, we again consider the spatial Gaussian process model with exponential covariance function, and we define a struct for storing expensive intermediate objects needed for data simulation. In this case, these objects include Cholesky factors and spatial graphs (which store the adjacency matrices needed to perform graph convolution): ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"struct Parameters{T} <: ParameterConfigurations\n\tθ::Matrix{T}   # true parameters  \n\tL              # Cholesky factors\n\tg              # spatial graphs\n\tS              # spatial locations \nend","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Again, we define two constructors, which will be convenient for sampling parameters from the prior during training and assessment, and for performing parametric bootstrap sampling when making inferences from observed data:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"function sample(K::Integer)\n\n\t# Sample parameters from the prior \n\tθ = rand(Uniform(0.05, 0.5), 1, K)\n\n\t# Simulate spatial configurations over the unit square\n\tn = rand(200:300, K)\n\tλ = rand(Uniform(10, 50), K)\n\tS = [maternclusterprocess(λ = λ[k], μ = n[k]/λ[k]) for k ∈ 1:K]\n\n\t# Pass to constructor\n\tParameters(θ, S)\nend\n\nfunction Parameters(θ::Matrix, S)\n\n\t# Number of parameter vectors\n\tK = size(θ, 2)\n\n\t# Distance matrices, covariance matrices, and Cholesky factors\n\tD = pairwise.(Ref(Euclidean()), S, dims = 1)\n\tL = Folds.map(1:K) do k\n\t\tΣ = exp.(-D[k] ./ θ[k])\n\t\tcholesky(Symmetric(Σ)).L\n\tend\n\n\t# Construct spatial graphs\n\tg = spatialgraph.(S)\n\n\tParameters(θ, L, g, S)\nend","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Next, we define a function for simulating from the model given an object of type Parameters. The code below enables simulation of an arbitrary number of independent replicates m, and one may provide a single integer for m, or any object that can be sampled using rand(m, K) (e.g., an integer range or some distribution over the possible sample sizes):","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"function simulate(parameters::Parameters, m)\n\tK = size(parameters, 2)\n\tm = rand(m, K)\n\tmap(1:K) do k\n\t\tL = parameters.L[k]\n\t\tg = parameters.g[k]\n\t\tn = size(L, 1)\n\t\tZ = L * randn(n, m[k])      \n\t\tspatialgraph(g, Z)            \n\tend\nend\nsimulate(parameters::Parameters, m::Integer = 1) = simulate(parameters, range(m, m))","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Next we construct an appropriate GNN architecture, as illustrated below. Here, our goal is to construct a point estimator, however any other kind of estimator (see Estimators) can be constructed by simply substituting the appropriate estimator class in the final line below:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"# Spatial weight function constructed using 0-1 basis functions \nh_max = 0.15 # maximum distance to consider \nq = 10       # output dimension of the spatial weights\nw = IndicatorWeights(h_max, q)\n\n# Propagation module\npropagation = GNNChain(\n\tSpatialGraphConv(1 => q, relu, w = w, w_out = q),\n\tSpatialGraphConv(q => q, relu, w = w, w_out = q)\n)\n\n# Readout module\nreadout = GlobalPool(mean)\n\n# Global features \nglobalfeatures = SpatialGraphConv(1 => q, relu, w = w, w_out = q, glob = true)\n\n# Summary network\nψ = GNNSummary(propagation, readout, globalfeatures)\n\n# Mapping module\nϕ = Chain(\n\tDense(2q => 128, relu), \n\tDense(128 => 128, relu), \n\tDense(128 => 1, identity)\n)\n\n# DeepSet object\ndeepset = DeepSet(ψ, ϕ)\n\n# Point estimator\nθ̂ = PointEstimator(deepset)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Next, we train the estimator:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"m = 1\nK = 3000\nθ_train = sample(K)\nθ_val   = sample(K÷5)\nθ̂ = train(θ̂, θ_train, θ_val, simulate, m = m, epochs = 5)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Then, we assess our trained estimator as before: ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"θ_test = sample(1000)\nZ_test = simulate(θ_test, m)\nassessment = assess(θ̂, θ_test, Z_test)\nbias(assessment)    # 0.001\nrmse(assessment)    # 0.037\nrisk(assessment)    # 0.029\nplot(assessment)   ","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"(Image: Estimates from a graph neural network (GNN) based neural Bayes estimator)","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"Finally, once the estimator has been assessed and is deemed to be performant, it may be applied to observed data, with bootstrap-based uncertainty quantification facilitated by bootstrap and interval. Below, we use simulated data as a substitute for observed data:","category":"page"},{"location":"workflow/examples/","page":"Examples","title":"Examples","text":"parameters = sample(1)             # sample a single parameter vector\nZ = simulate(parameters)           # simulate data                  \nθ = parameters.θ                   # true parameters used to generate data\nS = parameters.S                   # observed locations\nθ̂(Z)                               # point estimates\nθ̃ = Parameters(θ̂(Z), S)            # construct Parameters object from the point estimates\nbs = bootstrap(θ̂, θ̃, simulate, m)  # bootstrap estimates\ninterval(bs)                       # parametric bootstrap confidence interval              ","category":"page"},{"location":"framework/#Framework","page":"Framework","title":"Framework","text":"","category":"section"},{"location":"framework/","page":"Framework","title":"Framework","text":"In this section, we provide an overview of point estimation using neural Bayes estimators. For a more detailed discussion on the framework and its implementation, see the paper Likelihood-Free Parameter Estimation with Neural Bayes Estimators. For an accessible introduction to amortised neural inferential methods more broadly, see the review paper Neural Methods for Amortised Inference.","category":"page"},{"location":"framework/#Neural-Bayes-estimators","page":"Framework","title":"Neural Bayes estimators","text":"","category":"section"},{"location":"framework/","page":"Framework","title":"Framework","text":"A parametric statistical model is a set of probability distributions on a sample space mathcalZ subseteq mathbbR^n, where the probability distributions are parameterised via some parameter vector boldsymboltheta on a parameter space Theta subseteq mathbbR^p. Suppose that we have data from one such distribution, which we denote as boldsymbolZ. Then, the goal of parameter point estimation is to come up with an estimate of the unknown boldsymboltheta from boldsymbolZ using an estimator,","category":"page"},{"location":"framework/","page":"Framework","title":"Framework","text":" hatboldsymboltheta  mathcalZ to Theta","category":"page"},{"location":"framework/","page":"Framework","title":"Framework","text":"which is a mapping from the sample space to the parameter space.","category":"page"},{"location":"framework/","page":"Framework","title":"Framework","text":"Estimators can be constructed within a decision-theoretic framework. Consider a nonnegative loss function, L(boldsymboltheta hatboldsymboltheta(boldsymbolZ)), which assesses an estimator hatboldsymboltheta(cdot) for a given boldsymboltheta and data set boldsymbolZ sim f(boldsymbolz mid boldsymboltheta), where f(boldsymbolz mid boldsymboltheta) is the probability density function of the data conditional on boldsymboltheta. An estimator's Bayes risk is its loss averaged over all possible parameter values and data realisations,","category":"page"},{"location":"framework/","page":"Framework","title":"Framework","text":"int_Theta int_mathcalZ  L(boldsymboltheta hatboldsymboltheta(boldsymbolz))f(boldsymbolz mid boldsymboltheta) rmd boldsymbolz rmd Pi(boldsymboltheta)  ","category":"page"},{"location":"framework/","page":"Framework","title":"Framework","text":"where Pi(cdot) is a prior measure for boldsymboltheta. Any minimiser of the Bayes risk is said to be a Bayes estimator with respect to L(cdot cdot) and Pi(cdot).","category":"page"},{"location":"framework/","page":"Framework","title":"Framework","text":"Bayes estimators are theoretically attractive: for example, unique Bayes estimators are admissible and, under suitable regularity conditions and the squared-error loss, are consistent and asymptotically efficient. Further, for a large class of prior distributions, every set of conditions that imply consistency of the maximum likelihood (ML) estimator also imply consistency of Bayes estimators. Importantly, Bayes estimators are not motivated purely by asymptotics: by construction, they are Bayes irrespective of the sample size and model class. Unfortunately, however, Bayes estimators are typically unavailable in closed form for the complex models often encountered in practice. A way forward is to assume a flexible parametric model for hatboldsymboltheta(cdot), and to optimise the parameters within that model in order to approximate the Bayes estimator. Neural networks are ideal candidates, since they are universal function approximators, and because they are also fast to evaluate, usually involving only simple matrix-vector operations.","category":"page"},{"location":"framework/","page":"Framework","title":"Framework","text":"Let hatboldsymboltheta(boldsymbolZ boldsymbolgamma) denote a neural network that returns a point estimate from data boldsymbolZ, where boldsymbolgamma contains the neural-network parameters. Bayes estimators may be approximated with hatboldsymboltheta(cdot boldsymbolgamma^*) by solving the optimisation problem,  ","category":"page"},{"location":"framework/","page":"Framework","title":"Framework","text":"boldsymbolgamma^*\nequiv\nundersetboldsymbolgammamathrmargmin \nfrac1K sum_k = 1^K L(boldsymboltheta hatboldsymboltheta(boldsymbolz boldsymbolgamma))","category":"page"},{"location":"framework/","page":"Framework","title":"Framework","text":"whose objective function is a Monte Carlo approximation of the Bayes risk made using a set boldsymboltheta^(k)  k = 1 dots K of parameter vectors sampled from the prior Pi(cdot) and, for each k, data boldsymbolZ^(k) simulated from f(boldsymbolz mid  boldsymboltheta). Note that this Monte Carlo approximation does not involve evaluation, or knowledge, of the likelihood function.","category":"page"},{"location":"framework/","page":"Framework","title":"Framework","text":"The Monte Carlo approximation of the Bayes risk can be straightforwardly minimised with respect to boldsymbolgamma using back-propagation and stochastic gradient descent. For sufficiently flexible architectures, the point estimator targets a Bayes estimator with respect to L(cdot cdot) and Pi(cdot). We therefore call the fitted neural point estimator a  neural Bayes estimator. Like Bayes estimators, neural Bayes estimators target a specific point summary of the posterior distribution. For instance, the absolute-error and squared-error loss functions lead to neural Bayes estimators that approximate the posterior median and mean, respectively.","category":"page"},{"location":"framework/#Construction-of-neural-Bayes-estimators","page":"Framework","title":"Construction of neural Bayes estimators","text":"","category":"section"},{"location":"framework/","page":"Framework","title":"Framework","text":"The neural Bayes estimator is conceptually simple and can be used in a wide range of problems where other approaches, such as maximum-likelihood estimation, are computationally infeasible. The estimator also has marked practical appeal, as the general workflow for its construction is only loosely connected to the statistical or physical model being considered. The workflow is as follows:","category":"page"},{"location":"framework/","page":"Framework","title":"Framework","text":"Define the prior, Pi(cdot).\nChoose a loss function, L(cdot cdot), typically the mean-absolute-error or mean-squared-error loss.\nDesign a suitable neural-network architecture for the neural point estimator hatboldsymboltheta(cdot boldsymbolgamma).\nSample parameters from Pi(cdot) to form training/validation/test parameter sets.\nGiven the above parameter sets, simulate data from the model, to form training/validation/test data sets.\nTrain the neural network (i.e., estimate boldsymbolgamma) by minimising the loss function averaged over the training sets. During training, monitor performance and convergence using the validation sets.\nAssess the fitted neural Bayes estimator, hatboldsymboltheta(cdot boldsymbolgamma^*), using the test set.","category":"page"},{"location":"API/#Index","page":"Index","title":"Index","text":"","category":"section"},{"location":"API/","page":"Index","title":"Index","text":"","category":"page"},{"location":"workflow/advancedusage/#Advanced-usage","page":"Advanced usage","title":"Advanced usage","text":"","category":"section"},{"location":"workflow/advancedusage/#Saving-and-loading-neural-estimators","page":"Advanced usage","title":"Saving and loading neural estimators","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"In regards to saving and loading, neural estimators behave in the same manner as regular Flux models. Therefore, the examples and recommendations outlined in the Flux documentation also apply directly to neural estimators. For example, to save the model state of the neural estimator θ̂:","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"using Flux\nusing BSON: @save, @load\nmodel_state = Flux.state(θ̂)\n@save \"estimator.bson\" model_state","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Then, to load it in a new session, one may initialise a neural estimator with the same architecture used previously, and load the saved model state:","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"@load \"estimator.bson\" model_state\nFlux.loadmodel!(θ̂, model_state)","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"It is also straightforward to save the entire neural estimator, including its architecture (see here). However, the first approach outlined above is recommended for long-term storage.","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"For convenience, the function train() allows for the automatic saving of the model state during the training stage, via the argument savepath.","category":"page"},{"location":"workflow/advancedusage/#Storing-expensive-intermediate-objects-for-data-simulation","page":"Advanced usage","title":"Storing expensive intermediate objects for data simulation","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Parameters sampled from the prior distribution may be stored in two ways. Most simply, they can be stored as a p times K matrix, where p is the number of parameters in the model and K is the number of parameter vectors sampled from the prior distribution. Alternatively, they can be stored in a user-defined struct subtyping ParameterConfigurations, whose only requirement is a field θ that stores the p times K matrix of parameters. With this approach, one may store computationally expensive intermediate objects, such as Cholesky factors, for later use when conducting \"on-the-fly\" simulation, which is discussed below.","category":"page"},{"location":"workflow/advancedusage/#On-the-fly-and-just-in-time-simulation","page":"Advanced usage","title":"On-the-fly and just-in-time simulation","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"When data simulation is (relatively) computationally inexpensive, the training data set, mathcalZ_texttrain, can be simulated continuously during training, a technique coined \"simulation-on-the-fly\". Regularly refreshing mathcalZ_texttrain leads to lower out-of-sample error and to a reduction in overfitting. This strategy therefore facilitates the use of larger, more representationally-powerful networks that are prone to overfitting when mathcalZ_texttrain is fixed. Further, this technique allows for data be simulated \"just-in-time\", in the sense that they can be simulated in small batches, used to train the neural estimator, and then removed from memory. This can substantially reduce pressure on memory resources, particularly when working with large data sets.","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"One may also regularly refresh the set vartheta_texttrain of parameter vectors used during training, and doing so leads to similar benefits. However, fixing vartheta_texttrain allows computationally expensive terms, such as Cholesky factors when working with Gaussian process models, to be reused throughout training, which can substantially reduce the training time for some models. Hybrid approaches are also possible, whereby the parameters (and possibly the data) are held fixed for several epochs (i.e., several passes through the training set when performing stochastic gradient descent) before being refreshed.","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"The above strategies are facilitated with various methods of train().","category":"page"},{"location":"workflow/advancedusage/#Regularisation","page":"Advanced usage","title":"Regularisation","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"The term regularisation refers to a variety of techniques aimed to reduce overfitting when training a neural network, primarily by discouraging complex models.","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"One common regularisation technique is known as dropout (Srivastava et al., 2014), implemented in Flux's Dropout layer. Dropout involves temporarily dropping (\"turning off\") a randomly selected set of neurons (along with their connections) at each iteration of the training stage, and this results in a computationally-efficient form of model (neural-network) averaging.","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Another class of regularisation techniques involve modifying the loss function. For instance, L₁ regularisation (sometimes called lasso regression) adds to the loss a penalty based on the absolute value of the neural-network parameters. Similarly, L₂ regularisation (sometimes called ridge regression) adds to the loss a penalty based on the square of the neural-network parameters. Note that these penalty terms are not functions of the data or of the statistical-model parameters that we are trying to infer, and therefore do not modify the Bayes risk or the associated Bayes estimator. These regularisation techniques can be implemented straightforwardly by providing a custom optimiser to train that includes a SignDecay object for L₁ regularisation, or a WeightDecay object for L₂ regularisation. See the Flux documentation for further details.","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"For example, the following code constructs a neural Bayes estimator using dropout and L₁ regularisation with penalty coefficient lambda = 10^-4:","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"using NeuralEstimators\nusing Flux\n\n# Generate data from the model Z ~ N(θ, 1) and θ ~ N(0, 1)\np = 1       # number of unknown parameters in the statistical model\nm = 5       # number of independent replicates\nd = 1       # dimension of each independent replicate\nK = 3000    # number of training samples\nθ_train = randn(1, K)\nθ_val   = randn(1, K)\nZ_train = [μ .+ randn(1, m) for μ ∈ eachcol(θ_train)]\nZ_val   = [μ .+ randn(1, m) for μ ∈ eachcol(θ_val)]\n\n# Architecture with dropout layers\nψ = Chain(\n\tDense(1, 32, relu),\n\tDropout(0.1),\n\tDense(32, 32, relu),\n\tDropout(0.5)\n\t)     \nϕ = Chain(\n\tDense(32, 32, relu),\n\tDropout(0.5),\n\tDense(32, 1)\n\t)           \nθ̂ = DeepSet(ψ, ϕ)\n\n# Optimiser with L₂ regularisation\noptimiser = Flux.setup(OptimiserChain(SignDecay(1e-4), Adam()), θ̂)\n\n# Train the estimator\ntrain(θ̂, θ_train, θ_val, Z_train, Z_val; optimiser = optimiser)","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Note that when the training data and/or parameters are held fixed during training, L₂ regularisation with penalty coefficient lambda = 10^-4 is applied by default.","category":"page"},{"location":"workflow/advancedusage/#Expert-summary-statistics","page":"Advanced usage","title":"Expert summary statistics","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Implicitly, neural estimators involve the learning of summary statistics. However, some summary statistics are available in closed form, simple to compute, and highly informative (e.g., sample quantiles, the empirical variogram, etc.). Often, explicitly incorporating these expert summary statistics in a neural estimator can simplify the optimisation problem, and lead to a better estimator.","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"The fusion of learned and expert summary statistics is facilitated by our implementation of the DeepSet framework. Note that this implementation also allows the user to construct a neural estimator using only expert summary statistics, following, for example, Gerber and Nychka (2021) and Rai et al. (2024). Note also that the user may specify arbitrary expert summary statistics, however, for convenience several standard User-defined summary statistics are provided with the package, including a fast approximate version of the empirical variogram.","category":"page"},{"location":"workflow/advancedusage/#Variable-sample-sizes","page":"Advanced usage","title":"Variable sample sizes","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"A neural estimator in the Deep Set representation can be applied to data sets of arbitrary size. However, even when the neural Bayes estimator approximates the true Bayes estimator arbitrarily well, it is conditional on the number of replicates, m, and is not necessarily a Bayes estimator for m^* ne m. Denote a data set comprising m replicates as boldsymbolZ^(m) equiv (boldsymbolZ_1 dots boldsymbolZ_m). There are at least two (non-mutually exclusive) approaches one could adopt if data sets with varying m are envisaged, which we describe below.","category":"page"},{"location":"workflow/advancedusage/#Piecewise-estimators","page":"Advanced usage","title":"Piecewise estimators","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"If data sets with varying m are envisaged, one could train l neural Bayes estimators for different sample sizes, or groups thereof (e.g., a small-sample estimator and a large-sample estimator).  Specifically, for sample-size changepoints m_1, m_2, dots, m_l-1, one could construct a piecewise neural Bayes estimator,","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"hatboldsymboltheta(boldsymbolZ^(m) boldsymbolgamma^*)\n=\nbegincases\nhatboldsymboltheta(boldsymbolZ^(m) boldsymbolgamma^*_tildem_1)  m leq m_1\nhatboldsymboltheta(boldsymbolZ^(m) boldsymbolgamma^*_tildem_2)  m_1  m leq m_2\nquad vdots \nhatboldsymboltheta(boldsymbolZ^(m) boldsymbolgamma^*_tildem_l)  m  m_l-1\nendcases","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"where, here, boldsymbolgamma^* equiv (boldsymbolgamma^*_tildem_1 dots boldsymbolgamma^*_tildem_l-1), and where boldsymbolgamma^*_tildem are the neural-network parameters optimised for sample size tildem chosen so that hatboldsymboltheta(cdot boldsymbolgamma^*_tildem) is near-optimal over the range of sample sizes in which it is applied. This approach works well in practice, and it is less computationally burdensome than it first appears when used in conjunction with pre-training.","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Piecewise neural estimators are implemented with the struct, PiecewiseEstimator, and their construction is facilitated with trainx().  ","category":"page"},{"location":"workflow/advancedusage/#Training-with-variable-sample-sizes","page":"Advanced usage","title":"Training with variable sample sizes","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Alternatively, one could treat the sample size as a random variable, M, with support over a set of positive integers, mathcalM, in which case, for the neural Bayes estimator, the risk function becomes","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"sum_m in mathcalM\nP(M=m)left(\nint_Theta int_mathcalZ^m  L(boldsymboltheta hatboldsymboltheta(boldsymbolz^(m)))f(boldsymbolz^(m) mid boldsymboltheta) rmd boldsymbolz^(m) rmd Pi(boldsymboltheta)\nright)","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"This approach does not materially alter the workflow, except that one must also sample the number of replicates before simulating the data during the training phase.","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"The following pseudocode illustrates how one may modify a general data simulator to train under a range of sample sizes, with the distribution of M defined by passing any object that can be sampled using rand(m, K) (e.g., an integer range like 1:30, an integer-valued distribution from Distributions.jl, etc.):","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"function simulate(parameters, m)\n\n\t## Number of parameter vectors stored in parameters\n\tK = size(parameters, 2)\n\n\t## Generate K sample sizes from the prior distribution for M\n\tm̃ = rand(m, K)\n\n\t## Pseudocode for data simulation\n\tZ = [<simulate m̃[k] realisations from the model> for k ∈ 1:K]\n\n\treturn Z\nend\n\n## Method that allows an integer to be passed for m\nsimulate(parameters, m::Integer) = simulate(parameters, range(m, m))","category":"page"},{"location":"workflow/advancedusage/#Missing-data","page":"Advanced usage","title":"Missing data","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Neural networks do not naturally handle missing data, and this property can preclude their use in a broad range of applications. Here, we describe two techniques that alleviate this challenge in the context of parameter point estimation: The masking approach and The neural EM algorithm.","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"As a running example, we consider a Gaussian process model where the data are collected over a regular grid, but where some elements of the grid are unobserved. This situation often arises in, for example, remote-sensing applications, where the presence of cloud cover prevents measurement in some places. Below, we load the packages needed in this example, and define some aspects of the model that will remain constant throughout (e.g., the prior, the spatial domain, etc.). We also define structs and functions for sampling from the prior distribution and for simulating marginally from the data model.","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"using Distances\nusing Distributions\nusing Flux\nusing LinearAlgebra\nusing NeuralEstimators\nusing Statistics: mean\n\n# Set the prior and define the number of parameters in the statistical model\nΠ = (\n\tτ = Uniform(0, 1.0),\n\tρ = Uniform(0, 0.4)\n)\np = length(Π)\n\n# Define the (gridded) spatial domain and compute the distance matrix\npoints = range(0, 1, 16)\nS = expandgrid(points, points)\nD = pairwise(Euclidean(), S, dims = 1)\n\n# Store model information for later use\nξ = (\n\tΠ = Π,\n\tS = S,\n\tD = D\n)\n\n# Struct for storing parameters+Cholesky factors\nstruct Parameters <: ParameterConfigurations\n\tθ\n\tL\nend\n\n# Constructor for above struct\nfunction Parameters(K::Integer, ξ)\n\n\t# Sample parameters from the prior\n\tΠ = ξ.Π\n\tτ = rand(Π.τ, K)\n\tρ = rand(Π.ρ, K)\n\tν = 1 # fixed smoothness\n\n\t# Compute Cholesky factors  \n\tL = maternchols(ξ.D, ρ, ν)\n\n\t# Concatenate into matrix\n\tθ = permutedims(hcat(τ, ρ))\n\n\tParameters(θ, L)\nend\n\n# Marginal simulation from the data model\nfunction simulate(parameters::Parameters, m::Integer)\n\n\tK = size(parameters, 2)\n\tτ = parameters.θ[1, :]\n\tL = parameters.L\n\tn = isqrt(size(L, 1))\n\n\tZ = map(1:K) do k\n\t\tz = simulategaussian(L[:, :, k], m)\n\t\tz = z + τ[k] * randn(size(z)...)\n\t\tz = Float32.(z)\n\t\tz = reshape(z, n, n, 1, :)\n\t\tz\n\tend\n\n\treturn Z\nend","category":"page"},{"location":"workflow/advancedusage/#The-masking-approach","page":"Advanced usage","title":"The masking approach","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"The first missing-data technique that we consider is the so-called masking approach of Wang et al. (2024). The strategy involves completing the data by replacing missing values with zeros, and using auxiliary variables to encode the missingness pattern, which are also passed into the network.","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Let boldsymbolZ denote the complete-data vector. Then, the masking approach considers inference based on boldsymbolW, a vector of indicator variables that encode the missingness pattern (with elements equal to one or zero if the corresponding element of boldsymbolZ is observed or missing, respectively), and","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"boldsymbolU equiv boldsymbolZ odot boldsymbolW","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"where odot denotes elementwise multiplication and the product of a missing element and zero is defined to be zero. Irrespective of the missingness pattern, boldsymbolU and boldsymbolW have the same fixed dimensions and hence may be processed easily using a single neural network. A neural point estimator is then trained on realisations of boldsymbolU boldsymbolW which, by construction, do not contain any missing elements.","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Since the missingness pattern boldsymbolW is now an input to the neural network, it must be incorporated during the training phase. When interest lies only in making inference from a single already-observed data set, boldsymbolW is fixed and known, and the Bayes risk remains unchanged. However, amortised inference, whereby one trains a single neural network that will be used to make inference with many data sets, requires a joint model for the data boldsymbolZ and the missingness pattern boldsymbolW:","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"# Marginal simulation from the data model and a MCAR missingness model\nfunction simulatemissing(parameters::Parameters, m::Integer)\n\n\tZ = simulate(parameters, m)   # simulate completely-observed data\n\n\tUW = map(Z) do z\n\t\tprop = rand()             # sample a missingness proportion\n\t\tz = removedata(z, prop)   # randomly remove a proportion of the data\n\t\tuw = encodedata(z)        # replace missing entries with zero and encode missingness pattern\n\t\tuw\n\tend\n\n\treturn UW\nend","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Note that the helper functions removedata() and encodedata() facilitate the construction of augmented data sets boldsymbolU boldsymbolW.","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Next, we construct and train a masked neural Bayes estimator. Here, the first convolutional layer takes two input channels, since we store the augmented data boldsymbolU in the first channel and the missingness pattern boldsymbolW in the second. We construct a point estimator, but the masking approach is applicable with any other kind of estimator (see Estimators):","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"# Construct DeepSet object\nψ = Chain(\n\tConv((10, 10), 2 => 16,  relu),\n\tConv((5, 5),  16 => 32,  relu),\n\tConv((3, 3),  32 => 64, relu),\n\tFlux.flatten\n\t)\nϕ = Chain(Dense(64, 256, relu), Dense(256, p, exp))\ndeepset = DeepSet(ψ, ϕ)\n\n# Initialise point estimator\nθ̂ = PointEstimator(deepset)\n\n# Train the masked neural Bayes estimator\nθ̂ = train(θ̂, Parameters, simulatemissing, m = 1, ξ = ξ, K = 1000, epochs = 10)","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Once trained, we can apply our masked neural Bayes estimator to (incomplete) observed data. The data must be encoded in the same manner that was done during training. Below, we use simulated data as a surrogate for real data, with a missingness proportion of 0.25:","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"θ = Parameters(1, ξ)\nZ = simulate(θ, 1)[1]\nZ = removedata(Z, 0.25)\nUW = encodedata(Z)\nθ̂(UW)","category":"page"},{"location":"workflow/advancedusage/#The-neural-EM-algorithm","page":"Advanced usage","title":"The neural EM algorithm","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Let boldsymbolZ_1 and boldsymbolZ_2 denote the observed and unobserved (i.e., missing) data, respectively, and let boldsymbolZ equiv (boldsymbolZ_1 boldsymbolZ_2) denote the complete data. A classical approach to facilitating inference when data are missing is the expectation-maximisation (EM) algorithm. The neural EM algorithm is an approximate version of the conventional (Bayesian) Monte Carlo EM algorithm which, at the lth iteration, updates the parameter vector through","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"boldsymboltheta^(l) = argmax_boldsymboltheta sum_h = 1^H ell(boldsymboltheta  boldsymbolZ_1  boldsymbolZ_2^(lh)) + log pi_H(boldsymboltheta)","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"where realisations of the missing-data component, boldsymbolZ_2^(lh)  h = 1 dots H, are sampled from the probability distribution of boldsymbolZ_2 given boldsymbolZ_1 and boldsymboltheta^(l-1), and where pi_H(boldsymboltheta) propto pi(boldsymboltheta)^H is a concentrated version of the original prior density. Given the conditionally simulated data, the neural EM algorithm performs the above EM update using a neural network that returns the MAP estimate (i.e., the posterior mode) conditionally simulated data. Such a neural network can be obtained by training a neural Bayes estimator under a continuous relaxation of the 0–1 loss function, such as","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"First, we construct a neural approximation of the MAP estimator. In this example, we will take H=50. When H is taken to be reasonably large, one may lean on the Bernstein-von Mises theorem to train the neural Bayes estimator under linear or quadratic loss; otherwise, one should train the estimator under a continuous relaxation of the 0–1 loss (e.g., the tanhloss or kpowerloss in the limit kappa to 0):","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"# Construct DeepSet object\nψ = Chain(\n\tConv((10, 10), 1 => 16,  relu),\n\tConv((5, 5),  16 => 32,  relu),\n\tConv((3, 3),  32 => 64, relu),\n\tFlux.flatten\n\t)\nϕ = Chain(\n\tDense(64, 256, relu),\n\tDense(256, p, exp)\n\t)\ndeepset = DeepSet(ψ, ϕ)\n\n# Initialise point estimator\nθ̂ = PointEstimator(deepset)\n\n# Train neural Bayes estimator\nH = 50\nθ̂ = train(θ̂, Parameters, simulate, m = H, ξ = ξ, K = 1000, epochs = 10)","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Next, we define a function for conditional simulation (see EM for details on the required format of this function):","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"function simulateconditional(Z::M, θ, ξ; nsims::Integer = 1) where {M <: AbstractMatrix{Union{Missing, T}}} where T\n\n\t# Save the original dimensions\n\tdims = size(Z)\n\n\t# Convert to vector\n\tZ = vec(Z)\n\n\t# Compute the indices of the observed and missing data\n\tI₁ = findall(z -> !ismissing(z), Z) # indices of observed data\n\tI₂ = findall(z -> ismissing(z), Z)  # indices of missing data\n\tn₁ = length(I₁)\n\tn₂ = length(I₂)\n\n\t# Extract the observed data and drop Missing from the eltype of the container\n\tZ₁ = Z[I₁]\n\tZ₁ = [Z₁...]\n\n\t# Distance matrices needed for covariance matrices\n\tD   = ξ.D # distance matrix for all locations in the grid\n\tD₂₂ = D[I₂, I₂]\n\tD₁₁ = D[I₁, I₁]\n\tD₁₂ = D[I₁, I₂]\n\n\t# Extract the parameters from θ\n\tτ = θ[1]\n\tρ = θ[2]\n\n\t# Compute covariance matrices\n\tν = 1 # fixed smoothness\n\tΣ₂₂ = matern.(UpperTriangular(D₂₂), ρ, ν); Σ₂₂[diagind(Σ₂₂)] .+= τ^2\n\tΣ₁₁ = matern.(UpperTriangular(D₁₁), ρ, ν); Σ₁₁[diagind(Σ₁₁)] .+= τ^2\n\tΣ₁₂ = matern.(D₁₂, ρ, ν)\n\n\t# Compute the Cholesky factor of Σ₁₁ and solve the lower triangular system\n\tL₁₁ = cholesky(Symmetric(Σ₁₁)).L\n\tx = L₁₁ \\ Σ₁₂\n\n\t# Conditional covariance matrix, cov(Z₂ ∣ Z₁, θ),  and its Cholesky factor\n\tΣ = Σ₂₂ - x'x\n\tL = cholesky(Symmetric(Σ)).L\n\n\t# Conditonal mean, E(Z₂ ∣ Z₁, θ)\n\ty = L₁₁ \\ Z₁\n\tμ = x'y\n\n\t# Simulate from the distribution Z₂ ∣ Z₁, θ ∼ N(μ, Σ)\n\tz = randn(n₂, nsims)\n\tZ₂ = μ .+ L * z\n\n\t# Combine the observed and missing data to form the complete data\n\tZ = map(1:nsims) do l\n\t\tz = Vector{T}(undef, n₁ + n₂)\n\t\tz[I₁] = Z₁\n\t\tz[I₂] = Z₂[:, l]\n\t\tz\n\tend\n\tZ = stackarrays(Z, merge = false)\n\n\t# Convert Z to an array with appropriate dimensions\n\tZ = reshape(Z, dims..., 1, nsims)\n\n\treturn Z\nend","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Now we can use the neural EM algorithm to get parameter point estimates from data containing missing values. The algorithm is implemented with the struct EM. Again, here we use simulated data as a surrogate for real data:","category":"page"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"θ = Parameters(1, ξ)\nZ = simulate(θ, 1)[1][:, :]     # simulate a single gridded field\nZ = removedata(Z, 0.25)         # remove 25% of the data\nθ₀ = mean.([Π...])              # initial estimate, the prior mean\n\nneuralem = EM(simulateconditional, θ̂)\nneuralem(Z, θ₀, ξ = ξ, nsims = H, use_ξ_in_simulateconditional = true)","category":"page"},{"location":"workflow/advancedusage/#Censored-data","page":"Advanced usage","title":"Censored data","text":"","category":"section"},{"location":"workflow/advancedusage/","page":"Advanced usage","title":"Advanced usage","text":"Coming soon, based on the methodology presented in Richards et al. (2023+).","category":"page"},{"location":"workflow/overview/#Overview","page":"Overview","title":"Overview","text":"","category":"section"},{"location":"workflow/overview/","page":"Overview","title":"Overview","text":"To develop a neural estimator with NeuralEstimators,","category":"page"},{"location":"workflow/overview/","page":"Overview","title":"Overview","text":"Sample parameters from the prior distribution. The parameters are stored as p times K matrices, with p the number of parameters in the model and K the number of parameter vectors in the given parameter set (i.e., training, validation, or test set).\nSimulate data from the assumed model over the parameter sets generated above. These data are stored as a Vector{A}, with each element of the vector associated with one parameter configuration, and where A depends on the multivariate structure of the data and the representation of the neural estimator (e.g., an Array for CNN-based estimators, a GNNGraph for GNN-based estimators, etc.).\nInitialise a neural network θ̂.  \nTrain θ̂ under the chosen loss function using train().\nAssess θ̂ using assess(), which uses simulation-based methods to assess the estimator with respect to its sampling distribution.","category":"page"},{"location":"workflow/overview/","page":"Overview","title":"Overview","text":"Once the estimator θ̂ has passed our assessments and is therefore deemed to be well calibrated, it may be applied to observed data. See the Examples and, once familiar with the basic workflow, see Advanced usage for practical considerations on how to most effectively construct neural estimators.","category":"page"},{"location":"API/core/#Core","page":"Core","title":"Core","text":"","category":"section"},{"location":"API/core/","page":"Core","title":"Core","text":"This page documents the classes and functions that are central to the workflow of NeuralEstimators. Its organisation reflects the order in which these classes and functions appear in a standard implementation; that is, from sampling parameters from the prior distribution, to using a neural Bayes estimator to make inference with observed data sets.","category":"page"},{"location":"API/core/#Sampling-parameters","page":"Core","title":"Sampling parameters","text":"","category":"section"},{"location":"API/core/","page":"Core","title":"Core","text":"Parameters sampled from the prior distribution are stored as a p times K matrix, where p is the number of parameters in the statistical model and K is the number of parameter vectors sampled from the prior distribution.","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"It can sometimes be helpful to wrap the parameter matrix in a user-defined type that also stores expensive intermediate objects needed for data simulated (e.g., Cholesky factors). In this case, the user-defined type should be a subtype of the abstract type ParameterConfigurations, whose only requirement is a field θ that stores the matrix of parameters. See Storing expensive intermediate objects for data simulation for further discussion.   ","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"ParameterConfigurations","category":"page"},{"location":"API/core/#NeuralEstimators.ParameterConfigurations","page":"Core","title":"NeuralEstimators.ParameterConfigurations","text":"ParameterConfigurations\n\nAn abstract supertype for user-defined types that store parameters and any intermediate objects needed for data simulation.\n\nThe user-defined type must have a field θ that stores the p × K matrix of parameters, where p is the number of parameters in the model and K is the number of parameter vectors sampled from the prior distribution. There are no other restrictions.\n\nSee subsetparameters for the generic function for subsetting these objects.\n\nExamples\n\nstruct P <: ParameterConfigurations\n\tθ\n\t# other expensive intermediate objects...\nend\n\n\n\n\n\n","category":"type"},{"location":"API/core/#Simulating-data","page":"Core","title":"Simulating data","text":"","category":"section"},{"location":"API/core/","page":"Core","title":"Core","text":"NeuralEstimators facilitates neural estimation for arbitrary statistical models by having the user implicitly define their model via simulated data, either as fixed instances or via a function that simulates data from the statistical model.","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"The data are always stored as a Vector{A}, where each element of the vector corresponds to a data set of m independent replicates associated with one parameter vector (note that m is arbitrary), and where the type A depends on the multivariate structure of the data:","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"For univariate and unstructured multivariate data, A is a d times m matrix where d is the dimension each replicate (e.g., d=1 for univariate data).\nFor data collected over a regular grid, A is a (N + 2)-dimensional array, where N is the dimension of the grid (e.g., N = 1 for time series, N = 2 for two-dimensional spatial grids, etc.). The first N dimensions of the array correspond to the dimensions of the grid; the penultimate dimension stores the so-called \"channels\" (this dimension is singleton for univariate processes, two for bivariate processes, and so on); and the final dimension stores the independent replicates. For example, to store 50 independent replicates of a bivariate spatial process measured over a 10x15 grid, one would construct an array of dimension 10x15x2x50.\nFor spatial data collected over irregular spatial locations, A is a GNNGraph with independent replicates (possibly with differing spatial locations) stored as subgraphs using the function batch.","category":"page"},{"location":"API/core/#Estimators","page":"Core","title":"Estimators","text":"","category":"section"},{"location":"API/core/","page":"Core","title":"Core","text":"Several classes of neural estimators are available in the package.","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"The simplest class is PointEstimator, used for constructing arbitrary mappings from the sample space to the parameter space. When constructing a generic point estimator, the user defines the loss function and therefore the Bayes estimator that will be targeted.","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"Several classes cater for the estimation of marginal posterior quantiles, based on the quantile loss function (see quantileloss()); in particular, see IntervalEstimator and QuantileEstimatorDiscrete for estimating marginal posterior quantiles for a fixed set of probability levels, and QuantileEstimatorContinuous for estimating marginal posterior quantiles with the probability level as an input to the neural network.","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"In addition to point estimation, the package also provides the class RatioEstimator for approximating the so-called likelihood-to-evidence ratio. The binary classification problem at the heart of this approach proceeds based on the binary cross-entropy loss.","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"Users are free to choose the neural-network architecture of these estimators as they see fit (subject to some class-specific requirements), but the package also provides the convenience constructor initialise_estimator().","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"NeuralEstimator\n\nPointEstimator\n\nIntervalEstimator\n\nQuantileEstimatorDiscrete\n\nQuantileEstimatorContinuous\n\nRatioEstimator\n\nPiecewiseEstimator\n\nEnsemble","category":"page"},{"location":"API/core/#NeuralEstimators.NeuralEstimator","page":"Core","title":"NeuralEstimators.NeuralEstimator","text":"NeuralEstimator\n\nAn abstract supertype for neural estimators.\n\n\n\n\n\n","category":"type"},{"location":"API/core/#NeuralEstimators.PointEstimator","page":"Core","title":"NeuralEstimators.PointEstimator","text":"PointEstimator(deepset::DeepSet)\n\nA neural point estimator, a mapping from the sample space to the parameter space.\n\nThe estimator leverages the DeepSet architecture. The only requirement is that number of output neurons in the final layer of the inference network (i.e., the outer network) is equal to the number of parameters in the statistical model.\n\n\n\n\n\n","category":"type"},{"location":"API/core/#NeuralEstimators.IntervalEstimator","page":"Core","title":"NeuralEstimators.IntervalEstimator","text":"IntervalEstimator(u, v = u; probs = [0.025, 0.975], g::Function = exp)\nIntervalEstimator(u, c::Union{Function,Compress}; probs = [0.025, 0.975], g::Function = exp)\nIntervalEstimator(u, v, c::Union{Function,Compress}; probs = [0.025, 0.975], g::Function = exp)\n\nA neural interval estimator which, given data Z, jointly estimates marginal posterior credible intervals based on the probability levels probs.\n\nThe estimator employs a representation that prevents quantile crossing, namely, it constructs marginal posterior credible intervals for each parameter theta_i, i = 1 dots p  of the form,\n\nc_i(u_i(boldsymbolZ))  c_i(u_i(boldsymbolZ)) + g(v_i(boldsymbolZ)))\n\nwhere  boldsymbolu() equiv (u_1(cdot) dots u_p(cdot)) and boldsymbolv() equiv (v_1(cdot) dots v_p(cdot)) are neural networks that transform data into p-dimensional vectors; g(cdot) is a monotonically increasing function (e.g., exponential or softplus); and each c_i() is a monotonically increasing function that maps its input to the prior support of theta_i.\n\nThe functions c_i() may be defined by a p-dimensional object of type Compress. If these functions are unspecified, they will be set to the identity function so that the range of the intervals will be unrestricted.\n\nIf only a single neural-network architecture is provided, it will be used for both boldsymbolu() and boldsymbolv().\n\nThe return value  when applied to data is a matrix with 2p rows, where the first and second p rows correspond to the lower and upper bounds, respectively.\n\nSee also QuantileEstimatorDiscrete and QuantileEstimatorContinuous.\n\nExamples\n\nusing NeuralEstimators, Flux\n\n# Generate some toy data\nn = 2   # bivariate data\nm = 100 # number of independent replicates\nZ = rand(n, m)\n\n# prior\np = 3  # number of parameters in the statistical model\nmin_supp = [25, 0.5, -pi/2]\nmax_supp = [500, 2.5, 0]\ng = Compress(min_supp, max_supp)\n\n# Create an architecture\nw = 8  # width of each layer\nψ = Chain(Dense(n, w, relu), Dense(w, w, relu));\nϕ = Chain(Dense(w, w, relu), Dense(w, p));\nu = DeepSet(ψ, ϕ)\n\n# Initialise the interval estimator\nestimator = IntervalEstimator(u, g)\n\n# Apply the (untrained) interval estimator\nestimator(Z)\ninterval(estimator, Z)\n\n\n\n\n\n","category":"type"},{"location":"API/core/#NeuralEstimators.QuantileEstimatorDiscrete","page":"Core","title":"NeuralEstimators.QuantileEstimatorDiscrete","text":"QuantileEstimatorDiscrete(v::DeepSet; probs = [0.05, 0.25, 0.5, 0.75, 0.95], g = Flux.softplus, i = nothing)\n(estimator::QuantileEstimatorDiscrete)(Z)\n(estimator::QuantileEstimatorDiscrete)(Z, θ₋ᵢ)\n\nA neural estimator that jointly estimates a fixed set of marginal posterior quantiles with probability levels tau_1 dots tau_T, controlled by the keyword argument probs.\n\nBy default, the estimator approximates the marginal quantiles for all parameters in the model, that is, the quantiles of\n\ntheta_i mid boldsymbolZ\n\nfor parameters boldsymboltheta equiv (theta_1 dots theta_p). Alternatively, if initialised with i set to a positive integer, the estimator approximates the quantiles of the full conditional distribution\n\ntheta_i mid boldsymbolZ boldsymboltheta_-i\n\nwhere boldsymboltheta_-i denotes the parameter vector with its ith element removed. For ease of exposition, when targetting marginal posteriors of the form theta_i mid boldsymbolZ (i.e., the default behaviour), we define textdim(boldsymboltheta_-i)  0.\n\nThe estimator leverages the DeepSet architecture, subject to two requirements. First, the number of input neurons in the first layer of the inference network (i.e., the outer network) must be equal to the number of neurons in the final layer of the summary network plus textdim(boldsymboltheta_-i). Second, the number of output neurons in the final layer of the inference network must be equal to p - textdim(boldsymboltheta_-i).  The estimator employs a representation that prevents quantile crossing, namely,\n\nbeginaligned\nboldsymbolq^(tau_1)(boldsymbolZ) = boldsymbolv^(tau_1)(boldsymbolZ)\nboldsymbolq^(tau_t)(boldsymbolZ) = boldsymbolv^(tau_1)(boldsymbolZ) + sum_j=2^t g(boldsymbolv^(tau_j)(boldsymbolZ)) quad t = 2 dots T\nendaligned\n\nwhere boldsymbolq^(tau)(boldsymbolZ) denotes the vector of tau-quantiles for parameters boldsymboltheta equiv (theta_1 dots theta_p), and boldsymbolv^(tau_t)(cdot), t = 1 dots T, are unconstrained neural networks that transform data into p-dimensional vectors, and g(cdot) is a non-negative function (e.g., exponential or softplus) applied elementwise to its arguments. If g=nothing, the quantiles are estimated independently through the representation,\n\nboldsymbolq^(tau_t)(boldsymbolZ) = boldsymbolv^(tau_t)(boldsymbolZ) quad t = 1 dots T\n\nThe return value is a matrix with (p - textdim(boldsymboltheta_-i)) times T rows, where the first set of T rows corresponds to the estimated quantiles for the first parameter, the second set of T rows corresponds to the estimated quantiles for the second parameter, and so on.\n\nSee also IntervalEstimator and QuantileEstimatorContinuous.\n\nExamples\n\nusing NeuralEstimators, Flux, Distributions\nusing AlgebraOfGraphics, CairoMakie\n\n# Model: Z|θ ~ N(θ, 1) with θ ~ N(0, 1)\nd = 1   # dimension of each independent replicate\np = 1   # number of unknown parameters in the statistical model\nm = 30  # number of independent replicates in each data set\nprior(K) = randn32(p, K)\nsimulate(θ, m) = [μ .+ randn32(1, m) for μ ∈ eachcol(θ)]\n\n# Architecture\nψ = Chain(Dense(d, 64, relu), Dense(64, 64, relu))\nϕ = Chain(Dense(64, 64, relu), Dense(64, p))\nv = DeepSet(ψ, ϕ)\n\n# Initialise the estimator\nτ = [0.05, 0.25, 0.5, 0.75, 0.95]\nq̂ = QuantileEstimatorDiscrete(v; probs = τ)\n\n# Train the estimator\nq̂ = train(q̂, prior, simulate, m = m)\n\n# Assess the estimator\nθ = prior(1000)\nZ = simulate(θ, m)\nassessment = assess(q̂, θ, Z)\nplot(assessment)\n\n# Estimate posterior quantiles\nq̂(Z)\n\n\n# -------------------------------------------------------------\n# --------------------- Full conditionals ---------------------\n# -------------------------------------------------------------\n\n\n# Model: Z|μ,σ ~ N(μ, σ²) with μ ~ N(0, 1), σ ∼ IG(3,1)\nd = 1         # dimension of each independent replicate\np = 2         # number of unknown parameters in the statistical model\nm = 30        # number of independent replicates in each data set\nfunction prior(K)\n\tμ = randn(1, K)\n\tσ = rand(InverseGamma(3, 1), 1, K)\n\tθ = Float32.(vcat(μ, σ))\nend\nsimulate(θ, m) = [ϑ[1] .+ ϑ[2] .* randn32(1, m) for ϑ ∈ eachcol(θ)]\n\n# Architecture\nψ = Chain(Dense(d, 64, relu), Dense(64, 64, relu))\nϕ = Chain(Dense(64 + 1, 64, relu), Dense(64, 1))\nv = DeepSet(ψ, ϕ)\n\n# Initialise estimators respectively targetting quantiles of μ∣Z,σ and σ∣Z,μ\nτ = [0.05, 0.25, 0.5, 0.75, 0.95]\nq₁ = QuantileEstimatorDiscrete(v; probs = τ, i = 1)\nq₂ = QuantileEstimatorDiscrete(v; probs = τ, i = 2)\n\n# Train the estimators\nq₁ = train(q₁, prior, simulate, m = m)\nq₂ = train(q₂, prior, simulate, m = m)\n\n# Assess the estimators\nθ = prior(1000)\nZ = simulate(θ, m)\nassessment = assess([q₁, q₂], θ, Z, parameter_names = [\"μ\", \"σ\"])\nplot(assessment)\n\n# Estimate quantiles of μ∣Z,σ with σ = 0.5 and for many data sets\nθ₋ᵢ = 0.5f0\nq₁(Z, θ₋ᵢ)\n\n# Estimate quantiles of μ∣Z,σ with σ = 0.5 for only a single data set\nq₁(Z[1], θ₋ᵢ)\n\n\n\n\n\n","category":"type"},{"location":"API/core/#NeuralEstimators.QuantileEstimatorContinuous","page":"Core","title":"NeuralEstimators.QuantileEstimatorContinuous","text":"QuantileEstimatorContinuous(deepset::DeepSet; i = nothing, num_training_probs::Integer = 1)\n(estimator::QuantileEstimatorContinuous)(Z, τ)\n(estimator::QuantileEstimatorContinuous)(Z, θ₋ᵢ, τ)\n\nA neural estimator targetting posterior quantiles.\n\nGiven as input data boldsymbolZ and the desired probability level tau  (0 1), by default the estimator approximates the tau-quantile of\n\ntheta_i mid boldsymbolZ\n\nfor parameters boldsymboltheta equiv (theta_1 dots theta_p). Alternatively, if initialised with i set to a positive integer, the estimator approximates the tau-quantile of the full conditional distribution\n\ntheta_i mid boldsymbolZ boldsymboltheta_-i\n\nwhere boldsymboltheta_-i denotes the parameter vector with its ith element removed. For ease of exposition, when targetting marginal posteriors of the form theta_i mid boldsymbolZ (i.e., the default behaviour), we define textdim(boldsymboltheta_-i)  0.\n\nThe estimator leverages the DeepSet architecture, subject to two requirements. First, the number of input neurons in the first layer of the inference network (i.e., the outer network) must be equal to the number of neurons in the final layer of the summary network plus 1 + textdim(boldsymboltheta_-i). Second, the number of output neurons in the final layer of the inference network must be equal to p - textdim(boldsymboltheta_-i).\n\nAlthough not a requirement, one may employ a (partially) monotonic neural network to prevent quantile crossing (i.e., to ensure that the tau_1-quantile does not exceed the tau_2-quantile for any tau_2  tau_1). There are several ways to construct such a neural network: one simple yet effective approach is to ensure that all weights associated with tau are strictly positive (see, e.g., Cannon, 2018), and this can be done using the DensePositive layer as illustrated in the examples below.\n\nThe return value is a matrix with p - textdim(boldsymboltheta_-i) rows, corresponding to the estimated quantile for each parameter not in boldsymboltheta_-i.\n\nSee also QuantileEstimatorDiscrete.\n\nExamples\n\nusing NeuralEstimators, Flux, Distributions , InvertedIndices, Statistics\nusing AlgebraOfGraphics, CairoMakie\n\n# Model: Z|θ ~ N(θ, 1) with θ ~ N(0, 1)\nd = 1         # dimension of each independent replicate\np = 1         # number of unknown parameters in the statistical model\nm = 30        # number of independent replicates in each data set\nprior(K) = randn32(p, K)\nsimulateZ(θ, m) = [ϑ .+ randn32(1, m) for ϑ ∈ eachcol(θ)]\nsimulateτ(K)    = [rand32(10) for k in 1:K]\nsimulate(θ, m)  = simulateZ(θ, m), simulateτ(size(θ, 2))\n\n# Architecture: partially monotonic network to preclude quantile crossing\nw = 64  # width of each hidden layer\nψ = Chain(\n\tDense(d, w, relu),\n\tDense(w, w, relu),\n\tDense(w, w, relu)\n\t)\nϕ = Chain(\n\tDensePositive(Dense(w + 1, w, relu); last_only = true),\n\tDensePositive(Dense(w, w, relu)),\n\tDensePositive(Dense(w, p))\n\t)\ndeepset = DeepSet(ψ, ϕ)\n\n# Initialise the estimator\nq̂ = QuantileEstimatorContinuous(deepset)\n\n# Train the estimator\nq̂ = train(q̂, prior, simulate, m = m)\n\n# Assess the estimator\nθ = prior(1000)\nZ = simulateZ(θ, m)\nassessment = assess(q̂, θ, Z)\nplot(assessment)\n\n# Estimate 0.1-quantile for many data sets\nτ = 0.1f0\nq̂(Z, τ)\n\n# Estimate several quantiles for a single data set\n# (note that τ is given as a row vector)\nz = Z[1]\nτ = Float32.([0.1, 0.25, 0.5, 0.75, 0.9])'\nq̂(z, τ)\n\n# -------------------------------------------------------------\n# --------------------- Full conditionals ---------------------\n# -------------------------------------------------------------\n\n# Model: Z|μ,σ ~ N(μ, σ²) with μ ~ N(0, 1), σ ∼ IG(3,1)\nd = 1         # dimension of each independent replicate\np = 2         # number of unknown parameters in the statistical model\nm = 30        # number of independent replicates in each data set\nfunction prior(K)\n\tμ = randn(1, K)\n\tσ = rand(InverseGamma(3, 1), 1, K)\n\tθ = vcat(μ, σ)\n\tθ = Float32.(θ)\n\treturn θ\nend\nsimulateZ(θ, m) = [ϑ[1] .+ ϑ[2] .* randn32(1, m) for ϑ ∈ eachcol(θ)]\nsimulateτ(θ)    = [rand32(10) for k in 1:size(θ, 2)]\nsimulate(θ, m)  = simulateZ(θ, m), simulateτ(θ)\n\n# Architecture: partially monotonic network to preclude quantile crossing\nw = 64  # width of each hidden layer\nψ = Chain(\n\tDense(d, w, relu),\n\tDense(w, w, relu),\n\tDense(w, w, relu)\n\t)\nϕ = Chain(\n\tDensePositive(Dense(w + 2, w, relu); last_only = true),\n\tDensePositive(Dense(w, w, relu)),\n\tDensePositive(Dense(w, 1))\n\t)\ndeepset = DeepSet(ψ, ϕ)\n\n# Initialise the estimator for the first parameter, targetting μ∣Z,σ\ni = 1\nq̂ = QuantileEstimatorContinuous(deepset; i = i)\n\n# Train the estimator\nq̂ = train(q̂, prior, simulate, m = m)\n\n# Assess the estimator\nθ = prior(1000)\nZ = simulateZ(θ, m)\nassessment = assess(q̂, θ, Z)\nplot(assessment)\n\n# Estimate quantiles of μ∣Z,σ with σ = 0.5 and for many data sets\n# (use θ[Not(i), :] to determine the order in which the conditioned parameters should be given)\nθ = prior(1000)\nZ = simulateZ(θ, m)\nθ₋ᵢ = 0.5f0\nτ = Float32.([0.1, 0.25, 0.5, 0.75, 0.9])\nq̂(Z, θ₋ᵢ, τ)\n\n# Estimate quantiles for a single data set\nq̂(Z[1], θ₋ᵢ, τ)\n\n\n\n\n\n","category":"type"},{"location":"API/core/#NeuralEstimators.RatioEstimator","page":"Core","title":"NeuralEstimators.RatioEstimator","text":"RatioEstimator(deepset::DeepSet)\n\nA neural estimator that estimates the likelihood-to-evidence ratio,\n\nr(boldsymbolZ boldsymboltheta) equiv p(boldsymbolZ mid boldsymboltheta)p(boldsymbolZ)\n\nwhere p(boldsymbolZ mid boldsymboltheta) is the likelihood and p(boldsymbolZ) is the marginal likelihood, also known as the model evidence.\n\nThe estimator leverages the DeepSet architecture, subject to two requirements. First, the number of input neurons in the first layer of the inference network (i.e., the outer network) must equal the number of output neurons in the final layer of the summary network plus the number of parameters in the statistical model. Second, the number of output neurons in the final layer of the inference network must be equal to one.\n\nThe ratio estimator is trained by solving a relatively straightforward binary classification problem. Specifically, consider the problem of distinguishing dependent parameter–data pairs (boldsymboltheta boldsymbolZ) sim p(boldsymbolZ boldsymboltheta) with class labels Y=1 from independent parameter–data pairs (tildeboldsymboltheta tildeboldsymbolZ) sim p(boldsymboltheta)p(boldsymbolZ) with class labels Y=0, and where the classes are balanced. Then the Bayes classifier under binary cross-entropy loss is given by\n\nc(boldsymbolZ boldsymboltheta) = fracp(boldsymbolZ boldsymboltheta)p(boldsymbolZ boldsymboltheta) + p(boldsymboltheta)p(boldsymbolZ)\n\nand hence,\n\nr(boldsymbolZ boldsymboltheta) = fracc(boldsymbolZ boldsymboltheta)1 - c(boldsymbolZ boldsymboltheta)\n\nFor numerical stability, training is done on the log-scale using log r(boldsymbolZ boldsymboltheta) = textlogit(c(boldsymbolZ boldsymboltheta)).\n\nWhen applying the estimator to data, by default the likelihood-to-evidence ratio r(boldsymbolZ boldsymboltheta) is returned (setting the keyword argument classifier = true will yield class probability estimates). The estimated ratio can then be used in various downstream Bayesian (e.g., Hermans et al., 2020) or Frequentist (e.g., Walchessen et al., 2023) inferential algorithms.\n\nSee also mlestimate and mapestimate for obtaining approximate maximum-likelihood and maximum-a-posteriori estimates, and sampleposterior for obtaining approximate posterior samples.\n\nExamples\n\nusing NeuralEstimators, Flux, Statistics\n\n# Generate data from Z|μ,σ ~ N(μ, σ²) with μ, σ ~ U(0, 1)\np = 2     # number of unknown parameters in the statistical model\nd = 1     # dimension of each independent replicate\nm = 100   # number of independent replicates\n\nprior(K) = rand32(p, K)\nsimulate(θ, m) = θ[1] .+ θ[2] .* randn32(d, m)\nsimulate(θ::AbstractMatrix, m) = simulate.(eachcol(θ), m)\n\n# Architecture\nw = 64 # width of each hidden layer\nψ = Chain(\n\tDense(d, w, relu),\n\tDense(w, w, relu),\n\tDense(w, q, relu)\n\t)\nϕ = Chain(\n\tDense(w + p, w, relu),\n\tDense(w, w, relu),\n\tDense(w, 1)\n\t)\ndeepset = DeepSet(ψ, ϕ)\n\n# Initialise the estimator\nr̂ = RatioEstimator(deepset)\n\n# Train the estimator\nr̂ = train(r̂, prior, simulate, m = m)\n\n# Inference with \"observed\" data set\nθ = prior(1)\nz = simulate(θ, m)[1]\nθ₀ = [0.5, 0.5]                           # initial estimate\nmlestimate(r̂, z;  θ₀ = θ₀)                # maximum-likelihood estimate\nmapestimate(r̂, z; θ₀ = θ₀)                # maximum-a-posteriori estimate\nθ_grid = expandgrid(0:0.01:1, 0:0.01:1)'  # fine gridding of the parameter space\nθ_grid = Float32.(θ_grid)\nr̂(z, θ_grid)                              # likelihood-to-evidence ratios over grid\nsampleposterior(r̂, z; θ_grid = θ_grid)    # posterior samples\n\n\n\n\n\n","category":"type"},{"location":"API/core/#NeuralEstimators.PiecewiseEstimator","page":"Core","title":"NeuralEstimators.PiecewiseEstimator","text":"PiecewiseEstimator(estimators, changepoints)\n\nCreates a piecewise estimator (Sainsbury-Dale et al., 2024, sec. 2.2.2) from a collection of estimators and sample-size changepoints.\n\nSpecifically, with l estimators and sample-size changepoints m_1  m_2  dots  m_l-1, the piecewise etimator takes the form,\n\nhatboldsymboltheta(boldsymbolZ)\n=\nbegincases\nhatboldsymboltheta_1(boldsymbolZ)  m leq m_1\nhatboldsymboltheta_2(boldsymbolZ)  m_1  m leq m_2\nquad vdots \nhatboldsymboltheta_l(boldsymbolZ)  m  m_l-1\nendcases\n\nFor example, given an estimator  hatboldsymboltheta_1(cdot) trained for small sample sizes (e.g., m ≤ 30) and an estimator hatboldsymboltheta_2(cdot) trained for moderate-to-large sample sizes (e.g., m > 30), we may construct a PiecewiseEstimator that dispatches hatboldsymboltheta_1(cdot) if m ≤ 30 and hatboldsymboltheta_2(cdot) otherwise.\n\nSee also trainx() for training estimators for a range of sample sizes.\n\nExamples\n\nusing NeuralEstimators, Flux\n\nd = 2  # bivariate data\np = 3  # number of parameters in the statistical model\nw = 8  # width of each hidden layer\n\n# Small-sample estimator\nψ₁ = Chain(Dense(d, w, relu), Dense(w, w, relu));\nϕ₁ = Chain(Dense(w, w, relu), Dense(w, p));\nθ̂₁ = PointEstimator(DeepSet(ψ₁, ϕ₁))\n\n# Large-sample estimator\nψ₂ = Chain(Dense(d, w, relu), Dense(w, w, relu));\nϕ₂ = Chain(Dense(w, w, relu), Dense(w, p));\nθ̂₂ = PointEstimator(DeepSet(ψ₂, ϕ₂))\n\n# Piecewise estimator with changepoint m=30\nθ̂ = PiecewiseEstimator([θ̂₁, θ̂₂], 30)\n\n# Apply the (untrained) piecewise estimator to data\nZ = [rand(d, 1, m) for m ∈ (10, 50)]\nθ̂(Z)\n\n\n\n\n\n","category":"type"},{"location":"API/core/#NeuralEstimators.Ensemble","page":"Core","title":"NeuralEstimators.Ensemble","text":"Ensemble(estimators)\nEnsemble(architecture::Function, J::Integer)\n(ensemble::Ensemble)(Z; aggr = median)\n\nDefines an ensemble based on a collection of estimators which, when applied to data Z, returns the median (or another summary defined by aggr) of the estimates.\n\nThe ensemble can be initialised with a collection of trained estimators and then applied immediately to observed data. Alternatively, the ensemble can be initialised with a collection of untrained estimators (or a function defining the architecture of each estimator, and the number of estimators in the ensemble), trained with train(), and then applied to observed data. In the latter case, where the ensemble is trained directly, if savepath is specified both the ensemble and component estimators will be saved.\n\nNote that train() currently acts sequentially on the component estimators.\n\nThe ensemble components can be accessed by indexing the ensemble directly; the number of component estimators can be obtained using length().\n\nExamples\n\nusing NeuralEstimators, Flux\n\n# Define the model, Z|θ ~ N(θ, 1), θ ~ N(0, 1)\nd = 1   # dimension of each replicate\np = 1   # number of unknown parameters in the statistical model\nm = 30  # number of independent replicates in each data set\nsampler(K) = randn32(p, K)\nsimulator(θ, m) = [μ .+ randn32(d, m) for μ ∈ eachcol(θ)]\n\n# Architecture of each ensemble component\nfunction architecture()\n\tψ = Chain(Dense(d, 64, relu), Dense(64, 64, relu))\n\tϕ = Chain(Dense(64, 64, relu), Dense(64, p))\n\tdeepset = DeepSet(ψ, ϕ)\n\tPointEstimator(deepset)\nend\n\n# Ensemble size\nJ = 3\n\n# Initialise ensemble\nensemble = Ensemble(architecture, J)\nensemble[1]      # access component estimators by indexing\nlength(ensemble) # number of component estimators\n\n# Training\nensemble = train(ensemble, sampler, simulator, m = m, epochs = 5)\n\n# Assessment\nθ = sampler(1000)\nZ = simulator(θ, m)\nassessment = assess(ensemble, θ, Z)\nrmse(assessment)\n\n# Apply to data\nensemble(Z)\n\n\n\n\n\n","category":"type"},{"location":"API/core/#Training","page":"Core","title":"Training","text":"","category":"section"},{"location":"API/core/","page":"Core","title":"Core","text":"The function train is used to train a single neural estimator, while the wrapper function trainx is useful for training multiple neural estimators over a range of sample sizes, making using of the technique known as pre-training.","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"train\n\ntrainx","category":"page"},{"location":"API/core/#NeuralEstimators.train","page":"Core","title":"NeuralEstimators.train","text":"train(θ̂, sampler::Function, simulator::Function; ...)\ntrain(θ̂, θ_train::P, θ_val::P, simulator::Function; ...) where {P <: Union{AbstractMatrix, ParameterConfigurations}}\ntrain(θ̂, θ_train::P, θ_val::P, Z_train::T, Z_val::T; ...) where {T, P <: Union{AbstractMatrix, ParameterConfigurations}}\n\nTrain a neural estimator θ̂.\n\nThe methods cater for different variants of \"on-the-fly\" simulation. Specifically, a sampler can be provided to continuously sample new parameter vectors from the prior, and a simulator can be provided to continuously simulate new data conditional on the parameters. If provided with specific sets of parameters (θ_train and θ_val) and/or data (Z_train and Z_val), they will be held fixed during training.\n\nIn all methods, the validation parameters and data are held fixed to reduce noise when evaluating the validation risk.\n\nKeyword arguments common to all methods:\n\nloss = mae\nepochs = 100\nbatchsize = 32\noptimiser = ADAM()\nsavepath::String = \"\": path to save the trained estimator and other information; if an empty string (default), nothing is saved. Otherwise, the neural-network parameters (i.e., the weights and biases) will be saved during training as bson files; the risk function evaluated over the training and validation sets will also be saved, in the first and second columns of loss_per_epoch.csv, respectively; the best parameters (as measured by validation risk) will be saved as best_network.bson. \nstopping_epochs = 5: cease training if the risk doesn't improve in this number of epochs.\nuse_gpu = true\nverbose = true\n\nKeyword arguments common to train(θ̂, sampler, simulator) and train(θ̂, θ_train, θ_val, simulator):\n\nm: sample sizes (either an Integer or a collection of Integers). The simulator is called as simulator(θ, m).\nepochs_per_Z_refresh = 1: the number of passes to make through the training set before the training data are refreshed.\nsimulate_just_in_time = false: flag indicating whether we should simulate just-in-time, in the sense that only a batchsize number of parameter vectors and corresponding data are in memory at a given time.\n\nKeyword arguments unique to train(θ̂, sampler, simulator):\n\nK = 10000: number of parameter vectors in the training set; the size of the validation set is K ÷ 5.\nξ = nothing: an arbitrary collection of objects that, if provided, will be passed to the parameter sampler as sampler(K, ξ); otherwise, the parameter sampler will be called as sampler(K). Can also be provided as xi.\nepochs_per_θ_refresh = 1: the number of passes to make through the training set before the training parameters are refreshed. Must be a multiple of epochs_per_Z_refresh. Can also be provided as epochs_per_theta_refresh.\n\nExamples\n\nusing NeuralEstimators, Flux\n\nfunction sampler(K)\n\tμ = randn(K) # Gaussian prior\n\tσ = rand(K)  # Uniform prior\n\tθ = hcat(μ, σ)'\n\treturn θ\nend\n\nfunction simulator(θ_matrix, m)\n\t[θ[1] .+ θ[2] * randn(1, m) for θ ∈ eachcol(θ_matrix)]\nend\n\n# architecture\nd = 1   # dimension of each replicate\np = 2   # number of parameters in the statistical model\nψ = Chain(Dense(1, 32, relu), Dense(32, 32, relu))\nϕ = Chain(Dense(32, 32, relu), Dense(32, p))\nθ̂ = DeepSet(ψ, ϕ)\n\n# number of independent replicates to use during training\nm = 15\n\n# training: full simulation on-the-fly\nθ̂ = train(θ̂, sampler, simulator, m = m, epochs = 5)\n\n# training: simulation on-the-fly with fixed parameters\nK = 10000\nθ_train = sampler(K)\nθ_val   = sampler(K ÷ 5)\nθ̂       = train(θ̂, θ_train, θ_val, simulator, m = m, epochs = 5)\n\n# training: fixed parameters and fixed data\nZ_train = simulator(θ_train, m)\nZ_val   = simulator(θ_val, m)\nθ̂       = train(θ̂, θ_train, θ_val, Z_train, Z_val, epochs = 5)\n\n\n\n\n\n","category":"function"},{"location":"API/core/#NeuralEstimators.trainx","page":"Core","title":"NeuralEstimators.trainx","text":"trainx(θ̂, sampler::Function, simulator::Function, m::Vector{Integer}; ...)\ntrainx(θ̂, θ_train, θ_val, simulator::Function, m::Vector{Integer}; ...)\ntrainx(θ̂, θ_train, θ_val, Z_train, Z_val, m::Vector{Integer}; ...)\ntrainx(θ̂, θ_train, θ_val, Z_train::V, Z_val::V; ...) where {V <: AbstractVector{AbstractVector{Any}}}\n\nA wrapper around train() to construct neural estimators for different sample sizes.\n\nThe positional argument m specifies the desired sample sizes. Each estimator is pre-trained with the estimator for the previous sample size. For example, if m = [m₁, m₂], the estimator for sample size m₂ is pre-trained with the estimator for sample size m₁.\n\nThe method for Z_train and Z_val subsets the data using subsetdata(Z, 1:mᵢ) for each mᵢ ∈ m. The method for Z_train::V and Z_val::V trains an estimator for each element of Z_train::V and Z_val::V and, hence, it does not need to invoke subsetdata(), which can be slow or difficult to define in some cases (e.g., for graphical data). Note that, in this case, m is inferred from the data.\n\nThe keyword arguments inherit from train(). The keyword arguments epochs, batchsize, stopping_epochs, and optimiser can each be given as vectors. For example, if training two estimators, one may use a different number of epochs for each estimator by providing epochs = [epoch₁, epoch₂].\n\n\n\n\n\n","category":"function"},{"location":"API/core/#Assessment/calibration","page":"Core","title":"Assessment/calibration","text":"","category":"section"},{"location":"API/core/","page":"Core","title":"Core","text":"assess\n\nAssessment\n\nrisk\n\nbias\n\nrmse\n\ncoverage","category":"page"},{"location":"API/core/#NeuralEstimators.assess","page":"Core","title":"NeuralEstimators.assess","text":"assess(estimator, θ, Z)\n\nUsing an estimator (or a collection of estimators), computes estimates from data Z simulated based on true parameter vectors stored in θ.\n\nThe data Z should be a Vector, with each element corresponding to a single simulated data set. If Z contains more data sets than parameter vectors, the parameter matrix θ will be recycled by horizontal concatenation via the call θ = repeat(θ, outer = (1, J)) where J = length(Z) ÷ K is the number of simulated data sets and K = size(θ, 2) is the number of parameter vectors.\n\nThe output is of type Assessment; see ?Assessment for details.\n\nKeyword arguments\n\nestimator_names::Vector{String}: names of the estimators (sensible defaults provided).\nparameter_names::Vector{String}: names of the parameters (sensible defaults provided). If ξ is provided with a field parameter_names, those names will be used.\nξ = nothing: an arbitrary collection of objects that are fixed (e.g., distance matrices). Can also be provided as xi.\nuse_ξ = false: a Bool or a collection of Bool objects with length equal to the number of estimators. Specifies whether or not the estimator uses ξ: if it does, the estimator will be applied as estimator(Z, ξ). This argument is useful when multiple estimators are provided, only some of which need ξ; hence, if only one estimator is provided and ξ is not nothing, use_ξ is automatically set to true. Can also be provided as use_xi.\nuse_gpu = true: a Bool or a collection of Bool objects with length equal to the number of estimators.\nprobs = range(0.01, stop=0.99, length=100): (relevant only for estimator::QuantileEstimatorContinuous) a collection of probability levels in (0, 1)\n\nExamples\n\nusing NeuralEstimators, Flux\n\nn = 10 # number of observations in each realisation\np = 4  # number of parameters in the statistical model\n\n# Construct the neural estimator\nw = 32 # width of each layer\nψ = Chain(Dense(n, w, relu), Dense(w, w, relu));\nϕ = Chain(Dense(w, w, relu), Dense(w, p));\nθ̂ = DeepSet(ψ, ϕ)\n\n# Generate testing parameters\nK = 100\nθ = rand32(p, K)\n\n# Data for a single sample size\nm = 30\nZ = [rand32(n, m) for _ ∈ 1:K];\nassessment = assess(θ̂, θ, Z);\nrisk(assessment)\n\n# Multiple data sets for each parameter vector\nJ = 5\nZ = repeat(Z, J);\nassessment = assess(θ̂, θ, Z);\nrisk(assessment)\n\n# With set-level information\nqₓ = 2\nϕ  = Chain(Dense(w + qₓ, w, relu), Dense(w, p));\nθ̂ = DeepSet(ψ, ϕ)\nx = [rand(qₓ) for _ ∈ eachindex(Z)]\nassessment = assess(θ̂, θ, (Z, x));\nrisk(assessment)\n\n\n\n\n\n","category":"function"},{"location":"API/core/#NeuralEstimators.Assessment","page":"Core","title":"NeuralEstimators.Assessment","text":"Assessment(df::DataFrame, runtime::DataFrame)\n\nA type for storing the output of assess(). The field runtime contains the total time taken for each estimator. The field df is a long-form DataFrame with columns:\n\nestimator: the name of the estimator\nparameter: the name of the parameter\ntruth:     the true value of the parameter\nestimate:  the estimated value of the parameter\nm:         the sample size (number of iid replicates) for the given data set\nk:         the index of the parameter vector\nj:         the index of the data set (in the case that multiple data sets are associated with each parameter vector)\n\nIf estimator is an IntervalEstimator, the column estimate will be replaced by the columns lower and upper, containing the lower and upper bounds of the interval, respectively.\n\nIf estimator is a QuantileEstimator, the df will also contain a column prob indicating the probability level of the corresponding quantile estimate.\n\nMultiple Assessment objects can be combined with merge() (used for combining assessments from multiple point estimators) or join() (used for combining assessments from a point estimator and an interval estimator).\n\n\n\n\n\n","category":"type"},{"location":"API/core/#NeuralEstimators.risk","page":"Core","title":"NeuralEstimators.risk","text":"risk(assessment::Assessment; ...)\n\nComputes a Monte Carlo approximation of an estimator's Bayes risk,\n\nr(hatboldsymboltheta(cdot))\napprox\nfrac1K sum_k=1^K L(boldsymboltheta^(k) hatboldsymboltheta(boldsymbolZ^(k)))\n\nwhere boldsymboltheta^(k)  k = 1 dots K denotes a set of K parameter vectors sampled from the prior and, for each k, data boldsymbolZ^(k) are simulated from the statistical model conditional on boldsymboltheta^(k).\n\nKeyword arguments\n\nloss = (x, y) -> abs(x - y): a binary operator defining the loss function (default absolute-error loss).\naverage_over_parameters::Bool = false: if true, the loss is averaged over all parameters; otherwise (default), the loss is averaged over each parameter separately.\naverage_over_sample_sizes::Bool = true: if true (default), the loss is averaged over all sample sizes m; otherwise, the loss is averaged over each sample size separately.\n\n\n\n\n\n","category":"function"},{"location":"API/core/#NeuralEstimators.bias","page":"Core","title":"NeuralEstimators.bias","text":"bias(assessment::Assessment; ...)\n\nComputes a Monte Carlo approximation of an estimator's bias,\n\nrmbias(hatboldsymboltheta(cdot))\napprox\nfrac1K sum_k=1^K hatboldsymboltheta(boldsymbolZ^(k)) - boldsymboltheta^(k)\n\nwhere boldsymboltheta^(k)  k = 1 dots K denotes a set of K parameter vectors sampled from the prior and, for each k, data boldsymbolZ^(k) are simulated from the statistical model conditional on boldsymboltheta^(k).\n\nThis function inherits the keyword arguments of risk (excluding the argument loss).\n\n\n\n\n\n","category":"function"},{"location":"API/core/#NeuralEstimators.rmse","page":"Core","title":"NeuralEstimators.rmse","text":"rmse(assessment::Assessment; ...)\n\nComputes a Monte Carlo approximation of an estimator's root-mean-squared error,\n\nrmrmse(hatboldsymboltheta(cdot))\napprox\nsqrtfrac1K sum_k=1^K (hatboldsymboltheta(boldsymbolZ^(k)) - boldsymboltheta^(k))^2\n\nwhere boldsymboltheta^(k)  k = 1 dots K denotes a set of K parameter vectors sampled from the prior and, for each k, data boldsymbolZ^(k) are simulated from the statistical model conditional on boldsymboltheta^(k).\n\nThis function inherits the keyword arguments of risk (excluding the argument loss).\n\n\n\n\n\n","category":"function"},{"location":"API/core/#NeuralEstimators.coverage","page":"Core","title":"NeuralEstimators.coverage","text":"coverage(assessment::Assessment; ...)\n\nComputes a Monte Carlo approximation of an interval estimator's expected coverage, as defined in Hermans et al. (2022, Definition 2.1), and the proportion of parameters below and above the lower and upper bounds, respectively.\n\nKeyword arguments\n\naverage_over_parameters::Bool = false: if true, the coverage is averaged over all parameters; otherwise (default), it is computed over each parameter separately.\naverage_over_sample_sizes::Bool = true: if true (default), the coverage is averaged over all sample sizes m; otherwise, it is computed over each sample size separately.\n\n\n\n\n\n","category":"function"},{"location":"API/core/#Inference-with-observed-data","page":"Core","title":"Inference with observed data","text":"","category":"section"},{"location":"API/core/#Inference-using-point-estimators","page":"Core","title":"Inference using point estimators","text":"","category":"section"},{"location":"API/core/","page":"Core","title":"Core","text":"Inference with a neural Bayes (point) estimator proceeds simply by applying the estimator θ̂ to the observed data Z (possibly containing multiple data sets) in a call of the form θ̂(Z). To leverage a GPU, simply move the estimator and the data to the GPU using gpu(); see also estimateinbatches() to apply the estimator over batches of data, which can alleviate memory issues when working with a large number of data sets.","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"Uncertainty quantification often proceeds through the bootstrap distribution, which is essentially available \"for free\" when bootstrap data sets can be quickly generated; this is facilitated by bootstrap() and interval(). Alternatively, one may approximate a set of low and high marginal posterior quantiles using a specially constructed neural Bayes estimator, which can then be used to construct credible intervals: see IntervalEstimator, QuantileEstimatorDiscrete, and QuantileEstimatorContinuous.  ","category":"page"},{"location":"API/core/","page":"Core","title":"Core","text":"bootstrap\n\ninterval","category":"page"},{"location":"API/core/#NeuralEstimators.bootstrap","page":"Core","title":"NeuralEstimators.bootstrap","text":"bootstrap(θ̂, parameters::P, Z) where P <: Union{AbstractMatrix, ParameterConfigurations}\nbootstrap(θ̂, parameters::P, simulator, m::Integer; B = 400) where P <: Union{AbstractMatrix, ParameterConfigurations}\nbootstrap(θ̂, Z; B = 400, blocks = nothing)\n\nGenerates B bootstrap estimates from an estimator θ̂.\n\nParametric bootstrapping is facilitated by passing a single parameter configuration, parameters, and corresponding simulated data, Z, whose length implicitly defines B. Alternatively, one may provide a simulator and the desired sample size, in which case the data will be simulated using simulator(parameters, m).\n\nNon-parametric bootstrapping is facilitated by passing a single data set, Z. The argument blocks caters for block bootstrapping, and it should be a vector of integers specifying the block for each replicate. For example, with 5 replicates, the first two corresponding to block 1 and the remaining three corresponding to block 2, blocks should be [1, 1, 2, 2, 2]. The resampling algorithm aims to produce resampled data sets that are of a similar size to Z, but this can only be achieved exactly if all blocks are equal in length.\n\nThe keyword argument use_gpu is a flag determining whether to use the GPU, if it is available (default true).\n\nThe return type is a p × B matrix, where p is the number of parameters in the model.\n\n\n\n\n\n","category":"function"},{"location":"API/core/#NeuralEstimators.interval","page":"Core","title":"NeuralEstimators.interval","text":"interval(θ::Matrix; probs = [0.05, 0.95], parameter_names = nothing)\ninterval(estimator::IntervalEstimator, Z; parameter_names = nothing, use_gpu = true)\n\nCompute a confidence interval based either on a p × B matrix θ of parameters (typically containing bootstrap estimates or posterior draws) with p the number of parameters in the model, or from an IntervalEstimator and data Z.\n\nWhen given θ, the intervals are constructed by compute quantiles with probability levels controlled by the keyword argument probs.\n\nThe return type is a p × 2 matrix, whose first and second columns respectively contain the lower and upper bounds of the interval. The rows of this matrix can be named by passing a vector of strings to the keyword argument parameter_names.\n\nExamples\n\nusing NeuralEstimators\np = 3\nB = 50\nθ = rand(p, B)\ninterval(θ)\n\n\n\n\n\n","category":"function"},{"location":"API/core/#Inference-using-likelihood-and-likelihood-to-evidence-ratio-estimators","page":"Core","title":"Inference using likelihood and likelihood-to-evidence-ratio estimators","text":"","category":"section"},{"location":"API/core/","page":"Core","title":"Core","text":"mlestimate\n\nmapestimate\n\nsampleposterior","category":"page"},{"location":"API/core/#NeuralEstimators.mlestimate","page":"Core","title":"NeuralEstimators.mlestimate","text":"mlestimate(estimator::RatioEstimator, Z; θ₀ = nothing, θ_grid = nothing, penalty::Function = θ -> 1, use_gpu = true)\n\nComputes the (approximate) maximum likelihood estimate given data boldsymbolZ,\n\nargmax_boldsymboltheta ell(boldsymboltheta  boldsymbolZ)\n\nwhere ell(cdot  cdot) denotes the approximate log-likelihood function derived from estimator.\n\nIf a vector θ₀ of initial parameter estimates is given, the approximate likelihood is maximised by gradient descent. Otherwise, if a matrix of parameters θ_grid is given, the approximate likelihood is maximised by grid search.\n\nA maximum penalised likelihood estimate,\n\nargmax_boldsymboltheta ell(boldsymboltheta  boldsymbolZ) + log p(boldsymboltheta)\n\ncan be obtained by specifying the keyword argument penalty that defines the penalty term p(boldsymboltheta).\n\nSee also mapestimate() for computing (approximate) maximum a posteriori estimates.\n\n\n\n\n\n","category":"function"},{"location":"API/core/#NeuralEstimators.mapestimate","page":"Core","title":"NeuralEstimators.mapestimate","text":"mapestimate(estimator::RatioEstimator, Z; θ₀ = nothing, θ_grid = nothing, prior::Function = θ -> 1, use_gpu = true)\n\nComputes the (approximate) maximum a posteriori estimate given data boldsymbolZ,\n\nargmax_boldsymboltheta ell(boldsymboltheta  boldsymbolZ) + log p(boldsymboltheta)\n\nwhere ell(cdot  cdot) denotes the approximate log-likelihood function derived from estimator, and p(boldsymboltheta) denotes the prior density function controlled through the keyword argument prior (by default, a uniform prior is used).\n\nIf a vector θ₀ of initial parameter estimates is given, the approximate posterior density is maximised by gradient descent. Otherwise, if a matrix of parameters θ_grid is given, the approximate posterior density is maximised by grid search.\n\nSee also mlestimate() for computing (approximate) maximum likelihood estimates.\n\n\n\n\n\n","category":"function"},{"location":"API/core/#NeuralEstimators.sampleposterior","page":"Core","title":"NeuralEstimators.sampleposterior","text":"sampleposterior(estimator::RatioEstimator, Z, N::Integer = 1000; θ_grid, prior::Function = θ -> 1f0)\n\nSamples from the approximate posterior distribution p(boldsymboltheta mid boldsymbolZ) implied by estimator.\n\nThe positional argument N controls the size of the posterior sample.\n\nCurrently, the sampling algorithm is based on a fine-gridding of the parameter space, specified through the keyword argument θ_grid (or theta_grid).  The approximate posterior density is evaluated over this grid, which is then used to draw samples. This is very effective when making inference with a small number of parameters. For models with a large number of parameters, other sampling algorithms may be needed (please feel free to contact the package maintainer for discussion).\n\nThe prior distribution p(boldsymboltheta) is controlled through the keyword argument prior (by default, a uniform prior is used).\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#Miscellaneous","page":"Miscellaneous","title":"Miscellaneous","text":"","category":"section"},{"location":"API/utility/","page":"Miscellaneous","title":"Miscellaneous","text":"Order = [:type, :function]\nPages   = [\"utility.md\"]","category":"page"},{"location":"API/utility/#Core","page":"Miscellaneous","title":"Core","text":"","category":"section"},{"location":"API/utility/","page":"Miscellaneous","title":"Miscellaneous","text":"These functions can appear during the core workflow, and may need to be overloaded in some applications.","category":"page"},{"location":"API/utility/","page":"Miscellaneous","title":"Miscellaneous","text":"numberreplicates\n\nsubsetdata\n\nsubsetparameters","category":"page"},{"location":"API/utility/#NeuralEstimators.numberreplicates","page":"Miscellaneous","title":"NeuralEstimators.numberreplicates","text":"numberreplicates(Z)\n\nGeneric function that returns the number of replicates in a given object. Default implementations are provided for commonly used data formats, namely, data stored as an Array or as a GNNGraph.\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.subsetdata","page":"Miscellaneous","title":"NeuralEstimators.subsetdata","text":"subsetdata(Z::V, i) where {V <: AbstractArray{A}} where {A <: Any}\nsubsetdata(Z::A, i) where {A <: AbstractArray{T, N}} where {T, N}\nsubsetdata(Z::G, i) where {G <: AbstractGraph}\n\nReturn replicate(s) i from each data set in Z.\n\nIf the user is working with data that are not covered by the default methods, simply overload the function with the appropriate type for Z.\n\nFor graphical data, calls getgraph(), where the replicates are assumed be to stored as batched graphs. Since this can be slow, one should consider using a method of train() that does not require the data to be subsetted when working with graphical data (use numberreplicates() to check that the training and validation data sets are equally replicated, which prevents subsetting).\n\nExamples\n\nusing NeuralEstimators\nusing GraphNeuralNetworks\nusing Flux: batch\n\nd = 1  # dimension of the response variable\nn = 4  # number of observations in each realisation\nm = 6  # number of replicates in each data set\nK = 2  # number of data sets\n\n# Array data\nZ = [rand(n, d, m) for k ∈ 1:K]\nsubsetdata(Z, 2)   # extract second replicate from each data set\nsubsetdata(Z, 1:3) # extract first 3 replicates from each data set\n\n# Graphical data\ne = 8 # number of edges\nZ = [batch([rand_graph(n, e, ndata = rand(d, n)) for _ ∈ 1:m]) for k ∈ 1:K]\nsubsetdata(Z, 2)   # extract second replicate from each data set\nsubsetdata(Z, 1:3) # extract first 3 replicates from each data set\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.subsetparameters","page":"Miscellaneous","title":"NeuralEstimators.subsetparameters","text":"subsetparameters(parameters::M, indices) where {M <: AbstractMatrix}\nsubsetparameters(parameters::P, indices) where {P <: ParameterConfigurations}\n\nSubset parameters using a collection of indices.\n\nArrays in parameters::P with last dimension equal in size to the number of parameter configurations, K, are also subsetted (over their last dimension) using indices. All other fields are left unchanged. To modify this default behaviour, overload subsetparameters.\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#Downstream-inference-algorithms","page":"Miscellaneous","title":"Downstream-inference algorithms","text":"","category":"section"},{"location":"API/utility/","page":"Miscellaneous","title":"Miscellaneous","text":"EM","category":"page"},{"location":"API/utility/#NeuralEstimators.EM","page":"Miscellaneous","title":"NeuralEstimators.EM","text":"EM(simulateconditional::Function, MAP::Union{Function, NeuralEstimator}, θ₀ = nothing)\n\nImplements the (Bayesian) Monte Carlo expectation-maximisation (EM) algorithm,  with lth iteration\n\nboldsymboltheta^(l) = \nargmax_boldsymboltheta\nsum_h = 1^H ell(boldsymboltheta  boldsymbolZ_1  boldsymbolZ_2^(lh)) + Hlog pi(boldsymboltheta)\n\nwhere ell(cdot) is the complete-data log-likelihood function, boldsymbolZ equiv (boldsymbolZ_1 boldsymbolZ_2) denotes the complete data with boldsymbolZ_1 and boldsymbolZ_2 the observed and missing components, respectively, boldsymbolZ_2^(lh), h = 1 dots H, is simulated from the  distribution of boldsymbolZ_2 mid boldsymbolZ_1 boldsymboltheta^(l-1), and  pi(boldsymboltheta) denotes the prior density. \n\nFields\n\nThe function simulateconditional should have a signature of the form,\n\nsimulateconditional(Z::A, θ; nsims = 1) where {A <: AbstractArray{Union{Missing, T}}} where T\n\nThe output of simulateconditional should be the completed-data Z, and it should be  returned in whatever form is appropriate to be passed to the MAP estimator as MAP(Z). For example, if the data are gridded and  the MAP is a neural MAP estimator based on a CNN architecture, then Z should  be returned as a four-dimensional array.\n\nThe field MAP can be a function (to facilitate the conventional Monte Carlo EM algorithm) or a  NeuralEstimator (to facilitate the so-called neural EM algorithm). \n\nThe starting values θ₀ may be provided during initialisation (as a vector),  or when applying the EM object to data (see below). The starting values   given in a function call take precedence over those stored in the object.\n\nMethods\n\nOnce constructed, obects of type EM can be applied to data via the methods,\n\n(em::EM)(Z::A, θ₀::Union{Nothing, Vector} = nothing; ...) where {A <: AbstractArray{Union{Missing, T}, N}} where {T, N}\n(em::EM)(Z::V, θ₀::Union{Nothing, Vector, Matrix} = nothing; ...) where {V <: AbstractVector{A}} where {A <: AbstractArray{Union{Missing, T}, N}} where {T, N}\n\nwhere Z is the complete data containing the observed data and Missing values. Note that the second method caters for the case that one has multiple data sets. The keyword arguments are:\n\nniterations::Integer = 50: the maximum number of iterations.\nϵ = 0.01: tolerance used to assess convergence; the algorithm halts if the relative change in parameter values in successive iterations is less than ϵ.\nreturn_iterates::Bool: if true, the estimate at each iteration of the algorithm is returned; otherwise, only the final estimate is returned.\nnsims::Integer = 1: the number H of conditional simulations in each iteration. \nξ = nothing: model information needed for conditional simulation (e.g., distance matrices) or in the MAP estimator.\nuse_ξ_in_simulateconditional::Bool = false: if set to true, the conditional simulator is called as simulateconditional(Z, θ, ξ; nsims = nsims).\nuse_ξ_in_MAP::Bool = false: if set to true, the MAP estimator is called as MAP(Z, ξ).\nuse_gpu::Bool = true\nverbose::Bool = false\n\nExamples\n\n# See the \"Missing data\" section in \"Advanced usage\"\n\n\n\n\n\n","category":"type"},{"location":"API/utility/#Utility-functions","page":"Miscellaneous","title":"Utility functions","text":"","category":"section"},{"location":"API/utility/","page":"Miscellaneous","title":"Miscellaneous","text":"adjacencymatrix\n\ncontainertype\n\nencodedata\n\nestimateinbatches\n\nexpandgrid\n\nIndicatorWeights\n\ninitialise_estimator\n\nloadbestweights\n\nmaternchols\n\nremovedata\n\nrowwisenorm\n\nspatialgraph\n\nstackarrays\n\nvectotril","category":"page"},{"location":"API/utility/#NeuralEstimators.adjacencymatrix","page":"Miscellaneous","title":"NeuralEstimators.adjacencymatrix","text":"adjacencymatrix(S::Matrix, k::Integer; maxmin = false, combined = false)\nadjacencymatrix(S::Matrix, r::AbstractFloat)\nadjacencymatrix(S::Matrix, r::AbstractFloat, k::Integer; random = true)\nadjacencymatrix(M::Matrix; k, r, kwargs...)\n\nComputes a spatially weighted adjacency matrix from spatial locations S based  on either the k-nearest neighbours of each location; all nodes within a disc of fixed radius r; or, if both r and k are provided, a subset of k neighbours within a disc of fixed radius r.\n\nSeveral subsampling strategies are possible when choosing a subset of k neighbours within  a disc of fixed radius r. If random=true (default), the neighbours are randomly selected from  within the disc (note that this also approximately preserves the distribution of  distances within the neighbourhood set). If random=false, a deterministic algorithm is used  that aims to preserve the distribution of distances within the neighbourhood set, by choosing  those nodes with distances to the central node corresponding to the  0 frac1k frac2k dots frack-1k 1 quantiles of the empirical  distribution function of distances within the disc.  (This algorithm in fact yields k+1 neighbours, since both the closest and furthest nodes are always included.)  Otherwise, \n\nIf maxmin=false (default) the k-nearest neighbours are chosen based on all points in the graph. If maxmin=true, a so-called maxmin ordering is applied, whereby an initial point is selected, and each subsequent point is selected to maximise the minimum distance to those points that have already been selected. Then, the neighbours of each point are defined as the k-nearest neighbours amongst the points that have already appeared in the ordering. If combined=true, the  neighbours are defined to be the union of the k-nearest neighbours and the  k-nearest neighbours subject to a maxmin ordering. \n\nIf S is a square matrix, it is treated as a distance matrix; otherwise, it should be an n x d matrix, where n is the number of spatial locations and d is the spatial dimension (typically d = 2). In the latter case, the distance metric is taken to be the Euclidean distance. Note that use of a  maxmin ordering currently requires a matrix of spatial locations (not a distance matrix).\n\nBy convention with the functionality in GraphNeuralNetworks.jl which is based on directed graphs,  the neighbours of location i are stored in the column A[:, i] where A is the  returned adjacency matrix. Therefore, the number of neighbours for each location is given by collect(mapslices(nnz, A; dims = 1)), and the number of times each node is  a neighbour of another node is given by collect(mapslices(nnz, A; dims = 2)).\n\nBy convention, we do not consider a location to neighbour itself (i.e., the diagonal elements of the adjacency matrix are zero). \n\nExamples\n\nusing NeuralEstimators, Distances, SparseArrays\n\nn = 250\nd = 2\nS = rand(Float32, n, d)\nk = 10\nr = 0.10\n\n# Memory efficient constructors\nadjacencymatrix(S, k)\nadjacencymatrix(S, k; maxmin = true)\nadjacencymatrix(S, k; maxmin = true, combined = true)\nadjacencymatrix(S, r)\nadjacencymatrix(S, r, k)\nadjacencymatrix(S, r, k; random = false)\n\n# Construct from full distance matrix D\nD = pairwise(Euclidean(), S, dims = 1)\nadjacencymatrix(D, k)\nadjacencymatrix(D, r)\nadjacencymatrix(D, r, k)\nadjacencymatrix(D, r, k; random = false)\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.containertype","page":"Miscellaneous","title":"NeuralEstimators.containertype","text":"containertype(A::Type)\ncontainertype(::Type{A}) where A <: SubArray\ncontainertype(a::A) where A\n\nReturns the container type of its argument.\n\nIf given a SubArray, returns the container type of the parent array.\n\nExamples\n\na = rand(3, 4)\ncontainertype(a)\ncontainertype(typeof(a))\n[containertype(x) for x ∈ eachcol(a)]\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.encodedata","page":"Miscellaneous","title":"NeuralEstimators.encodedata","text":"encodedata(Z::A; c::T = zero(T)) where {A <: AbstractArray{Union{Missing, T}, N}} where T, N\n\nFor data Z with missing entries, returns an encoded data set (U, W) where W encodes the missingness pattern as an indicator vector and U is the original data Z with missing entries replaced by a fixed constant c.\n\nThe indicator vector W is stored in the second-to-last dimension of Z, which should be singleton. If the second-to-last dimension is not singleton, then two singleton dimensions will be added to the array, and W will be stored in the new second-to-last dimension.\n\nExamples\n\nusing NeuralEstimators\n\n# Generate some missing data\nZ = rand(16, 16, 1, 1)\nZ = removedata(Z, 0.25)\t # remove 25% of the data\n\n# Encode the data\nUW = encodedata(Z)\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.estimateinbatches","page":"Miscellaneous","title":"NeuralEstimators.estimateinbatches","text":"estimateinbatches(θ̂, z, t = nothing; batchsize::Integer = 32, use_gpu::Bool = true, kwargs...)\n\nApply the estimator θ̂ on minibatches of z (and optionally other set-level information t) of size batchsize.\n\nThis can prevent memory issues that can occur with large data sets, particularly on the GPU.\n\nMinibatching will only be done if there are multiple data sets in z; this will be inferred by z being a vector, or a tuple whose first element is a vector.\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.expandgrid","page":"Miscellaneous","title":"NeuralEstimators.expandgrid","text":"expandgrid(xs, ys)\n\nSame as expand.grid() in R, but currently caters for two dimensions only.\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.IndicatorWeights","page":"Miscellaneous","title":"NeuralEstimators.IndicatorWeights","text":"IndicatorWeights(h_max, n_bins::Integer)\n(w::IndicatorWeights)(h::Matrix)\n\nFor spatial locations boldsymbols and  boldsymbolu, creates a spatial weight function defined as\n\nboldsymbolw(boldsymbols boldsymbolu) equiv (mathbbI(h in B_k)  k = 1 dots K)\n\nwhere mathbbI(cdot) denotes the indicator function,  h equiv boldsymbols - boldsymbolu  is the spatial distance between boldsymbols and  boldsymbolu, and B_k  k = 1 dots K is a set of K =n_bins equally-sized distance bins covering the spatial distances between 0 and h_max. \n\nExamples\n\nusing NeuralEstimators \n\nh_max = 1\nn_bins = 10\nw = IndicatorWeights(h_max, n_bins)\nh = rand(1, 30) # distances between 30 pairs of spatial locations \nw(h)\n\n\n\n\n\n","category":"type"},{"location":"API/utility/#NeuralEstimators.initialise_estimator","page":"Miscellaneous","title":"NeuralEstimators.initialise_estimator","text":"initialise_estimator(p::Integer; ...)\n\nInitialise a neural estimator for a statistical model with p unknown parameters.\n\nThe estimator is couched in the DeepSets framework (see DeepSet) so that it can be applied to data sets containing an arbitrary number of independent replicates (including the special case of a single replicate).\n\nNote also that the user is free to initialise their neural estimator however they see fit using arbitrary Flux code; see here for Flux's API reference.\n\nFinally, the method with positional argument data_typeis a wrapper that allows one to specify the type of their data (either \"unstructured\", \"gridded\", or \"irregular_spatial\").\n\nKeyword arguments\n\narchitecture::String: for unstructured multivariate data, one may use a fully-connected multilayer perceptron (\"MLP\"); for data collected over a grid, a convolutional neural network (\"CNN\"); and for graphical or irregular spatial data, a graphical neural network (\"GNN\").\nd::Integer = 1: for unstructured multivariate data (i.e., when architecture = \"MLP\"), the dimension of the data (e.g., d = 3 for trivariate data); otherwise, if architecture ∈ [\"CNN\", \"GNN\"], the argument d controls the number of input channels (e.g., d = 1 for univariate spatial processes).\nestimator_type::String = \"point\": the type of estimator; either \"point\" or \"interval\".\ndepth = 3: the number of hidden layers; either a single integer or an integer vector of length two specifying the depth of the inner (summary) and outer (inference) network of the DeepSets framework.\nwidth = 32: a single integer or an integer vector of length sum(depth) specifying the width (or number of convolutional filters/channels) in each hidden layer.\nactivation::Function = relu: the (non-linear) activation function of each hidden layer.\nactivation_output::Function = identity: the activation function of the output layer.\nvariance_stabiliser::Union{Nothing, Function} = nothing: a function that will be applied directly to the input, usually to stabilise the variance.\nkernel_size = nothing: (applicable only to CNNs) a vector of length depth[1] containing integer tuples of length D, where D is the dimension of the convolution (e.g., D = 2 for two-dimensional convolution).\nweight_by_distance::Bool = true: (applicable only to GNNs) flag indicating whether the estimator will weight by spatial distance; if true, a SpatialGraphConv layer is used in the propagation module; otherwise, a regular GraphConv layer is used.\nprobs = [0.025, 0.975]: (applicable only if estimator_type = \"interval\") probability levels defining the lower and upper endpoints of the posterior credible interval.\n\nExamples\n\n## MLP, GNN, 1D CNN, and 2D CNN for a statistical model with two parameters:\np = 2\ninitialise_estimator(p, architecture = \"MLP\")\ninitialise_estimator(p, architecture = \"GNN\")\ninitialise_estimator(p, architecture = \"CNN\", kernel_size = [10, 5, 3])\ninitialise_estimator(p, architecture = \"CNN\", kernel_size = [(10, 10), (5, 5), (3, 3)])\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.loadbestweights","page":"Miscellaneous","title":"NeuralEstimators.loadbestweights","text":"loadbestweights(path::String)\n\nReturns the weights of the neural network saved as 'best_network.bson' in the given path.\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.maternchols","page":"Miscellaneous","title":"NeuralEstimators.maternchols","text":"maternchols(D, ρ, ν, σ² = 1; stack = true)\n\nGiven a matrix D of distances, constructs the Cholesky factor of the covariance matrix under the Matérn covariance function with range parameter ρ, smoothness parameter ν, and marginal variance σ².\n\nProviding vectors of parameters will yield a three-dimensional array of Cholesky factors (note that the vectors must of the same length, but a mix of vectors and scalars is allowed). A vector of distance matrices D may also be provided.\n\nIf stack = true, the Cholesky factors will be \"stacked\" into a three-dimensional array (this is only possible if all distance matrices in D are the same size).\n\nExamples\n\nusing NeuralEstimators\nusing LinearAlgebra: norm\nn  = 10\nS  = rand(n, 2)\nD  = [norm(sᵢ - sⱼ) for sᵢ ∈ eachrow(S), sⱼ ∈ eachrow(S)]\nρ  = [0.6, 0.5]\nν  = [0.7, 1.2]\nσ² = [0.2, 0.4]\nmaternchols(D, ρ, ν)\nmaternchols([D], ρ, ν)\nmaternchols(D, ρ, ν, σ²; stack = false)\n\nS̃  = rand(n, 2)\nD̃  = [norm(sᵢ - sⱼ) for sᵢ ∈ eachrow(S̃), sⱼ ∈ eachrow(S̃)]\nmaternchols([D, D̃], ρ, ν, σ²)\nmaternchols([D, D̃], ρ, ν, σ²; stack = false)\n\nS̃  = rand(2n, 2)\nD̃  = [norm(sᵢ - sⱼ) for sᵢ ∈ eachrow(S̃), sⱼ ∈ eachrow(S̃)]\nmaternchols([D, D̃], ρ, ν, σ²; stack = false)\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.removedata","page":"Miscellaneous","title":"NeuralEstimators.removedata","text":"removedata(Z::Array, Iᵤ::Vector{Integer})\nremovedata(Z::Array, p::Union{Float, Vector{Float}}; prevent_complete_missing = true)\nremovedata(Z::Array, n::Integer; fixed_pattern = false, contiguous_pattern = false, variable_proportion = false)\n\nReplaces elements of Z with missing.\n\nThe simplest method accepts a vector of integers Iᵤ that give the specific indices of the data to be removed.\n\nAlterntivaly, there are two methods available to generate data that are missing completely at random (MCAR).\n\nFirst, a vector p may be given that specifies the proportion of missingness for each element in the response vector. Hence, p should have length equal to the dimension of the response vector. If a single proportion is given, it will be replicated accordingly. If prevent_complete_missing = true, no replicates will contain 100% missingness (note that this can slightly alter the effective values of p).\n\nSecond, if an integer n is provided, all replicates will contain n observations after the data are removed. If fixed_pattern = true, the missingness pattern is fixed for all replicates. If contiguous_pattern = true, the data will be removed in a contiguous block. If variable_proportion = true, the proportion of missingness will vary across replicates, with each replicate containing between 1 and n observations after data removal, sampled uniformly (note that variable_proportion overrides fixed_pattern).\n\nThe return type is Array{Union{T, Missing}}.\n\nExamples\n\nd = 5           # dimension of each replicate\nm = 2000        # number of replicates\nZ = rand(d, m)  # simulated data\n\n# Passing a desired proportion of missingness\np = rand(d)\nremovedata(Z, p)\n\n# Passing a desired final sample size\nn = 3  # number of observed elements of each replicate: must have n <= d\nremovedata(Z, n)\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.rowwisenorm","page":"Miscellaneous","title":"NeuralEstimators.rowwisenorm","text":"rowwisenorm(A)\n\nComputes the row-wise norm of a matrix A.\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.spatialgraph","page":"Miscellaneous","title":"NeuralEstimators.spatialgraph","text":"spatialgraph(S)\nspatialgraph(S, Z)\nspatialgraph(g::GNNGraph, Z)\n\nGiven spatial data Z measured at spatial locations S, constructs a GNNGraph ready for use in a graph neural network that employs SpatialGraphConv layers. \n\nWhen m independent replicates are collected over the same set of n spatial locations,\n\nboldsymbols_1 dots boldsymbols_n subset mathcalD\n\nwhere mathcalD subset mathbbR^d denotes the spatial domain of interest,  Z should be given as an n times m matrix and S should be given as an n times d matrix.  Otherwise, when m independent replicates are collected over differing sets of spatial locations,\n\nboldsymbols_ij dots boldsymbols_in_i subset mathcalD quad i = 1 dots m\n\nZ should be given as an m-vector of n_i-vectors, and S should be given as an m-vector of n_i times d matrices.\n\nThe spatial information between neighbours is stored as an edge feature, with the specific  information controlled by the keyword arguments stationary and isotropic.  Specifically, the edge feature between node j and node j stores the spatial  distance boldsymbols_j - boldsymbols_j (if isotropic), the spatial  displacement boldsymbols_j - boldsymbols_j (if stationary), or the matrix of   locations (boldsymbols_j boldsymbols_j) (if !stationary).  \n\nAdditional keyword arguments inherit from adjacencymatrix() to determine the neighbourhood of each node, with the default being a randomly selected set of  k=30 neighbours within a disc of radius r=0.15 units.\n\nExamples\n\nusing NeuralEstimators\n\n# Number of replicates and spatial dimension\nm = 5  \nd = 2  \n\n# Spatial locations fixed for all replicates\nn = 100\nS = rand(n, d)\nZ = rand(n, m)\ng = spatialgraph(S, Z)\n\n# Spatial locations varying between replicates\nn = rand(50:100, m)\nS = rand.(n, d)\nZ = rand.(n)\ng = spatialgraph(S, Z)\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.stackarrays","page":"Miscellaneous","title":"NeuralEstimators.stackarrays","text":"stackarrays(v::V; merge = true) where {V <: AbstractVector{A}} where {A <: AbstractArray{T, N}} where {T, N}\n\nStack a vector of arrays v along the last dimension of each array, optionally merging the final dimension of the stacked array.\n\nThe arrays must be of the same size for the first N-1 dimensions. However, if merge = true, the size of the final dimension can vary.\n\nExamples\n\n# Vector containing arrays of the same size:\nZ = [rand(2, 3, m) for m ∈ (1, 1)];\nstackarrays(Z)\nstackarrays(Z, merge = false)\n\n# Vector containing arrays with differing final dimension size:\nZ = [rand(2, 3, m) for m ∈ (1, 2)];\nstackarrays(Z)\n\n\n\n\n\n","category":"function"},{"location":"API/utility/#NeuralEstimators.vectotril","page":"Miscellaneous","title":"NeuralEstimators.vectotril","text":"vectotril(v; strict = false)\nvectotriu(v; strict = false)\n\nConverts a vector v of length d(d+1)2 (a triangular number) into a d  d lower or upper triangular matrix.\n\nIf strict = true, the matrix will be strictly lower or upper triangular, that is, a (d+1)  (d+1) triangular matrix with zero diagonal.\n\nNote that the triangular matrix is constructed on the CPU, but the returned matrix will be a GPU array if v is a GPU array. Note also that the return type is not of type Triangular matrix (i.e., the zeros are materialised) since Traingular matrices are not always compatible with other GPU operations.\n\nExamples\n\nusing NeuralEstimators\n\nd = 4\nn = d*(d+1)÷2\nv = collect(range(1, n))\nvectotril(v)\nvectotriu(v)\nvectotril(v; strict = true)\nvectotriu(v; strict = true)\n\n\n\n\n\n","category":"function"},{"location":"API/loss/#Loss-functions","page":"Loss functions","title":"Loss functions","text":"","category":"section"},{"location":"API/loss/","page":"Loss functions","title":"Loss functions","text":"In addition to the standard loss functions provided by Flux (e.g., mae, mse, etc.), NeuralEstimators provides the following loss functions.","category":"page"},{"location":"API/loss/","page":"Loss functions","title":"Loss functions","text":"tanhloss\n\nkpowerloss\n\nquantileloss\n\nintervalscore","category":"page"},{"location":"API/loss/#NeuralEstimators.tanhloss","page":"Loss functions","title":"NeuralEstimators.tanhloss","text":"tanhloss(θ̂, θ, k; agg = mean, joint = true)\n\nFor k > 0, computes the loss function,\n\nL(θ θ) = tanh(θ - θk)\n\nwhich approximates the 0-1 loss as k → 0. Compared with the kpowerloss,  which may also be used as a continuous surrogate for the 0-1 loss, the gradient of the tanh loss is bounded as |θ̂ - θ| → 0, which can improve numerical stability during  training. \n\nIf joint = true, the L₁ norm is computed over each parameter vector, so that, with  k close to zero, the resulting Bayes estimator is the mode of the joint posterior distribution; otherwise, if joint = false, the Bayes estimator is the vector containing the modes of the marginal posterior distributions.\n\nSee also kpowerloss.\n\n\n\n\n\n","category":"function"},{"location":"API/loss/#NeuralEstimators.kpowerloss","page":"Loss functions","title":"NeuralEstimators.kpowerloss","text":"kpowerloss(θ̂, θ, k; agg = mean, joint = true, safeorigin = true, ϵ = 0.1)\n\nFor k > 0, the k-th power absolute-distance loss function,\n\nL(θ θ) = θ - θᵏ\n\ncontains the squared-error, absolute-error, and 0-1 loss functions as special cases (the latter obtained in the limit as k → 0). It is Lipschitz continuous iff k = 1, convex iff k ≥ 1, and strictly convex iff k > 1: it is quasiconvex for all k > 0.\n\nIf joint = true, the L₁ norm is computed over each parameter vector, so that, with  k close to zero, the resulting Bayes estimator is the mode of the joint posterior distribution; otherwise, if joint = false, the Bayes estimator is the vector containing the modes of the marginal posterior distributions.\n\nIf safeorigin = true, the loss function is modified to avoid pathologies around the origin, so that the resulting loss function behaves similarly to the absolute-error loss in the ϵ-interval surrounding the origin.\n\nSee also tanhloss.\n\n\n\n\n\n","category":"function"},{"location":"API/loss/#NeuralEstimators.quantileloss","page":"Loss functions","title":"NeuralEstimators.quantileloss","text":"quantileloss(θ̂, θ, τ; agg = mean)\nquantileloss(θ̂, θ, τ::Vector; agg = mean)\n\nThe asymmetric quantile loss function,\n\n  L(θ θ τ) = (θ - θ)(𝕀(θ - θ  0) - τ)\n\nwhere τ ∈ (0, 1) is a probability level and 𝕀(⋅) is the indicator function.\n\nThe method that takes τ as a vector is useful for jointly approximating several quantiles of the posterior distribution. In this case, the number of rows in θ̂ is assumed to be pr, where p is the number of parameters and r is the number probability levels in τ (i.e., the length of τ).\n\nExamples\n\np = 1\nK = 10\nθ = rand(p, K)\nθ̂ = rand(p, K)\nquantileloss(θ̂, θ, 0.1)\n\nθ̂ = rand(3p, K)\nquantileloss(θ̂, θ, [0.1, 0.5, 0.9])\n\np = 2\nθ = rand(p, K)\nθ̂ = rand(p, K)\nquantileloss(θ̂, θ, 0.1)\n\nθ̂ = rand(3p, K)\nquantileloss(θ̂, θ, [0.1, 0.5, 0.9])\n\n\n\n\n\n","category":"function"},{"location":"API/loss/#NeuralEstimators.intervalscore","page":"Loss functions","title":"NeuralEstimators.intervalscore","text":"intervalscore(l, u, θ, α; agg = mean)\nintervalscore(θ̂, θ, α; agg = mean)\nintervalscore(assessment::Assessment; average_over_parameters::Bool = false, average_over_sample_sizes::Bool = true)\n\nGiven an interval [l, u] with nominal coverage 100×(1-α)%  and true value θ, the interval score is defined by\n\nS(l u θ α) = (u - l) + 2α¹(l - θ)𝕀(θ  l) + 2α¹(θ - u)𝕀(θ  u)\n\nwhere α ∈ (0, 1) and 𝕀(⋅) is the indicator function.\n\nThe method that takes a single value θ̂ assumes that θ̂ is a matrix with 2p rows, where p is the number of parameters in the statistical model. Then, the first and second set of p rows will be used as l and u, respectively.\n\nFor further discussion, see Section 6 of Gneiting, T. and Raftery, A. E. (2007), \"Strictly proper scoring rules, prediction, and estimation\", Journal of the American statistical Association, 102, 359–378.\n\n\n\n\n\n","category":"function"},{"location":"API/architectures/#Architectures","page":"Architectures","title":"Architectures","text":"","category":"section"},{"location":"API/architectures/#Modules","page":"Architectures","title":"Modules","text":"","category":"section"},{"location":"API/architectures/","page":"Architectures","title":"Architectures","text":"The following high-level modules are often used when constructing a neural-network architecture. In particular, the DeepSet is the building block for most classes of Estimators in the package.","category":"page"},{"location":"API/architectures/","page":"Architectures","title":"Architectures","text":"DeepSet\n\nGNNSummary","category":"page"},{"location":"API/architectures/#NeuralEstimators.DeepSet","page":"Architectures","title":"NeuralEstimators.DeepSet","text":"DeepSet(ψ, ϕ, a = mean; S = nothing)\n\nThe DeepSets representation (Zaheer et al., 2017),\n\nθ(𝐙) = ϕ(𝐓(𝐙))\t \t 𝐓(𝐙) = 𝐚(ψ(𝐙ᵢ)  i = 1  m)\n\nwhere 𝐙 ≡ (𝐙₁', …, 𝐙ₘ')' are independent replicates from the statistical model, ψ and ϕ are neural networks, and a is a permutation-invariant aggregation function. Expert summary statistics can be incorporated as,\n\nθ(𝐙) = ϕ((𝐓(𝐙) 𝐒(𝐙)))\n\nwhere S is a function that returns a vector of user-defined summary statistics. These user-defined summary statistics are provided either as a Function that returns a Vector, or as a vector of functions. In the case that ψ is set to nothing, only expert summary statistics will be used.\n\nThe aggregation function a can be any function that acts on an array and has a keyword argument dims that allows aggregation over a specific dimension of the array (e.g., sum, mean, maximum, minimum, logsumexp).\n\nDeepSet objects act on data of type Vector{A}, where each element of the vector is associated with one data set (i.e., one set of independent replicates from the statistical model), and where the type A depends on the form of the data and the chosen architecture for ψ. As a rule of thumb, when A is an array, the replicates are stored in the final dimension. For example, with gridded spatial data and ψ a CNN, A should be a 4-dimensional array, with the replicates stored in the 4ᵗʰ dimension. Note that in Flux, the final dimension is usually the \"batch\" dimension, but batching with DeepSet objects is done at the data set level (i.e., sets of replicates are batched together).\n\nData stored as Vector{Arrays} are first concatenated along the replicates dimension before being passed into the summary network ψ. This means that ψ is applied to a single large array rather than many small arrays, which can substantially improve computational efficiency.\n\nSet-level information, 𝐱, that is not a function of the data can be passed directly into the inference network ϕ in the following manner,\n\nθ(𝐙) = ϕ((𝐓(𝐙) 𝐱))\t \t \n\nor, in the case that expert summary statistics are also used,\n\nθ(𝐙) = ϕ((𝐓(𝐙) 𝐒(𝐙) 𝐱))\t \n\nThis is done by calling the DeepSet object on a Tuple{Vector{A}, Vector{Vector}}, where the first element of the tuple contains a vector of data sets and the second element contains a vector of set-level information (i.e., one vector for each data set).\n\nExamples\n\nusing NeuralEstimators, Flux\n\n# Two dummy data sets containing 3 and 4 replicates\np = 5  # number of parameters in the statistical model\nn = 10 # dimension of each replicate\nZ = [rand32(n, m) for m ∈ (3, 4)]\n\n# Construct the deepset object\nS = samplesize\nqₛ = 1   # dimension of expert summary statistic\nqₜ = 16  # dimension of neural summary statistic\nw = 32  # width of hidden layers\nψ = Chain(Dense(n, w, relu), Dense(w, qₜ, relu))\nϕ = Chain(Dense(qₜ + qₛ, w, relu), Dense(w, p))\nθ̂ = DeepSet(ψ, ϕ; S = S)\n\n# Apply the deepset object\nθ̂(Z)\n\n# Data with set-level information\nqₓ = 2 # dimension of set-level vector\nϕ = Chain(Dense(qₜ + qₛ + qₓ, w, relu), Dense(w, p))\nθ̂ = DeepSet(ψ, ϕ; S = S)\nx = [rand32(qₓ) for _ ∈ eachindex(Z)]\nθ̂((Z, x))\n\n\n\n\n\n","category":"type"},{"location":"API/architectures/#NeuralEstimators.GNNSummary","page":"Architectures","title":"NeuralEstimators.GNNSummary","text":"GNNSummary(propagation, readout; globalfeatures = nothing)\n\nA graph neural network (GNN) module designed to serve as the summary network ψ in the DeepSet representation when the data are graphical (e.g., irregularly observed spatial data).\n\nThe propagation module transforms graphical input data into a set of hidden-feature graphs. The readout module aggregates these feature graphs into a single hidden feature vector of fixed length (i.e., a vector of summary statistics). The summary network is then defined as the composition of the propagation and readout modules.\n\nOptionally, one may also include a module that extracts features directly  from the graph, through the keyword argument globalfeatures. This module,  when applied to a GNNGraph, should return a matrix of features,  where the columns of the matrix correspond to the independent replicates  (e.g., a 5x10 matrix is expected for 5 hidden features for each of 10  independent replicates stored in the graph).  \n\nThe data should be stored as a GNNGraph or Vector{GNNGraph}, where each graph is associated with a single parameter vector. The graphs may contain subgraphs corresponding to independent replicates.\n\nExamples\n\nusing NeuralEstimators, Flux, GraphNeuralNetworks\nusing Flux: batch\nusing Statistics: mean\n\n# Propagation module\nd = 1      # dimension of response variable\nnₕ = 32    # dimension of node feature vectors\npropagation = GNNChain(GraphConv(d => nₕ), GraphConv(nₕ => nₕ))\n\n# Readout module\nreadout = GlobalPool(mean)\nnᵣ = nₕ   # dimension of readout vector\n\n# Summary network\nψ = GNNSummary(propagation, readout)\n\n# Inference network\np = 3     # number of parameters in the statistical model\nw = 64    # width of hidden layer\nϕ = Chain(Dense(nᵣ, w, relu), Dense(w, p))\n\n# Construct the estimator\nθ̂ = DeepSet(ψ, ϕ)\n\n# Apply the estimator to a single graph, a single graph with subgraphs\n# (corresponding to independent replicates), and a vector of graphs\n# (corresponding to multiple data sets each with independent replicates)\ng₁ = rand_graph(11, 30, ndata=rand(d, 11))\ng₂ = rand_graph(13, 40, ndata=rand(d, 13))\ng₃ = batch([g₁, g₂])\nθ̂(g₁)\nθ̂(g₃)\nθ̂([g₁, g₂, g₃])\n\n\n\n\n\n","category":"type"},{"location":"API/architectures/#User-defined-summary-statistics","page":"Architectures","title":"User-defined summary statistics","text":"","category":"section"},{"location":"API/architectures/","page":"Architectures","title":"Architectures","text":"Order = [:type, :function]\nPages   = [\"summarystatistics.md\"]","category":"page"},{"location":"API/architectures/","page":"Architectures","title":"Architectures","text":"The following functions correspond to summary statistics that are often useful as user-defined summary statistics in DeepSet objects.","category":"page"},{"location":"API/architectures/","page":"Architectures","title":"Architectures","text":"samplesize\n\nsamplecorrelation\n\nsamplecovariance\n\nNeighbourhoodVariogram","category":"page"},{"location":"API/architectures/#NeuralEstimators.samplesize","page":"Architectures","title":"NeuralEstimators.samplesize","text":"samplesize(Z::AbstractArray)\n\nComputes the sample size of a set of independent realisations Z.\n\nNote that this function is a wrapper around numberreplicates, but this function returns the number of replicates as the eltype of Z, rather than as an integer.\n\n\n\n\n\n","category":"function"},{"location":"API/architectures/#NeuralEstimators.samplecorrelation","page":"Architectures","title":"NeuralEstimators.samplecorrelation","text":"samplecorrelation(Z::AbstractArray)\n\nComputes the sample correlation matrix, R̂, and returns the vectorised strict lower triangle of R̂.\n\nExamples\n\n# 5 independent replicates of a 3-dimensional vector\nz = rand(3, 5)\nsamplecorrelation(z)\n\n\n\n\n\n","category":"function"},{"location":"API/architectures/#NeuralEstimators.samplecovariance","page":"Architectures","title":"NeuralEstimators.samplecovariance","text":"samplecovariance(Z::AbstractArray)\n\nComputes the sample covariance matrix, Σ̂, and returns the vectorised lower triangle of Σ̂.\n\nExamples\n\n# 5 independent replicates of a 3-dimensional vector\nz = rand(3, 5)\nsamplecovariance(z)\n\n\n\n\n\n","category":"function"},{"location":"API/architectures/#NeuralEstimators.NeighbourhoodVariogram","page":"Architectures","title":"NeuralEstimators.NeighbourhoodVariogram","text":"NeighbourhoodVariogram(h_max, n_bins) \n(l::NeighbourhoodVariogram)(g::GNNGraph)\n\nComputes the empirical variogram, \n\nhatgamma(h pm delta) = frac12N(h pm delta) sum_(ij) in N(h pm delta) (Z_i - Z_j)^2\n\nwhere N(h pm delta) equiv left(ij)  boldsymbols_i - boldsymbols_j in (h-delta h+delta)right  is the set of pairs of locations separated by a distance within (h-delta h+delta), and cdot denotes set cardinality. \n\nThe distance bins are constructed to have constant width 2delta, chosen based on the maximum distance  h_max to be considered, and the specified number of bins n_bins. \n\nThe input type is a GNNGraph, and the empirical variogram is computed based on the corresponding graph structure.  Specifically, only locations that are considered neighbours will be used when computing the empirical variogram. \n\nExamples\n\nusing NeuralEstimators, Distances, LinearAlgebra\n  \n# Simulate Gaussian spatial data with exponential covariance function \nθ = 0.1                                 # true range parameter \nn = 250                                 # number of spatial locations \nS = rand(n, 2)                          # spatial locations \nD = pairwise(Euclidean(), S, dims = 1)  # distance matrix \nΣ = exp.(-D ./ θ)                       # covariance matrix \nL = cholesky(Symmetric(Σ)).L            # Cholesky factor \nm = 5                                   # number of independent replicates \nZ = L * randn(n, m)                     # simulated data \n\n# Construct the spatial graph \nr = 0.15                                # radius of neighbourhood set\ng = spatialgraph(S, Z, r = r)\n\n# Construct the variogram object wth 10 bins\nnv = NeighbourhoodVariogram(r, 10) \n\n# Compute the empirical variogram \nnv(g)\n\n\n\n\n\n","category":"type"},{"location":"API/architectures/#Layers","page":"Architectures","title":"Layers","text":"","category":"section"},{"location":"API/architectures/","page":"Architectures","title":"Architectures","text":"In addition to the built-in layers provided by Flux, the following layers may be used when constructing a neural-network architecture.","category":"page"},{"location":"API/architectures/","page":"Architectures","title":"Architectures","text":"DensePositive\n\nPowerDifference\n\nResidualBlock\n\nSpatialGraphConv","category":"page"},{"location":"API/architectures/#NeuralEstimators.DensePositive","page":"Architectures","title":"NeuralEstimators.DensePositive","text":"DensePositive(layer::Dense, g::Function)\nDensePositive(layer::Dense; g::Function = Flux.relu)\n\nWrapper around the standard Dense layer that ensures positive weights (biases are left unconstrained).\n\nThis layer can be useful for constucting (partially) monotonic neural networks (see, e.g., QuantileEstimatorContinuous).\n\nExamples\n\nusing NeuralEstimators, Flux\n\nlayer = DensePositive(Dense(5 => 2))\nx = rand32(5, 64)\nlayer(x)\n\n\n\n\n\n","category":"type"},{"location":"API/architectures/#NeuralEstimators.PowerDifference","page":"Architectures","title":"NeuralEstimators.PowerDifference","text":"PowerDifference(a, b)\n\nFunction f(x y) = ax - (1-a)y^b for trainable parameters a ∈ [0, 1] and b > 0.\n\nExamples\n\nusing NeuralEstimators, Flux\n\n# Generate some data\nd = 5\nK = 10000\nX = randn32(d, K)\nY = randn32(d, K)\nXY = (X, Y)\na = 0.2f0\nb = 1.3f0\nZ = (abs.(a .* X - (1 .- a) .* Y)).^b\n\n# Initialise layer\nf = PowerDifference([0.5f0], [2.0f0])\n\n# Optimise the layer\nloader = Flux.DataLoader((XY, Z), batchsize=32, shuffle=false)\noptim = Flux.setup(Flux.Adam(0.01), f)\nfor epoch in 1:100\n    for (xy, z) in loader\n        loss, grads = Flux.withgradient(f) do m\n            Flux.mae(m(xy), z)\n        end\n        Flux.update!(optim, f, grads[1])\n    end\nend\n\n# Estimates of a and b\nf.a\nf.b\n\n\n\n\n\n","category":"type"},{"location":"API/architectures/#NeuralEstimators.ResidualBlock","page":"Architectures","title":"NeuralEstimators.ResidualBlock","text":"ResidualBlock(filter, in => out; stride = 1)\n\nBasic residual block (see here), consisting of two sequential convolutional layers and a skip (shortcut) connection that connects the input of the block directly to the output, facilitating the training of deep networks.\n\nExamples\n\nusing NeuralEstimators\nz = rand(16, 16, 1, 1)\nb = ResidualBlock((3, 3), 1 => 32)\nb(z)\n\n\n\n\n\n","category":"type"},{"location":"API/architectures/#NeuralEstimators.SpatialGraphConv","page":"Architectures","title":"NeuralEstimators.SpatialGraphConv","text":"SpatialGraphConv(in => out, g=relu; args...)\n\nImplements a spatial graph convolution for isotropic processes, \n\n boldsymbolh^(l)_j =\n gBig(\n boldsymbolGamma_1^(l) boldsymbolh^(l-1)_j\n +\n boldsymbolGamma_2^(l) barboldsymbolh^(l)_j\n +\n boldsymbolgamma^(l)\n Big)\n quad\n barboldsymbolh^(l)_j = sum_j in mathcalN(j)boldsymbolw^(l)(boldsymbols_j - boldsymbols_j) odot f^(l)(boldsymbolh^(l-1)_j boldsymbolh^(l-1)_j)\n\nwhere boldsymbolh^(l)_j is the hidden feature vector at location boldsymbols_j at layer l, g(cdot) is a non-linear activation function applied elementwise, boldsymbolGamma_1^(l) and boldsymbolGamma_2^(l) are trainable parameter matrices, boldsymbolgamma^(l) is a trainable bias vector, mathcalN(j) denotes the indices of neighbours of boldsymbols_j, boldsymbolw^(l)(cdot) is a (learnable) spatial weighting function, odot denotes elementwise multiplication,  and f^(l)(cdot cdot) is a (learnable) function. \n\nBy default, the function f^(l)(cdot cdot) is modelled using a PowerDifference function.  One may alternatively employ a nonlearnable function, for example, f = (hᵢ, hⱼ) -> (hᵢ - hⱼ).^2,  specified through the keyword argument f.  \n\nThe spatial distances between locations must be stored as an edge feature, as facilitated by spatialgraph().  The input to boldsymbolw(cdot) is a 1 times n matrix (i.e., a row vector) of spatial distances.  The output of boldsymbolw(cdot) must be either a scalar; a vector of the same dimension as the feature vectors of the previous layer;  or, if the features vectors of the previous layer are scalars, a vector of arbitrary dimension.  To promote identifiability, the weights are normalised to sum to one (row-wise) within each neighbourhood set.  By default, boldsymbolw(cdot) is taken to be a multilayer perceptron with a single hidden layer,  although a custom choice for this function can be provided using the keyword argument w. \n\nArguments\n\nin: The dimension of input features.\nout: The dimension of output features.\ng = relu: Activation function.\nbias = true: Add learnable bias?\ninit = glorot_uniform: Initialiser for boldsymbolGamma_1^(l), boldsymbolGamma_2^(l), and boldsymbolgamma^(l). \nf = nothing\nw = nothing \nw_width = 128: (Only applicable if w = nothing) The width of the hidden layer in the MLP used to model boldsymbolw(cdot cdot). \nw_out = in: (Only applicable if w = nothing) The output dimension of boldsymbolw(cdot cdot).  \nglob = false: If true, global features will be computed directly from the entire spatial graph. These features are of the form: boldsymbolT = sum_j=1^nsum_j in mathcalN(j)boldsymbolw^(l)(boldsymbols_j - boldsymbols_j) odot f^(l)(boldsymbolh^(l-1)_j boldsymbolh^(l-1)_j). Note that these global features are no longer associated with a graph structure, and should therefore only be used in the final layer of a summary-statistics module. \n\nExamples\n\nusing NeuralEstimators, Flux, GraphNeuralNetworks\n\n# Toy spatial data\nm = 5                  # number of replicates\nd = 2                  # spatial dimension\nn = 250                # number of spatial locations\nS = rand(n, d)         # spatial locations\nZ = rand(n, m)         # data\ng = spatialgraph(S, Z) # construct the graph\n\n# Construct and apply spatial graph convolution layer\nl = SpatialGraphConv(1 => 10)\nl(g)\n\n\n\n\n\n","category":"type"},{"location":"API/architectures/#Output-activation-functions","page":"Architectures","title":"Output activation functions","text":"","category":"section"},{"location":"API/architectures/","page":"Architectures","title":"Architectures","text":"Order = [:type, :function]\nPages   = [\"activationfunctions.md\"]","category":"page"},{"location":"API/architectures/","page":"Architectures","title":"Architectures","text":"In addition to the standard activation functions provided by Flux, the following structs can be used at the end of an architecture to act as output activation functions that ensure valid estimates for certain models. NB: Although we refer to the following objects as \"activation functions\", they should be treated as layers that are included in the final stage of a Flux Chain().","category":"page"},{"location":"API/architectures/","page":"Architectures","title":"Architectures","text":"Compress\n\nCorrelationMatrix\n\nCovarianceMatrix","category":"page"},{"location":"API/architectures/#NeuralEstimators.Compress","page":"Architectures","title":"NeuralEstimators.Compress","text":"Compress(a, b, k = 1)\n\nLayer that compresses its input to be within the range a and b, where each element of a is less than the corresponding element of b.\n\nThe layer uses a logistic function,\n\nl(θ) = a + fracb - a1 + e^-kθ\n\nwhere the arguments a and b together combine to shift and scale the logistic function to the range (a, b), and the growth rate k controls the steepness of the curve.\n\nThe logistic function given here contains an additional parameter, θ₀, which is the input value corresponding to the functions midpoint. In Compress, we fix θ₀ = 0, since the output of a randomly initialised neural network is typically around zero.\n\nExamples\n\nusing NeuralEstimators, Flux\n\na = [25, 0.5, -pi/2]\nb = [500, 2.5, 0]\np = length(a)\nK = 100\nθ = randn(p, K)\nl = Compress(a, b)\nl(θ)\n\nn = 20\nθ̂ = Chain(Dense(n, p), l)\nZ = randn(n, K)\nθ̂(Z)\n\n\n\n\n\n","category":"type"},{"location":"API/architectures/#NeuralEstimators.CorrelationMatrix","page":"Architectures","title":"NeuralEstimators.CorrelationMatrix","text":"CorrelationMatrix(d)\n(object::CorrelationMatrix)(x::Matrix, cholesky::Bool = false)\n\nTransforms a vector 𝐯 ∈ ℝᵈ to the parameters of an unconstrained d×d correlation matrix or, if cholesky = true, the lower Cholesky factor of an unconstrained d×d correlation matrix.\n\nThe expected input is a Matrix with T(d-1) = (d-1)d÷2 rows, where T(d-1) is the (d-1)th triangular number (the number of free parameters in an unconstrained d×d correlation matrix), and the output is a Matrix of the same dimension. The columns of the input and output matrices correspond to independent parameter configurations (i.e., different correlation matrices).\n\nInternally, the layer constructs a valid Cholesky factor 𝐋 for a correlation matrix, and then extracts the strict lower triangle from the correlation matrix 𝐑 = 𝐋𝐋'. The lower triangle is extracted and vectorised in line with Julia's column-major ordering: for example, when modelling the correlation matrix\n\nbeginbmatrix\n1    R₁₂   R₁₃ \nR₂₁  1     R₂₃\nR₃₁  R₃₂  1\nendbmatrix\n\nthe rows of the matrix returned by a CorrelationMatrix layer are ordered as\n\nbeginbmatrix\nR₂₁ \nR₃₁ \nR₃₂ \nendbmatrix\n\nwhich means that the output can easily be transformed into the implied correlation matrices using vectotril and Symmetric.\n\nSee also CovarianceMatrix.\n\nExamples\n\nusing NeuralEstimators\nusing LinearAlgebra\nusing Flux\n\nd  = 4\nl  = CorrelationMatrix(d)\np  = (d-1)*d÷2\nθ  = randn(p, 100)\n\n# Returns a matrix of parameters, which can be converted to correlation matrices\nR = l(θ)\nR = map(eachcol(R)) do r\n\tR = Symmetric(cpu(vectotril(r, strict = true)), :L)\n\tR[diagind(R)] .= 1\n\tR\nend\n\n# Obtain the Cholesky factor directly\nL = l(θ, true)\nL = map(eachcol(L)) do x\n\t# Only the strict lower diagonal elements are returned\n\tL = LowerTriangular(cpu(vectotril(x, strict = true)))\n\n\t# Diagonal elements are determined under the constraint diag(L*L') = 𝟏\n\tL[diagind(L)] .= sqrt.(1 .- rowwisenorm(L).^2)\n\tL\nend\nL[1] * L[1]'\n\n\n\n\n\n","category":"type"},{"location":"API/architectures/#NeuralEstimators.CovarianceMatrix","page":"Architectures","title":"NeuralEstimators.CovarianceMatrix","text":"CovarianceMatrix(d)\n(object::CovarianceMatrix)(x::Matrix, cholesky::Bool = false)\n\nTransforms a vector 𝐯 ∈ ℝᵈ to the parameters of an unconstrained d×d covariance matrix or, if cholesky = true, the lower Cholesky factor of an unconstrained d×d covariance matrix.\n\nThe expected input is a Matrix with T(d) = d(d+1)÷2 rows, where T(d) is the dth triangular number (the number of free parameters in an unconstrained d×d covariance matrix), and the output is a Matrix of the same dimension. The columns of the input and output matrices correspond to independent parameter configurations (i.e., different covariance matrices).\n\nInternally, the layer constructs a valid Cholesky factor 𝐋 and then extracts the lower triangle from the positive-definite covariance matrix 𝚺 = 𝐋𝐋'. The lower triangle is extracted and vectorised in line with Julia's column-major ordering: for example, when modelling the covariance matrix\n\nbeginbmatrix\nΣ₁₁  Σ₁₂  Σ₁₃ \nΣ₂₁  Σ₂₂  Σ₂₃ \nΣ₃₁  Σ₃₂  Σ₃₃ \nendbmatrix\n\nthe rows of the matrix returned by a CovarianceMatrix are ordered as\n\nbeginbmatrix\nΣ₁₁ \nΣ₂₁ \nΣ₃₁ \nΣ₂₂ \nΣ₃₂ \nΣ₃₃ \nendbmatrix\n\nwhich means that the output can easily be transformed into the implied covariance matrices using vectotril and Symmetric.\n\nSee also CorrelationMatrix.\n\nExamples\n\nusing NeuralEstimators\nusing Flux\nusing LinearAlgebra\n\nd = 4\nl = CovarianceMatrix(d)\np = d*(d+1)÷2\nθ = randn(p, 50)\n\n# Returns a matrix of parameters, which can be converted to covariance matrices\nΣ = l(θ)\nΣ = [Symmetric(cpu(vectotril(x)), :L) for x ∈ eachcol(Σ)]\n\n# Obtain the Cholesky factor directly\nL = l(θ, true)\nL = [LowerTriangular(cpu(vectotril(x))) for x ∈ eachcol(L)]\nL[1] * L[1]'\n\n\n\n\n\n","category":"type"},{"location":"#NeuralEstimators","page":"NeuralEstimators","title":"NeuralEstimators","text":"","category":"section"},{"location":"","page":"NeuralEstimators","title":"NeuralEstimators","text":"Neural Bayes estimators are neural networks that transform data into point summaries of the posterior distribution. They are likelihood free and, once constructed, substantially faster than classical methods. Uncertainty quantification with neural Bayes estimators is also straightforward through the bootstrap distribution, which is essentially available \"for free\" with a neural estimator, or by training a neural Bayes estimator to approximate a set of marginal posterior quantiles. A related class of methods uses neural networks to approximate the likelihood function, the likelihood-to-evidence ratio, and the full posterior distribution. ","category":"page"},{"location":"","page":"NeuralEstimators","title":"NeuralEstimators","text":"The package NeuralEstimators facilitates the development of neural Bayes estimators and related neural inferential methods in a user-friendly manner. It caters for arbitrary models by having the user implicitly define their model via simulated data. This makes development particularly straightforward for models with existing implementations (possibly in other programming languages, e.g., R or python). A convenient interface for R users is available here.","category":"page"},{"location":"#Getting-started","page":"NeuralEstimators","title":"Getting started","text":"","category":"section"},{"location":"","page":"NeuralEstimators","title":"NeuralEstimators","text":"Install NeuralEstimators using the following command inside Julia:","category":"page"},{"location":"","page":"NeuralEstimators","title":"NeuralEstimators","text":"using Pkg; Pkg.add(url = \"https://github.com/msainsburydale/NeuralEstimators.jl\")","category":"page"},{"location":"","page":"NeuralEstimators","title":"NeuralEstimators","text":"Once familiar with the details of the Framework, see the Examples.","category":"page"},{"location":"#Supporting-and-citing","page":"NeuralEstimators","title":"Supporting and citing","text":"","category":"section"},{"location":"","page":"NeuralEstimators","title":"NeuralEstimators","text":"This software was developed as part of academic research. If you would like to support it, please star the repository. If you use it in your research or other activities, please also use the following citation.","category":"page"},{"location":"","page":"NeuralEstimators","title":"NeuralEstimators","text":"@article{,\n\tauthor = {Sainsbury-Dale, Matthew and Zammit-Mangion, Andrew and Huser, Raphaël},\n\ttitle = {Likelihood-Free Parameter Estimation with Neural {B}ayes Estimators},\n\tjournal = {The American Statistician},\n\tyear = {2024},\n\tvolume = {78},\n\tpages = {1--14},\n\tdoi = {10.1080/00031305.2023.2249522},\n\turl = {https://doi.org/10.1080/00031305.2023.2249522}\n}","category":"page"},{"location":"#Papers-using-NeuralEstimators","page":"NeuralEstimators","title":"Papers using NeuralEstimators","text":"","category":"section"},{"location":"","page":"NeuralEstimators","title":"NeuralEstimators","text":"Likelihood-free parameter estimation with neural Bayes estimators [paper] [code]\nNeural Bayes estimators for censored inference with peaks-over-threshold models [paper]\nNeural Bayes estimators for irregular spatial data using graph neural networks [paper][code]\nModern extreme value statistics for Utopian extremes [paper]\nNeural Methods for Amortised Inference [paper][code]","category":"page"},{"location":"#Related-packages","page":"NeuralEstimators","title":"Related packages","text":"","category":"section"},{"location":"","page":"NeuralEstimators","title":"NeuralEstimators","text":"Several other software packages have been developed to facilitate neural likelihood-free inference. These include:","category":"page"},{"location":"","page":"NeuralEstimators","title":"NeuralEstimators","text":"BayesFlow (TensorFlow)\nLAMPE (PyTorch)\nsbi (PyTorch)\nswyft (PyTorch)","category":"page"},{"location":"","page":"NeuralEstimators","title":"NeuralEstimators","text":"A summary of the functionality in these packages is given in Zammit-Mangion et al. (2024, Section 6.1). Note that this list of related packages was created in July 2024; if you have software to add to this list, please contact the package maintainer. ","category":"page"}]
 }
diff --git a/dev/workflow/advancedusage/index.html b/dev/workflow/advancedusage/index.html
index c24a0dfc..46f630af 100644
--- a/dev/workflow/advancedusage/index.html
+++ b/dev/workflow/advancedusage/index.html
@@ -1,10 +1,9 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Advanced usage · NeuralEstimators.jl</title><meta name="title" content="Advanced usage · NeuralEstimators.jl"/><meta property="og:title" content="Advanced usage · NeuralEstimators.jl"/><meta property="twitter:title" content="Advanced usage · NeuralEstimators.jl"/><meta name="description" content="Documentation for NeuralEstimators.jl."/><meta property="og:description" content="Documentation for NeuralEstimators.jl."/><meta property="twitter:description" content="Documentation for NeuralEstimators.jl."/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="NeuralEstimators.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../../">NeuralEstimators.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">NeuralEstimators</a></li><li><a class="tocitem" href="../../framework/">Framework</a></li><li><span class="tocitem">Workflow</span><ul><li><a class="tocitem" href="../overview/">Overview</a></li><li><a class="tocitem" href="../examples/">Examples</a></li><li class="is-active"><a class="tocitem" href>Advanced usage</a><ul class="internal"><li><a class="tocitem" href="#Saving-and-loading-neural-estimators"><span>Saving and loading neural estimators</span></a></li><li><a class="tocitem" href="#Storing-expensive-intermediate-objects-for-data-simulation"><span>Storing expensive intermediate objects for data simulation</span></a></li><li><a class="tocitem" href="#On-the-fly-and-just-in-time-simulation"><span>On-the-fly and just-in-time simulation</span></a></li><li><a class="tocitem" href="#Regularisation"><span>Regularisation</span></a></li><li><a class="tocitem" href="#Expert-summary-statistics"><span>Expert summary statistics</span></a></li><li><a class="tocitem" href="#Variable-sample-sizes"><span>Variable sample sizes</span></a></li><li><a class="tocitem" href="#Missing-data"><span>Missing data</span></a></li><li><a class="tocitem" href="#Censored-data"><span>Censored data</span></a></li></ul></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="../../API/core/">Core</a></li><li><a class="tocitem" href="../../API/architectures/">Architectures</a></li><li><a class="tocitem" href="../../API/loss/">Loss functions</a></li><li><a class="tocitem" href="../../API/simulation/">Model-specific functions</a></li><li><a class="tocitem" href="../../API/utility/">Miscellaneous</a></li><li><a class="tocitem" href="../../API/">Index</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Workflow</a></li><li class="is-active"><a href>Advanced usage</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Advanced usage</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/main/docs/src/workflow/advancedusage.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Advanced-usage"><a class="docs-heading-anchor" href="#Advanced-usage">Advanced usage</a><a id="Advanced-usage-1"></a><a class="docs-heading-anchor-permalink" href="#Advanced-usage" title="Permalink"></a></h1><h2 id="Saving-and-loading-neural-estimators"><a class="docs-heading-anchor" href="#Saving-and-loading-neural-estimators">Saving and loading neural estimators</a><a id="Saving-and-loading-neural-estimators-1"></a><a class="docs-heading-anchor-permalink" href="#Saving-and-loading-neural-estimators" title="Permalink"></a></h2><p>As training is by far the most computationally demanding part of the workflow, one often trains an estimator and then saves it for later use. As discussed in the <a href="https://fluxml.ai/Flux.jl/stable/saving/">Flux documentation</a>, there are a number of ways to do this. Perhaps the simplest approach is to save the parameters (i.e., weights and biases) of the neural network in a BSON file:</p><pre><code class="nohighlight hljs">using Flux
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Advanced usage · NeuralEstimators.jl</title><meta name="title" content="Advanced usage · NeuralEstimators.jl"/><meta property="og:title" content="Advanced usage · NeuralEstimators.jl"/><meta property="twitter:title" content="Advanced usage · NeuralEstimators.jl"/><meta name="description" content="Documentation for NeuralEstimators.jl."/><meta property="og:description" content="Documentation for NeuralEstimators.jl."/><meta property="twitter:description" content="Documentation for NeuralEstimators.jl."/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="NeuralEstimators.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../../">NeuralEstimators.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">NeuralEstimators</a></li><li><a class="tocitem" href="../../framework/">Framework</a></li><li><span class="tocitem">Workflow</span><ul><li><a class="tocitem" href="../overview/">Overview</a></li><li><a class="tocitem" href="../examples/">Examples</a></li><li class="is-active"><a class="tocitem" href>Advanced usage</a><ul class="internal"><li><a class="tocitem" href="#Saving-and-loading-neural-estimators"><span>Saving and loading neural estimators</span></a></li><li><a class="tocitem" href="#Storing-expensive-intermediate-objects-for-data-simulation"><span>Storing expensive intermediate objects for data simulation</span></a></li><li><a class="tocitem" href="#On-the-fly-and-just-in-time-simulation"><span>On-the-fly and just-in-time simulation</span></a></li><li><a class="tocitem" href="#Regularisation"><span>Regularisation</span></a></li><li><a class="tocitem" href="#Expert-summary-statistics"><span>Expert summary statistics</span></a></li><li><a class="tocitem" href="#Variable-sample-sizes"><span>Variable sample sizes</span></a></li><li><a class="tocitem" href="#Missing-data"><span>Missing data</span></a></li><li><a class="tocitem" href="#Censored-data"><span>Censored data</span></a></li></ul></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="../../API/core/">Core</a></li><li><a class="tocitem" href="../../API/architectures/">Architectures</a></li><li><a class="tocitem" href="../../API/loss/">Loss functions</a></li><li><a class="tocitem" href="../../API/simulation/">Model-specific functions</a></li><li><a class="tocitem" href="../../API/utility/">Miscellaneous</a></li><li><a class="tocitem" href="../../API/">Index</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Workflow</a></li><li class="is-active"><a href>Advanced usage</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Advanced usage</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/main/docs/src/workflow/advancedusage.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Advanced-usage"><a class="docs-heading-anchor" href="#Advanced-usage">Advanced usage</a><a id="Advanced-usage-1"></a><a class="docs-heading-anchor-permalink" href="#Advanced-usage" title="Permalink"></a></h1><h2 id="Saving-and-loading-neural-estimators"><a class="docs-heading-anchor" href="#Saving-and-loading-neural-estimators">Saving and loading neural estimators</a><a id="Saving-and-loading-neural-estimators-1"></a><a class="docs-heading-anchor-permalink" href="#Saving-and-loading-neural-estimators" title="Permalink"></a></h2><p>In regards to saving and loading, neural estimators behave in the same manner as regular Flux models. Therefore, the examples and recommendations outlined in the <a href="https://fluxml.ai/Flux.jl/stable/guide/saving/">Flux documentation</a> also apply directly to neural estimators. For example, to save the model state of the neural estimator <code>θ̂</code>:</p><pre><code class="nohighlight hljs">using Flux
 using BSON: @save, @load
 model_state = Flux.state(θ̂)
-@save &quot;estimator.bson&quot; model_state</code></pre><p>Then, in a later session, one may initialise a neural network with the same architecture used previously, and load the saved parameters:</p><pre><code class="nohighlight hljs">@load &quot;estimator.bson&quot; model_state
-Flux.loadmodel!(θ̂, model_state)</code></pre><p>Note that the estimator <code>θ̂</code> must be already defined (i.e., only the network parameters are saved, not the architecture). </p><p>For convenience, the function <a href="../../API/core/#NeuralEstimators.train"><code>train()</code></a> allows for the automatic saving of the neural-network parameters during the training stage, via the argument <code>savepath</code>. Specifically, if <code>savepath</code> is specified, neural estimator&#39;s parameters will be saved in the folder <code>savepath</code> and, to load the optimal parameters post training, one may use the following code, or similar:</p><pre><code class="nohighlight hljs">using NeuralEstimators
-Flux.loadparams!(θ̂, loadbestweights(savepath))</code></pre><p>Above, the function <code>loadparams!()</code> loads the parameters of the best (as determined by <a href="../../API/utility/#NeuralEstimators.loadbestweights"><code>loadbestweights()</code></a>) neural estimator saved in <code>savepath</code>.</p><h2 id="Storing-expensive-intermediate-objects-for-data-simulation"><a class="docs-heading-anchor" href="#Storing-expensive-intermediate-objects-for-data-simulation">Storing expensive intermediate objects for data simulation</a><a id="Storing-expensive-intermediate-objects-for-data-simulation-1"></a><a class="docs-heading-anchor-permalink" href="#Storing-expensive-intermediate-objects-for-data-simulation" title="Permalink"></a></h2><p>Parameters sampled from the prior distribution may be stored in two ways. Most simply, they can be stored as a <span>$p \times K$</span> matrix, where <span>$p$</span> is the number of parameters in the model and <span>$K$</span> is the number of parameter vectors sampled from the prior distribution. Alternatively, they can be stored in a user-defined struct subtyping <a href="../../API/core/#NeuralEstimators.ParameterConfigurations"><code>ParameterConfigurations</code></a>, whose only requirement is a field <code>θ</code> that stores the <span>$p \times K$</span> matrix of parameters. With this approach, one may store computationally expensive intermediate objects, such as Cholesky factors, for later use when conducting &quot;on-the-fly&quot; simulation, which is discussed below.</p><h2 id="On-the-fly-and-just-in-time-simulation"><a class="docs-heading-anchor" href="#On-the-fly-and-just-in-time-simulation">On-the-fly and just-in-time simulation</a><a id="On-the-fly-and-just-in-time-simulation-1"></a><a class="docs-heading-anchor-permalink" href="#On-the-fly-and-just-in-time-simulation" title="Permalink"></a></h2><p>When data simulation is (relatively) computationally inexpensive, the training data set, <span>$\mathcal{Z}_{\text{train}}$</span>, can be simulated continuously during training, a technique coined &quot;simulation-on-the-fly&quot;. Regularly refreshing <span>$\mathcal{Z}_{\text{train}}$</span> leads to lower out-of-sample error and to a reduction in overfitting. This strategy therefore facilitates the use of larger, more representationally-powerful networks that are prone to overfitting when <span>$\mathcal{Z}_{\text{train}}$</span> is fixed. Further, this technique allows for data be simulated &quot;just-in-time&quot;, in the sense that they can be simulated in small batches, used to train the neural estimator, and then removed from memory. This can substantially reduce pressure on memory resources, particularly when working with large data sets. </p><p>One may also regularly refresh the set <span>$\vartheta_{\text{train}}$</span> of parameter vectors used during training, and doing so leads to similar benefits. However, fixing <span>$\vartheta_{\text{train}}$</span> allows computationally expensive terms, such as Cholesky factors when working with Gaussian process models, to be reused throughout training, which can substantially reduce the training time for some models. Hybrid approaches are also possible, whereby the parameters (and possibly the data) are held fixed for several epochs (i.e., several passes through the training set when performing stochastic gradient descent) before being refreshed. </p><p>The above strategies are facilitated with various methods of <a href="../../API/core/#NeuralEstimators.train"><code>train()</code></a>.</p><h2 id="Regularisation"><a class="docs-heading-anchor" href="#Regularisation">Regularisation</a><a id="Regularisation-1"></a><a class="docs-heading-anchor-permalink" href="#Regularisation" title="Permalink"></a></h2><p>The term <em>regularisation</em> refers to a variety of techniques aimed to reduce overfitting when training a neural network, primarily by discouraging complex models.</p><p>One common regularisation technique is known as dropout <a href="https://jmlr.org/papers/v15/srivastava14a.html">(Srivastava et al., 2014)</a>, implemented in Flux&#39;s <a href="https://fluxml.ai/Flux.jl/stable/models/layers/#Flux.Dropout"><code>Dropout</code></a> layer. Dropout involves temporarily dropping (&quot;turning off&quot;) a randomly selected set of neurons (along with their connections) at each iteration of the training stage, and this results in a computationally-efficient form of model (neural-network) averaging.</p><p>Another class of regularisation techniques involve modifying the loss function. For instance, L₁ regularisation (sometimes called lasso regression) adds to the loss a penalty based on the absolute value of the neural-network parameters. Similarly, L₂ regularisation (sometimes called ridge regression) adds to the loss a penalty based on the square of the neural-network parameters. Note that these penalty terms are not functions of the data or of the statistical-model parameters that we are trying to infer, and therefore do not modify the Bayes risk or the associated Bayes estimator. These regularisation techniques can be implemented straightforwardly by providing a custom <code>optimiser</code> to <a href="../../API/core/#NeuralEstimators.train"><code>train</code></a> that includes a <a href="https://fluxml.ai/Flux.jl/stable/training/optimisers/#Flux.Optimise.SignDecay"><code>SignDecay</code></a> object for L₁ regularisation, or a <a href="https://fluxml.ai/Flux.jl/stable/training/optimisers/#Flux.Optimise.WeightDecay"><code>WeightDecay</code></a> object for L₂ regularisation. See the <a href="https://fluxml.ai/Flux.jl/stable/training/training/#Regularisation">Flux documentation</a> for further details.</p><p>For example, the following code constructs a neural Bayes estimator using dropout and L₁ regularisation with penalty coefficient <span>$\lambda = 10^{-4}$</span>:</p><pre><code class="nohighlight hljs">using NeuralEstimators
+@save &quot;estimator.bson&quot; model_state</code></pre><p>Then, to load it in a new session, one may initialise a neural estimator with the same architecture used previously, and load the saved model state:</p><pre><code class="nohighlight hljs">@load &quot;estimator.bson&quot; model_state
+Flux.loadmodel!(θ̂, model_state)</code></pre><p>It is also straightforward to save the entire neural estimator, including its architecture (see <a href="https://fluxml.ai/Flux.jl/stable/guide/saving/#Saving-Models-as-Julia-Structs">here</a>). However, the first approach outlined above is recommended for long-term storage.</p><p>For convenience, the function <a href="../../API/core/#NeuralEstimators.train"><code>train()</code></a> allows for the automatic saving of the model state during the training stage, via the argument <code>savepath</code>.</p><h2 id="Storing-expensive-intermediate-objects-for-data-simulation"><a class="docs-heading-anchor" href="#Storing-expensive-intermediate-objects-for-data-simulation">Storing expensive intermediate objects for data simulation</a><a id="Storing-expensive-intermediate-objects-for-data-simulation-1"></a><a class="docs-heading-anchor-permalink" href="#Storing-expensive-intermediate-objects-for-data-simulation" title="Permalink"></a></h2><p>Parameters sampled from the prior distribution may be stored in two ways. Most simply, they can be stored as a <span>$p \times K$</span> matrix, where <span>$p$</span> is the number of parameters in the model and <span>$K$</span> is the number of parameter vectors sampled from the prior distribution. Alternatively, they can be stored in a user-defined struct subtyping <a href="../../API/core/#NeuralEstimators.ParameterConfigurations"><code>ParameterConfigurations</code></a>, whose only requirement is a field <code>θ</code> that stores the <span>$p \times K$</span> matrix of parameters. With this approach, one may store computationally expensive intermediate objects, such as Cholesky factors, for later use when conducting &quot;on-the-fly&quot; simulation, which is discussed below.</p><h2 id="On-the-fly-and-just-in-time-simulation"><a class="docs-heading-anchor" href="#On-the-fly-and-just-in-time-simulation">On-the-fly and just-in-time simulation</a><a id="On-the-fly-and-just-in-time-simulation-1"></a><a class="docs-heading-anchor-permalink" href="#On-the-fly-and-just-in-time-simulation" title="Permalink"></a></h2><p>When data simulation is (relatively) computationally inexpensive, the training data set, <span>$\mathcal{Z}_{\text{train}}$</span>, can be simulated continuously during training, a technique coined &quot;simulation-on-the-fly&quot;. Regularly refreshing <span>$\mathcal{Z}_{\text{train}}$</span> leads to lower out-of-sample error and to a reduction in overfitting. This strategy therefore facilitates the use of larger, more representationally-powerful networks that are prone to overfitting when <span>$\mathcal{Z}_{\text{train}}$</span> is fixed. Further, this technique allows for data be simulated &quot;just-in-time&quot;, in the sense that they can be simulated in small batches, used to train the neural estimator, and then removed from memory. This can substantially reduce pressure on memory resources, particularly when working with large data sets.</p><p>One may also regularly refresh the set <span>$\vartheta_{\text{train}}$</span> of parameter vectors used during training, and doing so leads to similar benefits. However, fixing <span>$\vartheta_{\text{train}}$</span> allows computationally expensive terms, such as Cholesky factors when working with Gaussian process models, to be reused throughout training, which can substantially reduce the training time for some models. Hybrid approaches are also possible, whereby the parameters (and possibly the data) are held fixed for several epochs (i.e., several passes through the training set when performing stochastic gradient descent) before being refreshed.</p><p>The above strategies are facilitated with various methods of <a href="../../API/core/#NeuralEstimators.train"><code>train()</code></a>.</p><h2 id="Regularisation"><a class="docs-heading-anchor" href="#Regularisation">Regularisation</a><a id="Regularisation-1"></a><a class="docs-heading-anchor-permalink" href="#Regularisation" title="Permalink"></a></h2><p>The term <em>regularisation</em> refers to a variety of techniques aimed to reduce overfitting when training a neural network, primarily by discouraging complex models.</p><p>One common regularisation technique is known as dropout <a href="https://jmlr.org/papers/v15/srivastava14a.html">(Srivastava et al., 2014)</a>, implemented in Flux&#39;s <a href="https://fluxml.ai/Flux.jl/stable/models/layers/#Flux.Dropout"><code>Dropout</code></a> layer. Dropout involves temporarily dropping (&quot;turning off&quot;) a randomly selected set of neurons (along with their connections) at each iteration of the training stage, and this results in a computationally-efficient form of model (neural-network) averaging.</p><p>Another class of regularisation techniques involve modifying the loss function. For instance, L₁ regularisation (sometimes called lasso regression) adds to the loss a penalty based on the absolute value of the neural-network parameters. Similarly, L₂ regularisation (sometimes called ridge regression) adds to the loss a penalty based on the square of the neural-network parameters. Note that these penalty terms are not functions of the data or of the statistical-model parameters that we are trying to infer, and therefore do not modify the Bayes risk or the associated Bayes estimator. These regularisation techniques can be implemented straightforwardly by providing a custom <code>optimiser</code> to <a href="../../API/core/#NeuralEstimators.train"><code>train</code></a> that includes a <a href="https://fluxml.ai/Flux.jl/stable/training/optimisers/#Flux.Optimise.SignDecay"><code>SignDecay</code></a> object for L₁ regularisation, or a <a href="https://fluxml.ai/Flux.jl/stable/training/optimisers/#Flux.Optimise.WeightDecay"><code>WeightDecay</code></a> object for L₂ regularisation. See the <a href="https://fluxml.ai/Flux.jl/stable/training/training/#Regularisation">Flux documentation</a> for further details.</p><p>For example, the following code constructs a neural Bayes estimator using dropout and L₁ regularisation with penalty coefficient <span>$\lambda = 10^{-4}$</span>:</p><pre><code class="nohighlight hljs">using NeuralEstimators
 using Flux
 
 # Generate data from the model Z ~ N(θ, 1) and θ ~ N(0, 1)
@@ -35,7 +34,7 @@
 optimiser = Flux.setup(OptimiserChain(SignDecay(1e-4), Adam()), θ̂)
 
 # Train the estimator
-train(θ̂, θ_train, θ_val, Z_train, Z_val; optimiser = optimiser)</code></pre><p>Note that when the training data and/or parameters are held fixed during training, L₂ regularisation with penalty coefficient <span>$\lambda = 10^{-4}$</span> is applied by default.</p><h2 id="Expert-summary-statistics"><a class="docs-heading-anchor" href="#Expert-summary-statistics">Expert summary statistics</a><a id="Expert-summary-statistics-1"></a><a class="docs-heading-anchor-permalink" href="#Expert-summary-statistics" title="Permalink"></a></h2><p>Implicitly, neural estimators involve the learning of summary statistics. However, some summary statistics are available in closed form, simple to compute, and highly informative (e.g., sample quantiles, the empirical variogram, etc.). Often, explicitly incorporating these expert summary statistics in a neural estimator can simplify the optimisation problem, and lead to a better estimator. </p><p>The fusion of learned and expert summary statistics is facilitated by our implementation of the <a href="../../API/architectures/#NeuralEstimators.DeepSet"><code>DeepSet</code></a> framework. Note that this implementation also allows the user to construct a neural estimator using only expert summary statistics, following, for example, <a href="https://onlinelibrary.wiley.com/doi/abs/10.1002/sta4.382">Gerber and Nychka (2021)</a> and <a href="https://onlinelibrary.wiley.com/doi/abs/10.1002/env.2845">Rai et al. (2024)</a>. Note also that the user may specify arbitrary expert summary statistics, however, for convenience several standard <a href="../../API/architectures/#User-defined-summary-statistics">User-defined summary statistics</a> are provided with the package, including a fast approximate version of the empirical variogram. </p><h2 id="Variable-sample-sizes"><a class="docs-heading-anchor" href="#Variable-sample-sizes">Variable sample sizes</a><a id="Variable-sample-sizes-1"></a><a class="docs-heading-anchor-permalink" href="#Variable-sample-sizes" title="Permalink"></a></h2><p>A neural estimator in the Deep Set representation can be applied to data sets of arbitrary size. However, even when the neural Bayes estimator approximates the true Bayes estimator arbitrarily well, it is conditional on the number of replicates, <span>$m$</span>, and is not necessarily a Bayes estimator for <span>$m^* \ne m$</span>. Denote a data set comprising <span>$m$</span> replicates as <span>$\boldsymbol{Z}^{(m)} \equiv (\boldsymbol{Z}_1&#39;, \dots, \boldsymbol{Z}_m&#39;)&#39;$</span>. There are at least two (non-mutually exclusive) approaches one could adopt if data sets with varying <span>$m$</span> are envisaged, which we describe below.</p><h3 id="Piecewise-estimators"><a class="docs-heading-anchor" href="#Piecewise-estimators">Piecewise estimators</a><a id="Piecewise-estimators-1"></a><a class="docs-heading-anchor-permalink" href="#Piecewise-estimators" title="Permalink"></a></h3><p>If data sets with varying <span>$m$</span> are envisaged, one could train <span>$l$</span> neural Bayes estimators for different sample sizes, or groups thereof (e.g., a small-sample estimator and a large-sample estimator).  Specifically, for sample-size changepoints <span>$m_1$</span>, <span>$m_2$</span>, <span>$\dots$</span>, <span>$m_{l-1}$</span>, one could construct a piecewise neural Bayes estimator,</p><p class="math-container">\[\hat{\boldsymbol{\theta}}(\boldsymbol{Z}^{(m)}; \boldsymbol{\gamma}^*)
+train(θ̂, θ_train, θ_val, Z_train, Z_val; optimiser = optimiser)</code></pre><p>Note that when the training data and/or parameters are held fixed during training, L₂ regularisation with penalty coefficient <span>$\lambda = 10^{-4}$</span> is applied by default.</p><h2 id="Expert-summary-statistics"><a class="docs-heading-anchor" href="#Expert-summary-statistics">Expert summary statistics</a><a id="Expert-summary-statistics-1"></a><a class="docs-heading-anchor-permalink" href="#Expert-summary-statistics" title="Permalink"></a></h2><p>Implicitly, neural estimators involve the learning of summary statistics. However, some summary statistics are available in closed form, simple to compute, and highly informative (e.g., sample quantiles, the empirical variogram, etc.). Often, explicitly incorporating these expert summary statistics in a neural estimator can simplify the optimisation problem, and lead to a better estimator.</p><p>The fusion of learned and expert summary statistics is facilitated by our implementation of the <a href="../../API/architectures/#NeuralEstimators.DeepSet"><code>DeepSet</code></a> framework. Note that this implementation also allows the user to construct a neural estimator using only expert summary statistics, following, for example, <a href="https://onlinelibrary.wiley.com/doi/abs/10.1002/sta4.382">Gerber and Nychka (2021)</a> and <a href="https://onlinelibrary.wiley.com/doi/abs/10.1002/env.2845">Rai et al. (2024)</a>. Note also that the user may specify arbitrary expert summary statistics, however, for convenience several standard <a href="../../API/architectures/#User-defined-summary-statistics">User-defined summary statistics</a> are provided with the package, including a fast approximate version of the empirical variogram.</p><h2 id="Variable-sample-sizes"><a class="docs-heading-anchor" href="#Variable-sample-sizes">Variable sample sizes</a><a id="Variable-sample-sizes-1"></a><a class="docs-heading-anchor-permalink" href="#Variable-sample-sizes" title="Permalink"></a></h2><p>A neural estimator in the Deep Set representation can be applied to data sets of arbitrary size. However, even when the neural Bayes estimator approximates the true Bayes estimator arbitrarily well, it is conditional on the number of replicates, <span>$m$</span>, and is not necessarily a Bayes estimator for <span>$m^* \ne m$</span>. Denote a data set comprising <span>$m$</span> replicates as <span>$\boldsymbol{Z}^{(m)} \equiv (\boldsymbol{Z}_1&#39;, \dots, \boldsymbol{Z}_m&#39;)&#39;$</span>. There are at least two (non-mutually exclusive) approaches one could adopt if data sets with varying <span>$m$</span> are envisaged, which we describe below.</p><h3 id="Piecewise-estimators"><a class="docs-heading-anchor" href="#Piecewise-estimators">Piecewise estimators</a><a id="Piecewise-estimators-1"></a><a class="docs-heading-anchor-permalink" href="#Piecewise-estimators" title="Permalink"></a></h3><p>If data sets with varying <span>$m$</span> are envisaged, one could train <span>$l$</span> neural Bayes estimators for different sample sizes, or groups thereof (e.g., a small-sample estimator and a large-sample estimator).  Specifically, for sample-size changepoints <span>$m_1$</span>, <span>$m_2$</span>, <span>$\dots$</span>, <span>$m_{l-1}$</span>, one could construct a piecewise neural Bayes estimator,</p><p class="math-container">\[\hat{\boldsymbol{\theta}}(\boldsymbol{Z}^{(m)}; \boldsymbol{\gamma}^*)
 =
 \begin{cases}
 \hat{\boldsymbol{\theta}}(\boldsymbol{Z}^{(m)}; \boldsymbol{\gamma}^*_{\tilde{m}_1}) &amp; m \leq m_1,\\
@@ -45,7 +44,7 @@
 \end{cases}\]</p><p>where, here, <span>$\boldsymbol{\gamma}^* \equiv (\boldsymbol{\gamma}^*_{\tilde{m}_1}, \dots, \boldsymbol{\gamma}^*_{\tilde{m}_{l-1}})$</span>, and where <span>$\boldsymbol{\gamma}^*_{\tilde{m}}$</span> are the neural-network parameters optimised for sample size <span>$\tilde{m}$</span> chosen so that <span>$\hat{\boldsymbol{\theta}}(\cdot; \boldsymbol{\gamma}^*_{\tilde{m}})$</span> is near-optimal over the range of sample sizes in which it is applied. This approach works well in practice, and it is less computationally burdensome than it first appears when used in conjunction with pre-training.</p><p>Piecewise neural estimators are implemented with the struct, <a href="../../API/core/#NeuralEstimators.PiecewiseEstimator"><code>PiecewiseEstimator</code></a>, and their construction is facilitated with <a href="../../API/core/#NeuralEstimators.trainx"><code>trainx()</code></a>.  </p><h3 id="Training-with-variable-sample-sizes"><a class="docs-heading-anchor" href="#Training-with-variable-sample-sizes">Training with variable sample sizes</a><a id="Training-with-variable-sample-sizes-1"></a><a class="docs-heading-anchor-permalink" href="#Training-with-variable-sample-sizes" title="Permalink"></a></h3><p>Alternatively, one could treat the sample size as a random variable, <span>$M$</span>, with support over a set of positive integers, <span>$\mathcal{M}$</span>, in which case, for the neural Bayes estimator, the risk function becomes</p><p class="math-container">\[\sum_{m \in \mathcal{M}}
 P(M=m)\left(
 \int_\Theta \int_{\mathcal{Z}^m}  L(\boldsymbol{\theta}, \hat{\boldsymbol{\theta}}(\boldsymbol{z}^{(m)}))f(\boldsymbol{z}^{(m)} \mid \boldsymbol{\theta}) \rm{d} \boldsymbol{z}^{(m)} \rm{d} \Pi(\boldsymbol{\theta})
-\right).\]</p><p>This approach does not materially alter the workflow, except that one must also sample the number of replicates before simulating the data during the training phase. </p><p>The following pseudocode illustrates how one may modify a general data simulator to train under a range of sample sizes, with the distribution of <span>$M$</span> defined by passing any object that can be sampled using <code>rand(m, K)</code> (e.g., an integer range like <code>1:30</code>, an integer-valued distribution from <a href="https://juliastats.org/Distributions.jl/stable/univariate/">Distributions.jl</a>, etc.):</p><pre><code class="nohighlight hljs">function simulate(parameters, m) 
+\right).\]</p><p>This approach does not materially alter the workflow, except that one must also sample the number of replicates before simulating the data during the training phase.</p><p>The following pseudocode illustrates how one may modify a general data simulator to train under a range of sample sizes, with the distribution of <span>$M$</span> defined by passing any object that can be sampled using <code>rand(m, K)</code> (e.g., an integer range like <code>1:30</code>, an integer-valued distribution from <a href="https://juliastats.org/Distributions.jl/stable/univariate/">Distributions.jl</a>, etc.):</p><pre><code class="nohighlight hljs">function simulate(parameters, m)
 
 	## Number of parameter vectors stored in parameters
 	K = size(parameters, 2)
@@ -60,8 +59,8 @@
 end
 
 ## Method that allows an integer to be passed for m
-simulate(parameters, m::Integer) = simulate(parameters, range(m, m))</code></pre><h2 id="Missing-data"><a class="docs-heading-anchor" href="#Missing-data">Missing data</a><a id="Missing-data-1"></a><a class="docs-heading-anchor-permalink" href="#Missing-data" title="Permalink"></a></h2><p>Neural networks do not naturally handle missing data, and this property can preclude their use in a broad range of applications. Here, we describe two techniques that alleviate this challenge in the context of parameter point estimation: <a href="#The-masking-approach">The masking approach</a> and <a href="#The-neural-EM-algorithm">The neural EM algorithm</a>.</p><p>As a running example, we consider a Gaussian process model where the data are collected over a regular grid, but where some elements of the grid are unobserved. This situation often arises in, for example, remote-sensing applications, where the presence of cloud cover prevents measurement in some places. Below, we load the packages needed in this example, and define some aspects of the model that will remain constant throughout (e.g., the prior, the spatial domain, etc.). We also define structs and functions for sampling from the prior distribution and for simulating marginally from the data model. </p><pre><code class="nohighlight hljs">using Distances 
-using Distributions 
+simulate(parameters, m::Integer) = simulate(parameters, range(m, m))</code></pre><h2 id="Missing-data"><a class="docs-heading-anchor" href="#Missing-data">Missing data</a><a id="Missing-data-1"></a><a class="docs-heading-anchor-permalink" href="#Missing-data" title="Permalink"></a></h2><p>Neural networks do not naturally handle missing data, and this property can preclude their use in a broad range of applications. Here, we describe two techniques that alleviate this challenge in the context of parameter point estimation: <a href="#The-masking-approach">The masking approach</a> and <a href="#The-neural-EM-algorithm">The neural EM algorithm</a>.</p><p>As a running example, we consider a Gaussian process model where the data are collected over a regular grid, but where some elements of the grid are unobserved. This situation often arises in, for example, remote-sensing applications, where the presence of cloud cover prevents measurement in some places. Below, we load the packages needed in this example, and define some aspects of the model that will remain constant throughout (e.g., the prior, the spatial domain, etc.). We also define structs and functions for sampling from the prior distribution and for simulating marginally from the data model.</p><pre><code class="nohighlight hljs">using Distances
+using Distributions
 using Flux
 using LinearAlgebra
 using NeuralEstimators
@@ -69,12 +68,12 @@
 
 # Set the prior and define the number of parameters in the statistical model
 Π = (
-	τ = Uniform(0, 1.0), 
+	τ = Uniform(0, 1.0),
 	ρ = Uniform(0, 0.4)
 )
-p = length(Π) 
+p = length(Π)
 
-# Define the (gridded) spatial domain and compute the distance matrix 
+# Define the (gridded) spatial domain and compute the distance matrix
 points = range(0, 1, 16)
 S = expandgrid(points, points)
 D = pairwise(Euclidean(), S, dims = 1)
@@ -86,7 +85,7 @@
 	D = D
 )
 
-# Struct for storing parameters+Cholesky factors 
+# Struct for storing parameters+Cholesky factors
 struct Parameters &lt;: ParameterConfigurations
 	θ
 	L
@@ -127,20 +126,20 @@
 	end
 
 	return Z
-end</code></pre><h3 id="The-masking-approach"><a class="docs-heading-anchor" href="#The-masking-approach">The masking approach</a><a id="The-masking-approach-1"></a><a class="docs-heading-anchor-permalink" href="#The-masking-approach" title="Permalink"></a></h3><p>The first missing-data technique that we consider is the so-called masking approach of <a href="https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1012184">Wang et al. (2024)</a>. The strategy involves completing the data by replacing missing values with zeros, and using auxiliary variables to encode the missingness pattern, which are also passed into the network.</p><p>Let <span>$\boldsymbol{Z}$</span> denote the complete-data vector. Then, the masking approach considers inference based on <span>$\boldsymbol{W}$</span>, a vector of indicator variables that encode the missingness pattern (with elements equal to one or zero if the corresponding element of <span>$\boldsymbol{Z}$</span> is observed or missing, respectively), and</p><p class="math-container">\[\boldsymbol{U} \equiv \boldsymbol{Z} \odot \boldsymbol{W},\]</p><p>where <span>$\odot$</span> denotes elementwise multiplication and the product of a missing element and zero is defined to be zero. Irrespective of the missingness pattern, <span>$\boldsymbol{U}$</span> and <span>$\boldsymbol{W}$</span> have the same fixed dimensions and hence may be processed easily using a single neural network. A neural point estimator is then trained on realisations of <span>$\{\boldsymbol{U}, \boldsymbol{W}\}$</span> which, by construction, do not contain any missing elements.</p><p>Since the missingness pattern <span>$\boldsymbol{W}$</span> is now an input to the neural network, it must be incorporated during the training phase. When interest lies only in making inference from a single already-observed data set, <span>$\boldsymbol{W}$</span> is fixed and known, and the Bayes risk remains unchanged. However, amortised inference, whereby one trains a single neural network that will be used to make inference with many data sets, requires a joint model for the data <span>$\boldsymbol{Z}$</span> and the missingness pattern <span>$\boldsymbol{W}$</span>: </p><pre><code class="nohighlight hljs"># Marginal simulation from the data model and a MCAR missingness model
+end</code></pre><h3 id="The-masking-approach"><a class="docs-heading-anchor" href="#The-masking-approach">The masking approach</a><a id="The-masking-approach-1"></a><a class="docs-heading-anchor-permalink" href="#The-masking-approach" title="Permalink"></a></h3><p>The first missing-data technique that we consider is the so-called masking approach of <a href="https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1012184">Wang et al. (2024)</a>. The strategy involves completing the data by replacing missing values with zeros, and using auxiliary variables to encode the missingness pattern, which are also passed into the network.</p><p>Let <span>$\boldsymbol{Z}$</span> denote the complete-data vector. Then, the masking approach considers inference based on <span>$\boldsymbol{W}$</span>, a vector of indicator variables that encode the missingness pattern (with elements equal to one or zero if the corresponding element of <span>$\boldsymbol{Z}$</span> is observed or missing, respectively), and</p><p class="math-container">\[\boldsymbol{U} \equiv \boldsymbol{Z} \odot \boldsymbol{W},\]</p><p>where <span>$\odot$</span> denotes elementwise multiplication and the product of a missing element and zero is defined to be zero. Irrespective of the missingness pattern, <span>$\boldsymbol{U}$</span> and <span>$\boldsymbol{W}$</span> have the same fixed dimensions and hence may be processed easily using a single neural network. A neural point estimator is then trained on realisations of <span>$\{\boldsymbol{U}, \boldsymbol{W}\}$</span> which, by construction, do not contain any missing elements.</p><p>Since the missingness pattern <span>$\boldsymbol{W}$</span> is now an input to the neural network, it must be incorporated during the training phase. When interest lies only in making inference from a single already-observed data set, <span>$\boldsymbol{W}$</span> is fixed and known, and the Bayes risk remains unchanged. However, amortised inference, whereby one trains a single neural network that will be used to make inference with many data sets, requires a joint model for the data <span>$\boldsymbol{Z}$</span> and the missingness pattern <span>$\boldsymbol{W}$</span>:</p><pre><code class="nohighlight hljs"># Marginal simulation from the data model and a MCAR missingness model
 function simulatemissing(parameters::Parameters, m::Integer)
 
 	Z = simulate(parameters, m)   # simulate completely-observed data
 
 	UW = map(Z) do z
-		prop = rand()             # sample a missingness proportion 
+		prop = rand()             # sample a missingness proportion
 		z = removedata(z, prop)   # randomly remove a proportion of the data
 		uw = encodedata(z)        # replace missing entries with zero and encode missingness pattern
 		uw
 	end
 
 	return UW
-end</code></pre><p>Note that the helper functions <a href="../../API/utility/#NeuralEstimators.removedata"><code>removedata()</code></a> and <a href="../../API/utility/#NeuralEstimators.encodedata"><code>encodedata()</code></a> facilitate the construction of augmented data sets <span>$\{\boldsymbol{U}, \boldsymbol{W}\}$</span>. </p><p>Next, we construct and train a masked neural Bayes estimator. Here, the first convolutional layer takes two input channels, since we store the augmented data <span>$\boldsymbol{U}$</span> in the first channel and the missingness pattern <span>$\boldsymbol{W}$</span> in the second. We construct a point estimator, but the masking approach is applicable with any other kind of estimator (see <a href="../../API/core/#Estimators">Estimators</a>): </p><pre><code class="nohighlight hljs"># Construct DeepSet object 
+end</code></pre><p>Note that the helper functions <a href="../../API/utility/#NeuralEstimators.removedata"><code>removedata()</code></a> and <a href="../../API/utility/#NeuralEstimators.encodedata"><code>encodedata()</code></a> facilitate the construction of augmented data sets <span>$\{\boldsymbol{U}, \boldsymbol{W}\}$</span>.</p><p>Next, we construct and train a masked neural Bayes estimator. Here, the first convolutional layer takes two input channels, since we store the augmented data <span>$\boldsymbol{U}$</span> in the first channel and the missingness pattern <span>$\boldsymbol{W}$</span> in the second. We construct a point estimator, but the masking approach is applicable with any other kind of estimator (see <a href="../../API/core/#Estimators">Estimators</a>):</p><pre><code class="nohighlight hljs"># Construct DeepSet object
 ψ = Chain(
 	Conv((10, 10), 2 =&gt; 16,  relu),
 	Conv((5, 5),  16 =&gt; 32,  relu),
@@ -150,15 +149,15 @@
 ϕ = Chain(Dense(64, 256, relu), Dense(256, p, exp))
 deepset = DeepSet(ψ, ϕ)
 
-# Initialise point estimator 
+# Initialise point estimator
 θ̂ = PointEstimator(deepset)
 
 # Train the masked neural Bayes estimator
-θ̂ = train(θ̂, Parameters, simulatemissing, m = 1, ξ = ξ, K = 1000, epochs = 10)</code></pre><p>Once trained, we can apply our masked neural Bayes estimator to (incomplete) observed data. The data must be encoded in the same manner that was done during training. Below, we use simulated data as a surrogate for real data, with a missingness proportion of 0.25: </p><pre><code class="nohighlight hljs">θ = Parameters(1, ξ)
+θ̂ = train(θ̂, Parameters, simulatemissing, m = 1, ξ = ξ, K = 1000, epochs = 10)</code></pre><p>Once trained, we can apply our masked neural Bayes estimator to (incomplete) observed data. The data must be encoded in the same manner that was done during training. Below, we use simulated data as a surrogate for real data, with a missingness proportion of 0.25:</p><pre><code class="nohighlight hljs">θ = Parameters(1, ξ)
 Z = simulate(θ, 1)[1]
-Z = removedata(Z, 0.25) 
+Z = removedata(Z, 0.25)
 UW = encodedata(Z)
-θ̂(UW)</code></pre><h3 id="The-neural-EM-algorithm"><a class="docs-heading-anchor" href="#The-neural-EM-algorithm">The neural EM algorithm</a><a id="The-neural-EM-algorithm-1"></a><a class="docs-heading-anchor-permalink" href="#The-neural-EM-algorithm" title="Permalink"></a></h3><p>Let <span>$\boldsymbol{Z}_1$</span> and <span>$\boldsymbol{Z}_2$</span> denote the observed and unobserved (i.e., missing) data, respectively, and let <span>$\boldsymbol{Z} \equiv (\boldsymbol{Z}_1&#39;, \boldsymbol{Z}_2&#39;)&#39;$</span> denote the complete data. A classical approach to facilitating inference when data are missing is the expectation-maximisation (EM) algorithm. The <em>neural EM algorithm</em> is an approximate version of the conventional (Bayesian) Monte Carlo EM algorithm which, at the <span>$l$</span>th iteration, updates the parameter vector through</p><p class="math-container">\[\boldsymbol{\theta}^{(l)} = \argmax_{\boldsymbol{\theta}} \sum_{h = 1}^H \ell(\boldsymbol{\theta};  \boldsymbol{Z}_1,  \boldsymbol{Z}_2^{(lh)}) + \log \pi_H(\boldsymbol{\theta}),\]</p><p>where realisations of the missing-data component, <span>$\{\boldsymbol{Z}_2^{(lh)} : h = 1, \dots, H\}$</span>, are sampled from the probability distribution of <span>$\boldsymbol{Z}_2$</span> given <span>$\boldsymbol{Z}_1$</span> and <span>$\boldsymbol{\theta}^{(l-1)}$</span>, and where <span>$\pi_H(\boldsymbol{\theta}) \propto \{\pi(\boldsymbol{\theta})\}^H$</span> is a concentrated version of the original prior density. Given the conditionally simulated data, the neural EM algorithm performs the above EM update using a neural network that returns the MAP estimate (i.e., the posterior mode) conditionally simulated data. Such a neural network can be obtained by training a neural Bayes estimator under a continuous relaxation of the 0–1 loss function, such as </p><p>First, we construct a neural approximation of the MAP estimator. In this example, we will take <span>$H=50$</span>. When <span>$H$</span> is taken to be reasonably large, one may lean on the <a href="https://en.wikipedia.org/wiki/Bernstein%E2%80%93von_Mises_theorem">Bernstein-von Mises</a> theorem to train the neural Bayes estimator under linear or quadratic loss; otherwise, one should train the estimator under a continuous relaxation of the 0–1 loss (e.g., the <a href="../../API/loss/#NeuralEstimators.tanhloss"><code>tanhloss</code></a> or <a href="../../API/loss/#NeuralEstimators.kpowerloss"><code>kpowerloss</code></a> in the limit <span>$\kappa \to 0$</span>):</p><pre><code class="nohighlight hljs"># Construct DeepSet object 
+θ̂(UW)</code></pre><h3 id="The-neural-EM-algorithm"><a class="docs-heading-anchor" href="#The-neural-EM-algorithm">The neural EM algorithm</a><a id="The-neural-EM-algorithm-1"></a><a class="docs-heading-anchor-permalink" href="#The-neural-EM-algorithm" title="Permalink"></a></h3><p>Let <span>$\boldsymbol{Z}_1$</span> and <span>$\boldsymbol{Z}_2$</span> denote the observed and unobserved (i.e., missing) data, respectively, and let <span>$\boldsymbol{Z} \equiv (\boldsymbol{Z}_1&#39;, \boldsymbol{Z}_2&#39;)&#39;$</span> denote the complete data. A classical approach to facilitating inference when data are missing is the expectation-maximisation (EM) algorithm. The <em>neural EM algorithm</em> is an approximate version of the conventional (Bayesian) Monte Carlo EM algorithm which, at the <span>$l$</span>th iteration, updates the parameter vector through</p><p class="math-container">\[\boldsymbol{\theta}^{(l)} = \argmax_{\boldsymbol{\theta}} \sum_{h = 1}^H \ell(\boldsymbol{\theta};  \boldsymbol{Z}_1,  \boldsymbol{Z}_2^{(lh)}) + \log \pi_H(\boldsymbol{\theta}),\]</p><p>where realisations of the missing-data component, <span>$\{\boldsymbol{Z}_2^{(lh)} : h = 1, \dots, H\}$</span>, are sampled from the probability distribution of <span>$\boldsymbol{Z}_2$</span> given <span>$\boldsymbol{Z}_1$</span> and <span>$\boldsymbol{\theta}^{(l-1)}$</span>, and where <span>$\pi_H(\boldsymbol{\theta}) \propto \{\pi(\boldsymbol{\theta})\}^H$</span> is a concentrated version of the original prior density. Given the conditionally simulated data, the neural EM algorithm performs the above EM update using a neural network that returns the MAP estimate (i.e., the posterior mode) conditionally simulated data. Such a neural network can be obtained by training a neural Bayes estimator under a continuous relaxation of the 0–1 loss function, such as</p><p>First, we construct a neural approximation of the MAP estimator. In this example, we will take <span>$H=50$</span>. When <span>$H$</span> is taken to be reasonably large, one may lean on the <a href="https://en.wikipedia.org/wiki/Bernstein%E2%80%93von_Mises_theorem">Bernstein-von Mises</a> theorem to train the neural Bayes estimator under linear or quadratic loss; otherwise, one should train the estimator under a continuous relaxation of the 0–1 loss (e.g., the <a href="../../API/loss/#NeuralEstimators.tanhloss"><code>tanhloss</code></a> or <a href="../../API/loss/#NeuralEstimators.kpowerloss"><code>kpowerloss</code></a> in the limit <span>$\kappa \to 0$</span>):</p><pre><code class="nohighlight hljs"># Construct DeepSet object
 ψ = Chain(
 	Conv((10, 10), 1 =&gt; 16,  relu),
 	Conv((5, 5),  16 =&gt; 32,  relu),
@@ -171,12 +170,12 @@
 	)
 deepset = DeepSet(ψ, ϕ)
 
-# Initialise point estimator 
+# Initialise point estimator
 θ̂ = PointEstimator(deepset)
 
 # Train neural Bayes estimator
 H = 50
-θ̂ = train(θ̂, Parameters, simulate, m = H, ξ = ξ, K = 1000, epochs = 10)</code></pre><p>Next, we define a function for conditional simulation (see <a href="../../API/utility/#NeuralEstimators.EM"><code>EM</code></a> for details on the required format of this function): </p><pre><code class="nohighlight hljs">function simulateconditional(Z::M, θ, ξ; nsims::Integer = 1) where {M &lt;: AbstractMatrix{Union{Missing, T}}} where T
+θ̂ = train(θ̂, Parameters, simulate, m = H, ξ = ξ, K = 1000, epochs = 10)</code></pre><p>Next, we define a function for conditional simulation (see <a href="../../API/utility/#NeuralEstimators.EM"><code>EM</code></a> for details on the required format of this function):</p><pre><code class="nohighlight hljs">function simulateconditional(Z::M, θ, ξ; nsims::Integer = 1) where {M &lt;: AbstractMatrix{Union{Missing, T}}} where T
 
 	# Save the original dimensions
 	dims = size(Z)
@@ -239,10 +238,10 @@
 	Z = reshape(Z, dims..., 1, nsims)
 
 	return Z
-end</code></pre><p>Now we can use the neural EM algorithm to get parameter point estimates from data containing missing values. The algorithm is implemented with the struct <a href="../../API/utility/#NeuralEstimators.EM"><code>EM</code></a>. Again, here we use simulated data as a surrogate for real data: </p><pre><code class="nohighlight hljs">θ = Parameters(1, ξ)
+end</code></pre><p>Now we can use the neural EM algorithm to get parameter point estimates from data containing missing values. The algorithm is implemented with the struct <a href="../../API/utility/#NeuralEstimators.EM"><code>EM</code></a>. Again, here we use simulated data as a surrogate for real data:</p><pre><code class="nohighlight hljs">θ = Parameters(1, ξ)
 Z = simulate(θ, 1)[1][:, :]     # simulate a single gridded field
 Z = removedata(Z, 0.25)         # remove 25% of the data
 θ₀ = mean.([Π...])              # initial estimate, the prior mean
 
 neuralem = EM(simulateconditional, θ̂)
-neuralem(Z, θ₀, ξ = ξ, nsims = H, use_ξ_in_simulateconditional = true)</code></pre><h2 id="Censored-data"><a class="docs-heading-anchor" href="#Censored-data">Censored data</a><a id="Censored-data-1"></a><a class="docs-heading-anchor-permalink" href="#Censored-data" title="Permalink"></a></h2><p>Coming soon, based on the methodology presented in <a href="https://arxiv.org/abs/2306.15642">Richards et al. (2023+)</a>.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../examples/">« Examples</a><a class="docs-footer-nextpage" href="../../API/core/">Core »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.6.0 on <span class="colophon-date" title="Wednesday 28 August 2024 08:07">Wednesday 28 August 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+neuralem(Z, θ₀, ξ = ξ, nsims = H, use_ξ_in_simulateconditional = true)</code></pre><h2 id="Censored-data"><a class="docs-heading-anchor" href="#Censored-data">Censored data</a><a id="Censored-data-1"></a><a class="docs-heading-anchor-permalink" href="#Censored-data" title="Permalink"></a></h2><p>Coming soon, based on the methodology presented in <a href="https://arxiv.org/abs/2306.15642">Richards et al. (2023+)</a>.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../examples/">« Examples</a><a class="docs-footer-nextpage" href="../../API/core/">Core »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.6.0 on <span class="colophon-date" title="Sunday 1 September 2024 07:59">Sunday 1 September 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/workflow/examples/index.html b/dev/workflow/examples/index.html
index 553a18ee..87a0a0e1 100644
--- a/dev/workflow/examples/index.html
+++ b/dev/workflow/examples/index.html
@@ -186,4 +186,4 @@
 θ̂(Z)                               # point estimates
 θ̃ = Parameters(θ̂(Z), S)            # construct Parameters object from the point estimates
 bs = bootstrap(θ̂, θ̃, simulate, m)  # bootstrap estimates
-interval(bs)                       # parametric bootstrap confidence interval              </code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../overview/">« Overview</a><a class="docs-footer-nextpage" href="../advancedusage/">Advanced usage »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.6.0 on <span class="colophon-date" title="Wednesday 28 August 2024 08:07">Wednesday 28 August 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+interval(bs)                       # parametric bootstrap confidence interval              </code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../overview/">« Overview</a><a class="docs-footer-nextpage" href="../advancedusage/">Advanced usage »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.6.0 on <span class="colophon-date" title="Sunday 1 September 2024 07:59">Sunday 1 September 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/workflow/overview/index.html b/dev/workflow/overview/index.html
index f3728d2b..287b8bfd 100644
--- a/dev/workflow/overview/index.html
+++ b/dev/workflow/overview/index.html
@@ -1,2 +1,2 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Overview · NeuralEstimators.jl</title><meta name="title" content="Overview · NeuralEstimators.jl"/><meta property="og:title" content="Overview · NeuralEstimators.jl"/><meta property="twitter:title" content="Overview · NeuralEstimators.jl"/><meta name="description" content="Documentation for NeuralEstimators.jl."/><meta property="og:description" content="Documentation for NeuralEstimators.jl."/><meta property="twitter:description" content="Documentation for NeuralEstimators.jl."/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="NeuralEstimators.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../../">NeuralEstimators.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">NeuralEstimators</a></li><li><a class="tocitem" href="../../framework/">Framework</a></li><li><span class="tocitem">Workflow</span><ul><li class="is-active"><a class="tocitem" href>Overview</a></li><li><a class="tocitem" href="../examples/">Examples</a></li><li><a class="tocitem" href="../advancedusage/">Advanced usage</a></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="../../API/core/">Core</a></li><li><a class="tocitem" href="../../API/architectures/">Architectures</a></li><li><a class="tocitem" href="../../API/loss/">Loss functions</a></li><li><a class="tocitem" href="../../API/simulation/">Model-specific functions</a></li><li><a class="tocitem" href="../../API/utility/">Miscellaneous</a></li><li><a class="tocitem" href="../../API/">Index</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Workflow</a></li><li class="is-active"><a href>Overview</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Overview</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/main/docs/src/workflow/overview.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Overview"><a class="docs-heading-anchor" href="#Overview">Overview</a><a id="Overview-1"></a><a class="docs-heading-anchor-permalink" href="#Overview" title="Permalink"></a></h1><p>To develop a neural estimator with <code>NeuralEstimators</code>,</p><ul><li>Sample parameters from the prior distribution. The parameters are stored as <span>$p \times K$</span> matrices, with <span>$p$</span> the number of parameters in the model and <span>$K$</span> the number of parameter vectors in the given parameter set (i.e., training, validation, or test set).</li><li>Simulate data from the assumed model over the parameter sets generated above. These data are stored as a <code>Vector{A}</code>, with each element of the vector associated with one parameter configuration, and where <code>A</code> depends on the multivariate structure of the data and the representation of the neural estimator (e.g., an <code>Array</code> for CNN-based estimators, a <code>GNNGraph</code> for GNN-based estimators, etc.).</li><li>Initialise a neural network <code>θ̂</code>.  </li><li>Train <code>θ̂</code> under the chosen loss function using <a href="../../API/core/#NeuralEstimators.train"><code>train()</code></a>.</li><li>Assess <code>θ̂</code> using <a href="../../API/core/#NeuralEstimators.assess"><code>assess()</code></a>, which uses simulation-based methods to assess the estimator with respect to its sampling distribution.</li></ul><p>Once the estimator <code>θ̂</code> has passed our assessments and is therefore deemed to be well calibrated, it may be applied to observed data. See the <a href="../examples/#Examples">Examples</a> and, once familiar with the basic workflow, see <a href="../advancedusage/#Advanced-usage">Advanced usage</a> for practical considerations on how to most effectively construct neural estimators.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../../framework/">« Framework</a><a class="docs-footer-nextpage" href="../examples/">Examples »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.6.0 on <span class="colophon-date" title="Wednesday 28 August 2024 08:07">Wednesday 28 August 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Overview · NeuralEstimators.jl</title><meta name="title" content="Overview · NeuralEstimators.jl"/><meta property="og:title" content="Overview · NeuralEstimators.jl"/><meta property="twitter:title" content="Overview · NeuralEstimators.jl"/><meta name="description" content="Documentation for NeuralEstimators.jl."/><meta property="og:description" content="Documentation for NeuralEstimators.jl."/><meta property="twitter:description" content="Documentation for NeuralEstimators.jl."/><script data-outdated-warner src="../../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../search_index.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../../"><img src="../../assets/logo.png" alt="NeuralEstimators.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../../">NeuralEstimators.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../../">NeuralEstimators</a></li><li><a class="tocitem" href="../../framework/">Framework</a></li><li><span class="tocitem">Workflow</span><ul><li class="is-active"><a class="tocitem" href>Overview</a></li><li><a class="tocitem" href="../examples/">Examples</a></li><li><a class="tocitem" href="../advancedusage/">Advanced usage</a></li></ul></li><li><span class="tocitem">API</span><ul><li><a class="tocitem" href="../../API/core/">Core</a></li><li><a class="tocitem" href="../../API/architectures/">Architectures</a></li><li><a class="tocitem" href="../../API/loss/">Loss functions</a></li><li><a class="tocitem" href="../../API/simulation/">Model-specific functions</a></li><li><a class="tocitem" href="../../API/utility/">Miscellaneous</a></li><li><a class="tocitem" href="../../API/">Index</a></li></ul></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Workflow</a></li><li class="is-active"><a href>Overview</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Overview</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/msainsburydale/NeuralEstimators.jl/blob/main/docs/src/workflow/overview.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Overview"><a class="docs-heading-anchor" href="#Overview">Overview</a><a id="Overview-1"></a><a class="docs-heading-anchor-permalink" href="#Overview" title="Permalink"></a></h1><p>To develop a neural estimator with <code>NeuralEstimators</code>,</p><ul><li>Sample parameters from the prior distribution. The parameters are stored as <span>$p \times K$</span> matrices, with <span>$p$</span> the number of parameters in the model and <span>$K$</span> the number of parameter vectors in the given parameter set (i.e., training, validation, or test set).</li><li>Simulate data from the assumed model over the parameter sets generated above. These data are stored as a <code>Vector{A}</code>, with each element of the vector associated with one parameter configuration, and where <code>A</code> depends on the multivariate structure of the data and the representation of the neural estimator (e.g., an <code>Array</code> for CNN-based estimators, a <code>GNNGraph</code> for GNN-based estimators, etc.).</li><li>Initialise a neural network <code>θ̂</code>.  </li><li>Train <code>θ̂</code> under the chosen loss function using <a href="../../API/core/#NeuralEstimators.train"><code>train()</code></a>.</li><li>Assess <code>θ̂</code> using <a href="../../API/core/#NeuralEstimators.assess"><code>assess()</code></a>, which uses simulation-based methods to assess the estimator with respect to its sampling distribution.</li></ul><p>Once the estimator <code>θ̂</code> has passed our assessments and is therefore deemed to be well calibrated, it may be applied to observed data. See the <a href="../examples/#Examples">Examples</a> and, once familiar with the basic workflow, see <a href="../advancedusage/#Advanced-usage">Advanced usage</a> for practical considerations on how to most effectively construct neural estimators.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../../framework/">« Framework</a><a class="docs-footer-nextpage" href="../examples/">Examples »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.6.0 on <span class="colophon-date" title="Sunday 1 September 2024 07:59">Sunday 1 September 2024</span>. Using Julia version 1.9.4.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>