introduce Dataset data type (#115)

joelberkeley · Aug 17, 2021 · f5ae6ec · f5ae6ec
1 parent 7304558
commit f5ae6ec
Show file tree

Hide file tree

Showing 4 changed files with 16 additions and 19 deletions.
diff --git a/src/BayesianOptimization/Acquisition.idr b/src/BayesianOptimization/Acquisition.idr
@@ -29,8 +29,7 @@ import Util
 ||| @out The type of the value constructed by the `Empiric`.
 public export 0
 Empiric : Distribution targets marginal => (0 features : Shape) -> (0 out : Type) -> Type
-Empiric features out = {s : _} ->
-  Data {samples=S s} features targets -> ProbabilisticModel features {marginal} -> out
+Empiric features out = Dataset features targets -> ProbabilisticModel features {marginal} -> out
 
 ||| An `Acquisition` function quantifies how useful it would be to query the objective at a given  
 ||| set of points, towards the goal of optimizing the objective.
@@ -62,7 +61,7 @@ expectedImprovement predict best at =
 ||| the observation value at each point.
 export
 expectedImprovementByModel : Empiric features {marginal=Gaussian [1]} $ Acquisition 1 features
-expectedImprovementByModel (query_points, _) predict at =
+expectedImprovementByModel (MkDataset query_points _) predict at =
   let best = squeeze $ reduce_min 0 $ mean $ predict query_points
    in expectedImprovement predict best at
 

diff --git a/src/Model.idr b/src/Model.idr
@@ -22,13 +22,13 @@ import Tensor
 ||| Observed pairs of data points from feature and target domains. Data sets such as this are
 ||| commonly used in supervised learning settings.
 |||
-||| @samples The number of points in each of the feature and target data.
 ||| @features The shape of the feature domain.
 ||| @targets The shape of the target domain.
-public export 0
-Data : {0 samples : Nat} -> (0 features : Shape) -> (0 targets : Shape) -> Type
-Data features targets =
-  (Tensor (Vect.(::) samples features) Double, Tensor (Vect.(::) samples targets) Double)
+public export
+data Dataset : (0 features : Shape) -> (0 targets : Shape) -> Type where
+  MkDataset : {s : _} -> Tensor (Vect.(::) (S s) features) Double
+           -> Tensor (Vect.(::) (S s) targets) Double
+           -> Dataset features targets
 
 ||| A `ProbabilisticModel` is a mapping from a feature domain to a probability distribution over
 ||| a target domain.

diff --git a/src/Model/GaussianProcess.idr b/src/Model/GaussianProcess.idr
@@ -93,19 +93,17 @@ predict_latent (MkConjugateGPR mk_gp gp_params _) x =
 export
 fit : ConjugateGPRegression features
   -> (forall n . Tensor [n] Double -> Optimizer $ Tensor [n] Double)
-  -> {s : _} -> Data {samples=S s} features [1]
+  -> Dataset features [1]
   -> ConjugateGPRegression features
-fit (MkConjugateGPR {p} mk_prior gp_params noise) optimizer {s} training_data =
+fit (MkConjugateGPR {p} mk_prior gp_params noise) optimizer (MkDataset x y) =
   let objective : Tensor [S p] Double -> Tensor [] Double
       objective params = let (noise, prior_params) = split 1 params
-                             (x, y) = training_data
                           in log_marginal_likelihood (mk_prior prior_params)
                              (squeeze noise) (x, squeeze y)
 
       (noise, gp_params) := split 1 $ optimizer (concat (expand 0 noise) gp_params) objective
 
       mk_posterior : Tensor [p] Double -> GaussianProcess features
-      mk_posterior params' = let (x, y) = training_data
-                              in posterior (mk_prior params') (squeeze noise) (x, squeeze y)
+      mk_posterior params' = posterior (mk_prior params') (squeeze noise) (x, squeeze y)
 
     in MkConjugateGPR mk_posterior gp_params (squeeze noise)
diff --git a/tutorials/BayesianOptimization.md b/tutorials/BayesianOptimization.md
@@ -53,8 +53,8 @@ import Optimize
 import Util
 -->
 ```idris
-historicData : Data {samples=3} [2] [1]
-historicData = (const [[0.3, 0.4], [0.5, 0.2], [0.3, 0.9]], const [[1.2], [-0.5], [0.7]])
+historicData : Dataset [2] [1]
+historicData = MkDataset (const [[0.3, 0.4], [0.5, 0.2], [0.3, 0.9]]) (const [[1.2], [-0.5], [0.7]])
 ```
 
 and model that data
@@ -173,8 +173,8 @@ The `Morphism i o`, or `i ~> o` type has proven flexible in allowing us to const
 With this new functionality at hand, we'll return to our objective with failure regions. We'll need some data on failure regions, and to model that data. Recall that we can represent this in any form we like, and we'll simply use a dedicated `Data` set and `ProbabilisticModel`:
 
 ```idris
-failureData : Data {samples=4} [2] [1]
-failureData = (const [[0.3, 0.4], [0.5, 0.2], [0.3, 0.9], [0.7, 0.1]], const [[0], [0], [0], [1]])
+failureData : Dataset [2] [1]
+failureData = MkDataset (const [[0.3, 0.4], [0.5, 0.2], [0.3, 0.9], [0.7, 0.1]]) (const [[0], [0], [0], [1]])
 
 failureModel : ConjugateGPRegression [2]
 failureModel = let mk_gp = \len => MkGP zero (rbf $ squeeze len)
@@ -197,8 +197,8 @@ Idris generates two methods `objective` and `failure` from this `record`, which
 
 ```idris
 newPoint'' : Tensor [1, 2] Double
-newPoint'' = let eci = objective >>> expectedConstrainedImprovement (const 0.5) {s=_}
-                 pof = failure >>> probabilityOfFeasibility (const 0.5) {s=_}
+newPoint'' = let eci = objective >>> expectedConstrainedImprovement (const 0.5)
+                 pof = failure >>> probabilityOfFeasibility (const 0.5)
                  acquisition = map optimizer (eci <*> pof)
                  dataAndModel = Label (historicData, predict_latent model)
                                       (failureData, predict_latent failureModel)