Merge pull request #79 from deepskies/issue/yaml

Issue/yaml
deepskies · Apr 29, 2024 · aed0acc · aed0acc
2 parents 3fdcbcb + 5ea71fd
commit aed0acc
Show file tree

Hide file tree

Showing 25 changed files with 870 additions and 683 deletions.
diff --git a/README.md b/README.md
@@ -12,22 +12,33 @@ DeepUQ is a package for injecting and measuring different types of uncertainty i
 ![GitHub Workflow Status](https://img.shields.io/github/workflow/status/owner/repo/test-repo?label=test)
 
 ## Workflow
-![Folder structure overview](images/folders_deepUQ.png)
+![Folder structure overview](images/DeepUQWorkflow_Maggie.png)
 
-Getting a little more specific:
+The scripts can be accessed via the ipython example notebooks or via the model modules (ie `DeepEnsemble.py`). For example, to ingest data and train a Deep Ensemble: 
+> cd src/scripts/
 
-![python module overview](images/workflow_deepUQ.png)
+> python DeepEnsemble.py
 
-These modules can be accessed via the ipython example notebooks or via the model modules (ie `DeepEnsemble.py`). For example, to ingest data and train a Deep Ensemble: 
-> cd src/scripts/
+With no config file specified, this command will pull settings from the `default.py` file within `utils`. For the `DeepEnsemble.py` script, it will automatically select the `DefaultsDE` dictionary.
+
+Another option is to specify your own config file:
+
+> python DeepEnsemble.py --config "path/to/config/myconfig.yaml"
 
-> python DeepEnsemble.py low 10 /Users/rnevin/Documents/DeepUQ/ --save_final_checkpoint --savefig --n_epochs=10
+Where you would modify the "path/to/config/myconfig.yaml" to specify where your own yaml lives.
+
+The third option is to input settings on the command line. These choices are then combined with the default settings and output in a temporary yaml.
+
+> python DeepEnsemble.py --noise_level "low" --n_models 10 --out_dir ./DeepUQResources/results/--save_final_checkpoint True --savefig True --n_epochs 10
 
 This command will train a 10 network, 10 epoch ensemble on the low noise data and will save figures and final checkpoints to the specified directory. Required arguments are the noise setting (low/medium/high), the number of ensembles, and the working directory.
 
 For more information on the arguments:
 > python DeepEnsemble.py --help
 
+The other available script is the `DeepEvidentialRegression.py` script:
+> python DeepEvidentialRegression.py --help
+
 ## Installation 
 
 ### Clone this repo

diff --git a/images/DeepUQWorkflow_Maggie.png b/images/DeepUQWorkflow_Maggie.png
diff --git a/ms.pdf b/ms.pdf
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,6 +7,10 @@ authors = ["beckynevin <beckynevin@gmail.com>"]
 readme = "README.md"
 license = "MIT"
 
+[tool.poetry.scripts]
+ensamble = "src.scripts.DeepEnsemble:main"
+der = "src.scripts.DeepEvidentialRegression:main"
+
 [tool.poetry.dependencies]
 python = ">=3.9,<3.11"
 jupyter = "^1.0.0"

diff --git a/showyourwork.yml b/showyourwork.yml
diff --git a/src/data/__init__.py b/src/data/__init__.py
@@ -0,0 +1,6 @@
+from data.data import MyDataLoader, DataPreparation
+
+DataModules = {
+    "MyDataLoader": MyDataLoader,
+    "DataPreparation": DataPreparation
+}
diff --git a/src/scripts/io.py → src/data/data.py b/src/scripts/io.py → src/data/data.py
@@ -1,107 +1,17 @@
 # Contains modules used to prepare a dataset
 # with varying noise properties
-import argparse
 import numpy as np
 from sklearn.model_selection import train_test_split
 import pickle
 from torch.distributions import Uniform
-from torch.utils.data import TensorDataset
 import torch
 import h5py
 
 
-def parse_args():
-    parser = argparse.ArgumentParser(description="data handling module")
-    parser.add_argument(
-        "size_df",
-        type=float,
-        required=False,
-        default=1000,
-        help="Used to load the associated .h5 data file",
-    )
-    parser.add_argument(
-        "noise_level",
-        type=str,
-        required=False,
-        default="low",
-        help="low, medium, high or vhigh, \
-              used to look up associated sigma value",
-    )
-    parser.add_argument(
-        "size_df",
-        type=str,
-        nargs="?",
-        default="/repo/embargo",
-        help="Butler Repository path from which data is transferred. \
-            Input str. Default = '/repo/embargo'",
-    )
-    parser.add_argument(
-        "--normalize",
-        required=False,
-        action="store_true",
-        help="If true theres an option to normalize the dataset",
-    )
-    parser.add_argument(
-        "--val_proportion",
-        type=float,
-        required=False,
-        default=0.1,
-        help="Proportion of the dataset to use as validation",
-    )
-    parser.add_argument(
-        "--randomseed",
-        type=float,
-        required=False,
-        default=42,
-        help="Random seed used for shuffling the training and validation set",
-    )
-    parser.add_argument(
-        "--batchsize",
-        type=float,
-        required=False,
-        default=100,
-        help="Size of batched used in the traindataloader",
-    )
-    return parser.parse_args()
-
-
-class ModelLoader:
-    def save_model_pkl(self, path, model_name, posterior):
-        """
-        Save the pkl'ed saved posterior model
-
-        :param path: Location to save the model
-        :param model_name: Name of the model
-        :param posterior: Model object to be saved
-        """
-        file_name = path + model_name + ".pkl"
-        with open(file_name, "wb") as file:
-            pickle.dump(posterior, file)
-
-    def load_model_pkl(self, path, model_name):
-        """
-        Load the pkl'ed saved posterior model
-
-        :param path: Location to load the model from
-        :param model_name: Name of the model
-        :return: Loaded model object that can be used with the predict function
-        """
-        print(path)
-        with open(path + model_name + ".pkl", "rb") as file:
-            posterior = pickle.load(file)
-        return posterior
-
-    def predict(input, model):
-        """
-
-        :param input: loaded object used for inference
-        :param model: loaded model
-        :return: Prediction
-        """
-        return 0
-
+class MyDataLoader:
+    def __init__(self):
+        self.data = None
 
-class DataLoader:
     def save_data_pkl(self, data_name, data, path="../data/"):
         """
         Save and load the pkl'ed training/test set
@@ -197,7 +107,7 @@ def simulate_data(
         sigma,
         simulation_name,
         x=np.linspace(0, 100, 101),
-        seed=13
+        seed=42
     ):
         if simulation_name == "linear_homogeneous":
             # convert to numpy array (if tensor):
@@ -300,35 +210,3 @@ def train_val_split(
             random_state=random_state,
         )
         return x_train, x_val, y_train, y_val
-
-
-# Example usage:
-if __name__ == "__main__":
-    namespace = parse_args()
-    size_df = namespace.size_df
-    noise = namespace.noise_level
-    norm = namespace.normalize
-    val_prop = namespace.val_proportion
-    rs = namespace.randomseed
-    BATCH_SIZE = namespace.batchsize
-    sigma = DataPreparation.get_sigma(noise)
-    loader = DataLoader()
-    data = loader.load_data_h5("linear_sigma_" + str(sigma) +
-                               "_size_" + str(size_df))
-    len_df = len(data["params"][:, 0].numpy())
-    len_x = len(data["inputs"].numpy())
-    ms_array = np.repeat(data["params"][:, 0].numpy(), len_x)
-    bs_array = np.repeat(data["params"][:, 1].numpy(), len_x)
-    xs_array = np.tile(data["inputs"].numpy(), len_df)
-    ys_array = np.reshape(data["output"].numpy(), (len_df * len_x))
-    inputs = np.array([xs_array, ms_array, bs_array]).T
-    model_inputs, model_outputs = DataPreparation.normalize(inputs,
-                                                            ys_array,
-                                                            norm)
-    x_train, x_val, y_train, y_val = DataPreparation.train_val_split(
-        model_inputs, model_outputs, test_size=val_prop, random_state=rs
-    )
-    trainData = TensorDataset(torch.Tensor(x_train), torch.Tensor(y_train))
-    trainDataLoader = DataLoader(trainData,
-                                 batch_size=BATCH_SIZE,
-                                 shuffle=True)
diff --git a/src/models/__init__.py b/src/models/__init__.py
@@ -0,0 +1,5 @@
+from models.models import ModelLoader
+
+ModelModules = {
+    "ModelLoader": ModelLoader
+}
diff --git a/src/scripts/models.py → src/models/models.py b/src/scripts/models.py → src/models/models.py
@@ -1,9 +1,48 @@
+# Contains modules used to prepare a dataset
+# with varying noise properties
 import numpy as np
-import torch.nn as nn
+import pickle
 import torch
+import torch.nn as nn
 import math
 
 
+class ModelLoader:
+    def save_model_pkl(self, path, model_name, posterior):
+        """
+        Save the pkl'ed saved posterior model
+
+        :param path: Location to save the model
+        :param model_name: Name of the model
+        :param posterior: Model object to be saved
+        """
+        file_name = path + model_name + ".pkl"
+        with open(file_name, "wb") as file:
+            pickle.dump(posterior, file)
+
+    def load_model_pkl(self, path, model_name):
+        """
+        Load the pkl'ed saved posterior model
+
+        :param path: Location to load the model from
+        :param model_name: Name of the model
+        :return: Loaded model object that can be used with the predict function
+        """
+        print(path)
+        with open(path + model_name + ".pkl", "rb") as file:
+            posterior = pickle.load(file)
+        return posterior
+
+    def predict(input, model):
+        """
+
+        :param input: loaded object used for inference
+        :param model: loaded model
+        :return: Prediction
+        """
+        return 0
+
+
 class DERLayer(nn.Module):
     def __init__(self):
         super().__init__()
@@ -209,27 +248,3 @@ def loss_bnll(mean, variance, target, beta):  # beta=0.5):
     if beta > 0:
         loss = loss * (variance.detach() ** beta)
     return loss.sum(axis=-1)
-
-
-'''
-def get_loss(transform, beta=None):
-    if beta:
-        def beta_nll_loss(targets, outputs, beta=beta):
-            """Compute beta-NLL loss
-            """
-            mu = outputs[..., 0:1]
-            var = transform(outputs[..., 1:2])
-            loss = (K.square((targets - mu)) / var + K.log(var))
-            loss = loss * K.stop_gradient(var) ** beta
-            return loss
-        return beta_nll_loss
-    else:
-        def negative_log_likelihood(targets, outputs):
-            """Calculate the negative loglikelihood."""
-            mu = outputs[..., 0:1]
-            var = transform(outputs[..., 1:2])
-            y = targets[..., 0:1]
-            loglik = - K.log(var) - K.square((y - mu)) / var
-            return - loglik
-    return negative_log_likelihood
-'''