Skip to content

Commit

Permalink
Merge pull request #79 from deepskies/issue/yaml
Browse files Browse the repository at this point in the history
Issue/yaml
  • Loading branch information
beckynevin authored Apr 29, 2024
2 parents 3fdcbcb + 5ea71fd commit aed0acc
Show file tree
Hide file tree
Showing 25 changed files with 870 additions and 683 deletions.
23 changes: 17 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,33 @@ DeepUQ is a package for injecting and measuring different types of uncertainty i
![GitHub Workflow Status](https://img.shields.io/github/workflow/status/owner/repo/test-repo?label=test)

## Workflow
![Folder structure overview](images/folders_deepUQ.png)
![Folder structure overview](images/DeepUQWorkflow_Maggie.png)

Getting a little more specific:
The scripts can be accessed via the ipython example notebooks or via the model modules (ie `DeepEnsemble.py`). For example, to ingest data and train a Deep Ensemble:
> cd src/scripts/
![python module overview](images/workflow_deepUQ.png)
> python DeepEnsemble.py
These modules can be accessed via the ipython example notebooks or via the model modules (ie `DeepEnsemble.py`). For example, to ingest data and train a Deep Ensemble:
> cd src/scripts/
With no config file specified, this command will pull settings from the `default.py` file within `utils`. For the `DeepEnsemble.py` script, it will automatically select the `DefaultsDE` dictionary.

Another option is to specify your own config file:

> python DeepEnsemble.py --config "path/to/config/myconfig.yaml"
> python DeepEnsemble.py low 10 /Users/rnevin/Documents/DeepUQ/ --save_final_checkpoint --savefig --n_epochs=10
Where you would modify the "path/to/config/myconfig.yaml" to specify where your own yaml lives.

The third option is to input settings on the command line. These choices are then combined with the default settings and output in a temporary yaml.

> python DeepEnsemble.py --noise_level "low" --n_models 10 --out_dir ./DeepUQResources/results/--save_final_checkpoint True --savefig True --n_epochs 10
This command will train a 10 network, 10 epoch ensemble on the low noise data and will save figures and final checkpoints to the specified directory. Required arguments are the noise setting (low/medium/high), the number of ensembles, and the working directory.

For more information on the arguments:
> python DeepEnsemble.py --help
The other available script is the `DeepEvidentialRegression.py` script:
> python DeepEvidentialRegression.py --help
## Installation

### Clone this repo
Expand Down
Binary file added images/DeepUQWorkflow_Maggie.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed ms.pdf
Binary file not shown.
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ authors = ["beckynevin <beckynevin@gmail.com>"]
readme = "README.md"
license = "MIT"

[tool.poetry.scripts]
ensamble = "src.scripts.DeepEnsemble:main"
der = "src.scripts.DeepEvidentialRegression:main"

[tool.poetry.dependencies]
python = ">=3.9,<3.11"
jupyter = "^1.0.0"
Expand Down
7 changes: 0 additions & 7 deletions showyourwork.yml

This file was deleted.

6 changes: 6 additions & 0 deletions src/data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from data.data import MyDataLoader, DataPreparation

DataModules = {
"MyDataLoader": MyDataLoader,
"DataPreparation": DataPreparation
}
130 changes: 4 additions & 126 deletions src/scripts/io.py → src/data/data.py
Original file line number Diff line number Diff line change
@@ -1,107 +1,17 @@
# Contains modules used to prepare a dataset
# with varying noise properties
import argparse
import numpy as np
from sklearn.model_selection import train_test_split
import pickle
from torch.distributions import Uniform
from torch.utils.data import TensorDataset
import torch
import h5py


def parse_args():
parser = argparse.ArgumentParser(description="data handling module")
parser.add_argument(
"size_df",
type=float,
required=False,
default=1000,
help="Used to load the associated .h5 data file",
)
parser.add_argument(
"noise_level",
type=str,
required=False,
default="low",
help="low, medium, high or vhigh, \
used to look up associated sigma value",
)
parser.add_argument(
"size_df",
type=str,
nargs="?",
default="/repo/embargo",
help="Butler Repository path from which data is transferred. \
Input str. Default = '/repo/embargo'",
)
parser.add_argument(
"--normalize",
required=False,
action="store_true",
help="If true theres an option to normalize the dataset",
)
parser.add_argument(
"--val_proportion",
type=float,
required=False,
default=0.1,
help="Proportion of the dataset to use as validation",
)
parser.add_argument(
"--randomseed",
type=float,
required=False,
default=42,
help="Random seed used for shuffling the training and validation set",
)
parser.add_argument(
"--batchsize",
type=float,
required=False,
default=100,
help="Size of batched used in the traindataloader",
)
return parser.parse_args()


class ModelLoader:
def save_model_pkl(self, path, model_name, posterior):
"""
Save the pkl'ed saved posterior model
:param path: Location to save the model
:param model_name: Name of the model
:param posterior: Model object to be saved
"""
file_name = path + model_name + ".pkl"
with open(file_name, "wb") as file:
pickle.dump(posterior, file)

def load_model_pkl(self, path, model_name):
"""
Load the pkl'ed saved posterior model
:param path: Location to load the model from
:param model_name: Name of the model
:return: Loaded model object that can be used with the predict function
"""
print(path)
with open(path + model_name + ".pkl", "rb") as file:
posterior = pickle.load(file)
return posterior

def predict(input, model):
"""
:param input: loaded object used for inference
:param model: loaded model
:return: Prediction
"""
return 0

class MyDataLoader:
def __init__(self):
self.data = None

class DataLoader:
def save_data_pkl(self, data_name, data, path="../data/"):
"""
Save and load the pkl'ed training/test set
Expand Down Expand Up @@ -197,7 +107,7 @@ def simulate_data(
sigma,
simulation_name,
x=np.linspace(0, 100, 101),
seed=13
seed=42
):
if simulation_name == "linear_homogeneous":
# convert to numpy array (if tensor):
Expand Down Expand Up @@ -300,35 +210,3 @@ def train_val_split(
random_state=random_state,
)
return x_train, x_val, y_train, y_val


# Example usage:
if __name__ == "__main__":
namespace = parse_args()
size_df = namespace.size_df
noise = namespace.noise_level
norm = namespace.normalize
val_prop = namespace.val_proportion
rs = namespace.randomseed
BATCH_SIZE = namespace.batchsize
sigma = DataPreparation.get_sigma(noise)
loader = DataLoader()
data = loader.load_data_h5("linear_sigma_" + str(sigma) +
"_size_" + str(size_df))
len_df = len(data["params"][:, 0].numpy())
len_x = len(data["inputs"].numpy())
ms_array = np.repeat(data["params"][:, 0].numpy(), len_x)
bs_array = np.repeat(data["params"][:, 1].numpy(), len_x)
xs_array = np.tile(data["inputs"].numpy(), len_df)
ys_array = np.reshape(data["output"].numpy(), (len_df * len_x))
inputs = np.array([xs_array, ms_array, bs_array]).T
model_inputs, model_outputs = DataPreparation.normalize(inputs,
ys_array,
norm)
x_train, x_val, y_train, y_val = DataPreparation.train_val_split(
model_inputs, model_outputs, test_size=val_prop, random_state=rs
)
trainData = TensorDataset(torch.Tensor(x_train), torch.Tensor(y_train))
trainDataLoader = DataLoader(trainData,
batch_size=BATCH_SIZE,
shuffle=True)
5 changes: 5 additions & 0 deletions src/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from models.models import ModelLoader

ModelModules = {
"ModelLoader": ModelLoader
}
65 changes: 40 additions & 25 deletions src/scripts/models.py → src/models/models.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,48 @@
# Contains modules used to prepare a dataset
# with varying noise properties
import numpy as np
import torch.nn as nn
import pickle
import torch
import torch.nn as nn
import math


class ModelLoader:
def save_model_pkl(self, path, model_name, posterior):
"""
Save the pkl'ed saved posterior model
:param path: Location to save the model
:param model_name: Name of the model
:param posterior: Model object to be saved
"""
file_name = path + model_name + ".pkl"
with open(file_name, "wb") as file:
pickle.dump(posterior, file)

def load_model_pkl(self, path, model_name):
"""
Load the pkl'ed saved posterior model
:param path: Location to load the model from
:param model_name: Name of the model
:return: Loaded model object that can be used with the predict function
"""
print(path)
with open(path + model_name + ".pkl", "rb") as file:
posterior = pickle.load(file)
return posterior

def predict(input, model):
"""
:param input: loaded object used for inference
:param model: loaded model
:return: Prediction
"""
return 0


class DERLayer(nn.Module):
def __init__(self):
super().__init__()
Expand Down Expand Up @@ -209,27 +248,3 @@ def loss_bnll(mean, variance, target, beta): # beta=0.5):
if beta > 0:
loss = loss * (variance.detach() ** beta)
return loss.sum(axis=-1)


'''
def get_loss(transform, beta=None):
if beta:
def beta_nll_loss(targets, outputs, beta=beta):
"""Compute beta-NLL loss
"""
mu = outputs[..., 0:1]
var = transform(outputs[..., 1:2])
loss = (K.square((targets - mu)) / var + K.log(var))
loss = loss * K.stop_gradient(var) ** beta
return loss
return beta_nll_loss
else:
def negative_log_likelihood(targets, outputs):
"""Calculate the negative loglikelihood."""
mu = outputs[..., 0:1]
var = transform(outputs[..., 1:2])
y = targets[..., 0:1]
loglik = - K.log(var) - K.square((y - mu)) / var
return - loglik
return negative_log_likelihood
'''
Loading

0 comments on commit aed0acc

Please sign in to comment.