Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

APT Loss Functions #58

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions paltas/Analysis/dataset_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,37 @@ def unnormalize_outputs(input_norm_path,learning_params,mean,standard_dev=None,
if cov_mat is not None:
cov_mat[:,lpi,:] *= param_std
cov_mat[:,:,lpi] *= param_std

# TODO: write test after moving (make sure identity operation w/ unnormalized)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you've added a test for this now so please remove the TODO.

def normalize_mu_prec(mu,prec_mat,input_norm_path):
"""Helper function to convert mu, prec_matrix to normalized parameter
space

Args:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not familiar with this string formatting. Does that shape information translate to the read the docs?

Remove the indentation from Returns (should have same indentation as Args)

mu ([float]), shape: (dim): array of means for each param
prec_mat ([float]), shape: (dim,dim): precision matrix
input_norm_path (string): path to norms.csv

Returns:
mu (n_params), precision_matrix (n_params,n_params)
"""
mu_copy = np.copy(mu)
#mu_copy = copy.deepcopy(mu)
norm_dict = pd.read_csv(input_norm_path)
norm_means = norm_dict['mean'].to_numpy()
norm_std = norm_dict['std'].to_numpy()

cov_mat = np.linalg.inv(prec_mat)

# do the opposite of dataset_generation.unnormalize_outputs
for i in range(0,len(mu)):
mu_copy[i] -= norm_means[i]
mu_copy[i] /= norm_std[i]

cov_mat[i,:] /= norm_std[i]
cov_mat[:,i] /= norm_std[i]

return mu_copy, np.linalg.inv(cov_mat)


def kwargs_detector_to_tf_noise(image,kwargs_detector):
Expand Down
155 changes: 155 additions & 0 deletions paltas/Analysis/loss_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import tensorflow as tf
import numpy as np
import itertools
from paltas.Analysis.dataset_generation import normalize_mu_prec


class BaseLoss():
Expand Down Expand Up @@ -425,3 +426,157 @@ def loss(self,y_true,output):
tf.matmul(y_pred,flip_mat),prec_mat,L_diag))
loss_stack = tf.stack(loss_list,axis=-1)
return tf.reduce_min(loss_stack,axis=-1)


class FullCovarianceAPTLoss(FullCovarianceLoss):
""" Automatic Posterior Transformation (APT) Loss w/ full covariance matrix

Args:
num_params (int): The number of parameters to predict.
prior_means ([float]): Means of initial Gaussian training prior
prior_scatters ([float]): Standard deviations of initial Gaussian training prior
proposal_means ([float]): Means of updated proposal Gaussian training prior
proposal_scatters ([float]): Standard deviations of updated proposal Gaussian training prior
flip_pairs ([[int,...],...]): A list of lists. Each list contains
the index of parameters that when flipped together return an
equivalent lens model.

Notes:
If multiple lists are provided, all possible combinations of
flips will be considered. For example, if flip_pairs is
[[0,1],[2,3]] then flipping 0,1,2,3 all at the same time will
also be considered.
"""

def __init__(self, num_params, prior_means, prior_prec, proposal_means,
proposal_prec,input_norm_path=None,flip_pairs=None, weight_terms=None):

super().__init__(num_params,flip_pairs=flip_pairs,
weight_terms=weight_terms)

# IF NORMALIZING PARAMETERS WITH NORMS.CSV, MUST ACCOUNT FOR THAT
if input_norm_path is not None:
print('normalizing prior/proposal')
prior_means,prior_prec = normalize_mu_prec(prior_means,
prior_prec,input_norm_path)
proposal_means,proposal_prec = normalize_mu_prec(proposal_means,
proposal_prec,input_norm_path)

# store prior & proposal info which we will need to compute loss
self.prior_mu = tf.constant(prior_means,dtype=tf.float32)
self.prior_prec = tf.constant(prior_prec,dtype=tf.float32)
self.proposal_mu = tf.constant(proposal_means,dtype=tf.float32)
self.proposal_prec = tf.constant(proposal_prec,dtype=tf.float32)

@staticmethod
def log_gauss_full(y_true,y_pred,prec_mat):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the same function as the class you're inheriting? If so there's no need to redefine it. Or am I missing an important detail?

""" Return the negative log posterior of a Gaussian with full
covariance matrix

Args:
y_true (tf.Tensor): The true values of the parameters
y_pred (tf.Tensor): The predicted value of the parameters
prec_mat: The precision matrix

Returns:
(tf.Tensor): The TF graph for calculating the nlp

Notes:
This loss does not include the constant factor of 1/(2*pi)^(d/2).
"""
y_dif = y_true - y_pred
# TODO: check that this is correct (reducing along right axes, etc.)
# A/B test: FullCovariance w/ this prefactor vs. FullCovariance w/ original prefactor
prefactor = -0.5*tf.math.log(tf.linalg.det(prec_mat))
return prefactor + 0.5 * tf.reduce_sum(
tf.multiply(y_dif,tf.reduce_sum(tf.multiply(tf.expand_dims(
y_dif,-1),prec_mat),axis=-2)),-1)

def loss(self,y_true,output):

# Extract the outputs
y_pred, prec_mat, _ = self.convert_output(output)

prec_comb = prec_mat + self.proposal_prec - self.prior_prec

# Add each possible flip to the loss list. We will then take the
# minimum.
loss_list = []
for flip_mat in self.flip_mat_list:
y_pred_flip = tf.matmul(y_pred,flip_mat)
# have to add dimension to y_pred to facilitate matmul
rhs = (tf.matmul(prec_mat,tf.expand_dims(y_pred_flip,-1)) +
tf.matmul(self.proposal_prec,tf.expand_dims(self.proposal_mu,-1)) -
tf.matmul(self.prior_prec,tf.expand_dims(self.prior_mu,-1)) )
mu_comb = tf.matmul(tf.linalg.inv(prec_comb),rhs)
# remove extra dimension
mu_comb = tf.squeeze(mu_comb,axis=-1)
loss_list.append(self.log_gauss_full(y_true,mu_comb,prec_comb))
loss_stack = tf.stack(loss_list,axis=-1)
return tf.reduce_min(loss_stack,axis=-1)


class DiagonalCovarianceAPTLoss(DiagonalCovarianceLoss):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think if you inherit from FullCovarianceAPTLoss you don't have to repeat the init

""" Automatic Posterior Transformation (APT) Loss w/ diagonal covariance matrix

Args:
num_params (int): The number of parameters to predict.
prior_means ([float]): Means of initial Gaussian training prior
prior_scatters ([float]): Standard deviations of initial Gaussian training prior
proposal_means ([float]): Means of updated proposal Gaussian training prior
proposal_scatters ([float]): Standard deviations of updated proposal Gaussian training prior
flip_pairs ([[int,...],...]): A list of lists. Each list contains
the index of parameters that when flipped together return an
equivalent lens model.

Notes:
If multiple lists are provided, all possible combinations of
flips will be considered. For example, if flip_pairs is
[[0,1],[2,3]] then flipping 0,1,2,3 all at the same time will
also be considered.
"""

def __init__(self, num_params, prior_means, prior_prec, proposal_means,
proposal_prec,input_norm_path=None,flip_pairs=None, weight_terms=None):

super().__init__(num_params,flip_pairs=flip_pairs,
weight_terms=weight_terms)

# IF NORMALIZING PARAMETERS WITH NORMS.CSV, MUST ACCOUNT FOR THAT
if input_norm_path is not None:
print('normalizing prior/proposal')
prior_means,prior_prec = normalize_mu_prec(prior_means,
prior_prec,input_norm_path)
proposal_means,proposal_prec = normalize_mu_prec(proposal_means,
proposal_prec,input_norm_path)

# store prior & proposal info which we will need to compute loss
self.prior_mu = tf.constant(prior_means,dtype=tf.float32)
self.prior_prec = tf.constant(prior_prec,dtype=tf.float32)
self.proposal_mu = tf.constant(proposal_means,dtype=tf.float32)
self.proposal_prec = tf.constant(proposal_prec,dtype=tf.float32)


def loss(self,y_true,output):

# Extract the outputs
y_pred, log_var_pred = self.convert_output(output)

prec_mat = tf.linalg.diag(tf.math.reciprocal(tf.exp(log_var_pred)))
prec_comb = prec_mat + self.proposal_prec - self.prior_prec

# Add each possible flip to the loss list. We will then take the
# minimum.
loss_list = []
for flip_mat in self.flip_mat_list:
y_pred_flip = tf.matmul(y_pred,flip_mat)
# have to add dimension to y_pred to facilitate matmul
rhs = (tf.matmul(prec_mat,tf.expand_dims(y_pred_flip,-1)) +
tf.matmul(self.proposal_prec,tf.expand_dims(self.proposal_mu,-1)) -
tf.matmul(self.prior_prec,tf.expand_dims(self.prior_mu,-1)) )
mu_comb = tf.matmul(tf.linalg.inv(prec_comb),rhs)
# remove extra dimension
mu_comb = tf.squeeze(mu_comb,axis=-1)
loss_list.append(FullCovarianceAPTLoss.log_gauss_full(y_true,mu_comb,prec_comb))
loss_stack = tf.stack(loss_list,axis=-1)
return tf.reduce_min(loss_stack,axis=-1)
14 changes: 14 additions & 0 deletions paltas/Analysis/train_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ def main():
norm_images = config_module.norm_images
# A string with which loss function to use.
loss_function = config_module.loss_function
# if APT loss, load necessary prior & proposal info
if loss_function in {'fullapt','diagapt'}:
prior_means = config_module.prior_means
prior_prec = config_module.prior_prec
proposal_means = config_module.proposal_means
proposal_prec = config_module.proposal_prec
# A string specifying which model to use
model_type = config_module.model_type
# A string specifying which optimizer to use
Expand Down Expand Up @@ -189,10 +195,18 @@ def main():
num_outputs = num_params*2
loss = loss_functions.DiagonalCovarianceLoss(num_params,
flip_pairs,weight_terms).loss
elif loss_function == 'diagapt':
num_outputs = num_params*2
loss = loss_functions.DiagonalCovarianceAPTLoss(num_params,prior_means,
prior_prec, proposal_means, proposal_prec, input_norm_path=input_norm_path).loss
elif loss_function == 'full':
num_outputs = num_params + int(num_params*(num_params+1)/2)
loss = loss_functions.FullCovarianceLoss(num_params,flip_pairs,
weight_terms).loss
elif loss_function == 'fullapt':
num_outputs = num_params + int(num_params*(num_params+1)/2)
loss = loss_functions.FullCovarianceAPTLoss(num_params, prior_means,
prior_prec, proposal_means, proposal_prec, input_norm_path=input_norm_path).loss
else:
raise ValueError('%s loss not in the list of supported losses'%(
loss_function))
Expand Down
Loading
Loading