swagnercarena · smericks · Jun 11, 2024 · Jun 11, 2024 · Jun 11, 2024 · Jul 1, 2024
diff --git a/paltas/Analysis/dataset_generation.py b/paltas/Analysis/dataset_generation.py
@@ -120,6 +120,37 @@ def unnormalize_outputs(input_norm_path,learning_params,mean,standard_dev=None,
 		if cov_mat is not None:
 			cov_mat[:,lpi,:] *= param_std
 			cov_mat[:,:,lpi] *= param_std
+
+# TODO: write test after moving (make sure identity operation w/ unnormalized)
+def normalize_mu_prec(mu,prec_mat,input_norm_path):
+    """Helper function to convert mu, prec_matrix to normalized parameter 
+        space
+
+    Args:
+        mu ([float]), shape: (dim): array of means for each param
+        prec_mat ([float]), shape: (dim,dim): precision matrix
+        input_norm_path (string): path to norms.csv
+
+	Returns:
+        mu (n_params), precision_matrix (n_params,n_params)
+    """
+    mu_copy = np.copy(mu)
+    #mu_copy = copy.deepcopy(mu)
+    norm_dict = pd.read_csv(input_norm_path)
+    norm_means = norm_dict['mean'].to_numpy()
+    norm_std = norm_dict['std'].to_numpy()
+
+    cov_mat = np.linalg.inv(prec_mat)
+
+    # do the opposite of dataset_generation.unnormalize_outputs
+    for i in range(0,len(mu)):
+        mu_copy[i] -= norm_means[i]
+        mu_copy[i] /= norm_std[i]
+
+        cov_mat[i,:] /= norm_std[i]
+        cov_mat[:,i] /= norm_std[i]
+
+    return mu_copy, np.linalg.inv(cov_mat)
 
 
 def kwargs_detector_to_tf_noise(image,kwargs_detector):

diff --git a/paltas/Analysis/loss_functions.py b/paltas/Analysis/loss_functions.py
@@ -9,6 +9,7 @@
 import tensorflow as tf
 import numpy as np
 import itertools
+from paltas.Analysis.dataset_generation import normalize_mu_prec
 
 
 class BaseLoss():
@@ -425,3 +426,157 @@ def loss(self,y_true,output):
 				tf.matmul(y_pred,flip_mat),prec_mat,L_diag))
 		loss_stack = tf.stack(loss_list,axis=-1)
 		return tf.reduce_min(loss_stack,axis=-1)
+
+
+class FullCovarianceAPTLoss(FullCovarianceLoss):
+	""" Automatic Posterior Transformation (APT) Loss w/ full covariance matrix
+
+	Args:
+		num_params (int): The number of parameters to predict.
+		prior_means ([float]): Means of initial Gaussian training prior
+		prior_scatters ([float]): Standard deviations of initial Gaussian training prior
+		proposal_means ([float]): Means of updated proposal Gaussian training prior
+		proposal_scatters ([float]): Standard deviations of updated proposal Gaussian training prior
+		flip_pairs ([[int,...],...]): A list of lists. Each list contains
+			the index of parameters that when flipped together return an
+			equivalent lens model.
+
+		Notes:
+			If multiple lists are provided, all possible combinations of
+			flips will be considered. For example, if flip_pairs is
+			[[0,1],[2,3]] then flipping 0,1,2,3 all at the same time will
+			also be considered.
+	"""
+
+	def __init__(self, num_params, prior_means, prior_prec, proposal_means,
+		proposal_prec,input_norm_path=None,flip_pairs=None, weight_terms=None):
+
+		super().__init__(num_params,flip_pairs=flip_pairs,
+			weight_terms=weight_terms)
+
+		# IF NORMALIZING PARAMETERS WITH NORMS.CSV, MUST ACCOUNT FOR THAT
+		if input_norm_path is not None:
+			print('normalizing prior/proposal')
+			prior_means,prior_prec = normalize_mu_prec(prior_means,
+				prior_prec,input_norm_path)
+			proposal_means,proposal_prec = normalize_mu_prec(proposal_means,
+				proposal_prec,input_norm_path)
+
+		# store prior & proposal info which we will need to compute loss
+		self.prior_mu = tf.constant(prior_means,dtype=tf.float32)
+		self.prior_prec = tf.constant(prior_prec,dtype=tf.float32)
+		self.proposal_mu = tf.constant(proposal_means,dtype=tf.float32)
+		self.proposal_prec = tf.constant(proposal_prec,dtype=tf.float32)
+
+	@staticmethod
+	def log_gauss_full(y_true,y_pred,prec_mat):
+		""" Return the negative log posterior of a Gaussian with full
+		covariance matrix
+
+		Args:
+			y_true (tf.Tensor): The true values of the parameters
+			y_pred (tf.Tensor): The predicted value of the parameters
+			prec_mat: The precision matrix
+
+		Returns:
+			(tf.Tensor): The TF graph for calculating the nlp
+
+		Notes:
+			This loss does not include the constant factor of 1/(2*pi)^(d/2).
+		"""
+		y_dif = y_true - y_pred
+		# TODO: check that this is correct (reducing along right axes, etc.)
+		# A/B test: FullCovariance w/ this prefactor vs. FullCovariance w/ original prefactor
+		prefactor = -0.5*tf.math.log(tf.linalg.det(prec_mat))
+		return prefactor + 0.5 * tf.reduce_sum(
+			tf.multiply(y_dif,tf.reduce_sum(tf.multiply(tf.expand_dims(
+				y_dif,-1),prec_mat),axis=-2)),-1)
+
+	def loss(self,y_true,output):
+
+		# Extract the outputs
+		y_pred, prec_mat, _ = self.convert_output(output)
+
+		prec_comb = prec_mat + self.proposal_prec - self.prior_prec
+
+		# Add each possible flip to the loss list. We will then take the
+		# minimum.
+		loss_list = []
+		for flip_mat in self.flip_mat_list:
+			y_pred_flip = tf.matmul(y_pred,flip_mat)
+			# have to add dimension to y_pred to facilitate matmul
+			rhs = (tf.matmul(prec_mat,tf.expand_dims(y_pred_flip,-1)) + 
+	 			tf.matmul(self.proposal_prec,tf.expand_dims(self.proposal_mu,-1)) - 
+	 			tf.matmul(self.prior_prec,tf.expand_dims(self.prior_mu,-1)) )
+			mu_comb = tf.matmul(tf.linalg.inv(prec_comb),rhs)
+			# remove extra dimension 
+			mu_comb = tf.squeeze(mu_comb,axis=-1)
+			loss_list.append(self.log_gauss_full(y_true,mu_comb,prec_comb))
+		loss_stack = tf.stack(loss_list,axis=-1)
+		return tf.reduce_min(loss_stack,axis=-1)
+
+
+class DiagonalCovarianceAPTLoss(DiagonalCovarianceLoss):
+	""" Automatic Posterior Transformation (APT) Loss w/ diagonal covariance matrix
+
+	Args:
+		num_params (int): The number of parameters to predict.
+		prior_means ([float]): Means of initial Gaussian training prior
+		prior_scatters ([float]): Standard deviations of initial Gaussian training prior
+		proposal_means ([float]): Means of updated proposal Gaussian training prior
+		proposal_scatters ([float]): Standard deviations of updated proposal Gaussian training prior
+		flip_pairs ([[int,...],...]): A list of lists. Each list contains
+			the index of parameters that when flipped together return an
+			equivalent lens model.
+
+		Notes:
+			If multiple lists are provided, all possible combinations of
+			flips will be considered. For example, if flip_pairs is
+			[[0,1],[2,3]] then flipping 0,1,2,3 all at the same time will
+			also be considered.
+	"""
+
+	def __init__(self, num_params, prior_means, prior_prec, proposal_means,
+		proposal_prec,input_norm_path=None,flip_pairs=None, weight_terms=None):
+
+		super().__init__(num_params,flip_pairs=flip_pairs,
+			weight_terms=weight_terms)
+
+		# IF NORMALIZING PARAMETERS WITH NORMS.CSV, MUST ACCOUNT FOR THAT
+		if input_norm_path is not None:
+			print('normalizing prior/proposal')
+			prior_means,prior_prec = normalize_mu_prec(prior_means,
+				prior_prec,input_norm_path)
+			proposal_means,proposal_prec = normalize_mu_prec(proposal_means,
+				proposal_prec,input_norm_path)
+
+		# store prior & proposal info which we will need to compute loss
+		self.prior_mu = tf.constant(prior_means,dtype=tf.float32)
+		self.prior_prec = tf.constant(prior_prec,dtype=tf.float32)
+		self.proposal_mu = tf.constant(proposal_means,dtype=tf.float32)
+		self.proposal_prec = tf.constant(proposal_prec,dtype=tf.float32)
+
+
+	def loss(self,y_true,output):
+
+		# Extract the outputs
+		y_pred, log_var_pred = self.convert_output(output)
+
+		prec_mat = tf.linalg.diag(tf.math.reciprocal(tf.exp(log_var_pred)))
+		prec_comb = prec_mat + self.proposal_prec - self.prior_prec
+
+		# Add each possible flip to the loss list. We will then take the
+		# minimum.
+		loss_list = []
+		for flip_mat in self.flip_mat_list:
+			y_pred_flip = tf.matmul(y_pred,flip_mat)
+			# have to add dimension to y_pred to facilitate matmul
+			rhs = (tf.matmul(prec_mat,tf.expand_dims(y_pred_flip,-1)) + 
+				tf.matmul(self.proposal_prec,tf.expand_dims(self.proposal_mu,-1)) - 
+				tf.matmul(self.prior_prec,tf.expand_dims(self.prior_mu,-1)) )
+			mu_comb = tf.matmul(tf.linalg.inv(prec_comb),rhs)
+			# remove extra dimension 
+			mu_comb = tf.squeeze(mu_comb,axis=-1)
+			loss_list.append(FullCovarianceAPTLoss.log_gauss_full(y_true,mu_comb,prec_comb))
+		loss_stack = tf.stack(loss_list,axis=-1)
+		return tf.reduce_min(loss_stack,axis=-1)
diff --git a/paltas/Analysis/train_model.py b/paltas/Analysis/train_model.py
@@ -103,6 +103,12 @@ def main():
 	norm_images = config_module.norm_images
 	# A string with which loss function to use.
 	loss_function = config_module.loss_function
+	# if APT loss, load necessary prior & proposal info
+	if loss_function in  {'fullapt','diagapt'}:
+		prior_means = config_module.prior_means
+		prior_prec = config_module.prior_prec
+		proposal_means = config_module.proposal_means
+		proposal_prec = config_module.proposal_prec
 	# A string specifying which model to use
 	model_type = config_module.model_type
 	# A string specifying which optimizer to use
@@ -189,10 +195,18 @@ def main():
 		num_outputs = num_params*2
 		loss = loss_functions.DiagonalCovarianceLoss(num_params,
 			flip_pairs,weight_terms).loss
+	elif loss_function == 'diagapt':
+		num_outputs = num_params*2
+		loss = loss_functions.DiagonalCovarianceAPTLoss(num_params,prior_means, 
+			prior_prec, proposal_means, proposal_prec, input_norm_path=input_norm_path).loss
 	elif loss_function == 'full':
 		num_outputs = num_params + int(num_params*(num_params+1)/2)
 		loss = loss_functions.FullCovarianceLoss(num_params,flip_pairs,
 			weight_terms).loss
+	elif loss_function == 'fullapt':
+		num_outputs = num_params + int(num_params*(num_params+1)/2)
+		loss = loss_functions.FullCovarianceAPTLoss(num_params, prior_means, 
+			prior_prec, proposal_means, proposal_prec, input_norm_path=input_norm_path).loss
 	else:
 		raise ValueError('%s loss not in the list of supported losses'%(
 			loss_function))