diff --git a/easyvvuq/analysis/sc_analysis.py b/easyvvuq/analysis/sc_analysis.py old mode 100644 new mode 100755 index 60ec89eaa..002e130fd --- a/easyvvuq/analysis/sc_analysis.py +++ b/easyvvuq/analysis/sc_analysis.py @@ -1,14 +1,12 @@ -"""Analysis element for Stochastic Collocation (SC). - -Method: 'Global Sensitivity Analysis for Stochastic Collocation' - G. Tang and G. Iaccarino, AIAA 2922, 2010 -""" import numpy as np import chaospy as cp from itertools import product, chain, combinations +import pickle +import copy from easyvvuq import OutputType from .base import BaseAnalysisElement import logging +from scipy.special import comb __author__ = "Wouter Edeling" __copyright__ = """ @@ -37,15 +35,11 @@ class SCAnalysis(BaseAnalysisElement): def __init__(self, sampler=None, qoi_cols=None): - """Analysis element for Stochastic Collocation (SC). - - Method: 'Global Sensitivity Analysis for Stocastic Collocation' - G. Tang and G. Iaccarino, AIAA 2922, 2010 - + """ Parameters ---------- sampler : :obj:`easyvvuq.sampling.stochastic_collocation.SCSampler` - Sampler used to initiate the PCE analysis + Sampler used to initiate the SC analysis qoi_cols : list or None Column names for quantities of interest (for which analysis is performed). @@ -62,7 +56,9 @@ def __init__(self, sampler=None, qoi_cols=None): self.qoi_cols = qoi_cols self.output_type = OutputType.SUMMARY self.sampler = sampler - self._number_of_samples = sampler._number_of_samples + self.dimension_adaptive = sampler.dimension_adaptive + if self.dimension_adaptive: + self.adaptation_errors = [] self.sparse = sampler.sparse def element_name(self): @@ -71,10 +67,53 @@ def element_name(self): def element_version(self): """Version of this element for logging purposes""" - return "0.3" + return "0.5" + + def save_state(self, filename): + """ + Saves the complete state of the analysis object to a pickle file, + except the sampler object (self.samples). - def analyse(self, data_frame=None): - """Perform PCE analysis on input `data_frame`. + Parameters + ---------- + filename : (string) name to the file to write the state to + + Returns + ------- + None. + + """ + print("Saving analysis state to %s" % filename) + #make a copy of the state, and do not store the sampler as well + state = copy.copy(self.__dict__) + del state['sampler'] + file = open(filename, 'wb') + pickle.dump(state, file) + file.close() + + def load_state(self, filename): + """ + Loads the complete state of the analysis object from a + pickle file, stored using save_state. + + Parameters + ---------- + filename : (string) name of the file to load + + Returns + ------- + None. + + """ + print("Loading analysis state from %s" % filename) + file = open(filename, 'rb') + state = pickle.load(file) + for key in state.keys(): + self.__dict__[key] = state[key] + file.close() + + def analyse(self, data_frame=None, compute_results=True): + """Perform SC analysis on input `data_frame`. Parameters ---------- @@ -96,11 +135,12 @@ def analyse(self, data_frame=None): raise RuntimeError( "No data in data frame passed to analyse element") - # the maximum level (quad order) of the (sparse) grid - self.L = self.sampler.L - # the number of uncertain parameters self.N = self.sampler.N + #tensor grid + self.xi_d = self.sampler.xi_d + # the maximum level (quad order) of the (sparse) grid + self.L = self.sampler.L # if L < L_min: quadratures and interpolations are zero # For full tensor grid: there is only one level: L_min = L @@ -108,14 +148,18 @@ def analyse(self, data_frame=None): self.L_min = self.L self.l_norm = np.array([self.sampler.polynomial_order]) self.l_norm_min = self.l_norm - # For sparse grid: multiple levels, L >= N must hold + # For sparse grid: one or more levels else: self.L_min = 1 - self.l_norm = self.sampler.compute_sparse_multi_idx(self.L, self.N) - self.l_norm_min = np.ones(self.N, dtype=int) + #multi indices (stored in l_norm) for isotropic sparse grid or + #dimension-adaptive grid before the 1st refinement. + #If dimension_adaptive and number_of_adaptations > 0: l_norm + #is computed in self.adaptation_metric + if not self.dimension_adaptive or self.sampler.number_of_adaptations == 0: + # the maximum level (quad order) of the (sparse) grid + self.l_norm = self.sampler.compute_sparse_multi_idx(self.L, self.N) - # full tensor grid - self.xi_d = self.sampler.xi_d + self.l_norm_min = np.ones(self.N, dtype=int) # 1d weights and points per level self.xi_1d = self.sampler.xi_1d @@ -127,14 +171,14 @@ def analyse(self, data_frame=None): self.Map = {} self.surr_lm1 = {} - self.foo = [] - + print('Computing collocation points and level indices...') for level in range(self.L_min, self.L + 1): - self.Map[level] = self.create_map(self.N, level) - + self.Map[level] = self.create_map(level) + print('done.') self.clear_surr_lm1() # Extract output values for each quantity of interest from Dataframe + print('Loading samples...') qoi_cols = self.qoi_cols samples = {k: [] for k in qoi_cols} for run_id in data_frame.run_id.unique(): @@ -142,31 +186,33 @@ def analyse(self, data_frame=None): values = data_frame.loc[data_frame['run_id'] == run_id][k].values samples[k].append(values) self.samples = samples + print('done') # size of one code sample self.N_qoi = self.samples[qoi_cols[0]][0].size - results = {'statistical_moments': {}, - 'sobols_first': {k: {} for k in self.qoi_cols}, - 'sobols': {k: {} for k in self.qoi_cols}} - - # Compute descriptive statistics for each quantity of interest - for qoi_k in qoi_cols: - mean_k, var_k = self.get_moments(qoi_k) - std_k = np.sqrt(var_k) - # compute statistical moments - results['statistical_moments'][qoi_k] = {'mean': mean_k, - 'var': var_k, - 'std': std_k} - # compute all Sobol indices - results['sobols'][qoi_k] = self.get_sobol_indices(qoi_k, 'all') - for idx, param_name in enumerate(self.sampler.vary.get_keys()): - results['sobols_first'][qoi_k][param_name] = \ - results['sobols'][qoi_k][(idx,)] - - return results - - def create_map(self, N, L): + if compute_results: + results = {'statistical_moments': {}, + 'sobols_first': {k: {} for k in self.qoi_cols}, + 'sobols': {k: {} for k in self.qoi_cols}} + + # Compute descriptive statistics for each quantity of interest + for qoi_k in qoi_cols: + mean_k, var_k = self.get_moments(qoi_k) + std_k = np.sqrt(var_k) + # compute statistical moments + results['statistical_moments'][qoi_k] = {'mean': mean_k, + 'var': var_k, + 'std': std_k} + # compute all Sobol indices + results['sobols'][qoi_k] = self.get_sobol_indices(qoi_k, 'first_order') + for idx, param_name in enumerate(self.sampler.vary.get_keys()): + results['sobols_first'][qoi_k][param_name] = \ + results['sobols'][qoi_k][(idx,)] + + return results + + def create_map(self, L): """ Create a map from a unique integer k to each (level multi index l, collocation point X) combination. Also @@ -182,7 +228,7 @@ def create_map(self, N, L): - Map: a dict for level L containing k, l, X, and f """ # unique index - logging.debug('Creating multi-index map for level %d ...', L) + logging.debug('Creating multi-index map for level %d', L) # full tensor product if not self.sparse: # l = (np.ones(N) * L).astype('int') @@ -195,14 +241,16 @@ def create_map(self, N, L): k += 1 # sparse grid else: - # all sparse grid multi indices l with |l| <= L - l_norm_le_L = self.sampler.compute_sparse_multi_idx(L, N) + # all sparse grid multi indices l + # l_norm_le_L = self.sampler.compute_sparse_multi_idx(L, N) + idx_le_L = np.where(np.sum(self.l_norm, axis=1) - self.N + 1 <= L) + l_norm_le_L = self.l_norm[idx_le_L] k = 0 map_ = {} # loop over all multi indices for l in l_norm_le_L: # colloc point of current level index l - X_l = [self.xi_1d[n][l[n]] for n in range(N)] + X_l = [self.xi_1d[n][l[n]] for n in range(self.N)] X_l = np.array(list(product(*X_l))) for x in X_l: j = np.where((x == self.xi_d).all(axis=1))[0][0] @@ -211,6 +259,91 @@ def create_map(self, N, L): logging.debug('done.') return map_ + def adapt_dimension(self, qoi, data_frame): + """ + Compute the adaptation metric and decide which of the admissible + level indices to include in next iteration of the sparse grid. The + adaptation metric is based on the hierarchical surplus, defined as the + difference between the new code values of the admissible level indices, + and the SC surrogate of the previous iteration. Important: this + subroutine must therefore be called AFTER the code is evaluated at + the new points, but BEFORE the analysis is performed. + + Parameters + ---------- + qoi : (string) the name of the quantity of interest which is used + to base the adaptation metric on. + data_frame : the data frame from the EasyVVUQ Campaign + + Returns + ------- + None. + + """ + #load the code samples + samples = [] + for run_id in data_frame.run_id.unique(): + values = data_frame.loc[data_frame['run_id'] == run_id][qoi].values + samples.append(values) + + #the currently accepted grid points + xi_d_accepted = self.sampler.generate_grid(self.l_norm) + + #compute the hierarchical surplus based error for every admissible l + error = {} + for l in self.sampler.admissible_idx: + error[tuple(l)] = [] + # collocation points of current level index l + X_l = [self.sampler.xi_1d[n][l[n]] for n in range(self.N)] + X_l = np.array(list(product(*X_l))) + #only consider new points, subtract the accepted points + X_l = setdiff2d(X_l, xi_d_accepted) + for xi in X_l: + #find the location of the current xi in the global grid + idx = np.where((xi == self.sampler.xi_d).all(axis=1))[0][0] + #hierarchical surplus error at xi + hier_surplus = samples[idx] - self.surrogate(qoi, xi) + error[tuple(l)].append(np.linalg.norm(hier_surplus)) + #compute mean error over all points in X_l + error[tuple(l)] = np.mean(error[tuple(l)]) + for key in error.keys(): + print("Surplus error when l =", key, "=", error[key]) + #find the admissble index with the largest error + l_star = np.array(max(error, key=error.get)).reshape([1, self.N]) + print('Selecting', l_star, 'for refinement.') + #add max error to list + self.adaptation_errors.append(max(error.values())) + + #add l_star to the current accepted level indices + self.l_norm = np.concatenate((self.l_norm, l_star)) + #if someone executes this function twice for some reason, + #remove the duplicate l_star entry + self.l_norm = np.unique(self.l_norm, axis=0) + + self.analyse(data_frame, compute_results=False) + + # self.L = np.max(np.sum(self.l_norm, axis = 1) - self.N + 1) + # self.xi_d = self.sampler.generate_grid(self.l_norm) + # self.run_id = [] + + # #names of the uncertain variables + # uncertain_vars = list(self.sampler.vary.get_keys()) + # #loop over all samples in the data frame + # for run_id in data_frame["run_id"].unique(): + # #find the value of the input params at current run_id + # xi = data_frame.loc[data_frame["run_id"] == run_id][uncertain_vars].values[0] + # #see if this point is also in self.xi_d + # idx = np.where((xi == self.xi_d).all(axis = 1))[0] + # #if so, add run_id to self.run_id + # if idx.size != 0: + # self.run_id.append(run_id) + + def get_adaptation_errors(self): + """ + Returns self.adaptation_errors + """ + return self.adaptation_errors + def surrogate(self, qoi, x, L=None): """ Use sc_expansion UQP as a surrogate @@ -247,18 +380,83 @@ def quadrature(self, qoi, samples=None): # compute the Delta Q := # (Q^1_l_1 - Q^1_{l_1 - 1}) X ... X (Q^1_{l_N} - Q^1_{L_N - 1}) # tensor product - return np.array([self.compute_Q_diff(l, samples) for l in self.l_norm]).sum(axis=0) + + if not self.sparse or self.dimension_adaptive: + return np.array([self.compute_Q_diff(l, samples) for l in self.l_norm]).sum(axis=0) + else: + return self.combination_technique(qoi, samples) + + def combination_technique(self, qoi, samples=None): + """ + Efficient quadrature formulation for isotropic sparse grids. See: + + Gerstner, Griebel, "Numerical integration using sparse grids" + page 7. + + Parameters + ---------- + - qoi (str): name of the qoi + + - samples (optional in kwargs): Default: compute the mean + by setting samples = self.samples. To compute the variance, + set samples = (self.samples - mean)**2 + """ + + if samples is None: + samples = self.samples[qoi] + + #quadrature Q + Q = 0.0 + + #loop over l + for l in self.l_norm: + + #for sum(l) < L, combination technique formula shows that weights + #are zero + if np.sum(l) >= self.L: + + # compute the tensor product of parameter and weight values + X_k = [self.xi_1d[n][l[n]] for n in range(self.N)] + W_k = [self.wi_1d[n][l[n]] for n in range(self.N)] + + X_k = np.array(list(product(*X_k))) + W_k = np.array(list(product(*W_k))) + W_k = np.prod(W_k, axis=1) + W_k = W_k.reshape([W_k.shape[0], 1]) + + #scaling factor of combination technique + scaling_factor = (-1)**(self.L + self.N - np.sum(l) - 1) * \ + comb(self.N - 1, np.sum(l) - self.L) + W_k *= scaling_factor + + # find corresponding code values + f_k = np.array([samples[np.where((x == self.xi_d).all(axis=1))[0][0]] for x in X_k]) + + # quadrature of Q^1_{k1} X ... X Q^1_{kN} product + Q += np.sum(f_k * W_k, axis=0).T + + return Q def compute_Q_diff(self, l, samples): """ + Brute force computation of quadrature difference operators \Delta_l + Note: superseded by combination_technique for sparse grids, but might + still be useful for anisotropic sparse grids. Becomes very slow though + for a large number of parameters, and is therefore in need of + replacement. ======================================================================= For every multi index l = (l1, l2, ..., ld), Smolyak sums over tensor products difference quadrature rules: (Q^1_{l1} - Q^1_{l1-1}) X ... X (Q^1_{lN) - Q^1_{lN-1}) Below this product is expanded into individual tensor products, each of which is then computed as: - Q^1_{k1} X ... X Q^1_{kN} = sum...sum w_{k1}*...*w{kN}*f(x_{k1},...,x_{kN}) + Q^1_{k1} X ... X Q^1_{kN} = sum...sum w_{k1}*...*w_{kN}*f(x_{k1},...,x_{kN}) ======================================================================= + Parameters + - l : multi index of quadrature orders + - samples: array of samples to use in the quadrature + + Returns: value of the difference operator """ # expand the multi-index indices of the tensor product @@ -266,7 +464,7 @@ def compute_Q_diff(self, l, samples): diff_idx = np.array(list(product(*[[k, -(k - 1)] for k in l]))) # Delta will be the sum of all expanded tensor products - # Q^1_{k1} X ... X Q^1_{kd} = sum...sum w_{k1}*...*w{kN}*f(x_{k1},...,x_{kd}) + # Q^1_{k1} X ... X Q^1_{kd} = sum...sum w_{k1}*...*w_{kN}*f(x_{k1},...,x_{kd}) Delta = 0.0 # each diff contains the level indices of to a single @@ -305,11 +503,13 @@ def get_moments(self, qoi): - mean and variance of qoi (float (N_qoi,)) """ + print('Computing moments...') # compute mean mean_f = self.quadrature(qoi) # compute variance variance_samples = [(sample - mean_f)**2 for sample in self.samples[qoi]] var_f = self.quadrature(qoi, samples=variance_samples) + print('done') return mean_f, var_f def sc_expansion(self, L, samples, x): @@ -366,12 +566,15 @@ def sc_expansion(self, L, samples, x): # Recursively computed. if self.sparse: - Lm1 = np.sum(l) - 1 + #In case of sparse grid: level = sum(l) - N + 1 + #so prev level is sum(l) - N + Lm1 = np.sum(l) - self.N else: + #previous level of full tensor grid = L - 1, will yield a zero Lm1 = self.L - 1 if k in self.surr_lm1[L]: - # print('surrogate already computed') + #print('surrogate already computed') surr_lm1 = self.surr_lm1[L][k] else: surr_lm1 = self.sc_expansion(Lm1, samples, x=Map[k]['X']) @@ -482,11 +685,42 @@ def get_sample_array(self, qoi): ------- - array of all samples of qoi """ - return np.array([self.samples[qoi][k] for k in range(self._number_of_samples)]) + return np.array([self.samples[qoi][k] for k in range(len(self.samples[qoi]))]) + + def adaptation_histogram(self): + """ + Parameters + ---------- + None + + Returns + ------- + Plots a bar chart of the maximum order of the quadrature rule + that is used in each dimension. Use in case of the dimension adaptive + sampler to get an idea of which parameters were more refined than others. + This gives only a first-order idea, as it only plots the max quad + order independently per input parameter, so higher-order refinements + that were made do not show up in the bar chart. + """ + import matplotlib.pyplot as plt + + fig = plt.figure(figsize=[4, 8]) + ax = fig.add_subplot(111, ylabel='max quadrature order', + title='Number of refinements = %d' + % self.sampler.number_of_adaptations) + #find max quad order for every parameter + adapt_measure = np.max(self.l_norm, axis=0) + ax.bar(range(adapt_measure.size), height=adapt_measure) + params = list(self.sampler.vary.get_keys()) + ax.set_xticks(range(adapt_measure.size)) + ax.set_xticklabels(params) + plt.xticks(rotation=90) + plt.tight_layout() + plt.show() def plot_grid(self): """ - If N = 2 or N = 3 plot the (sparse) grid + Plots the collocation points for 2 and 3 dimensional problems """ import matplotlib.pyplot as plt @@ -494,18 +728,19 @@ def plot_grid(self): fig = plt.figure() ax = fig.add_subplot(111, xlabel=r'$x_1$', ylabel=r'$x_2$') ax.plot(self.xi_d[:, 0], self.xi_d[:, 1], 'ro') + plt.tight_layout() + plt.show() elif self.N == 3: from mpl_toolkits.mplot3d import Axes3D fig = plt.figure() ax = fig.add_subplot(111, projection='3d', xlabel=r'$x_1$', ylabel=r'$x_2$', zlabel=r'$x_3$') ax.scatter(self.xi_d[:, 0], self.xi_d[:, 1], self.xi_d[:, 2]) + plt.tight_layout() + plt.show() else: print('Will only plot for N = 2 or N = 3.') - plt.tight_layout() - plt.show() - # Start SC specific methods @staticmethod @@ -597,6 +832,7 @@ def get_sobol_indices(self, qoi, typ='first_order'): ------- Either the first order or all Sobol indices of qoi """ + print('Computing Sobol indices...') # multi indices U = np.arange(self.N) @@ -652,6 +888,7 @@ def get_sobol_indices(self, qoi, typ='first_order'): # compute Sobol index, only include points where D > 0 # sobol[u] = D_u[u][idx_gt0]/D[idx_gt0] sobol[u] = D_u[u] / D + print('done.') return sobol @@ -697,3 +934,21 @@ def lagrange_poly(x, x_i, j): """ x_i_ = np.delete(x_i, j) return np.prod((x - x_i_) / (x_i[j] - x_i_)) + + +def setdiff2d(X, Y): + """ + Computes the difference of two 2D arrays X \ Y + + Parameters + ---------- + X : 2D numpy array + Y : 2D numpy array + + Returns + ------- + The difference X \ Y as a 2D array + + """ + diff = set(map(tuple, X)) - set(map(tuple, Y)) + return np.array(list(diff)) diff --git a/easyvvuq/collate/aggregate_samples.py b/easyvvuq/collate/aggregate_samples.py index 9a36ca8e8..a4191b796 100644 --- a/easyvvuq/collate/aggregate_samples.py +++ b/easyvvuq/collate/aggregate_samples.py @@ -75,6 +75,7 @@ def collate(self, campaign, app_id): # Use decoder to check if run has completed (in general application-specific) if decoder.sim_complete(run_info=run_info): + run_data = decoder.parse_sim_output(run_info=run_info) if self.average: diff --git a/easyvvuq/decoders/simple_csv.py b/easyvvuq/decoders/simple_csv.py index e2bf1c5c3..884cfb605 100644 --- a/easyvvuq/decoders/simple_csv.py +++ b/easyvvuq/decoders/simple_csv.py @@ -32,7 +32,8 @@ class SimpleCSV(BaseDecoder, decoder_name="csv"): - def __init__(self, target_filename=None, output_columns=None, header=0): + def __init__(self, target_filename=None, output_columns=None, header=0, + delimiter=","): if target_filename is None: msg = ( @@ -59,6 +60,7 @@ def __init__(self, target_filename=None, output_columns=None, header=0): self.target_filename = target_filename self.output_columns = output_columns self.header = header + self.delimiter = delimiter self.output_type = OutputType('sample') @@ -88,6 +90,7 @@ def parse_sim_output(self, run_info={}): data = pd.read_csv( out_path, usecols=self.output_columns, + sep=self.delimiter, header=self.header) return data @@ -95,7 +98,8 @@ def parse_sim_output(self, run_info={}): def get_restart_dict(self): return {"target_filename": self.target_filename, "output_columns": self.output_columns, - "header": self.header} + "header": self.header, + "delimiter": self.delimiter} def element_version(self): return "0.1" diff --git a/easyvvuq/sampling/stochastic_collocation.py b/easyvvuq/sampling/stochastic_collocation.py index 8cbb2bb4f..290aa8fd2 100644 --- a/easyvvuq/sampling/stochastic_collocation.py +++ b/easyvvuq/sampling/stochastic_collocation.py @@ -1,6 +1,7 @@ from .base import BaseSamplingElement, Vary import chaospy as cp import numpy as np +import pickle from itertools import product, chain import logging @@ -29,6 +30,9 @@ class SCSampler(BaseSamplingElement, sampler_name="sc_sampler"): + """ + Stochastic Collocation sampler + """ def __init__(self, vary=None, @@ -36,7 +40,9 @@ def __init__(self, quadrature_rule="G", count=0, growth=False, - sparse=False): + sparse=False, + midpoint_level1=False, + dimension_adaptive=False): """ Create the sampler for the Stochastic Collocation method. @@ -82,89 +88,246 @@ def __init__(self, self.quad_rule = quadrature_rule self.sparse = sparse + #determines how many points the 1st level of a sparse grid will have. + #If midpoint_level1 = True, order 0 quadrature will be generated + self.midpoint_level1 = midpoint_level1 + #determines wether to use an insotropic sparse grid, or to adapt + #the levels in the sparse grid based on a hierachical error measure + self.dimension_adaptive = dimension_adaptive + self.number_of_adaptations = 0 self.quad_sparse = sparse self.growth = growth self.params_distribution = params_distribution + #determine if a nested sparse grid is used + if self.sparse is True and self.growth is True and \ + (self.quad_rule == "C" or self.quad_rule == "clenshaw_curtis"): + self.nested = True + elif self.sparse is True and self.growth is False and self.quad_rule == "gauss_patterson": + self.nested = True + elif self.sparse is True and self.growth is True and self.quad_rule == "newton_cotes": + self.nested = True + else: + self.nested = False + # L = level of (sparse) grid L = np.max(self.polynomial_order) + self.L = L + self.N = N + + #compute the 1D collocation points (and quad weights) + self.compute_1D_points_weights(L, N) + + #compute N-dimensional collocation points + if not self.sparse: + + # generate collocation grid locally + l_norm = np.array([self.polynomial_order]) + self.xi_d = self.generate_grid(l_norm) + + # sparse grid = a linear combination of tensor products of 1D rules + # of different order. Use chaospy to compute these 1D quadrature rules + else: + + # simplex set of multi indices + multi_idx = self.compute_sparse_multi_idx(L, N) + + # create sparse grid of dimension N and level q using the 1d + #rules in self.xi_1d + self.xi_d = self.generate_grid(multi_idx) + + self._number_of_samples = self.xi_d.shape[0] + + self.count = 0 + + # This gives an error when storting and loading campaigns in the + #dimension adaptive setting - seems not required anyway - commented it + # Fast forward to specified count, if possible + # if self.count >= self._number_of_samples: + # msg = (f"Attempt to start sampler fastforwarded to count {self.count}, " + # f"but sampler only has {self._number_of_samples} samples, therefore" + # f"this sampler will not provide any more samples.") + # logging.warning(msg) + # else: + # for i in range(count): + # self.__next__() + + def compute_1D_points_weights(self, L, N): + """ + Computes 1D collocation points and quad weights, + and stores this in self.xi_1d, self.wi_1d. + + Parameters + ---------- + L : (int) the max level of the (sparse) grid + N : (int) the number of uncertain parameters + + Returns + ------- + None. + """ # for every dimension (parameter), create a hierachy of 1D # quadrature rules of increasing order self.xi_1d = [{} for n in range(N)] self.wi_1d = [{} for n in range(N)] - #for n in range(N): - # self.xi_1d[n] = {} - # self.wi_1d[n] = {} + if self.sparse: + + #if level one of the sparse grid is a midpoint rule, generate + #the quadrature with order 0 (1 quad point). Else set order at + #level 1 to 1 + if self.midpoint_level1: + j = 0 + else: + j = 1 - if sparse: for n in range(N): - for i in range(1, L + 1): - xi_i, wi_i = cp.generate_quadrature(i + 1, - params_distribution[n], + for i in range(L): + xi_i, wi_i = cp.generate_quadrature(i + j, + self.params_distribution[n], rule=self.quad_rule, growth=self.growth) - self.xi_1d[n][i] = xi_i[0] - self.wi_1d[n][i] = wi_i + self.xi_1d[n][i + 1] = xi_i[0] + self.wi_1d[n][i + 1] = wi_i else: for n in range(N): xi_i, wi_i = cp.generate_quadrature(self.polynomial_order[n], - params_distribution[n], + self.params_distribution[n], rule=self.quad_rule, growth=self.growth) self.xi_1d[n][self.polynomial_order[n]] = xi_i[0] self.wi_1d[n][self.polynomial_order[n]] = wi_i - if not sparse: - # Generate collocation grid via chaospy - # NOTE: different poly orders per dimension does not work for all - # guadarture rules - use self.generate_grid subroutine instead - # # the nodes of the collocation grid - # xi_d, _ = cp.generate_quadrature(self.polynomial_order, - # self.joint_dist, - # rule=quadrature_rule) - # self.xi_d = xi_d.T + def next_level_sparse_grid(self): + """ + Adds the points of the next level for isotropic hierarchical sparse grids. - # generate collocation grid locally - l_norm = np.array([self.polynomial_order]) - self.xi_d = self.generate_grid(L, N, l_norm) + Returns + ------- + None. - # sparse grid = a linear combination of tensor products of 1D rules - # of different order. Use chaospy to compute these 1D quadrature rules - else: + """ - # L >= N must hold - if L < N: - raise RuntimeError(("Sparse grid level is lower than the number of params. " - "Increase level (via polynomial_order) p such that p-1 >= N")) + if self.nested is False: + logging.debug('Only works for nested sparse grids') + return - # multi-index l, such that |l| <= L - l_norm_le_L = self.compute_sparse_multi_idx(L, N) + #update level of sparse grid + L = np.max(self.polynomial_order) + 1 + self.polynomial_order = [p + 1 for p in self.polynomial_order] - # create sparse grid of dimension N and level q using the 1d - #rules in self.xi_1d - self.xi_d = self.generate_grid(L, N, l_norm_le_L) + print('Moving grid from level %d to level %d' % (L - 1, L)) - self.L = L - self.N = N - self._number_of_samples = self.xi_d.shape[0] + #compute all multi indices + multi_idx = self.compute_sparse_multi_idx(L, self.N) - # Fast forward to specified count, if possible - self.count = 0 - if self.count >= self._number_of_samples: - msg = (f"Attempt to start sampler fastforwarded to count {self.count}, " - f"but sampler only has {self._number_of_samples} samples, therefore" - f"this sampler will not provide any more samples.") - logging.warning(msg) - else: - for i in range(count): - self.__next__() + #find only the indices of the new level (|l| = L + N - 1) + new = np.where(np.sum(multi_idx, axis=1) == L + self.N - 1)[0] + + #update the 1D points and weights + self.compute_1D_points_weights(L, self.N) + + #generate the new N-dimensional collocation points + new_grid = self.generate_grid(multi_idx[new]) + + #find the new points unique to the new grid + new_points = setdiff2d(new_grid, self.xi_d) + + print('%d new points added' % new_points.shape[0]) + + #update the number of samples + self._number_of_samples += new_points.shape[0] + + #update the N-dimensional sparse grid + self.xi_d = np.concatenate((self.xi_d, new_points)) + + def look_ahead(self, current_multi_idx): + """ + The look-ahead step in dimension-adaptive sparse grid sampling. Allows + for anisotropic sampling plans. + + Computes the admissible forward neighbors with respect to the current level + multi-indices. The admissible level indices l are added to self.admissible_idx. + The code will be evaluated next iteration at the new collocation points + corresponding to the levels in admissble_idx. + + Source: Gerstner, Griebel, "Numerical integration using sparse grids" + + Parameters + ---------- + current_multi_idx : array of the levels in the current iteration + of the sparse grid. + + Returns + ------- + None. + + """ + if not self.dimension_adaptive: + print('Dimension adaptivity is not selected') + return + + #compute all forward neighbors for every l in current_multi_idx + forward_neighbor = [] + e_n = np.eye(self.N, dtype=int) + for l in current_multi_idx: + for n in range(self.N): + #the forward neighbor is l plus a unit vector + forward_neighbor.append(l + e_n[n]) + + #remove duplicates + forward_neighbor = np.unique(np.array(forward_neighbor), axis=0) + #remove those which are already in the grid + forward_neighbor = setdiff2d(forward_neighbor, current_multi_idx) + #make sure the final candidates are admissible (all backward neighbors + #must be in the current multi indices) + print('Computing admissible levels...') + admissible_idx = [] + for l in forward_neighbor: + admissible = True + for n in range(self.N): + backward_neighbor = l - e_n[n] + #find backward_neighbor in current_multi_idx + idx = np.where((backward_neighbor == current_multi_idx).all(axis=1))[0] + #if backward neighbor is not in the current index set + #and it is 'on the interior' (contains no 0): not admissible + if idx.size == 0 and backward_neighbor[n] != 0: + admissible = False + break + #if all backward neighbors are in the current index set: l is admissible + if admissible: + admissible_idx.append(l) + print('done') + + self.admissible_idx = np.array(admissible_idx) + print('Admissible multi-indices:\n', self.admissible_idx) + + #determine the maximum level L of the new index set L = |l| - N + 1 + self.L = np.max(np.sum(self.admissible_idx, axis=1) - self.N + 1) + #recompute the 1D weights and collocation points + self.compute_1D_points_weights(self.L, self.N) + #compute collocation grid based on the admissible level indices + admissible_grid = self.generate_grid(self.admissible_idx) + #remove collocation points which have already been computed + new_points = setdiff2d(admissible_grid, self.xi_d) + + print('%d new points added' % new_points.shape[0]) + + #update the number of samples + self._number_of_samples += new_points.shape[0] + + #update the N-dimensional sparse grid if unsampled points are added + if new_points.shape[0] > 0: + self.xi_d = np.concatenate((self.xi_d, new_points)) + + #count the number of times the dimensions were adapted + self.number_of_adaptations += 1 def element_version(self): - return "0.4" + return "0.5" def is_finite(self): return True @@ -193,7 +356,21 @@ def get_restart_dict(self): "quadrature_rule": self.quadrature_rule, "count": self.count, "growth": self.growth, - "sparse": self.sparse} + "sparse": self.sparse, + "midpoint_level1": self.midpoint_level1, + "dimension_adaptive": self.dimension_adaptive} + + def save_state(self, filename): + print("Saving sampler state to %s" % filename) + file = open(filename, 'wb') + pickle.dump(self.__dict__, file) + file.close() + + def load_state(self, filename): + print("Loading sampler state from %s" % filename) + file = open(filename, 'rb') + self.__dict__ = pickle.load(file) + file.close() """ ========================= @@ -201,9 +378,9 @@ def get_restart_dict(self): ========================= """ - def generate_grid(self, L, N, l_norm, dimensions=None): - if dimensions is None: - dimensions = range(N) + def generate_grid(self, l_norm): + + dimensions = range(self.N) H_L_N = [] # loop over all multi indices i for l in l_norm: @@ -218,8 +395,31 @@ def generate_grid(self, L, N, l_norm, dimensions=None): def compute_sparse_multi_idx(self, L, N): """ computes all N dimensional multi-indices l = (l1,...,lN) such that - |l| <= Q. Here |l| is the internal sum of i (l1+...+lN) + |l| <= L + N - 1, i.e. a simplex set: + 3 * + 2 * * (L=3 and N=2) + 1 * * * + 1 2 3 + Here |l| is the internal sum of i (l1+...+lN) """ P = np.array(list(product(range(1, L + 1), repeat=N))) - l_norm_le_q = P[np.where(np.sum(P, axis=1) <= L)[0]] - return l_norm_le_q + multi_idx = P[np.where(np.sum(P, axis=1) <= L + N - 1)[0]] + return multi_idx + + +def setdiff2d(X, Y): + """ + Computes the difference of two 2D arrays X \ Y + + Parameters + ---------- + X : 2D numpy array + Y : 2D numpy array + + Returns + ------- + The difference X \ Y as a 2D array + + """ + diff = set(map(tuple, X)) - set(map(tuple, Y)) + return np.array(list(diff)) diff --git a/log b/log new file mode 100644 index 000000000..e69de29bb diff --git a/tests/sc/poly_model.py b/tests/sc/poly_model.py new file mode 100755 index 000000000..535f4a8e6 --- /dev/null +++ b/tests/sc/poly_model.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +""" +Analytic isotropic function to test the SC sampler on +""" + +# scalar analytic test function, defined on [0, 1]**d +import json +import sys +import numpy as np + + +def poly_model(theta): + """ + A simple analytic test function + + Parameters + ---------- + theta : array of input parameters in [0, 1] + + Returns + ------- + (float) function value + """ + return np.prod(3 * theta**2 + 1) / 2**d + +# stocastic dimension of the problem +d = 2 + +# the json input file containing the values of the parameters, and the +# output file +json_input = sys.argv[1] + +with open(json_input, "r") as f: + inputs = json.load(f) +output_filename = inputs['outfile'] + +theta = [] +for i in range(d): + theta.append(float(inputs['x' + str(i + 1)])) +theta = np.array(theta) + +print(theta) + +# result = sobol_g_func(theta) +result = poly_model(theta) +# print(result) + +# output csv file +header = 'f' +np.savetxt(output_filename, np.array([result]), + delimiter=",", comments='', + header=header) diff --git a/tests/sc/poly_model_anisotropic.py b/tests/sc/poly_model_anisotropic.py new file mode 100755 index 000000000..1d593fa79 --- /dev/null +++ b/tests/sc/poly_model_anisotropic.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +""" +Analytic anisotropic function to test the SC sampler on +""" + +# scalar analytic test function, defined on [0, 1]**d +import json +import sys +import numpy as np + + +def poly_model(theta): + """ + Analytic test function where some parameters are more important than others. + + Parameters + ---------- + theta : array of input parameters in [0, 1] + + Returns + ------- + (float) function value + """ + sol = 1.0 + for i in range(d): + sol *= 3 * a[i] * theta[i]**2 + 1.0 + return sol / 2**d + +# stocastic dimension of the problem +d = 3 +a = np.ones(d) * 0.01 +# effective dimension of the problem +effective_d = 1 +a[0:effective_d] = 1.0 + +# the json input file containing the values of the parameters, and the +# output file +json_input = sys.argv[1] + +with open(json_input, "r") as f: + inputs = json.load(f) +output_filename = inputs['outfile'] + +theta = [] +for i in range(d): + theta.append(float(inputs['x' + str(i + 1)])) +theta = np.array(theta) + +result = poly_model(theta) + +# output csv file +header = 'f' +np.savetxt(output_filename, np.array([result]), + delimiter=",", comments='', + header=header) diff --git a/tests/sc/poly_model_anisotropic.template b/tests/sc/poly_model_anisotropic.template new file mode 100644 index 000000000..6cf3d44f8 --- /dev/null +++ b/tests/sc/poly_model_anisotropic.template @@ -0,0 +1,2 @@ +{"outfile": "$out_file", "x1": "$x1", "x2": "$x2", "x3": "$x3", "x4": "$x4", "x5": "$x5", + "x6": "$x6", "x7": "$x7", "x8": "$x8", "x9": "$x9", "x10": "$x10"} diff --git a/tests/test_dimension_adaptive_SC.py b/tests/test_dimension_adaptive_SC.py new file mode 100755 index 000000000..07cbe2543 --- /dev/null +++ b/tests/test_dimension_adaptive_SC.py @@ -0,0 +1,126 @@ +import chaospy as cp +import numpy as np +import easyvvuq as uq +import matplotlib.pyplot as plt + +plt.close('all') + +# author: Wouter Edeling +__license__ = "LGPL" + + +def run_campaign(d, number_of_adaptations): + """ + Runs a EasVVUQ campaign with the dimension adaptive SC sampler + + Parameters + ---------- + d : int (max 10) the number of uncertain variables + number_of_adaptations : (int) how many adaptation steps are taken + + Returns + ------- + None. + + """ + # Set up a fresh campaign called "sc" + my_campaign = uq.Campaign(name='sc', work_dir='/tmp') + + # Define parameter space + params = {} + for i in range(10): + params["x%d" % (i + 1)] = {"type": "float", + "min": 0.0, + "max": 1.0, + "default": 0.5} + params["out_file"] = {"type": "string", "default": "output.csv"} + + output_filename = params["out_file"]["default"] + output_columns = ["f"] + + # Create an encoder, decoder and collation element + encoder = uq.encoders.GenericEncoder( + template_fname='tests/sc/poly_model_anisotropic.template', + delimiter='$', + target_filename='poly_in.json') + decoder = uq.decoders.SimpleCSV(target_filename=output_filename, + output_columns=output_columns, + header=0) + collater = uq.collate.AggregateSamples(average=False) + + # Add the SC app (automatically set as current app) + my_campaign.add_app(name="sc", + params=params, + encoder=encoder, + decoder=decoder, + collater=collater) + + # Create the sampler + vary = {} + for i in range(d): + vary["x%d" % (i + 1)] = cp.Uniform(0, 1) + + my_sampler = uq.sampling.SCSampler(vary=vary, polynomial_order=1, + quadrature_rule="C", + sparse=True, growth=True, + midpoint_level1=True, + dimension_adaptive=True) + + # Associate the sampler with the campaign + my_campaign.set_sampler(my_sampler) + + # Will draw all (of the finite set of samples) + my_campaign.draw_samples() + my_campaign.populate_runs_dir() + + # Run the samples using EasyVVUQ on the localhost + my_campaign.apply_for_each_run_dir(uq.actions.ExecuteLocal( + "tests/sc/poly_model_anisotropic.py poly_in.json")) + + my_campaign.collate() + data_frame = my_campaign.get_collation_result() + + # Post-processing analysis + analysis = uq.analysis.SCAnalysis(sampler=my_sampler, qoi_cols=output_columns) + + my_campaign.apply_analysis(analysis) + + for i in range(number_of_adaptations): + my_sampler.look_ahead(analysis.l_norm) + + my_campaign.draw_samples() + my_campaign.populate_runs_dir() + my_campaign.apply_for_each_run_dir(uq.actions.ExecuteLocal( + "tests/sc/poly_model_anisotropic.py poly_in.json")) + my_campaign.collate() + data_frame = my_campaign.get_collation_result() + analysis.adapt_dimension('f', data_frame) + + my_campaign.apply_analysis(analysis) + + results = my_campaign.get_last_analysis() + + analysis.plot_grid() + + #analytic mean and standard deviation + a = np.ones(d) * 0.01 + effective_d = 1 + a[0:effective_d] = 1.0 + + ref_mean = np.prod(a[0:d] + 1) / 2**d + ref_std = np.sqrt(np.prod(9 * a[0:d]**2 / 5 + 2 * a[0:d] + 1) / 2**(2 * d) - ref_mean**2) + + print("======================================") + print("Number of samples = %d" % my_sampler._number_of_samples) + print("--------------------------------------") + print("Analytic mean = %.4f" % ref_mean) + print("Computed mean = %.4f" % results['statistical_moments']['f']['mean']) + print("--------------------------------------") + print("Analytic standard deiation = %.4f" % ref_std) + print("Computed standard deiation = %.4f" % results['statistical_moments']['f']['std']) + print("--------------------------------------") + print("First order Sobol indices =", results['sobols_first']['f']) + print("--------------------------------------") + +if __name__ == '__main__': + run_campaign(3, 6) diff --git a/tests/test_hierarchical_sparse_grid_sc.py b/tests/test_hierarchical_sparse_grid_sc.py new file mode 100755 index 000000000..bee07be77 --- /dev/null +++ b/tests/test_hierarchical_sparse_grid_sc.py @@ -0,0 +1,143 @@ +import os +import easyvvuq as uq +import numpy as np +import chaospy as cp + + +def exact_sobols_poly_model(): + """ + Exact Sobol indices for the polynomial test model + """ + S_i = np.zeros(d) + + for i in range(d): + S_i[i] = 5**-(i + 1) / ((6 / 5)**d - 1) + + return S_i + +# number of unknown variables +d = 2 + +# author: Wouter Edeling +__license__ = "LGPL" + +HOME = os.path.abspath(os.path.dirname(__file__)) + + +#An EasyVVUQ campaign on sobol_model.py. Takes the polynomial order as input +def run_campaign(poly_order, work_dir='/tmp'): + # Set up a fresh campaign called "sc" + my_campaign = uq.Campaign(name='sc', work_dir=work_dir) + + # Define parameter space + params = { + "x1": { + "type": "float", + "min": 0.0, + "max": 1.0, + "default": 0.5}, + "x2": { + "type": "float", + "min": 0.0, + "max": 1.0, + "default": 0.5}, + "x3": { + "type": "float", + "min": 0.0, + "max": 1.0, + "default": 0.5}, + "x4": { + "type": "float", + "min": 0.0, + "max": 1.0, + "default": 0.5}, + "x5": { + "type": "float", + "min": 0.0, + "max": 1.0, + "default": 0.5}, + "x6": { + "type": "float", + "min": 0.0, + "max": 1.0, + "default": 0.5}, + "out_file": { + "type": "string", + "default": "output.csv"}} + + output_filename = params["out_file"]["default"] + output_columns = ["f"] + + # Create an encoder, decoder and collation element + encoder = uq.encoders.GenericEncoder( + template_fname=HOME + '/sc/sobol.template', + delimiter='$', + target_filename='poly_in.json') + decoder = uq.decoders.SimpleCSV(target_filename=output_filename, + output_columns=output_columns, + header=0) + collater = uq.collate.AggregateSamples(average=False) + + # Add the SC app (automatically set as current app) + my_campaign.add_app(name="sc", + params=params, + encoder=encoder, + decoder=decoder, + collater=collater) + + # Create the sampler + vary = { + "x1": cp.Uniform(0.0, 1.0), + "x2": cp.Uniform(0.0, 1.0)} + + #To use 'next_level_sparse_grid' below, we must select a nested + #sparse grid here + my_sampler = uq.sampling.SCSampler(vary=vary, polynomial_order=poly_order, + quadrature_rule="C", sparse=True, + growth=True) + + # Associate the sampler with the campaign + my_campaign.set_sampler(my_sampler) + + print('Number of samples:', my_sampler._number_of_samples) + + # Will draw all (of the finite set of samples) + my_campaign.draw_samples() + my_campaign.populate_runs_dir() + + # Use this instead to run the samples using EasyVVUQ on the localhost + my_campaign.apply_for_each_run_dir(uq.actions.ExecuteLocal( + "tests/sc/poly_model.py poly_in.json")) + + my_campaign.collate() + + # Post-processing analysis + analysis = uq.analysis.SCAnalysis(sampler=my_sampler, qoi_cols=output_columns) + my_campaign.apply_analysis(analysis) + results = my_campaign.get_last_analysis() + + #update the sparse grid to the next level + my_sampler.next_level_sparse_grid() + + #draw the new samples + my_campaign.draw_samples() + my_campaign.populate_runs_dir() + + my_campaign.apply_for_each_run_dir(uq.actions.ExecuteLocal( + "tests/sc/poly_model.py poly_in.json")) + + my_campaign.collate() + my_campaign.apply_analysis(analysis) + results = my_campaign.get_last_analysis() + + #check the computed Sobol indices against the analytical result + for i in range(ref_sobols.size): + print('Exact Sobol indices order %d = %.4f' % (i + 1, ref_sobols[i])) + print('Computed Sobol indices', results['sobols']['f']) + +if __name__ == '__main__': + + #analytic Sobol indices + ref_sobols = exact_sobols_poly_model() + + run_campaign(poly_order=2) diff --git a/tests/test_stochastic_collocation.py b/tests/test_stochastic_collocation.py index a30b80332..10843f1de 100644 --- a/tests/test_stochastic_collocation.py +++ b/tests/test_stochastic_collocation.py @@ -5,14 +5,14 @@ import pickle import numpy as np - -def test_l_n_exception(): - vary = { - "gravity": cp.Uniform(9.8, 1.0), - "mass": cp.Uniform(2.0, 10.0), - } - with pytest.raises(RuntimeError): - sampler = uq.sampling.SCSampler(vary=vary, polynomial_order=1, sparse=True) +#No longer required +# def test_l_n_exception(): +# vary = { +# "gravity": cp.Uniform(9.8, 1.0), +# "mass": cp.Uniform(2.0, 10.0), +# } +# with pytest.raises(RuntimeError): +# sampler = uq.sampling.SCSampler(vary=vary, polynomial_order=1, sparse=True) def test_lagrange_poly(): diff --git a/tests/test_stochastic_collocation_sampler.py b/tests/test_stochastic_collocation_sampler.py index 53ec7e462..0890c8688 100644 --- a/tests/test_stochastic_collocation_sampler.py +++ b/tests/test_stochastic_collocation_sampler.py @@ -38,7 +38,7 @@ def test_sampler(sc_sampler): def test_generate_grid(sc_sampler): - grid = sc_sampler.generate_grid(5, 2, np.array([[2, 5]])) + grid = sc_sampler.generate_grid(np.array([[2, 5]])) assert((grid == np.array([(111.27016653792582, 0.9533765242898424), (111.27016653792582, 0.9669395306766867), (111.27016653792582, 0.98806904069584), @@ -64,9 +64,14 @@ def test_cmpute_sparse_multi_idx(sc_sampler): [1, 2], [1, 3], [1, 4], + [1, 5], [2, 1], [2, 2], [2, 3], + [2, 4], [3, 1], [3, 2], - [4, 1]])).all()) + [3, 3], + [4, 1], + [4, 2], + [5, 1]])).all())