From c6767e173d96f44299c8a2c29225c2a1e3db822e Mon Sep 17 00:00:00 2001 From: Vijayan Balasubramanian Date: Thu, 8 Feb 2024 14:17:51 -0800 Subject: [PATCH] Fixed code review comments Signed-off-by: Vijayan Balasubramanian --- osbenchmark/workload/params.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/osbenchmark/workload/params.py b/osbenchmark/workload/params.py index a2aaf2f6e..3bacf756a 100644 --- a/osbenchmark/workload/params.py +++ b/osbenchmark/workload/params.py @@ -888,6 +888,7 @@ def __init__(self, workload, params, context: Context, **kwargs): self.data_set_format = parse_string_parameter("data_set_format", params) self.data_set_path = parse_string_parameter("data_set_path", params, "") self.data_set_corpus = parse_string_parameter("data_set_corpus", params, "") + self._validate_data_set(self.data_set_path, self.data_set_corpus) self.total_num_vectors: int = parse_int_parameter("num_vectors", params, -1) self.num_vectors = 0 self.total = 1 @@ -914,10 +915,14 @@ def infinite(self): def _is_last_partition(partition_index, total_partitions): return partition_index == total_partitions - 1 - def _validate_data_set(self): - if not self.data_set_path and not self.data_set_corpus: + @staticmethod + def _validate_data_set(file_path, corpus): + if not file_path and not corpus: raise exceptions.ConfigurationError( "Dataset is missing. Provide either dataset file path or valid corpus.") + if file_path and corpus: + raise exceptions.ConfigurationError( + "Provide either dataset file path '%s' or corpus '%s'." % (file_path, corpus)) @staticmethod def _validate_data_set_corpus(data_set_path_list): @@ -939,7 +944,6 @@ def partition(self, partition_index, total_partitions): Returns: The parameter source for this particular partition """ - self._validate_data_set() if self.data_set_corpus and not self.data_set_path: data_set_path = self._get_corpora_file_paths(self.data_set_corpus, self.data_set_format) self._validate_data_set_corpus(data_set_path) @@ -1045,6 +1049,7 @@ def __init__(self, workloads, params, query_params, **kwargs): self.PARAMS_NAME_NEIGHBORS_DATA_SET_FORMAT, params, self.data_set_format) self.neighbors_data_set_path = params.get(self.PARAMS_NAME_NEIGHBORS_DATA_SET_PATH) self.neighbors_data_set_corpus = params.get(self.PARAMS_NAME_NEIGHBORS_DATA_SET_CORPUS) + self._validate_data_set(self.neighbors_data_set_path, self.neighbors_data_set_corpus) self.neighbors_data_set = None operation_type = parse_string_parameter(self.PARAMS_NAME_OPERATION_TYPE, params, self.PARAMS_VALUE_VECTOR_SEARCH) @@ -1060,6 +1065,14 @@ def __init__(self, workloads, params, query_params, **kwargs): neighbors_corpora = self.extract_corpora(self.neighbors_data_set_corpus, self.neighbors_data_set_format) self.corpora.extend(corpora for corpora in neighbors_corpora if corpora not in self.corpora) + def _validate_neighbors_data_set(self): + if not self.data_set_path and not self.data_set_corpus: + raise exceptions.ConfigurationError( + "Dataset is missing. Provide either dataset file path or valid corpus.") + if self.data_set_path and self.data_set_corpus: + raise exceptions.ConfigurationError( + "Provide either dataset file path '%s' or corpus '%s'." % (self.data_set_path, self.data_set_corpus)) + def _update_request_params(self): request_params = self.query_params.get(self.PARAMS_NAME_REQUEST_PARAMS, {}) request_params[self.PARAMS_NAME_SOURCE] = request_params.get(