From c3ec61bf608ef41abbac0e11e13cf36784d94f16 Mon Sep 17 00:00:00 2001 From: John Readey Date: Tue, 21 Jan 2025 21:11:48 +0800 Subject: [PATCH] support queries is alphanum field names (#415) --- hsds/attr_sn.py | 9 ++++++--- hsds/chunk_crawl.py | 3 +-- hsds/chunk_dn.py | 1 + hsds/chunk_sn.py | 12 ++++++++---- hsds/datanode_lib.py | 1 - hsds/domain_crawl.py | 4 +--- hsds/dset_lib.py | 12 ++++++++++-- hsds/servicenode_lib.py | 5 ++--- hsds/util/chunkUtil.py | 23 ++++++++++++++--------- hsds/util/dsetUtil.py | 4 ++-- tests/unit/bool_parser_test.py | 7 +++++++ tests/unit/chunk_util_test.py | 3 ++- 12 files changed, 54 insertions(+), 30 deletions(-) diff --git a/hsds/attr_sn.py b/hsds/attr_sn.py index 653ce0c6..b7ecdce4 100755 --- a/hsds/attr_sn.py +++ b/hsds/attr_sn.py @@ -433,11 +433,11 @@ def _getValueFromRequest(body, data_type, data_shape): # check to see if this works with our shape and type try: + arr = bytesToArray(data, arr_dtype, np_dims) + except ValueError as e: log.debug(f"data: {data}") log.debug(f"type: {arr_dtype}") log.debug(f"np_dims: {np_dims}") - arr = bytesToArray(data, arr_dtype, np_dims) - except ValueError as e: msg = f"Bad Request: encoded input data doesn't match shape and type: {e}" log.warn(msg) raise HTTPBadRequest(reason=msg) @@ -1405,7 +1405,10 @@ async def DELETE_Attributes(request): # the query string attr_names = attr_names_query_string.split(separator) log.info(f"delete {len(attr_names)} attributes for {obj_id}") - log.debug(f"attr_names: {attr_names}") + if len(attr_names) < 10: + log.debug(f"attr_names: {attr_names}") + else: + log.debug(f"attr_names: deleting {len(attr_names)} attributes") username, pswd = getUserPasswordFromRequest(request) await validateUserPassword(app, username, pswd) diff --git a/hsds/chunk_crawl.py b/hsds/chunk_crawl.py index 67ee4b2a..847f0933 100755 --- a/hsds/chunk_crawl.py +++ b/hsds/chunk_crawl.py @@ -393,7 +393,6 @@ async def read_chunk_hyperslab( else: # convert binary data to numpy array try: - log.debug(f"np_arr.dtype: {np_arr.dtype}") log.debug(f"chunk_shape: {chunk_shape}") chunk_arr = bytesToArray(array_data, np_arr.dtype, chunk_shape) except ValueError as ve: @@ -408,7 +407,7 @@ async def read_chunk_hyperslab( raise HTTPInternalServerError() chunk_arr = chunk_arr.reshape(chunk_shape) - log.debug(f"chunk_arr shape: {chunk_arr.shape}, dtype: {chunk_arr.dtype}") + log.debug(f"chunk_arr shape: {chunk_arr.shape}") log.debug(f"data_sel: {data_sel}") log.debug(f"np_arr shape: {np_arr.shape}") diff --git a/hsds/chunk_dn.py b/hsds/chunk_dn.py index e6384a21..e2671b61 100644 --- a/hsds/chunk_dn.py +++ b/hsds/chunk_dn.py @@ -546,6 +546,7 @@ async def GET_Chunk(request): msg = f"chunk {chunk_id} no results for query: {query}" log.debug(msg) raise HTTPNotFound() + log.debug(f"test - got output_arr: {output_arr}") else: # read selected data from chunk output_arr = chunkReadSelection(chunk_arr, slices=selection, select_dt=select_dt) diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py index 2d9a295b..68575007 100755 --- a/hsds/chunk_sn.py +++ b/hsds/chunk_sn.py @@ -263,7 +263,11 @@ def _getSelectDtype(params, dset_dtype, body=None): msg = f"invalid fields selection: {te}" log.warn(msg) raise HTTPBadRequest(reason=msg) - log.debug(f"using select dtype: {select_dtype}") + if select_dtype: + if len(select_dtype) < 10: + log.debug(f"using select dtype: {select_dtype}") + else: + log.debug(f"using select_dtype with {len(select_dtype)} fields") else: select_dtype = dset_dtype # return all fields @@ -873,7 +877,7 @@ async def PUT_Value(request): log.warn(msg) raise HTTPBadRequest(reason=msg) - msg = f"PUT value - numpy array shape: {arr.shape} dtype: {arr.dtype}" + msg = f"PUT value - numpy array shape: {arr.shape}" log.debug(msg) elif request_type == "json": @@ -1012,7 +1016,8 @@ async def GET_Value(request): select_dtype = _getSelectDtype(params, dset_dtype) log.debug(f"GET Value selection: {slices}") - log.debug(f"dset_dtype: {dset_dtype}, select_dtype: {select_dtype}") + if len(dset_dtype) < 10: + log.debug(f"dset_dtype: {dset_dtype}, select_dtype: {select_dtype}") limit = _getLimit(params) @@ -1317,7 +1322,6 @@ async def POST_Value(request): slices = _getSelect(params, dset_json, body=body) select_dtype = _getSelectDtype(params, dset_dtype, body=body) - log.debug(f"got select_dtype: {select_dtype}") if request_type == "json": if "points" in body: diff --git a/hsds/datanode_lib.py b/hsds/datanode_lib.py index 0dc23560..08ecc52a 100644 --- a/hsds/datanode_lib.py +++ b/hsds/datanode_lib.py @@ -1022,7 +1022,6 @@ async def get_chunk( filters = getFilters(dset_json) dset_id = dset_json["id"] filter_ops = getFilterOps(app, dset_id, filters, dtype=dt, chunk_shape=chunk_dims) - log.debug(f"filter_ops: {filter_ops}") if s3path: if s3path.startswith("s3://"): diff --git a/hsds/domain_crawl.py b/hsds/domain_crawl.py index abf459f8..b8e0ba39 100644 --- a/hsds/domain_crawl.py +++ b/hsds/domain_crawl.py @@ -290,7 +290,6 @@ async def get_obj_json(self, obj_id): self._obj_dict[obj_id] = {"status": status} log.debug(f"DomainCrawler - got json for {obj_id}") - log.debug(f"obj_json: {obj_json}") log.debug("store obj json") self._obj_dict[obj_id] = obj_json # store the obj_json @@ -432,7 +431,6 @@ def get_status(self): status = None for obj_id in self._obj_dict: item = self._obj_dict[obj_id] - log.debug(f"item: {item}") if "status" in item: item_status = item["status"] if status is None or item_status > status: @@ -564,4 +562,4 @@ async def fetch(self, obj_id): msg = f"DomainCrawler - fetch complete obj_id: {obj_id}, " msg += f"{len(self._obj_dict)} objects found" log.debug(msg) - log.debug(f"obj_dict: {self._obj_dict}") + log.debug(f"obj_dict: {len(self._obj_dict)} items") diff --git a/hsds/dset_lib.py b/hsds/dset_lib.py index 060bb703..1fe89b3e 100755 --- a/hsds/dset_lib.py +++ b/hsds/dset_lib.py @@ -532,7 +532,11 @@ async def doReadSelection( log.debug(f"chunk_ids: {chunk_ids}") else: log.debug(f"chunk_ids: {chunk_ids[:10]} ...") - log.debug(f"doReadSelection - select_dtype: {select_dtype}") + if select_dtype: + if len(select_dtype) < 10: + log.debug(f"doReadSelection - select_dtype: {select_dtype}") + else: + log.debug(f"doReadSelection - select_dtype: {len(select_dtype)} fields") type_json = dset_json["type"] item_size = getItemSize(type_json) @@ -545,7 +549,11 @@ async def doReadSelection( else: log.debug(f"query: {query} limit: {limit}") query_dtype = getQueryDtype(select_dtype) - log.debug(f"query_dtype: {query_dtype}") + if query_dtype: + if len(query_dtype) < 10: + log.debug(f"query_dtype: {query_dtype}") + else: + log.debug(f"query_dtype {len(query_dtype)}") # create array to hold response data arr = None diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py index 23511772..c8c84f75 100644 --- a/hsds/servicenode_lib.py +++ b/hsds/servicenode_lib.py @@ -343,7 +343,7 @@ async def getObjectJson(app, continue cache_obj[k] = obj_json[k] meta_cache[obj_id] = cache_obj - log.debug(f"stored {cache_obj} in meta_cache") + log.debug(f"stored {obj_id} in meta_cache") return obj_json @@ -1199,7 +1199,6 @@ async def createObjectByPath(app, # create a link to the new object await putHardLink(app, parent_id, link_title, tgt_id=obj_id, bucket=bucket) parent_id = obj_id # new parent - log.info(f"createObjectByPath {h5path} done") - log.debug(f" returning obj_json: {obj_json}") + log.info(f"createObjectByPath {h5path} done, returning obj_json") return obj_json diff --git a/hsds/util/chunkUtil.py b/hsds/util/chunkUtil.py index 170ea007..dc03cc89 100644 --- a/hsds/util/chunkUtil.py +++ b/hsds/util/chunkUtil.py @@ -891,14 +891,17 @@ def chunkReadSelection(chunk_arr, slices=None, select_dt=None): raise ValueError(msg) dt = chunk_arr.dtype - log.debug(f"dtype: {dt}") # get requested data output_arr = chunk_arr[slices] if len(select_dt) < len(dt): # do a field selection - log.debug(f"select_dtype: {select_dt}") + if select_dt: + if len(select_dt) < 10: + log.debug(f"select_dtype: {select_dt}") + else: + log.debug(f"select_dtype: {len(select_dt)} from {len(dt)} fields") # create an array with just the given fields arr = np.zeros(output_arr.shape, select_dt) # slot in each of the given fields @@ -1069,7 +1072,8 @@ def chunkWritePoints(chunk_id=None, dset_dtype = chunk_arr.dtype if select_dt is None: select_dt = dset_dtype # no field selection - log.debug(f"dtype: {dset_dtype}") + else: + log.debug(f"select dtype: {dset_dtype}") # point_arr should have the following type: # (coord1, coord2, ...) | select_dtype @@ -1255,7 +1259,7 @@ def _getEvalStr(query, arr_name, field_names): for item in black_list: if item in field_names: msg = "invalid field name" - log.warn("Bad query: " + msg) + log.warn(f"Bad query: {msg}") raise ValueError(msg) if query.startswith("where "): @@ -1283,7 +1287,6 @@ def _getEvalStr(query, arr_name, field_names): if var_name not in field_names: # invalid msg = f"query variable: {var_name}" - log.debug(f"field_names: {field_names}") log.warn("Bad query: " + msg) raise ValueError(msg) eval_str += arr_name + "['" + var_name + "']" @@ -1298,11 +1301,14 @@ def _getEvalStr(query, arr_name, field_names): elif ch in ("'", '"'): end_quote_char = ch eval_str += ch - elif ch.isalpha() or ch == "_": + elif ch.isalnum() or ch == "_": if ch == "b" and ch_next in ("'", '"'): eval_str += "b" # start of a byte string literal elif var_name is None: - var_name = ch # start of a variable + if ch.isalpha(): + var_name = ch # start of a variable + else: + eval_str += ch # assume a numeric value else: var_name += ch elif ch == "(" and end_quote_char is None: @@ -1366,8 +1372,7 @@ def chunkQuery( """ Run query on chunk and selection """ - msg = f"chunkQuery - chunk_id: {chunk_id} query: [{query}] slices: {slices}, " - msg += f"limit: {limit} select_dt: {select_dt}" + msg = f"chunkQuery - chunk_id: {chunk_id} query: [{query}] slices: {slices}, limit: {limit}" log.debug(msg) if not isinstance(chunk_arr, np.ndarray): diff --git a/hsds/util/dsetUtil.py b/hsds/util/dsetUtil.py index bfe24731..044127f0 100644 --- a/hsds/util/dsetUtil.py +++ b/hsds/util/dsetUtil.py @@ -162,7 +162,7 @@ def getFilterOps(app, dset_id, filters, dtype=None, chunk_shape=None): try: if dset_id in filter_map: - log.debug(f"returning filter from filter_map {filter_map[dset_id]}") + log.debug(f"returning filter from filter_map for dset: {dset_id}") return filter_map[dset_id] except TypeError: log.error(f"getFilterOps TypeError - dset_id: {dset_id} filter_map: {filter_map}") @@ -204,7 +204,7 @@ def getFilterOps(app, dset_id, filters, dtype=None, chunk_shape=None): # save the chunk shape and dtype filter_ops["chunk_shape"] = chunk_shape filter_ops["dtype"] = dtype - log.debug(f"save filter ops: {filter_ops} for {dset_id}") + log.debug(f"save filter ops for {dset_id}") filter_map[dset_id] = filter_ops # save return filter_ops diff --git a/tests/unit/bool_parser_test.py b/tests/unit/bool_parser_test.py index 59078e0b..097daed5 100755 --- a/tests/unit/bool_parser_test.py +++ b/tests/unit/bool_parser_test.py @@ -22,6 +22,13 @@ def __init__(self, *args, **kwargs): # main def testExpressions(self): + p = BooleanParser("x1 < 42") + variables = p.getVariables() + self.assertEqual(len(variables), 1) + self.assertTrue("x1" in variables) + self.assertTrue(p.evaluate({"x1": 24})) + eval_str = p.getEvalStr() + self.assertEqual(eval_str, "x1 < 42.0") p = BooleanParser("x1 < 42") variables = p.getVariables() diff --git a/tests/unit/chunk_util_test.py b/tests/unit/chunk_util_test.py index 22954466..37d1e512 100755 --- a/tests/unit/chunk_util_test.py +++ b/tests/unit/chunk_util_test.py @@ -1408,6 +1408,7 @@ def testChunkIterator3d(self): def testGetEvalStr(self): queries = {} queries["date == 23"] = "rows['date'] == 23" + queries["tgt123 == 456"] = "rows['tgt123'] == 456" queries["wind == b'W 5'"] = "rows['wind'] == b'W 5'" queries["temp > 61"] = "rows['temp'] > 61" queries["(date >= 22) & (date <= 24)"] = "(rows['date'] >= 22) & (rows['date'] <= 24)" @@ -1419,7 +1420,7 @@ def testGetEvalStr(self): queries["date >= 22 where 'temp' in (61, 68, 72)"] = "rows['date'] >= 22" queries["date >= 22 where 'temp F' in (61, 68, 72)"] = "rows['date'] >= 22" - fields = ["date", "wind", "temp"] + fields = ["date", "wind", "temp", "tgt123"] for query in queries.keys(): eval_str = _getEvalStr(query, "rows", fields)