diff --git a/ydb/library/benchmarks/template/__init__.py b/ydb/library/benchmarks/template/__init__.py new file mode 100644 index 000000000000..0722feff9cc5 --- /dev/null +++ b/ydb/library/benchmarks/template/__init__.py @@ -0,0 +1,140 @@ +from jinja2 import Environment, FileSystemLoader +from library.python import resource + + +class Loader(FileSystemLoader): + def __init__(self, searchpath, encoding='utf-8', followlinks=False): + super().__init__(searchpath, encoding, followlinks) + self.static = {} + self.links = {} + self.loaded = {} + + def get_source(self, environment, template): + if template in self.links: + text, tmpl, flag = self.get_source(environment, self.links[template]) + elif template in self.static: + text, tmpl, flag = self.static[template], template, lambda: True + elif resource.find(template) is not None: + x = resource.find(template) + text, tmpl, flag = x.decode('utf-8'), template, lambda: True + else: + text, tmpl, flag = super().get_source(environment, template) + + self.loaded[tmpl] = 1 + return text, tmpl, flag + + def add_source(self, name, content): + self.static[name] = content + + def add_link(self, f, to): + self.links[f] = to + + +class Builder: + def __init__(self, paths=[]): + self.loader = Loader(paths) + self.env = Environment(loader=self.loader) + self.vars = {} + + def add(self, name: str, text: str): + self.loader.add_source(name, text) + + def add_link(self, f: str, to: str): + self.loader.add_link(f, to) + + def add_from_file(self, name: str, file: str): + with open(file, "r") as f: + return self.template(name, f.read()) + + def add_vars(self, v): + self.vars.update(v) + + def expose_vars(self, name: str): + m = self.env.get_template(name).module + for k, v in m.__dict__.items(): + if not k.startswith("_"): + self.vars[k] = v + + def replace_vars(self, v): + self.vars = v + + def build(self, name: str, expose_all=False): + t = self.env.get_template(name) + if expose_all: + t.render({}) + for k in self.loader.loaded.keys(): + if k != name: + self.expose_vars(k) + + return t.render(self.vars) + + +class ResultFormatter: + def __init__(self, fmt): + self.fmt = fmt + + def _is_double(self, t): + if (str(t[1]) == 'Double'): + return True + + if (t[0] == 'OptionalType'): + return self._is_double(t[1]) + + return False + + def _is_date(self, t): + if (str(t[1]) == 'Date'): + return True + + if (t[0] == 'OptionalType'): + return self._is_date(t[1]) + + return False + + def _format_date(self, d): + import datetime + seconds = int(d) * 86400 + dd = datetime.datetime.fromtimestamp(seconds) + return str(dd.date()) + + def _format_double(self, d): + t = self.fmt % (d) + if 'e' not in t: + t = t.rstrip('0').rstrip('.') + if t == '-0': + t = '0' + return t + + def _format(self, r): + cols = len(r["Type"][1][1]) + doubles = [] + dates = [] + for i in range(cols): + t = r["Type"][1][1][i] + if self._is_double(t[1]): + doubles.append(i) + + if self._is_date(t[1]): + dates.append(i) + + for row in r["Data"]: + for i in range(len(row)): + if isinstance(row[i], list): + if (len(row[i]) == 0): + row[i] = 'NULL' + else: + row[i] = row[i][0] + + for i in doubles: + if row[i] != 'NULL': + row[i] = self._format_double(float(row[i])) + + for i in dates: + if row[i] != 'NULL': + row[i] = self._format_date(float(row[i])) + + def format(self, res): + # {'Write': [{'Type': ['ListType', ['StructType', [['cntrycode', ['DataType', 'String']], ['numcust', ['DataType', 'Uint64']], ['totacctbal', ['DataType', 'Double']]]]] + for x in res: + for y in x['Write']: + self._format(y) diff --git a/ydb/library/benchmarks/template/ut/test.py b/ydb/library/benchmarks/template/ut/test.py new file mode 100644 index 000000000000..4609742d6ddd --- /dev/null +++ b/ydb/library/benchmarks/template/ut/test.py @@ -0,0 +1,225 @@ +import unittest + +from ydb.library.benchmarks.template import Builder, ResultFormatter + + +class Test(unittest.TestCase): + def test_create(self): + Builder() + + def test_add(self): + b = Builder() + b.add("name", "text") + text = b.build("name") + self.assertEqual("text", text) + + def test_include_from_resource(self): + b = Builder() + b.add("name", """ +{% include 'test.txt' %} +Content + +""") + text = b.build("name") + expected = """ +Text +Content +""" + self.assertEqual(expected, text) + + def test_add_vars(self): + b = Builder() + b.add("name", "{{var}}") + b.add_vars({"var": "text"}) + text = b.build("name") + self.assertEqual("text", text) + + def test_include(self): + b = Builder() + b.add("include_name", "IncludeText") + b.add("name", """ +{% include 'include_name' %} +Content + +""") + text = b.build("name") + expected = """ +IncludeText +Content +""" + self.assertEqual(expected, text) + + def test_linked_include(self): + b = Builder() + b.add("include_name", "IncludeText") + b.add_link("include_name1", "include_name") + b.add("name", """ +{% include 'include_name1' %} +Content + +""") + text = b.build("name") + expected = """ +IncludeText +Content +""" + self.assertEqual(expected, text) + + def test_expose_var_from_include(self): + b = Builder() + b.add("include_name", '{% set var = "VAR" %}') + b.add("name", "{% include 'include_name' %}{{var}}") + text = b.build("name", True) + expected = "VAR" + self.assertEqual(expected, text) + + def test_result_formatter(self): + d = { + 'data': [{ + 'Write': [{ + 'Type': [ + 'ListType', [ + 'StructType', [ + ['cntrycode', ['DataType', 'String']], + ['numcust', ['DataType', 'Uint64']], + ['totacctbal', ['DataType', 'Double']]]]], + 'Data': [ + ['15', '893', '6702431.719999995'], + ['25', '877', '6511759.129999992'], + ['26', '859', '6394689.130000009'], + ['28', '909', '6710689.259999996'], + ['29', '948', '7158866.629999996'], + ['30', '909', '6808436.1299999915'], + ['31', '922', '6806670.179999996']]}], + 'Position': {'Column': '1', 'Row': '55', 'File': '
'}}], + 'errors': [], + 'id': '645e199819d7146f01c11bac', + 'issues': [], + 'status': 'COMPLETED', + 'updatedAt': '2023-05-12T10:49:07.967Z', + 'version': 1000000} + + f = ResultFormatter("%.2f") + f.format(d['data']) + + expected = { + 'data': [{ + 'Write': [{ + 'Type': [ + 'ListType', [ + 'StructType', [ + ['cntrycode', ['DataType', 'String']], + ['numcust', ['DataType', 'Uint64']], + ['totacctbal', ['DataType', 'Double']]]]], + 'Data': [ + ['15', '893', '6702431.72'], + ['25', '877', '6511759.13'], + ['26', '859', '6394689.13'], + ['28', '909', '6710689.26'], + ['29', '948', '7158866.63'], + ['30', '909', '6808436.13'], + ['31', '922', '6806670.18']]}], + 'Position': {'Column': '1', 'Row': '55', 'File': '
'}}], + 'errors': [], + 'id': '645e199819d7146f01c11bac', + 'issues': [], + 'status': 'COMPLETED', + 'updatedAt': '2023-05-12T10:49:07.967Z', + 'version': 1000000} + + self.assertEqual(expected, d) + + def test_result_formatter_optional(self): + d = { + 'data': [{ + 'Write': [{ + 'Type': [ + 'ListType', [ + 'StructType', [ + ['cntrycode', ['DataType', 'String']], + ['numcust', ['OptionalType', ['DataType', 'Uint64']]], + ['totacctbal', ['OptionalType', ['DataType', 'Double']]]]]], + 'Data': [ + ['15', ['893'], ['6702431.719999995']], + ['16', [], []], + ]}]}]} + + expected = { + 'data': [{ + 'Write': [{ + 'Type': [ + 'ListType', [ + 'StructType', [ + ['cntrycode', ['DataType', 'String']], + ['numcust', ['OptionalType', ['DataType', 'Uint64']]], + ['totacctbal', ['OptionalType', ['DataType', 'Double']]]]]], + 'Data': [ + ['15', '893', '6702431.72'], + ['16', 'NULL', 'NULL'], + ]}]}]} + + f = ResultFormatter("%.2f") + f.format(d['data']) + self.assertEqual(expected, d) + + def test_result_formatter_zeros(self): + d = { + 'data': [{ + 'Write': [{ + 'Type': [ + 'ListType', [ + 'StructType', [ + ['totacctbal', ['DataType', 'Double']]]]], + 'Data': [ + ['6702431.719999995'], + ['123.101'], + ['123.001'], + ['-0.001'] + ]}]}]} + + expected = { + 'data': [{ + 'Write': [{ + 'Type': [ + 'ListType', [ + 'StructType', [ + ['totacctbal', ['DataType', 'Double']]]]], + 'Data': [ + ['6702431.72'], + ['123.1'], + ['123'], + ['0'] + ]}]}]} + + f = ResultFormatter("%.2f") + f.format(d['data']) + self.assertEqual(expected, d) + + def test_result_formatter_dates(self): + d = { + 'data': [{ + 'Write': [{ + 'Type': [ + 'ListType', [ + 'StructType', [ + ['totacctbal', ['DataType', 'Date']]]]], + 'Data': [ + ['9076'], + ['9667'] + ]}]}]} + + expected = { + 'data': [{ + 'Write': [{ + 'Type': [ + 'ListType', [ + 'StructType', [ + ['totacctbal', ['DataType', 'Date']]]]], + 'Data': [ + ['1994-11-07'], + ['1996-06-20'] + ]}]}]} + + f = ResultFormatter("%.2f") + f.format(d['data']) + self.assertEqual(expected, d) diff --git a/ydb/library/benchmarks/template/ut/test.txt b/ydb/library/benchmarks/template/ut/test.txt new file mode 100644 index 000000000000..3de705a41ff2 --- /dev/null +++ b/ydb/library/benchmarks/template/ut/test.txt @@ -0,0 +1 @@ +Text diff --git a/ydb/library/benchmarks/template/ut/ya.make b/ydb/library/benchmarks/template/ut/ya.make new file mode 100644 index 000000000000..522393fa850e --- /dev/null +++ b/ydb/library/benchmarks/template/ut/ya.make @@ -0,0 +1,13 @@ +PY3TEST() + +OWNER(g:yql) + +TEST_SRCS(test.py) + +RESOURCE(test.txt test.txt) + +PEERDIR( + ydb/library/benchmarks/template +) + +END() diff --git a/ydb/library/benchmarks/template/ya.make b/ydb/library/benchmarks/template/ya.make new file mode 100644 index 000000000000..6c0648f288ac --- /dev/null +++ b/ydb/library/benchmarks/template/ya.make @@ -0,0 +1,14 @@ +PY3_LIBRARY() + +OWNER(g:yql) + +PY_SRCS(__init__.py) + +PEERDIR( + contrib/python/Jinja2 + library/python/resource +) + +END() + +RECURSE_FOR_TESTS(ut) diff --git a/ydb/library/benchmarks/ya.make b/ydb/library/benchmarks/ya.make new file mode 100644 index 000000000000..8859abdb3ca1 --- /dev/null +++ b/ydb/library/benchmarks/ya.make @@ -0,0 +1 @@ +RECURSE(template) diff --git a/ydb/library/ya.make b/ydb/library/ya.make index e101f1de537b..f12231a96242 100644 --- a/ydb/library/ya.make +++ b/ydb/library/ya.make @@ -6,6 +6,7 @@ RECURSE( arrow_kernels arrow_parquet backup + benchmarks binary_json chunks_limiter dynumber