From dbc0f42db1f4039d4b97f7c999cf4aa212f8ffce Mon Sep 17 00:00:00 2001 From: AAfghahi <48933336+AAfghahi@users.noreply.github.com> Date: Mon, 19 Jul 2021 19:34:58 -0400 Subject: [PATCH] feat: adding Progress Bar to Benchmark script (#15719) * rough draft of benchmark script * revisions * revisions * rough draft of benchmark script * revisions * Update requirements/development.in Co-authored-by: Beto Dealmeida * Update superset/utils/mock_data.py Co-authored-by: Beto Dealmeida * more revisions Co-authored-by: Beto Dealmeida --- requirements/development.in | 1 + requirements/development.txt | 114 ++++++++++----------------------- scripts/benchmark_migration.py | 12 +++- setup.cfg | 2 +- superset/utils/mock_data.py | 14 ++-- 5 files changed, 51 insertions(+), 92 deletions(-) diff --git a/requirements/development.in b/requirements/development.in index 5ac06dc543399..efa332051ccf4 100644 --- a/requirements/development.in +++ b/requirements/development.in @@ -25,3 +25,4 @@ psycopg2-binary==2.8.5 tableschema thrift>=0.11.0,<1.0.0 pygithub>=1.54.1,<2.0.0 +progress>=1.5,<2 diff --git a/requirements/development.txt b/requirements/development.txt index c065721174d3b..d6bc03be022bc 100644 --- a/requirements/development.txt +++ b/requirements/development.txt @@ -1,4 +1,4 @@ -# SHA1:1b4d15a41f3498d2eb930ac3d3d4ce5d1f218a2f +# SHA1:c470411e2e9cb04b412a94f80a6a9d870bece74d # # This file is autogenerated by pip-compile-multi # To update, run: @@ -6,85 +6,39 @@ # pip-compile-multi # -r base.txt --e file:. - # via -r requirements/base.in -boto3==1.16.10 - # via tabulator -botocore==1.19.10 - # via - # boto3 - # s3transfer -cached-property==1.5.2 - # via tableschema -certifi==2020.6.20 - # via requests -deprecated==1.2.11 - # via pygithub -et-xmlfile==1.0.1 - # via openpyxl -flask-cors==3.0.9 - # via -r requirements/development.in -future==0.18.2 - # via pyhive -ijson==3.1.2.post0 - # via tabulator -jdcal==1.4.1 - # via openpyxl -jmespath==0.10.0 - # via - # boto3 - # botocore -jsonlines==1.2.0 - # via tabulator -linear-tsv==1.1.0 - # via tabulator -mysqlclient==1.4.2.post1 - # via -r requirements/development.in -openpyxl==3.0.5 - # via tabulator -pillow==7.2.0 - # via -r requirements/development.in -psycopg2-binary==2.8.5 - # via -r requirements/development.in -pydruid==0.6.1 - # via -r requirements/development.in -pygithub==1.54.1 - # via -r requirements/development.in -pyhive[hive]==0.6.3 - # via -r requirements/development.in -requests==2.24.0 - # via - # pydruid - # pygithub - # tableschema - # tabulator -rfc3986==1.4.0 - # via tableschema -s3transfer==0.3.3 - # via boto3 -sasl==0.2.1 - # via - # pyhive - # thrift-sasl -tableschema==1.20.0 - # via -r requirements/development.in -tabulator==1.52.5 - # via tableschema -thrift==0.13.0 - # via - # -r requirements/development.in - # pyhive - # thrift-sasl -thrift-sasl==0.4.2 - # via pyhive -unicodecsv==0.14.1 - # via - # tableschema - # tabulator -wrapt==1.12.1 - # via deprecated -xlrd==1.2.0 - # via tabulator +-e file:. # via -r requirements/base.in +boto3==1.16.10 # via tabulator +botocore==1.19.10 # via boto3, s3transfer +cached-property==1.5.2 # via tableschema +certifi==2020.6.20 # via requests +deprecated==1.2.11 # via pygithub +et-xmlfile==1.0.1 # via openpyxl +flask-cors==3.0.9 # via -r requirements/development.in +future==0.18.2 # via pyhive +ijson==3.1.2.post0 # via tabulator +jdcal==1.4.1 # via openpyxl +jmespath==0.10.0 # via boto3, botocore +jsonlines==1.2.0 # via tabulator +linear-tsv==1.1.0 # via tabulator +mysqlclient==1.4.2.post1 # via -r requirements/development.in +openpyxl==3.0.5 # via tabulator +pillow==7.2.0 # via -r requirements/development.in +progress==1.5 # via -r requirements/development.in +psycopg2-binary==2.8.5 # via -r requirements/development.in +pydruid==0.6.1 # via -r requirements/development.in +pygithub==1.54.1 # via -r requirements/development.in +pyhive[hive]==0.6.3 # via -r requirements/development.in +requests==2.24.0 # via pydruid, pygithub, tableschema, tabulator +rfc3986==1.4.0 # via tableschema +s3transfer==0.3.3 # via boto3 +sasl==0.2.1 # via pyhive, thrift-sasl +tableschema==1.20.0 # via -r requirements/development.in +tabulator==1.52.5 # via tableschema +thrift-sasl==0.4.2 # via pyhive +thrift==0.13.0 # via -r requirements/development.in, pyhive, thrift-sasl +unicodecsv==0.14.1 # via tableschema, tabulator +wrapt==1.12.1 # via deprecated +xlrd==1.2.0 # via tabulator # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/scripts/benchmark_migration.py b/scripts/benchmark_migration.py index d226efbfd3058..d923ff4ab8bff 100644 --- a/scripts/benchmark_migration.py +++ b/scripts/benchmark_migration.py @@ -29,6 +29,7 @@ from flask_appbuilder import Model from flask_migrate import downgrade, upgrade from graphlib import TopologicalSorter # pylint: disable=wrong-import-order +from progress.bar import ChargingBar from sqlalchemy import create_engine, inspect, Table from sqlalchemy.ext.automap import automap_base @@ -177,18 +178,23 @@ def main( for model in models: missing = min_entities - model_rows[model] if missing > 0: + entities: List[Model] = [] print(f"- Adding {missing} entities to the {model.__name__} model") + bar = ChargingBar("Processing", max=missing) try: - added_models = add_sample_rows(session, model, missing) + for entity in add_sample_rows(session, model, missing): + entities.append(entity) + bar.next() except Exception: session.rollback() raise + bar.finish() model_rows[model] = min_entities + session.add_all(entities) session.commit() if auto_cleanup: - new_models[model].extend(added_models) - + new_models[model].extend(entities) start = time.time() upgrade(revision=revision) duration = time.time() - start diff --git a/setup.cfg b/setup.cfg index afb034b3f1295..68dc35424f86e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,7 +30,7 @@ combine_as_imports = true include_trailing_comma = true line_length = 88 known_first_party = superset -known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,contextlib2,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pathlib2,pgsanity,pkg_resources,polyline,prison,pyarrow,pyhive,pyparsing,pytest,pytz,redis,requests,retry,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,typing_extensions,werkzeug,wtforms,wtforms_json,yaml +known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,contextlib2,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pathlib2,pgsanity,pkg_resources,polyline,prison,progress,pyarrow,pyhive,pyparsing,pytest,pytz,redis,requests,retry,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,typing_extensions,werkzeug,wtforms,wtforms_json,yaml multi_line_output = 3 order_by_type = false diff --git a/superset/utils/mock_data.py b/superset/utils/mock_data.py index bf36e59c1e35c..ce03553715949 100644 --- a/superset/utils/mock_data.py +++ b/superset/utils/mock_data.py @@ -22,7 +22,7 @@ import string import sys from datetime import date, datetime, time, timedelta -from typing import Any, Callable, cast, Dict, List, Optional, Type +from typing import Any, Callable, cast, Dict, Iterator, List, Optional, Type from uuid import uuid4 import sqlalchemy.sql.sqltypes @@ -232,10 +232,11 @@ def generate_column_data(column: ColumnInfo, num_rows: int) -> List[Any]: return [gen() for _ in range(num_rows)] -def add_sample_rows(session: Session, model: Type[Model], count: int) -> List[Model]: +def add_sample_rows( + session: Session, model: Type[Model], count: int +) -> Iterator[Model]: """ Add entities of a given model. - :param Model model: a Superset/FAB model :param int count: how many entities to generate and insert """ @@ -245,7 +246,6 @@ def add_sample_rows(session: Session, model: Type[Model], count: int) -> List[Mo relationships = inspector.relationships.items() samples = session.query(model).limit(count).all() if relationships else [] - entities: List[Model] = [] max_primary_key: Optional[int] = None for i in range(count): sample = samples[i % len(samples)] if samples else None @@ -276,10 +276,8 @@ def add_sample_rows(session: Session, model: Type[Model], count: int) -> List[Mo else: kwargs[column.name] = generate_value(column) - entities.append(model(**kwargs)) - - session.add_all(entities) - return entities + entity = model(**kwargs) + yield entity def get_valid_foreign_key(column: Column) -> Any: