Skip to content

Commit

Permalink
feat: adding Progress Bar to Benchmark script (apache#15719)
Browse files Browse the repository at this point in the history
* rough draft of benchmark script

* revisions

* revisions

* rough draft of benchmark script

* revisions

* Update requirements/development.in

Co-authored-by: Beto Dealmeida <roberto@dealmeida.net>

* Update superset/utils/mock_data.py

Co-authored-by: Beto Dealmeida <roberto@dealmeida.net>

* more revisions

Co-authored-by: Beto Dealmeida <roberto@dealmeida.net>
  • Loading branch information
2 people authored and cccs-RyanS committed Dec 17, 2021
1 parent f271e85 commit dbc0f42
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 92 deletions.
1 change: 1 addition & 0 deletions requirements/development.in
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ psycopg2-binary==2.8.5
tableschema
thrift>=0.11.0,<1.0.0
pygithub>=1.54.1,<2.0.0
progress>=1.5,<2
114 changes: 34 additions & 80 deletions requirements/development.txt
Original file line number Diff line number Diff line change
@@ -1,90 +1,44 @@
# SHA1:1b4d15a41f3498d2eb930ac3d3d4ce5d1f218a2f
# SHA1:c470411e2e9cb04b412a94f80a6a9d870bece74d
#
# This file is autogenerated by pip-compile-multi
# To update, run:
#
# pip-compile-multi
#
-r base.txt
-e file:.
# via -r requirements/base.in
boto3==1.16.10
# via tabulator
botocore==1.19.10
# via
# boto3
# s3transfer
cached-property==1.5.2
# via tableschema
certifi==2020.6.20
# via requests
deprecated==1.2.11
# via pygithub
et-xmlfile==1.0.1
# via openpyxl
flask-cors==3.0.9
# via -r requirements/development.in
future==0.18.2
# via pyhive
ijson==3.1.2.post0
# via tabulator
jdcal==1.4.1
# via openpyxl
jmespath==0.10.0
# via
# boto3
# botocore
jsonlines==1.2.0
# via tabulator
linear-tsv==1.1.0
# via tabulator
mysqlclient==1.4.2.post1
# via -r requirements/development.in
openpyxl==3.0.5
# via tabulator
pillow==7.2.0
# via -r requirements/development.in
psycopg2-binary==2.8.5
# via -r requirements/development.in
pydruid==0.6.1
# via -r requirements/development.in
pygithub==1.54.1
# via -r requirements/development.in
pyhive[hive]==0.6.3
# via -r requirements/development.in
requests==2.24.0
# via
# pydruid
# pygithub
# tableschema
# tabulator
rfc3986==1.4.0
# via tableschema
s3transfer==0.3.3
# via boto3
sasl==0.2.1
# via
# pyhive
# thrift-sasl
tableschema==1.20.0
# via -r requirements/development.in
tabulator==1.52.5
# via tableschema
thrift==0.13.0
# via
# -r requirements/development.in
# pyhive
# thrift-sasl
thrift-sasl==0.4.2
# via pyhive
unicodecsv==0.14.1
# via
# tableschema
# tabulator
wrapt==1.12.1
# via deprecated
xlrd==1.2.0
# via tabulator
-e file:. # via -r requirements/base.in
boto3==1.16.10 # via tabulator
botocore==1.19.10 # via boto3, s3transfer
cached-property==1.5.2 # via tableschema
certifi==2020.6.20 # via requests
deprecated==1.2.11 # via pygithub
et-xmlfile==1.0.1 # via openpyxl
flask-cors==3.0.9 # via -r requirements/development.in
future==0.18.2 # via pyhive
ijson==3.1.2.post0 # via tabulator
jdcal==1.4.1 # via openpyxl
jmespath==0.10.0 # via boto3, botocore
jsonlines==1.2.0 # via tabulator
linear-tsv==1.1.0 # via tabulator
mysqlclient==1.4.2.post1 # via -r requirements/development.in
openpyxl==3.0.5 # via tabulator
pillow==7.2.0 # via -r requirements/development.in
progress==1.5 # via -r requirements/development.in
psycopg2-binary==2.8.5 # via -r requirements/development.in
pydruid==0.6.1 # via -r requirements/development.in
pygithub==1.54.1 # via -r requirements/development.in
pyhive[hive]==0.6.3 # via -r requirements/development.in
requests==2.24.0 # via pydruid, pygithub, tableschema, tabulator
rfc3986==1.4.0 # via tableschema
s3transfer==0.3.3 # via boto3
sasl==0.2.1 # via pyhive, thrift-sasl
tableschema==1.20.0 # via -r requirements/development.in
tabulator==1.52.5 # via tableschema
thrift-sasl==0.4.2 # via pyhive
thrift==0.13.0 # via -r requirements/development.in, pyhive, thrift-sasl
unicodecsv==0.14.1 # via tableschema, tabulator
wrapt==1.12.1 # via deprecated
xlrd==1.2.0 # via tabulator

# The following packages are considered to be unsafe in a requirements file:
# setuptools
12 changes: 9 additions & 3 deletions scripts/benchmark_migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from flask_appbuilder import Model
from flask_migrate import downgrade, upgrade
from graphlib import TopologicalSorter # pylint: disable=wrong-import-order
from progress.bar import ChargingBar
from sqlalchemy import create_engine, inspect, Table
from sqlalchemy.ext.automap import automap_base

Expand Down Expand Up @@ -177,18 +178,23 @@ def main(
for model in models:
missing = min_entities - model_rows[model]
if missing > 0:
entities: List[Model] = []
print(f"- Adding {missing} entities to the {model.__name__} model")
bar = ChargingBar("Processing", max=missing)
try:
added_models = add_sample_rows(session, model, missing)
for entity in add_sample_rows(session, model, missing):
entities.append(entity)
bar.next()
except Exception:
session.rollback()
raise
bar.finish()
model_rows[model] = min_entities
session.add_all(entities)
session.commit()

if auto_cleanup:
new_models[model].extend(added_models)

new_models[model].extend(entities)
start = time.time()
upgrade(revision=revision)
duration = time.time() - start
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ combine_as_imports = true
include_trailing_comma = true
line_length = 88
known_first_party = superset
known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,contextlib2,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pathlib2,pgsanity,pkg_resources,polyline,prison,pyarrow,pyhive,pyparsing,pytest,pytz,redis,requests,retry,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,typing_extensions,werkzeug,wtforms,wtforms_json,yaml
known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,contextlib2,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pathlib2,pgsanity,pkg_resources,polyline,prison,progress,pyarrow,pyhive,pyparsing,pytest,pytz,redis,requests,retry,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,typing_extensions,werkzeug,wtforms,wtforms_json,yaml
multi_line_output = 3
order_by_type = false

Expand Down
14 changes: 6 additions & 8 deletions superset/utils/mock_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import string
import sys
from datetime import date, datetime, time, timedelta
from typing import Any, Callable, cast, Dict, List, Optional, Type
from typing import Any, Callable, cast, Dict, Iterator, List, Optional, Type
from uuid import uuid4

import sqlalchemy.sql.sqltypes
Expand Down Expand Up @@ -232,10 +232,11 @@ def generate_column_data(column: ColumnInfo, num_rows: int) -> List[Any]:
return [gen() for _ in range(num_rows)]


def add_sample_rows(session: Session, model: Type[Model], count: int) -> List[Model]:
def add_sample_rows(
session: Session, model: Type[Model], count: int
) -> Iterator[Model]:
"""
Add entities of a given model.
:param Model model: a Superset/FAB model
:param int count: how many entities to generate and insert
"""
Expand All @@ -245,7 +246,6 @@ def add_sample_rows(session: Session, model: Type[Model], count: int) -> List[Mo
relationships = inspector.relationships.items()
samples = session.query(model).limit(count).all() if relationships else []

entities: List[Model] = []
max_primary_key: Optional[int] = None
for i in range(count):
sample = samples[i % len(samples)] if samples else None
Expand Down Expand Up @@ -276,10 +276,8 @@ def add_sample_rows(session: Session, model: Type[Model], count: int) -> List[Mo
else:
kwargs[column.name] = generate_value(column)

entities.append(model(**kwargs))

session.add_all(entities)
return entities
entity = model(**kwargs)
yield entity


def get_valid_foreign_key(column: Column) -> Any:
Expand Down

0 comments on commit dbc0f42

Please sign in to comment.