Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: adding Progress Bar to Benchmark script #15719

Merged
merged 8 commits into from
Jul 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements/development.in
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ psycopg2-binary==2.8.5
tableschema
thrift>=0.11.0,<1.0.0
pygithub>=1.54.1,<2.0.0
progress>=1.5,<2
114 changes: 34 additions & 80 deletions requirements/development.txt
Original file line number Diff line number Diff line change
@@ -1,90 +1,44 @@
# SHA1:1b4d15a41f3498d2eb930ac3d3d4ce5d1f218a2f
# SHA1:c470411e2e9cb04b412a94f80a6a9d870bece74d
#
# This file is autogenerated by pip-compile-multi
# To update, run:
#
# pip-compile-multi
#
-r base.txt
-e file:.
# via -r requirements/base.in
boto3==1.16.10
# via tabulator
botocore==1.19.10
# via
# boto3
# s3transfer
cached-property==1.5.2
# via tableschema
certifi==2020.6.20
# via requests
deprecated==1.2.11
# via pygithub
et-xmlfile==1.0.1
# via openpyxl
flask-cors==3.0.9
# via -r requirements/development.in
future==0.18.2
# via pyhive
ijson==3.1.2.post0
# via tabulator
jdcal==1.4.1
# via openpyxl
jmespath==0.10.0
# via
# boto3
# botocore
jsonlines==1.2.0
# via tabulator
linear-tsv==1.1.0
# via tabulator
mysqlclient==1.4.2.post1
# via -r requirements/development.in
openpyxl==3.0.5
# via tabulator
pillow==7.2.0
# via -r requirements/development.in
psycopg2-binary==2.8.5
# via -r requirements/development.in
pydruid==0.6.1
# via -r requirements/development.in
pygithub==1.54.1
# via -r requirements/development.in
pyhive[hive]==0.6.3
# via -r requirements/development.in
requests==2.24.0
# via
# pydruid
# pygithub
# tableschema
# tabulator
rfc3986==1.4.0
# via tableschema
s3transfer==0.3.3
# via boto3
sasl==0.2.1
# via
# pyhive
# thrift-sasl
tableschema==1.20.0
# via -r requirements/development.in
tabulator==1.52.5
# via tableschema
thrift==0.13.0
# via
# -r requirements/development.in
# pyhive
# thrift-sasl
thrift-sasl==0.4.2
# via pyhive
unicodecsv==0.14.1
# via
# tableschema
# tabulator
wrapt==1.12.1
# via deprecated
xlrd==1.2.0
# via tabulator
-e file:. # via -r requirements/base.in
boto3==1.16.10 # via tabulator
botocore==1.19.10 # via boto3, s3transfer
cached-property==1.5.2 # via tableschema
certifi==2020.6.20 # via requests
deprecated==1.2.11 # via pygithub
et-xmlfile==1.0.1 # via openpyxl
flask-cors==3.0.9 # via -r requirements/development.in
future==0.18.2 # via pyhive
ijson==3.1.2.post0 # via tabulator
jdcal==1.4.1 # via openpyxl
jmespath==0.10.0 # via boto3, botocore
jsonlines==1.2.0 # via tabulator
linear-tsv==1.1.0 # via tabulator
mysqlclient==1.4.2.post1 # via -r requirements/development.in
openpyxl==3.0.5 # via tabulator
pillow==7.2.0 # via -r requirements/development.in
progress==1.5 # via -r requirements/development.in
psycopg2-binary==2.8.5 # via -r requirements/development.in
pydruid==0.6.1 # via -r requirements/development.in
pygithub==1.54.1 # via -r requirements/development.in
pyhive[hive]==0.6.3 # via -r requirements/development.in
requests==2.24.0 # via pydruid, pygithub, tableschema, tabulator
rfc3986==1.4.0 # via tableschema
s3transfer==0.3.3 # via boto3
sasl==0.2.1 # via pyhive, thrift-sasl
tableschema==1.20.0 # via -r requirements/development.in
tabulator==1.52.5 # via tableschema
thrift-sasl==0.4.2 # via pyhive
thrift==0.13.0 # via -r requirements/development.in, pyhive, thrift-sasl
unicodecsv==0.14.1 # via tableschema, tabulator
wrapt==1.12.1 # via deprecated
xlrd==1.2.0 # via tabulator

# The following packages are considered to be unsafe in a requirements file:
# setuptools
12 changes: 9 additions & 3 deletions scripts/benchmark_migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from flask_appbuilder import Model
from flask_migrate import downgrade, upgrade
from graphlib import TopologicalSorter # pylint: disable=wrong-import-order
from progress.bar import ChargingBar
from sqlalchemy import create_engine, inspect, Table
from sqlalchemy.ext.automap import automap_base

Expand Down Expand Up @@ -177,18 +178,23 @@ def main(
for model in models:
missing = min_entities - model_rows[model]
if missing > 0:
entities: List[Model] = []
print(f"- Adding {missing} entities to the {model.__name__} model")
bar = ChargingBar("Processing", max=missing)
try:
added_models = add_sample_rows(session, model, missing)
for entity in add_sample_rows(session, model, missing):
entities.append(entity)
bar.next()
except Exception:
session.rollback()
raise
bar.finish()
model_rows[model] = min_entities
session.add_all(entities)
session.commit()

if auto_cleanup:
new_models[model].extend(added_models)

new_models[model].extend(entities)
start = time.time()
upgrade(revision=revision)
duration = time.time() - start
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ combine_as_imports = true
include_trailing_comma = true
line_length = 88
known_first_party = superset
known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,contextlib2,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pathlib2,pgsanity,pkg_resources,polyline,prison,pyarrow,pyhive,pyparsing,pytest,pytz,redis,requests,retry,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,typing_extensions,werkzeug,wtforms,wtforms_json,yaml
known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,contextlib2,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pathlib2,pgsanity,pkg_resources,polyline,prison,progress,pyarrow,pyhive,pyparsing,pytest,pytz,redis,requests,retry,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,typing_extensions,werkzeug,wtforms,wtforms_json,yaml
multi_line_output = 3
order_by_type = false

Expand Down
14 changes: 6 additions & 8 deletions superset/utils/mock_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import string
import sys
from datetime import date, datetime, time, timedelta
from typing import Any, Callable, cast, Dict, List, Optional, Type
from typing import Any, Callable, cast, Dict, Iterator, List, Optional, Type
from uuid import uuid4

import sqlalchemy.sql.sqltypes
Expand Down Expand Up @@ -232,10 +232,11 @@ def generate_column_data(column: ColumnInfo, num_rows: int) -> List[Any]:
return [gen() for _ in range(num_rows)]


def add_sample_rows(session: Session, model: Type[Model], count: int) -> List[Model]:
def add_sample_rows(
session: Session, model: Type[Model], count: int
) -> Iterator[Model]:
"""
Add entities of a given model.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Keep this empty line.

:param Model model: a Superset/FAB model
:param int count: how many entities to generate and insert
"""
Expand All @@ -245,7 +246,6 @@ def add_sample_rows(session: Session, model: Type[Model], count: int) -> List[Mo
relationships = inspector.relationships.items()
samples = session.query(model).limit(count).all() if relationships else []

entities: List[Model] = []
max_primary_key: Optional[int] = None
for i in range(count):
sample = samples[i % len(samples)] if samples else None
Expand Down Expand Up @@ -276,10 +276,8 @@ def add_sample_rows(session: Session, model: Type[Model], count: int) -> List[Mo
else:
kwargs[column.name] = generate_value(column)

entities.append(model(**kwargs))

session.add_all(entities)
return entities
entity = model(**kwargs)
yield entity


def get_valid_foreign_key(column: Column) -> Any:
Expand Down