diff --git a/.github/workflows/wool.yml b/.github/workflows/wool.yml new file mode 100644 index 00000000..59e96ae2 --- /dev/null +++ b/.github/workflows/wool.yml @@ -0,0 +1,17 @@ +on: + pull_request: + issue_comment: + types: [created, edited] + +name: Wool + +jobs: + runWool: + name: Run black + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + + - uses: uc-cdis/wool@master + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.travis.yml b/.travis.yml index 512cf9df..c9f06610 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,7 @@ sudo: false language: python python: - - "2.7" + - "3.6" cache: - pip diff --git a/Dockerfile b/Dockerfile index 975343fa..5f9c2252 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,24 +1,39 @@ # To run: docker run -v /path/to/wsgi.py:/var/www/peregrine/wsgi.py --name=peregrine -p 81:80 peregrine -# To check running container: docker exec -it peregrine /bin/bash +# To check running container: docker exec -it peregrine /bin/bash -FROM quay.io/cdis/py27base:pybase2-1.0.2 +FROM quay.io/cdis/python-nginx:pybase3-1.1.0 -ENV DEBIAN_FRONTEND=noninteractive +ENV appname=peregrine -RUN mkdir /var/www/peregrine \ - && chown www-data /var/www/peregrine +RUN apk update \ + && apk add postgresql-libs postgresql-dev libffi-dev libressl-dev \ + && apk add linux-headers musl-dev gcc libxml2-dev libxslt-dev \ + && apk add curl bash git vim -COPY . /peregrine +COPY . /$appname COPY ./deployment/uwsgi/uwsgi.ini /etc/uwsgi/uwsgi.ini -WORKDIR /peregrine +WORKDIR /$appname -RUN pip install -r requirements.txt \ - && COMMIT=`git rev-parse HEAD` && echo "COMMIT=\"${COMMIT}\"" >peregrine/version_data.py \ - && VERSION=`git describe --always --tags` && echo "VERSION=\"${VERSION}\"" >>peregrine/version_data.py +RUN python -m pip install --upgrade pip \ + && python -m pip install --upgrade setuptools \ + && pip --version \ + && pip install -r requirements.txt + +RUN mkdir -p /var/www/$appname \ + && mkdir -p /var/www/.cache/Python-Eggs/ \ + && mkdir /run/nginx/ \ + && ln -sf /dev/stdout /var/log/nginx/access.log \ + && ln -sf /dev/stderr /var/log/nginx/error.log \ + && chown nginx -R /var/www/.cache/Python-Eggs/ \ + && chown nginx /var/www/$appname EXPOSE 80 -WORKDIR /var/www/peregrine +RUN COMMIT=`git rev-parse HEAD` && echo "COMMIT=\"${COMMIT}\"" >$appname/version_data.py \ + && VERSION=`git describe --always --tags` && echo "VERSION=\"${VERSION}\"" >>$appname/version_data.py \ + && python setup.py install + + +WORKDIR /var/www/$appname -ENTRYPOINT [ "/bin/sh", "/dockerrun.sh" ] -CMD [] +CMD /dockerrun.sh diff --git a/TestDockerfile b/TestDockerfile index e2bec02a..f24b3edc 100644 --- a/TestDockerfile +++ b/TestDockerfile @@ -18,12 +18,12 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libxml2-dev \ libxslt1-dev \ nginx \ - python2.7 \ - python-dev \ - python-pip \ - python-setuptools \ + python3-dev \ + python3-pip \ + python3-setuptools \ sudo \ vim \ + && ln -s /usr/bin/python3 /usr/bin/python \ && python -m pip install --upgrade pip \ && python -m pip install --upgrade setuptools \ && python -m pip install --upgrade uwsgi \ diff --git a/bin/setup_notifications.py b/bin/setup_notifications.py index 2345bdc9..52ecca9c 100644 --- a/bin/setup_notifications.py +++ b/bin/setup_notifications.py @@ -10,6 +10,8 @@ def setup(host, user, password, database): engine = create_engine( "postgres://{user}:{password}@{host}/{database}".format( - user=user, host=host, password=password, database=database)) + user=user, host=host, password=password, database=database + ) + ) Base.metadata.drop_all(engine) Base.metadata.create_all(engine) diff --git a/bin/setup_psqlgraph.py b/bin/setup_psqlgraph.py index ab831a66..068066ff 100755 --- a/bin/setup_psqlgraph.py +++ b/bin/setup_psqlgraph.py @@ -6,12 +6,13 @@ from psqlgraph import create_all, Node, Edge -def try_drop_test_data(user, database, root_user='postgres', host=''): +def try_drop_test_data(user, database, root_user="postgres", host=""): - print('Dropping old test data') + print("Dropping old test data") - engine = create_engine("postgres://{user}@{host}/postgres".format( - user=root_user, host=host)) + engine = create_engine( + "postgres://{user}@{host}/postgres".format(user=root_user, host=host) + ) conn = engine.connect() conn.execute("commit") @@ -19,44 +20,55 @@ def try_drop_test_data(user, database, root_user='postgres', host=''): try: create_stmt = 'DROP DATABASE "{database}"'.format(database=database) conn.execute(create_stmt) - except Exception, msg: + except Exception as msg: logging.warn("Unable to drop test data:" + str(msg)) conn.close() -def setup_database(user, password, database, root_user='postgres', - host='', no_drop=False, no_user=False): +def setup_database( + user, + password, + database, + root_user="postgres", + host="", + no_drop=False, + no_user=False, +): """ setup the user and database """ - print('Setting up test database') + print("Setting up test database") if not no_drop: try_drop_test_data(user, database) - engine = create_engine("postgres://{user}@{host}/postgres".format( - user=root_user, host=host)) + engine = create_engine( + "postgres://{user}@{host}/postgres".format(user=root_user, host=host) + ) conn = engine.connect() conn.execute("commit") create_stmt = 'CREATE DATABASE "{database}"'.format(database=database) try: conn.execute(create_stmt) - except Exception, msg: - logging.warn('Unable to create database: {}'.format(msg)) + except Exception as msg: + logging.warn("Unable to create database: {}".format(msg)) if not no_user: try: user_stmt = "CREATE USER {user} WITH PASSWORD '{password}'".format( - user=user, password=password) + user=user, password=password + ) conn.execute(user_stmt) - perm_stmt = 'GRANT ALL PRIVILEGES ON DATABASE {database} to {password}'\ - ''.format(database=database, password=password) + perm_stmt = ( + "GRANT ALL PRIVILEGES ON DATABASE {database} to {password}" + "".format(database=database, password=password) + ) conn.execute(perm_stmt) conn.execute("commit") - except Exception, msg: + except Exception as msg: logging.warn("Unable to add user:" + str(msg)) conn.close() @@ -65,57 +77,84 @@ def create_tables(host, user, password, database): """ create a table """ - print('Creating tables in test database') + print("Creating tables in test database") - engine = create_engine("postgres://{user}:{pwd}@{host}/{db}".format( - user=user, host=host, pwd=password, db=database)) + engine = create_engine( + "postgres://{user}:{pwd}@{host}/{db}".format( + user=user, host=host, pwd=password, db=database + ) + ) create_all(engine) versioned_nodes.Base.metadata.create_all(engine) def create_indexes(host, user, password, database): - print('Creating indexes') - engine = create_engine("postgres://{user}:{pwd}@{host}/{db}".format( - user=user, host=host, pwd=password, db=database)) + print("Creating indexes") + engine = create_engine( + "postgres://{user}:{pwd}@{host}/{db}".format( + user=user, host=host, pwd=password, db=database + ) + ) index = lambda t, c: ["CREATE INDEX ON {} ({})".format(t, x) for x in c] for scls in Node.get_subclasses(): tablename = scls.__tablename__ - map(engine.execute, index( - tablename, [ - 'node_id', - ])) - map(engine.execute, [ - "CREATE INDEX ON {} USING gin (_sysan)".format(tablename), - "CREATE INDEX ON {} USING gin (_props)".format(tablename), - "CREATE INDEX ON {} USING gin (_sysan, _props)".format(tablename), - ]) + list(map(engine.execute, index(tablename, ["node_id",]))) + list( + map( + engine.execute, + [ + "CREATE INDEX ON {} USING gin (_sysan)".format(tablename), + "CREATE INDEX ON {} USING gin (_props)".format(tablename), + "CREATE INDEX ON {} USING gin (_sysan, _props)".format(tablename), + ], + ) + ) for scls in Edge.get_subclasses(): - map(engine.execute, index( - scls.__tablename__, [ - 'src_id', - 'dst_id', - 'dst_id, src_id', - ])) + list( + map( + engine.execute, + index(scls.__tablename__, ["src_id", "dst_id", "dst_id, src_id",]), + ) + ) -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--host", type=str, action="store", - default='localhost', help="psql-server host") - parser.add_argument("--user", type=str, action="store", - default='test', help="psql test user") - parser.add_argument("--password", type=str, action="store", - default='test', help="psql test password") - parser.add_argument("--database", type=str, action="store", - default='automated_test', help="psql test database") - parser.add_argument("--no-drop", action="store_true", - default=False, help="do not drop any data") - parser.add_argument("--no-user", action="store_true", - default=False, help="do not create user") + parser.add_argument( + "--host", type=str, action="store", default="localhost", help="psql-server host" + ) + parser.add_argument( + "--user", type=str, action="store", default="test", help="psql test user" + ) + parser.add_argument( + "--password", + type=str, + action="store", + default="test", + help="psql test password", + ) + parser.add_argument( + "--database", + type=str, + action="store", + default="automated_test", + help="psql test database", + ) + parser.add_argument( + "--no-drop", action="store_true", default=False, help="do not drop any data" + ) + parser.add_argument( + "--no-user", action="store_true", default=False, help="do not create user" + ) args = parser.parse_args() - setup_database(args.user, args.password, args.database, - no_drop=args.no_drop, no_user=args.no_user) + setup_database( + args.user, + args.password, + args.database, + no_drop=args.no_drop, + no_user=args.no_user, + ) create_tables(args.host, args.user, args.password, args.database) create_indexes(args.host, args.user, args.password, args.database) diff --git a/bin/setup_test_database.py b/bin/setup_test_database.py index ccc0a45a..6fed381a 100644 --- a/bin/setup_test_database.py +++ b/bin/setup_test_database.py @@ -18,25 +18,44 @@ ) -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--host", type=str, action="store", - default='localhost', help="psql-server host") - parser.add_argument("--user", type=str, action="store", - default='test', help="psql test user") - parser.add_argument("--password", type=str, action="store", - default='test', help="psql test password") - parser.add_argument("--database", type=str, action="store", - default='automated_test', help="psql test database") - parser.add_argument("--no-drop", action="store_true", - default=False, help="do not drop any data") - parser.add_argument("--no-user", action="store_true", - default=False, help="do not create user") + parser.add_argument( + "--host", type=str, action="store", default="localhost", help="psql-server host" + ) + parser.add_argument( + "--user", type=str, action="store", default="test", help="psql test user" + ) + parser.add_argument( + "--password", + type=str, + action="store", + default="test", + help="psql test password", + ) + parser.add_argument( + "--database", + type=str, + action="store", + default="automated_test", + help="psql test database", + ) + parser.add_argument( + "--no-drop", action="store_true", default=False, help="do not drop any data" + ) + parser.add_argument( + "--no-user", action="store_true", default=False, help="do not create user" + ) args = parser.parse_args() - setup_database(args.user, args.password, args.database, - no_drop=args.no_drop, no_user=args.no_user) + setup_database( + args.user, + args.password, + args.database, + no_drop=args.no_drop, + no_user=args.no_user, + ) create_tables(args.host, args.user, args.password, args.database) create_indexes(args.host, args.user, args.password, args.database) create_transaction_logs_table(args.host, args.user, args.password, args.database) diff --git a/bin/setup_transactionlogs.py b/bin/setup_transactionlogs.py index a69d4bcb..91b04d04 100644 --- a/bin/setup_transactionlogs.py +++ b/bin/setup_transactionlogs.py @@ -11,22 +11,36 @@ def setup(host, user, password, database): engine = create_engine( "postgres://{user}:{password}@{host}/{database}".format( - user=user, host=host, password=password, database=database)) + user=user, host=host, password=password, database=database + ) + ) Base.metadata.drop_all(engine) Base.metadata.create_all(engine) -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--host", type=str, action="store", - default='localhost', help="psql-server host") - parser.add_argument("--user", type=str, action="store", - default='test', help="psql test user") - parser.add_argument("--password", type=str, action="store", - default='test', help="psql test password") - parser.add_argument("--database", type=str, action="store", - default='automated_test', help="psql test database") + parser.add_argument( + "--host", type=str, action="store", default="localhost", help="psql-server host" + ) + parser.add_argument( + "--user", type=str, action="store", default="test", help="psql test user" + ) + parser.add_argument( + "--password", + type=str, + action="store", + default="test", + help="psql test password", + ) + parser.add_argument( + "--database", + type=str, + action="store", + default="automated_test", + help="psql test database", + ) args = parser.parse_args() setup(args.host, args.user, args.password, args.database) diff --git a/deployment/uwsgi/uwsgi.ini b/deployment/uwsgi/uwsgi.ini index d111bed7..4be66856 100644 --- a/deployment/uwsgi/uwsgi.ini +++ b/deployment/uwsgi/uwsgi.ini @@ -2,9 +2,11 @@ protocol = uwsgi socket = /var/run/gen3/uwsgi.sock buffer-size = 32768 +uid = nginx +gid = nginx +chown-socket = nginx:nginx chmod-socket = 666 master = true -processes = 2 harakiri-verbose = true # No global HARAKIRI, using only user HARAKIRI, because export overwrites it # Cannot overwrite global HARAKIRI with user's: https://git.io/fjYuD @@ -15,12 +17,11 @@ worker-reload-mercy = 45 reload-mercy = 45 mule-reload-mercy = 45 wsgi-file=/var/www/peregrine/wsgi.py -plugins = python +plugins = python3 vacuum = true -uid = www-data -gid = www-data pythonpath = /var/www/peregrine/ pythonpath = /peregrine/ +pythonpath = /usr/local/lib/python3.6/site-packages/ # Disable noisy uWSGI logs. For debugging purposes, the "disable-logging" # option can be disabled and the "memory-report" option enabled diff --git a/dev-requirements.txt b/dev-requirements.txt index 6e663b89..363bf21d 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,7 +1,7 @@ httmock==1.2.3 pytest>=4.6.5,<5.0.0 lockfile==0.10.2 -coverage==3.7.1 +coverage==4.0 mock==1.0.1 pytest-flask==0.15.0 pytest-cov==2.5.1 @@ -9,8 +9,8 @@ codacy-coverage moto==0.4.5 Sphinx==1.3.1 sphinxcontrib-httpdomain==1.3.0 --e git+https://git@github.com/uc-cdis/indexclient.git@1.6.0#egg=indexclient --e git+https://git@github.com/uc-cdis/cdisutils-test.git@0.0.1#egg=cdisutilstest +-e git+https://git@github.com/uc-cdis/indexclient.git@2.0.0#egg=indexclient +-e git+https://git@github.com/uc-cdis/cdisutils-test.git@1.0.0#egg=cdisutilstest # dependency of sheepdog envelopes==0.4 --e git+https://git@github.com/uc-cdis/sheepdog.git@2.0.0#egg=sheepdog +-e git+https://git@github.com/uc-cdis/sheepdog.git@chore/python3#egg=sheepdog diff --git a/peregrine/api.py b/peregrine/api.py index 481ad45d..82fa9cda 100644 --- a/peregrine/api.py +++ b/peregrine/api.py @@ -28,42 +28,43 @@ sys.setrecursionlimit(10000) DEFAULT_ASYNC_WORKERS = 8 + def app_register_blueprints(app): # TODO: (jsm) deprecate the index endpoints on the root path, # these are currently duplicated under /index (the ultimate # path) for migration - v0 = '/v0' + v0 = "/v0" app.url_map.strict_slashes = False - app.register_blueprint(peregrine.blueprints.blueprint, url_prefix=v0+'/submission') - app.register_blueprint(datasets.blueprint, url_prefix=v0+'/datasets') + app.register_blueprint( + peregrine.blueprints.blueprint, url_prefix=v0 + "/submission" + ) + app.register_blueprint(datasets.blueprint, url_prefix=v0 + "/datasets") def app_register_duplicate_blueprints(app): # TODO: (jsm) deprecate this v0 version under root endpoint. This # root endpoint duplicates /v0 to allow gradual client migration - app.register_blueprint(peregrine.blueprints.blueprint, url_prefix='/submission') - app.register_blueprint(datasets.blueprint, url_prefix='/datasets') + app.register_blueprint(peregrine.blueprints.blueprint, url_prefix="/submission") + app.register_blueprint(datasets.blueprint, url_prefix="/datasets") def async_pool_init(app): """Create and start an pool of workers for async tasks.""" - n_async_workers = ( - app.config - .get('ASYNC', {}) - .get('N_WORKERS', DEFAULT_ASYNC_WORKERS) + n_async_workers = app.config.get("ASYNC", {}).get( + "N_WORKERS", DEFAULT_ASYNC_WORKERS ) app.async_pool = peregrine.utils.scheduling.AsyncPool() app.async_pool.start(n_async_workers) def db_init(app): - app.logger.info('Initializing PsqlGraph driver') + app.logger.info("Initializing PsqlGraph driver") app.db = PsqlGraphDriver( - host=app.config['PSQLGRAPH']['host'], - user=app.config['PSQLGRAPH']['user'], - password=app.config['PSQLGRAPH']['password'], - database=app.config['PSQLGRAPH']['database'], + host=app.config["PSQLGRAPH"]["host"], + user=app.config["PSQLGRAPH"]["user"], + password=app.config["PSQLGRAPH"]["password"], + database=app.config["PSQLGRAPH"]["database"], set_flush_timestamps=True, ) @@ -71,45 +72,50 @@ def db_init(app): # Set CORS options on app configuration def cors_init(app): accepted_headers = [ - 'Content-Type', - 'X-Requested-With', - 'X-CSRFToken', + "Content-Type", + "X-Requested-With", + "X-CSRFToken", ] - CORS(app, resources={ - r"/*": {"origins": '*'}, - }, headers=accepted_headers, expose_headers=['Content-Disposition']) + CORS( + app, + resources={r"/*": {"origins": "*"},}, + headers=accepted_headers, + expose_headers=["Content-Disposition"], + ) def dictionary_init(app): start = time.time() - if ('DICTIONARY_URL' in app.config): - app.logger.info('Initializing dictionary from url') - url = app.config['DICTIONARY_URL'] + if "DICTIONARY_URL" in app.config: + app.logger.info("Initializing dictionary from url") + url = app.config["DICTIONARY_URL"] d = DataDictionary(url=url) dict_init.init(d) - elif ('PATH_TO_SCHEMA_DIR' in app.config): - app.logger.info('Initializing dictionary from schema dir') - d = DataDictionary(root_dir=app.config['PATH_TO_SCHEMA_DIR']) + elif "PATH_TO_SCHEMA_DIR" in app.config: + app.logger.info("Initializing dictionary from schema dir") + d = DataDictionary(root_dir=app.config["PATH_TO_SCHEMA_DIR"]) dict_init.init(d) else: - app.logger.info('Initializing dictionary from gdcdictionary') + app.logger.info("Initializing dictionary from gdcdictionary") import gdcdictionary + d = gdcdictionary.gdcdictionary dictionary.init(d) from gdcdatamodel import models as md from gdcdatamodel import validators as vd + datamodelutils.validators.init(vd) datamodelutils.models.init(md) end = int(round(time.time() - start)) - app.logger.info('Initialized dictionary in {} sec'.format(end)) + app.logger.info("Initialized dictionary in {} sec".format(end)) def app_init(app): app.logger.setLevel(logging.INFO) # Register duplicates only at runtime - app.logger.info('Initializing app') + app.logger.info("Initializing app") dictionary_init(app) if app.config.get("USE_USER_HARAKIRI", True): @@ -126,11 +132,9 @@ def app_init(app): app.graphql_schema = submission.graphql.get_schema() app.schema_file = submission.generate_schema_file(app.graphql_schema, app.logger) try: - app.secret_key = app.config['FLASK_SECRET_KEY'] + app.secret_key = app.config["FLASK_SECRET_KEY"] except KeyError: - app.logger.error( - 'Secret key not set in config! Authentication will not work' - ) + app.logger.error("Secret key not set in config! Authentication will not work") async_pool_init(app) # ARBORIST deprecated, replaced by ARBORIST_URL @@ -141,7 +145,7 @@ def app_init(app): app.logger.info("Using default Arborist base URL") app.auth = ArboristClient() - app.logger.info('Initialization complete.') + app.logger.info("Initialization complete.") app = Flask(__name__) @@ -152,31 +156,33 @@ def app_init(app): setup_default_handlers(app) -@app.route('/_status', methods=['GET']) +@app.route("/_status", methods=["GET"]) def health_check(): with app.db.session_scope() as session: try: - session.execute('SELECT 1') + session.execute("SELECT 1") except Exception: - raise UnhealthyCheck('Unhealthy') + raise UnhealthyCheck("Unhealthy") + + return "Healthy", 200 - return 'Healthy', 200 -@app.route('/_version', methods=['GET']) +@app.route("/_version", methods=["GET"]) def version(): # dictver['commit'] deprecated; see peregrine#130 dictver = { - 'version': pkg_resources.get_distribution("gen3dictionary").version, - 'commit': '', + "version": pkg_resources.get_distribution("gen3dictionary").version, + "commit": "", } base = { - 'version': VERSION, - 'commit': COMMIT, - 'dictionary': dictver, + "version": VERSION, + "commit": COMMIT, + "dictionary": dictver, } return jsonify(base), 200 + @app.errorhandler(404) def page_not_found(e): return jsonify(message=e.description), e.code @@ -196,16 +202,15 @@ def _log_and_jsonify_exception(e): ``OAuth2Errors``. """ app.logger.exception(e) - if hasattr(e, 'json') and e.json: + if hasattr(e, "json") and e.json: return jsonify(**e.json), e.code else: return jsonify(message=e.message), e.code + app.register_error_handler(APIError, _log_and_jsonify_exception) -app.register_error_handler( - peregrine.errors.APIError, _log_and_jsonify_exception -) +app.register_error_handler(peregrine.errors.APIError, _log_and_jsonify_exception) app.register_error_handler(AuthError, _log_and_jsonify_exception) @@ -215,15 +220,13 @@ def run_for_development(**kwargs): for key in ["http_proxy", "https_proxy"]: if os.environ.get(key): del os.environ[key] - app.config.from_object('peregrine.dev_settings') + app.config.from_object("peregrine.dev_settings") - kwargs['port'] = app.config['PEREGRINE_PORT'] - kwargs['host'] = app.config['PEREGRINE_HOST'] + kwargs["port"] = app.config["PEREGRINE_PORT"] + kwargs["host"] = app.config["PEREGRINE_HOST"] try: app_init(app) except: - app.logger.exception( - "Couldn't initialize application, continuing anyway" - ) + app.logger.exception("Couldn't initialize application, continuing anyway") app.run(**kwargs) diff --git a/peregrine/auth/__init__.py b/peregrine/auth/__init__.py index 3c8957a7..98c07a53 100644 --- a/peregrine/auth/__init__.py +++ b/peregrine/auth/__init__.py @@ -14,14 +14,12 @@ from gen3authz.client.arborist.errors import ArboristError import flask -from peregrine.errors import AuthNError - logger = get_logger(__name__) def resource_path_to_project_ids(resource_path): - parts = resource_path.strip('/').split('/') + parts = resource_path.strip("/").split("/") # resource path ignored by peregrine if resource_path != "/" and parts[0] != "programs": @@ -29,19 +27,17 @@ def resource_path_to_project_ids(resource_path): if len(parts) > 4 or (len(parts) > 2 and parts[2] != "projects"): logger.warn( - "ignoring resource path {} because peregrine cannot handle a permission more granular than program/project level".format(resource_path) + "ignoring resource path {} because peregrine cannot handle a permission more granular than program/project level".format( + resource_path + ) ) return [] # "/" or "/programs": access to all programs if len(parts) == 1: - programs = ( - flask.current_app.db - .nodes(models.Program) - .all() - ) + programs = flask.current_app.db.nodes(models.Program).all() return [ - program.name + '-' + project.code + program.name + "-" + project.code for program in programs for project in program.projects ] @@ -51,39 +47,31 @@ def resource_path_to_project_ids(resource_path): if len(parts) < 4: program_name = parts[1] program = ( - flask.current_app.db - .nodes(models.Program) - .props(name=program_name) - .first() + flask.current_app.db.nodes(models.Program).props(name=program_name).first() ) if not program: logger.warn( - "program {} in resource path {} does not exist".format(program_name, resource_path) + "program {} in resource path {} does not exist".format( + program_name, resource_path + ) ) return [] - return [ - program.name + '-' + project.code - for project in program.projects - ] + return [program.name + "-" + project.code for project in program.projects] # "/programs/[...]/projects/[...]": access to a specific project # here, len(parts) == 4 and parts[2] == "projects" project_code = parts[3] project = ( - flask.current_app.db - .nodes(models.Project) - .props(code=project_code) - .first() + flask.current_app.db.nodes(models.Project).props(code=project_code).first() ) if not project: logger.warn( - "project {} in resource path {} does not exist".format(project_code, resource_path) + "project {} in resource path {} does not exist".format( + project_code, resource_path + ) ) return [] - return [ - program.name + '-' + project.code - for program in project.programs - ] + return [program.name + "-" + project.code for program in project.programs] def get_read_access_projects(): @@ -95,7 +83,9 @@ def get_read_access_projects(): except ArboristError as e: # Arborist errored, or this user is unknown to Arborist logger.warn( - "Unable to retrieve auth mapping for user `{}`: {}".format(current_user.username, e) + "Unable to retrieve auth mapping for user `{}`: {}".format( + current_user.username, e + ) ) mapping = {} @@ -105,7 +95,11 @@ def get_read_access_projects(): for resource_path, permissions in mapping.items() for project_id in resource_path_to_project_ids(resource_path) # ignore resource if no peregrine read access: - if any(permission.get("service") in ["*", "peregrine"] and permission.get("method") in ["*", "read"] for permission in permissions) + if any( + permission.get("service") in ["*", "peregrine"] + and permission.get("method") in ["*", "read"] + for permission in permissions + ) ] # return unique project_ids diff --git a/peregrine/blueprints/__init__.py b/peregrine/blueprints/__init__.py index 2ede16e1..c818502b 100644 --- a/peregrine/blueprints/__init__.py +++ b/peregrine/blueprints/__init__.py @@ -1,3 +1,3 @@ import flask -blueprint = flask.Blueprint('graphql', 'submission_v0') +blueprint = flask.Blueprint("graphql", "submission_v0") diff --git a/peregrine/blueprints/datasets.py b/peregrine/blueprints/datasets.py index 5a3e2afa..0abad01c 100644 --- a/peregrine/blueprints/datasets.py +++ b/peregrine/blueprints/datasets.py @@ -8,7 +8,7 @@ set_read_access_projects, ) -from cdiserrors import UserError, AuthZError +from cdiserrors import UserError from dictionaryutils import dictionary blueprint = flask.Blueprint("datasets", "datasets") @@ -55,7 +55,7 @@ def get_datasets(): return flask.jsonify({"data": data, "errors": errors}), 400 result = {project_id: {} for project_id in projects} - for name, value in data.iteritems(): + for name, value in data.items(): match = re.search("^i(\d+)_(.*)", name) index = int(match.group(1)) node = match.group(2) @@ -82,11 +82,11 @@ def get_projects(): # we labeled the count by project index and later parse it # with regex to add structure to response query = "{project (first: 0) { name code dbgap_accession_number " - for field in ['description', 'image_url']: - if dictionary.schema['project']['properties'].get(field): - query += field + ' ' + for field in ["description", "image_url"]: + if dictionary.schema["project"]["properties"].get(field): + query += field + " " - query += '}}' + query += "}}" data, errors = graphql.execute_query(query, variables={}) if errors: return flask.jsonify({"data": data, "errors": errors}), 400 diff --git a/peregrine/config.py b/peregrine/config.py index 7a0bd8ca..c04f5cf7 100644 --- a/peregrine/config.py +++ b/peregrine/config.py @@ -10,14 +10,16 @@ # mode will affect which mapping and index is used is used. -LEGACY_MODE = os.environ.get('PEREGRINE_LEGACY_MODE', '').lower() == 'true' +LEGACY_MODE = os.environ.get("PEREGRINE_LEGACY_MODE", "").lower() == "true" if LEGACY_MODE: logger.info( "Running in LEGACY mode. The Elasticsearch 'GDC_ES_LEGACY_INDEX' " - "environment variable and the legacy mapping will be used. ") + "environment variable and the legacy mapping will be used. " + ) else: logger.info( "Running in ACTIVE mode. The Elasticsearch 'GDC_ES_INDEX' " - "environment variable and the active mapping will be used. ") + "environment variable and the active mapping will be used. " + ) diff --git a/peregrine/dev_settings.example.py b/peregrine/dev_settings.example.py index 7bb4ae52..0c1b46d4 100644 --- a/peregrine/dev_settings.example.py +++ b/peregrine/dev_settings.example.py @@ -3,47 +3,56 @@ from os import environ as env # Auth -AUTH = 'https://gdc-portal.nci.nih.gov/auth/keystone/v3/' -INTERNAL_AUTH = env.get('INTERNAL_AUTH', 'https://gdc-portal.nci.nih.gov/auth/') +AUTH = "https://gdc-portal.nci.nih.gov/auth/keystone/v3/" +INTERNAL_AUTH = env.get("INTERNAL_AUTH", "https://gdc-portal.nci.nih.gov/auth/") AUTH_ADMIN_CREDS = { - 'domain_name': env.get('KEYSTONE_DOMAIN'), - 'username': env.get('KEYSTONE_USER'), - 'password': env.get('KEYSTONE_PASSWORD'), - 'auth_url': env.get('KEYSTONE_AUTH_URL'), - 'user_domain_name': env.get('KEYSTONE_DOMAIN')} + "domain_name": env.get("KEYSTONE_DOMAIN"), + "username": env.get("KEYSTONE_USER"), + "password": env.get("KEYSTONE_PASSWORD"), + "auth_url": env.get("KEYSTONE_AUTH_URL"), + "user_domain_name": env.get("KEYSTONE_DOMAIN"), +} # Storage -CLEVERSAFE_HOST = env.get('CLEVERSAFE_HOST', 'cleversafe.service.consul') +CLEVERSAFE_HOST = env.get("CLEVERSAFE_HOST", "cleversafe.service.consul") -STORAGE = {"s3": { - "keys": { - "cleversafe.service.consul": { - "access_key": os.environ.get('CLEVERSAFE_ACCESS_KEY'), - 'secret_key': os.environ.get('CLEVERSAFE_SECRET_KEY')}, - "localhost": { - "access_key": os.environ.get('CLEVERSAFE_ACCESS_KEY'), - 'secret_key': os.environ.get('CLEVERSAFE_SECRET_KEY')}, - }, "kwargs": { - 'cleversafe.service.consul': { - 'host': 'cleversafe.service.consul', - "is_secure": False, - "calling_format": OrdinaryCallingFormat()}, - 'localhost': { - 'host': 'localhost', - "is_secure": False, - "calling_format": OrdinaryCallingFormat()}, - }}} +STORAGE = { + "s3": { + "keys": { + "cleversafe.service.consul": { + "access_key": os.environ.get("CLEVERSAFE_ACCESS_KEY"), + "secret_key": os.environ.get("CLEVERSAFE_SECRET_KEY"), + }, + "localhost": { + "access_key": os.environ.get("CLEVERSAFE_ACCESS_KEY"), + "secret_key": os.environ.get("CLEVERSAFE_SECRET_KEY"), + }, + }, + "kwargs": { + "cleversafe.service.consul": { + "host": "cleversafe.service.consul", + "is_secure": False, + "calling_format": OrdinaryCallingFormat(), + }, + "localhost": { + "host": "localhost", + "is_secure": False, + "calling_format": OrdinaryCallingFormat(), + }, + }, + } +} SUBMISSION = { - "bucket": 'test_submission', + "bucket": "test_submission", "host": CLEVERSAFE_HOST, } # Postgres PSQLGRAPH = { - 'host': os.getenv("GDC_PG_HOST", "localhost"), - 'user': os.getenv("GDC_PG_USER", "test"), - 'password': os.getenv("GDC_PG_PASSWORD", "test"), - 'database': os.getenv("GDC_PG_DBNAME", "automated_test") + "host": os.getenv("GDC_PG_HOST", "localhost"), + "user": os.getenv("GDC_PG_USER", "test"), + "password": os.getenv("GDC_PG_PASSWORD", "test"), + "database": os.getenv("GDC_PG_DBNAME", "automated_test"), } # API server @@ -53,16 +62,20 @@ # FLASK_SECRET_KEY should be set to a secure random string with an appropriate # length; 50 is reasonable. For the random generation to be secure, use # ``random.SystemRandom()`` -FLASK_SECRET_KEY = 'eCKJOOw3uQBR5pVDz3WIvYk3RsjORYoPRdzSUNJIeUEkm1Uvtq' +FLASK_SECRET_KEY = "eCKJOOw3uQBR5pVDz3WIvYk3RsjORYoPRdzSUNJIeUEkm1Uvtq" -DICTIONARY_URL = os.environ.get('DICTIONARY_URL') +DICTIONARY_URL = os.environ.get("DICTIONARY_URL") -HMAC_ENCRYPTION_KEY = os.environ.get('CDIS_HMAC_ENCRYPTION_KEY', '') +HMAC_ENCRYPTION_KEY = os.environ.get("CDIS_HMAC_ENCRYPTION_KEY", "") OAUTH2 = { - "client_id": os.environ.get('CDIS_PEREGRINE_CLIENT_ID'), + "client_id": os.environ.get("CDIS_PEREGRINE_CLIENT_ID"), "client_secret": os.environ.get("CDIS_PEREGRINE_CLIENT_SECRET"), - "oauth_provider": os.environ.get("CDIS_USER_API_OAUTH", 'http://localhost:8000/oauth2/'), - "redirect_uri": os.environ.get("CDIS_PEREGRINE_OAUTH_REDIRECT", 'localhost:5000/v0/oauth2/authorize'), + "oauth_provider": os.environ.get( + "CDIS_USER_API_OAUTH", "http://localhost:8000/oauth2/" + ), + "redirect_uri": os.environ.get( + "CDIS_PEREGRINE_OAUTH_REDIRECT", "localhost:5000/v0/oauth2/authorize" + ), } USER_API = "http://localhost:8000/" @@ -70,7 +83,7 @@ # token when redirecting, used during local docker compose setup when the # services are on different containers but the hostname is still localhost FORCE_ISSUER = False -SESSION_COOKIE_NAME = 'PEREGRINE_session' +SESSION_COOKIE_NAME = "PEREGRINE_session" # verify project existence in dbgap or not VERIFY_PROJECT = False AUTH_SUBMISSION_LIST = False diff --git a/peregrine/dev_settings.py b/peregrine/dev_settings.py index ed77c1d4..a0efa4d8 100644 --- a/peregrine/dev_settings.py +++ b/peregrine/dev_settings.py @@ -4,52 +4,62 @@ # IndexClient INDEX_CLIENT = { - 'host': env.get('INDEX_CLIENT_HOST', 'http://localhost:8888'), - 'version': 'v0', - 'auth': None} + "host": env.get("INDEX_CLIENT_HOST", "http://localhost:8888"), + "version": "v0", + "auth": None, +} # Auth -AUTH = 'https://gdc-portal.nci.nih.gov/auth/keystone/v3/' -INTERNAL_AUTH = env.get('INTERNAL_AUTH', 'https://gdc-portal.nci.nih.gov/auth/') +AUTH = "https://gdc-portal.nci.nih.gov/auth/keystone/v3/" +INTERNAL_AUTH = env.get("INTERNAL_AUTH", "https://gdc-portal.nci.nih.gov/auth/") AUTH_ADMIN_CREDS = { - 'domain_name': env.get('KEYSTONE_DOMAIN'), - 'username': env.get('KEYSTONE_USER'), - 'password': env.get('KEYSTONE_PASSWORD'), - 'auth_url': env.get('KEYSTONE_AUTH_URL'), - 'user_domain_name': env.get('KEYSTONE_DOMAIN')} + "domain_name": env.get("KEYSTONE_DOMAIN"), + "username": env.get("KEYSTONE_USER"), + "password": env.get("KEYSTONE_PASSWORD"), + "auth_url": env.get("KEYSTONE_AUTH_URL"), + "user_domain_name": env.get("KEYSTONE_DOMAIN"), +} # Storage -CLEVERSAFE_HOST = env.get('CLEVERSAFE_HOST', 'cleversafe.service.consul') +CLEVERSAFE_HOST = env.get("CLEVERSAFE_HOST", "cleversafe.service.consul") -STORAGE = {"s3": { - "keys": { - "cleversafe.service.consul": { - "access_key": os.environ.get('CLEVERSAFE_ACCESS_KEY'), - 'secret_key': os.environ.get('CLEVERSAFE_SECRET_KEY')}, - "localhost": { - "access_key": os.environ.get('CLEVERSAFE_ACCESS_KEY'), - 'secret_key': os.environ.get('CLEVERSAFE_SECRET_KEY')}, - }, "kwargs": { - 'cleversafe.service.consul': { - 'host': 'cleversafe.service.consul', - "is_secure": False, - "calling_format": OrdinaryCallingFormat()}, - 'localhost': { - 'host': 'localhost', - "is_secure": False, - "calling_format": OrdinaryCallingFormat()}, - }}} +STORAGE = { + "s3": { + "keys": { + "cleversafe.service.consul": { + "access_key": os.environ.get("CLEVERSAFE_ACCESS_KEY"), + "secret_key": os.environ.get("CLEVERSAFE_SECRET_KEY"), + }, + "localhost": { + "access_key": os.environ.get("CLEVERSAFE_ACCESS_KEY"), + "secret_key": os.environ.get("CLEVERSAFE_SECRET_KEY"), + }, + }, + "kwargs": { + "cleversafe.service.consul": { + "host": "cleversafe.service.consul", + "is_secure": False, + "calling_format": OrdinaryCallingFormat(), + }, + "localhost": { + "host": "localhost", + "is_secure": False, + "calling_format": OrdinaryCallingFormat(), + }, + }, + } +} SUBMISSION = { - "bucket": 'test_submission', + "bucket": "test_submission", "host": CLEVERSAFE_HOST, } # Postgres PSQLGRAPH = { - 'host': os.getenv("GDC_PG_HOST", "localhost"), - 'user': os.getenv("GDC_PG_USER", "test"), - 'password': os.getenv("GDC_PG_PASSWORD", "test"), - 'database': os.getenv("GDC_PG_DBNAME", "automated_test") + "host": os.getenv("GDC_PG_HOST", "localhost"), + "user": os.getenv("GDC_PG_USER", "test"), + "password": os.getenv("GDC_PG_PASSWORD", "test"), + "database": os.getenv("GDC_PG_DBNAME", "automated_test"), } # API server @@ -59,20 +69,27 @@ # FLASK_SECRET_KEY should be set to a secure random string with an appropriate # length; 50 is reasonable. For the random generation to be secure, use # ``random.SystemRandom()`` -FLASK_SECRET_KEY = 'eCKJOOw3uQBR5pVDz3WIvYk3RsjORYoPRdzSUNJIeUEkm1Uvtq' +FLASK_SECRET_KEY = "eCKJOOw3uQBR5pVDz3WIvYk3RsjORYoPRdzSUNJIeUEkm1Uvtq" -DICTIONARY_URL = os.environ.get('DICTIONARY_URL','https://s3.amazonaws.com/dictionary-artifacts/datadictionary/develop/schema.json') +DICTIONARY_URL = os.environ.get( + "DICTIONARY_URL", + "https://s3.amazonaws.com/dictionary-artifacts/datadictionary/develop/schema.json", +) -HMAC_ENCRYPTION_KEY = os.environ.get('CDIS_HMAC_ENCRYPTION_KEY', '') +HMAC_ENCRYPTION_KEY = os.environ.get("CDIS_HMAC_ENCRYPTION_KEY", "") OAUTH2 = { - "client_id": os.environ.get('CDIS_PEREGRINE_CLIENT_ID'), + "client_id": os.environ.get("CDIS_PEREGRINE_CLIENT_ID"), "client_secret": os.environ.get("CDIS_PEREGRINE_CLIENT_SECRET"), - "oauth_provider": os.environ.get("CDIS_USER_API_OAUTH", 'http://localhost:8000/oauth2/'), - "redirect_uri": os.environ.get("CDIS_PEREGRINE_OAUTH_REDIRECT", 'localhost:5000/v0/oauth2/authorize'), + "oauth_provider": os.environ.get( + "CDIS_USER_API_OAUTH", "http://localhost:8000/oauth2/" + ), + "redirect_uri": os.environ.get( + "CDIS_PEREGRINE_OAUTH_REDIRECT", "localhost:5000/v0/oauth2/authorize" + ), } USER_API = "http://localhost:8000/" -SESSION_COOKIE_NAME = 'PEREGRINE_session' +SESSION_COOKIE_NAME = "PEREGRINE_session" # verify project existence in dbgap or not VERIFY_PROJECT = False AUTH_SUBMISSION_LIST = False diff --git a/peregrine/dictionary.py b/peregrine/dictionary.py index f2767b5f..93c36c42 100644 --- a/peregrine/dictionary.py +++ b/peregrine/dictionary.py @@ -17,18 +17,19 @@ #: The data dictionary must implement these attributes. required_attrs = [ - 'resolvers', - 'schema', + "resolvers", + "schema", ] optional_attrs = [ - 'settings', + "settings", ] resolvers = None schema = None settings = None + def init(dictionary): """ Initialize this file with the same attributes as ``dictionary`` to be @@ -45,17 +46,13 @@ def init(dictionary): for required_attr in required_attrs: try: # Basically do: this_module.required_attr = models.required_attr - setattr( - this_module, required_attr, getattr(dictionary, required_attr) - ) + setattr(this_module, required_attr, getattr(dictionary, required_attr)) except AttributeError: - raise ValueError('given dictionary does not define ' + required_attr) + raise ValueError("given dictionary does not define " + required_attr) for optional_attr in optional_attrs: try: # Basically do: this_module.required_attr = models.required_attr - setattr( - this_module, optional_attr, getattr(dictionary, optional_attr) - ) + setattr(this_module, optional_attr, getattr(dictionary, optional_attr)) except AttributeError: pass diff --git a/peregrine/globals.py b/peregrine/globals.py index d5be63ed..56d21f4c 100644 --- a/peregrine/globals.py +++ b/peregrine/globals.py @@ -3,10 +3,10 @@ Contains values for global constants. """ -FLAG_IS_ASYNC = 'async' +FLAG_IS_ASYNC = "async" # Async scheduling configuration ASYNC_MAX_Q_LEN = 128 ERR_ASYNC_SCHEDULING = ( - 'The API is currently under heavy load and currently has too many' - ' asynchronous tasks. Please try again later.' + "The API is currently under heavy load and currently has too many" + " asynchronous tasks. Please try again later." ) diff --git a/peregrine/models.py b/peregrine/models.py index 6003a506..e2fe56c5 100644 --- a/peregrine/models.py +++ b/peregrine/models.py @@ -25,10 +25,10 @@ #: The data model must implement these attributes. required_attrs = [ - 'Program', - 'Project', - 'submission', - 'VersionedNode', + "Program", + "Project", + "submission", + "VersionedNode", ] # These could be assigned programatically, as in: @@ -60,4 +60,4 @@ def init(models): # Basically do: this_module.required_attr = models.required_attr setattr(this_module, required_attr, getattr(models, required_attr)) except AttributeError: - raise ValueError('given models does not define ' + required_attr) + raise ValueError("given models does not define " + required_attr) diff --git a/peregrine/resources/submission/__init__.py b/peregrine/resources/submission/__init__.py index 53148419..9d5065b4 100644 --- a/peregrine/resources/submission/__init__.py +++ b/peregrine/resources/submission/__init__.py @@ -27,22 +27,22 @@ def get_open_project_ids(): list of project ids for open projects and list of error messages generated from running graphql """ - if not hasattr(models.Project, 'availability_type'): + if not hasattr(models.Project, "availability_type"): return [] with flask.current_app.db.session_scope(): projects = ( - flask.current_app.db - .nodes(models.Project) + flask.current_app.db.nodes(models.Project) .filter(models.Project.availability_type.astext == "Open") .all() ) return [ - program['name'] + '-' + project['code'] + program["name"] + "-" + project["code"] for project in projects - for program in project['programs'] + for program in project["programs"] ] + def set_read_access_projects_for_public_endpoint(): """ Set the global user project list to include all projects for endpoint @@ -50,15 +50,11 @@ def set_read_access_projects_for_public_endpoint(): """ with flask.current_app.db.session_scope(): - projects = ( - flask.current_app.db - .nodes(models.Project) - .all() - ) + projects = flask.current_app.db.nodes(models.Project).all() flask.g.read_access_projects = [ - program['name'] + '-' + project['code'] + program["name"] + "-" + project["code"] for project in projects - for program in project['programs'] + for program in project["programs"] ] @@ -82,12 +78,12 @@ def set_read_access_projects(): assigns result from ``get_open_project_ids`` to ``flask.g.read_access_projects``. """ - if not hasattr(flask.g, 'read_access_projects'): + if not hasattr(flask.g, "read_access_projects"): flask.g.read_access_projects = get_read_access_projects() flask.g.read_access_projects.extend(get_open_project_ids()) -@peregrine.blueprints.blueprint.route('/graphql', methods=['POST']) +@peregrine.blueprints.blueprint.route("/graphql", methods=["POST"]) def root_graphql_query(): """ Run a graphql query. @@ -97,16 +93,14 @@ def root_graphql_query(): try: set_read_access_projects() except AuthZError: - data = flask.jsonify({'data': {}, 'errors': ['Unauthorized query.']}) + data = flask.jsonify({"data": {}, "errors": ["Unauthorized query."]}) return data, 403 payload = peregrine.utils.parse_request_json() - query = payload.get('query') + query = payload.get("query") variables, errors = peregrine.utils.get_variables(payload) if errors: - return flask.jsonify({'data': None, 'errors': errors}), 400 - return peregrine.utils.jsonify_check_errors( - graphql.execute_query(query, variables) - ) + return flask.jsonify({"data": None, "errors": errors}), 400 + return peregrine.utils.jsonify_check_errors(graphql.execute_query(query, variables)) def generate_schema_file(graphql_schema, app_logger): @@ -123,69 +117,74 @@ def generate_schema_file(graphql_schema, app_logger): """ current_dir = os.path.dirname(os.path.realpath(__file__)) # relative to current running directory - schema_file = 'schema.json' + schema_file = "schema.json" # if the file has already been generated, do not re-generate it if os.path.isfile(schema_file): try: # if we can lock the file, the generation is done -> return # if not, another process is currently generating it -> wait - with open(schema_file, 'r') as f: + with open(schema_file, "r") as f: fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB) fcntl.flock(f, fcntl.LOCK_UN) - app_logger.info('Skipping {} generation (file already exists)'.format(schema_file)) + app_logger.info( + "Skipping {} generation (file already exists)".format(schema_file) + ) return os.path.abspath(schema_file) except IOError: pass - query_file = os.path.join( - current_dir, 'graphql', 'introspection_query.txt') - with open(query_file, 'r') as f: + query_file = os.path.join(current_dir, "graphql", "introspection_query.txt") + with open(query_file, "r") as f: query = f.read() try: - with open(schema_file, 'w') as f: + with open(schema_file, "w") as f: # lock file (prevents several processes from generating the schema at the same time) fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB) - app_logger.info('Generating the graphql schema file {}'.format(schema_file)) + app_logger.info("Generating the graphql schema file {}".format(schema_file)) # generate the schema file start = time.time() result = graphql_schema.execute(query) - data = {'data': result.data} + data = {"data": result.data} if result.errors: - data['errors'] = [err.message for err in result.errors] + data["errors"] = [err.message for err in result.errors] json.dump(data, f) end = int(round(time.time() - start)) - app_logger.info('Generated {} in {} sec'.format(schema_file, end)) - fcntl.flock(f, fcntl.LOCK_UN) # unlock file + app_logger.info("Generated {} in {} sec".format(schema_file, end)) + fcntl.flock(f, fcntl.LOCK_UN) # unlock file except IOError: # wait for file unlock (end of schema generation) before proceeding - timeout_minutes = 5 # 5 minutes from now + timeout_minutes = 5 # 5 minutes from now wait_for_file(schema_file, timeout_minutes, app_logger) return os.path.abspath(schema_file) def wait_for_file(file_name, timeout_minutes, app_logger): - print('A process is waiting for {} generation.'.format(file_name)) + print("A process is waiting for {} generation.".format(file_name)) timeout = time.time() + 60 * timeout_minutes while True: try: - with open(file_name, 'r') as f: # try to access+lock the file + with open(file_name, "r") as f: # try to access+lock the file fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB) fcntl.flock(f, fcntl.LOCK_UN) - break # file is available -> schema has been generated -> process can proceed - except IOError: # file is still unavailable -> process waits + break # file is available -> schema has been generated -> process can proceed + except IOError: # file is still unavailable -> process waits pass if time.time() > timeout: - app_logger.warning('A process is proceeding without waiting for end of {} generation ({} minutes timeout)'.format(file_name, timeout_minutes)) + app_logger.warning( + "A process is proceeding without waiting for end of {} generation ({} minutes timeout)".format( + file_name, timeout_minutes + ) + ) break time.sleep(0.5) -@peregrine.blueprints.blueprint.route('/getschema', methods=['GET']) +@peregrine.blueprints.blueprint.route("/getschema", methods=["GET"]) def root_graphql_schema_query(): """ Get the graphql schema. diff --git a/peregrine/resources/submission/constants.py b/peregrine/resources/submission/constants.py index 48f9cf0c..223e481c 100644 --- a/peregrine/resources/submission/constants.py +++ b/peregrine/resources/submission/constants.py @@ -28,10 +28,12 @@ def case_cache_enabled(): still import/run individual modules without raising errors. """ from peregrine import dictionary + try: return ( - True if dictionary.settings == None - else dictionary.settings.get('enable_case_cache', True) + True + if dictionary.settings == None + else dictionary.settings.get("enable_case_cache", True) ) except (AttributeError, KeyError, TypeError): return True @@ -41,7 +43,7 @@ def case_cache_enabled(): # File upload #: State a file should be put in given an error -ERROR_STATE = 'error' +ERROR_STATE = "error" #: Initial file state def submitted_state(): @@ -67,27 +69,28 @@ def submitted_state(): still import/run individual modules without raising errors. """ from peregrine import dictionary + try: - return ( - dictionary.resolvers['_definitions.yaml'] - .source['file_state']['default'] - ) + return dictionary.resolvers["_definitions.yaml"].source["file_state"]["default"] except (AttributeError, KeyError, TypeError): return None + #: State file enters when user begins upload -UPLOADING_STATE = 'uploading' +UPLOADING_STATE = "uploading" #: State file enters when user completes upload -SUCCESS_STATE = 'uploaded' +SUCCESS_STATE = "uploaded" # ====================================================================== # Release/Submit workflow #: UUID seeds for program/project uuid5 generation -uuid_regex = re.compile("^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$") -project_seed = uuid.UUID('249b4405-2c69-45d9-96bc-7410333d5d80') -program_seed = uuid.UUID('85b08c6a-56a6-4474-9c30-b65abfd214a8') +uuid_regex = re.compile( + "^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$" +) +project_seed = uuid.UUID("249b4405-2c69-45d9-96bc-7410333d5d80") +program_seed = uuid.UUID("85b08c6a-56a6-4474-9c30-b65abfd214a8") #: This is a list of states that an entity must be in to allow @@ -105,9 +108,9 @@ def submitted_state(): #: These categories should all have a ``state`` associated with each type ENTITY_STATE_CATEGORIES = [ - 'biospecimen', - 'clinical', - 'data_file', + "biospecimen", + "clinical", + "data_file", # 'cases' => cases are currently `admin` but are manually included # in submission # 'annotations' => cases are currently `TBD` but are manually @@ -117,39 +120,39 @@ def submitted_state(): #: Possible entity.state transitions #: { to_state: from_state } ENTITY_STATE_TRANSITIONS = { - 'submitted': ['validated', None], + "submitted": ["validated", None], } #: The key that specifies the high level state that a file is in the #: pipeline -FILE_STATE_KEY = 'file_state' +FILE_STATE_KEY = "file_state" #: Possible data_file.file_state transitions #: { to_state: from_state } FILE_STATE_TRANSITIONS = { - 'submitted': ['validated'], + "submitted": ["validated"], } #: The auth role required to take action actions -ROLE_SUBMIT = 'release' -ROLE_REVIEW = 'release' -ROLE_OPEN = 'release' +ROLE_SUBMIT = "release" +ROLE_REVIEW = "release" +ROLE_OPEN = "release" #: The key that specifies the high level state that an entity is in the #: release process -STATE_KEY = 'state' +STATE_KEY = "state" #: Allow dry_run transactions to be committed (in a new transaction) #: if the TransactionLog.state is in the following -STATES_COMITTABLE_DRY_RUN = {'SUCCEEDED'} +STATES_COMITTABLE_DRY_RUN = {"SUCCEEDED"} # ====================================================================== # Formats -FORMAT_JSON = 'JSON' -FORMAT_XML = 'XML' -FORMAT_TSV = 'TSV' -FORMAT_CSV = 'CSV' +FORMAT_JSON = "JSON" +FORMAT_XML = "XML" +FORMAT_TSV = "TSV" +FORMAT_CSV = "CSV" # ====================================================================== # Transaction Logs @@ -157,33 +160,33 @@ def submitted_state(): #: The transaction succeeded without user or system error. If the #: transaction was a non-dry_run mutation, then the result should be #: represented in the database -TX_LOG_STATE_SUCCEEDED = 'SUCCEEDED' +TX_LOG_STATE_SUCCEEDED = "SUCCEEDED" #: The transaction failed due to user error -TX_LOG_STATE_FAILED = 'FAILED' +TX_LOG_STATE_FAILED = "FAILED" #: The transaction failed due to system error -TX_LOG_STATE_ERRORED = 'ERRORED' +TX_LOG_STATE_ERRORED = "ERRORED" #: The transaction is sill pending or a fatal event ended the job #: before it could report an ERROR status -TX_LOG_STATE_PENDING = 'PENDING' +TX_LOG_STATE_PENDING = "PENDING" # ====================================================================== # Requests #: Query param flag for performing transaction in background with #: early return -FLAG_IS_ASYNC = 'async' -FLAG_IS_DRY_RUN = 'dry_run' +FLAG_IS_ASYNC = "async" +FLAG_IS_DRY_RUN = "dry_run" # ====================================================================== # Error messages -ERR_ASYNC_SCHEDULING = 'The API is currently under heavy load and currently has too many asynchronous tasks. Please try again later.' +ERR_ASYNC_SCHEDULING = "The API is currently under heavy load and currently has too many asynchronous tasks. Please try again later." #: Things go wrong, let's make a message for when they do -MESSAGE_500 = 'Internal server error. Sorry, something unexpected went wrong!' +MESSAGE_500 = "Internal server error. Sorry, something unexpected went wrong!" # ====================================================================== diff --git a/peregrine/resources/submission/graphql/__init__.py b/peregrine/resources/submission/graphql/__init__.py index fb7dc5d6..1dbe5997 100644 --- a/peregrine/resources/submission/graphql/__init__.py +++ b/peregrine/resources/submission/graphql/__init__.py @@ -5,26 +5,22 @@ import graphql from peregrine import dictionary -from peregrine.utils.pyutils import ( - log_duration, -) +from peregrine.utils.pyutils import log_duration from .node import ( NodeField, create_root_fields, resolve_node, NodeCounter, - DataNode, get_datanode_fields_dict, get_datanode_interface_args, resolve_datanode, - NodeType, get_nodetype_fields_dict, get_nodetype_interface_args, resolve_nodetype, ) -#from .node import __fields as ns_fields + from .node import get_fields from .transaction import ( TransactionLogCountField, @@ -33,12 +29,10 @@ resolve_transaction_log_count, ) from .traversal import make_graph_traversal_dict -from .util import ( - set_session_timeout, -) +from .util import set_session_timeout -GRAPHQL_TIMEOUT = float(os.environ.get('GRAPHQL_TIMEOUT', 20)) # seconds +GRAPHQL_TIMEOUT = float(os.environ.get("GRAPHQL_TIMEOUT", 20)) # seconds TIMEOUT_MESSAGE = """ Query exceeded {} second timeout. Please reduce query complexity and @@ -47,7 +41,9 @@ (e.g. with_path_to), or limiting extensive path traversal field inclusion (e.g. _related_cases). -""".replace('\n', ' ').strip() +""".replace( + "\n", " " +).strip() def get_schema(): @@ -57,31 +53,35 @@ def get_schema(): ns_fields = get_fields() root_fields.update(create_root_fields(ns_fields)) - root_fields['node'] = NodeField - root_fields['resolve_node'] = resolve_node + root_fields["node"] = NodeField + root_fields["resolve_node"] = resolve_node - DataNode = type('DataNode', (graphene.ObjectType,), get_datanode_fields_dict()) # init DataNode fields + DataNode = type( + "DataNode", (graphene.ObjectType,), get_datanode_fields_dict() + ) # init DataNode fields DataNodeField = graphene.List(DataNode, args=get_datanode_interface_args()) - root_fields['datanode'] = DataNodeField - root_fields['resolve_datanode'] = resolve_datanode + root_fields["datanode"] = DataNodeField + root_fields["resolve_datanode"] = resolve_datanode - NodeType = type('NodeType', (graphene.ObjectType,), get_nodetype_fields_dict()) # init NodeType fields + NodeType = type( + "NodeType", (graphene.ObjectType,), get_nodetype_fields_dict() + ) # init NodeType fields NodeTypeField = graphene.List(NodeType, args=get_nodetype_interface_args()) - root_fields['_node_type'] = NodeTypeField - root_fields['resolve__node_type'] = resolve_nodetype + root_fields["_node_type"] = NodeTypeField + root_fields["resolve__node_type"] = resolve_nodetype - Viewer = type('viewer', (graphene.ObjectType,), root_fields) + Viewer = type("viewer", (graphene.ObjectType,), root_fields) - root_fields['viewer'] = graphene.Field(Viewer) - root_fields['resolve_viewer'] = lambda *_: Viewer() + root_fields["viewer"] = graphene.Field(Viewer) + root_fields["resolve_viewer"] = lambda *_: Viewer() - root_fields['transaction_log'] = TransactionLogField - root_fields['resolve_transaction_log'] = resolve_transaction_log + root_fields["transaction_log"] = TransactionLogField + root_fields["resolve_transaction_log"] = resolve_transaction_log - root_fields['_transaction_log_count'] = TransactionLogCountField - root_fields['resolve__transaction_log_count'] = resolve_transaction_log_count + root_fields["_transaction_log_count"] = TransactionLogCountField + root_fields["resolve__transaction_log_count"] = resolve_transaction_log_count - Root = type('Root', (graphene.ObjectType,), root_fields) + Root = type("Root", (graphene.ObjectType,), root_fields) Schema = graphene.Schema(query=Root, auto_camelcase=False) @@ -107,8 +107,9 @@ def execute_query(query, variables=None, app=None): with timer: set_session_timeout(session, GRAPHQL_TIMEOUT) # result = Schema.execute(query, variable_values=variables) - result = app.graphql_schema.execute(query, variable_values=variables, - return_promise=True) + result = app.graphql_schema.execute( + query, variable_values=variables, return_promise=True + ) NodeCounter.current().run(session) result = result.get() except graphql.error.GraphQLError as e: diff --git a/peregrine/resources/submission/graphql/counts.py b/peregrine/resources/submission/graphql/counts.py index 33a25f57..29fb1f8a 100644 --- a/peregrine/resources/submission/graphql/counts.py +++ b/peregrine/resources/submission/graphql/counts.py @@ -5,20 +5,17 @@ # Brittle, changinge this may result in circular dependencies import node_subclass as ns -import transaction +from . import transaction from .base import ( assert_type, munge, ) -from .util import ( - clean_count, -) +from .util import clean_count + +from gdcgraphql import Query -from gdcgraphql import ( - Query, -) class NodeCountQuery(base.GraphQLQuery): @@ -38,15 +35,14 @@ def get_transaction_log_count_result(self, field): self.result = {self.top.key: q.count()} def get_node_count_result(self, field): - label = '_'.join(self.top.name.split('_')[1:-1]) + label = "_".join(self.top.name.split("_")[1:-1]) cls = Node.get_subclass(label) if not cls: - self.errors.append('Unable to execute {} count'.format(label)) + self.errors.append("Unable to execute {} count".format(label)) return None - node_query = ns.NodeSubclassQuery( - self.g, None, self.fragments) + node_query = ns.NodeSubclassQuery(self.g, None, self.fragments) q = self.get_authorized_query(cls) for arg in self.top.arguments: @@ -76,4 +72,4 @@ def _types(): @staticmethod def _query_name(cls): - return '_{}_count'.format(cls.label) + return "_{}_count".format(cls.label) diff --git a/peregrine/resources/submission/graphql/node.py b/peregrine/resources/submission/graphql/node.py index 6e93b9fd..83dc7ca7 100644 --- a/peregrine/resources/submission/graphql/node.py +++ b/peregrine/resources/submission/graphql/node.py @@ -28,21 +28,17 @@ get_authorized_query, get_fields as util_get_fields, filtered_column_dict, - DEFAULT_LIMIT + DEFAULT_LIMIT, ) from . import transaction -from .traversal import ( - subq_paths, -) +from .traversal import subq_paths -from peregrine.resources.submission.constants import ( - case_cache_enabled, -) +from peregrine.resources.submission.constants import case_cache_enabled logging.root.setLevel(level=logging.ERROR) -COUNT_NAME = '_{}_count' +COUNT_NAME = "_{}_count" __gql_object_classes = {} @@ -72,24 +68,22 @@ def filter_project_project_id(q, value, info): subqs = [] for project_id in project_ids: - split = project_id.split('-', 1) + split = project_id.split("-", 1) if len(split) == 2: program_name, project_code = split - subq = q.props(code=project_code)\ - .path('programs')\ - .props(name=program_name) + subq = q.props(code=project_code).path("programs").props(name=program_name) subqs.append(subq) if not subqs: q = q.filter(sa.sql.false()) else: - q = capp.db.nodes(q.entity()).select_entity_from(sa.union_all(*[ - sq.subquery().select() for sq in subqs - ])) + q = capp.db.nodes(q.entity()).select_entity_from( + sa.union_all(*[sq.subquery().select() for sq in subqs]) + ) return q -def with_path_to(q, value, info, union=False, name='with_path_to'): +def with_path_to(q, value, info, union=False, name="with_path_to"): """This will traverse any (any meaning any paths specified in the path generation heuristic which prunes some redundant/wandering paths) from the source entity to the given target type where it will @@ -108,41 +102,41 @@ def with_path_to(q, value, info, union=False, name='with_path_to'): entry = dict(entry) # Check target type - dst_type = entry.pop('type', None) + dst_type = entry.pop("type", None) if not dst_type: raise RuntimeError( - 'Please specify a {{type: }} in the {} filter.' - .format(name)) + "Please specify a {{type: }} in the {} filter.".format(name) + ) # Prevent traversal to Node interface if q.entity() is Node: raise RuntimeError( - '{} filter cannot be used with "node" interface' - .format(name)) + '{} filter cannot be used with "node" interface'.format(name) + ) # Define end of traversal filter def end_of_traversal_filter(q, entry=entry): if not entry: return q - for key, val in entry.iteritems(): - if key == 'id': + for key, val in entry.items(): + if key == "id": q = q.ids(val) else: q = q.filter(q.entity()._props.contains({key: val})) return q # Special case for traversing TO case - if case_cache_enabled() and dst_type == 'case': + if case_cache_enabled() and dst_type == "case": # Rely on shortcut link to case, if it doesn't exist, then # this entity does not relate to any cases - if hasattr(q.entity(), '_related_cases'): - subq = q.subq_path('_related_cases', end_of_traversal_filter) + if hasattr(q.entity(), "_related_cases"): + subq = q.subq_path("_related_cases", end_of_traversal_filter) else: subq = q.filter(sa.sql.false()) # Special case for traversing FROM case - elif case_cache_enabled() and q.entity().label == 'case': - link = '_related_{}'.format(dst_type) + elif case_cache_enabled() and q.entity().label == "case": + link = "_related_{}".format(dst_type) q = q.limit(None) if hasattr(q.entity(), link): subq = q.subq_path(link, end_of_traversal_filter) @@ -193,7 +187,7 @@ def apply_arg_quicksearch(q, args, info): """ - search_phrase = args.get('quick_search', None) + search_phrase = args.get("quick_search", None) if not search_phrase: # Safety check to make sure that the quicksearch filter is # actually being used @@ -205,7 +199,7 @@ def apply_arg_quicksearch(q, args, info): cls = q.entity() node_id_attr = sa.func.lower(cls.node_id) - sub_id_attr = cls._props['submitter_id'].astext + sub_id_attr = cls._props["submitter_id"].astext # Search for ids that contain the search_phrase node_id_query = q.filter(node_id_attr.contains(search_phrase)) @@ -227,7 +221,7 @@ def apply_query_args(q, args, info): info: graphene object that holds the query's arguments, models and requested fields. """ - pg_props = set(getattr(q.entity(), '__pg_properties__', {}).keys()) + pg_props = set(getattr(q.entity(), "__pg_properties__", {}).keys()) # *: filter for those with matching dictionary properties for key in set(args.keys()).intersection(pg_props): @@ -243,129 +237,138 @@ def apply_query_args(q, args, info): field_type = q.entity().__pg_properties__[key][0] if field_type == list: # This field has type list. Return supersets of input (i.e. do AND filter) - q = q.filter(*[q.entity()._props[key].astext.like('%"'+v+'"%') for v in val]) + q = q.filter( + *[q.entity()._props[key].astext.like('%"' + v + '"%') for v in val] + ) else: # This field has scalar type. Treat input as several queries (i.e. do OR filter) if field_type == bool: # convert True to "true"; False to "false" val = [str(v).lower() for v in val] - q = q.filter(q.entity()._props[key].astext.in_([ - str(v) for v in val])) + q = q.filter(q.entity()._props[key].astext.in_([str(v) for v in val])) # not: nest a NOT filter for props, filters out matches - not_props = args.get('not', {}) - not_props = {item.keys()[0]: item.values()[0] for item in not_props} + not_props = args.get("not", {}) + not_props = {list(item.keys())[0]: list(item.values())[0] for item in not_props} for key in set(not_props.keys()).intersection(pg_props): val = not_props[key] val = val if isinstance(val, list) else [val] - q = q.filter(sa.not_(q.entity()._props[key].astext.in_([ - str(v) for v in val]))) + q = q.filter(sa.not_(q.entity()._props[key].astext.in_([str(v) for v in val]))) # ids: filter for those with ids in a given list - if 'id' in args: - q = q.ids(args.get('id')) + if "id" in args: + q = q.ids(args.get("id")) # ids: filter for those with ids in a given list (alias of `id` filter) - if 'ids' in args: - q = q.ids(args.get('ids')) + if "ids" in args: + q = q.ids(args.get("ids")) # submitter_id: filter for those with submitter_ids in a given list - if q.entity().label == 'node' and 'submitter_id' in args: - val = args['submitter_id'] + if q.entity().label == "node" and "submitter_id" in args: + val = args["submitter_id"] val = val if isinstance(val, list) else [val] - q = q.filter(q.entity()._props['submitter_id'].astext.in_([str(v) for v in val])) + q = q.filter( + q.entity()._props["submitter_id"].astext.in_([str(v) for v in val]) + ) # quick_search: see ``apply_arg_quicksearch`` - if 'quick_search' in args: + if "quick_search" in args: q = apply_arg_quicksearch(q, args, info) # created_after: filter by created datetime - if 'created_after' in args: - q = q.filter(q.entity()._props['created_datetime'].cast(sa.String).cast(sa.DateTime) - > parse(args['created_after'])) + if "created_after" in args: + q = q.filter( + q.entity()._props["created_datetime"].cast(sa.String).cast(sa.DateTime) + > parse(args["created_after"]) + ) # created_before: filter by created datetime - if 'created_before' in args: - q = q.filter(q.entity()._props['created_datetime'].cast(sa.String).cast(sa.DateTime) - < parse(args['created_before'])) + if "created_before" in args: + q = q.filter( + q.entity()._props["created_datetime"].cast(sa.String).cast(sa.DateTime) + < parse(args["created_before"]) + ) # updated_after: filter by update datetime - if 'updated_after' in args: - q = q.filter(q.entity()._props['updated_datetime'].cast(sa.String).cast(sa.DateTime) - > parse(args['updated_after'])) + if "updated_after" in args: + q = q.filter( + q.entity()._props["updated_datetime"].cast(sa.String).cast(sa.DateTime) + > parse(args["updated_after"]) + ) # updated_before: filter by update datetime - if 'updated_before' in args: - q = q.filter(q.entity()._props['updated_datetime'].cast(sa.String).cast(sa.DateTime) - < parse(args['updated_before'])) + if "updated_before" in args: + q = q.filter( + q.entity()._props["updated_datetime"].cast(sa.String).cast(sa.DateTime) + < parse(args["updated_before"]) + ) # with_links: (AND) (filter for those with given links) - if 'with_links' in args: - for link in set(args['with_links']): + if "with_links" in args: + for link in set(args["with_links"]): q = q.filter(get_link_attr(q.entity(), link).any()) # with_links_any: (OR) (filter for those with given links) - if 'with_links_any' in args: - links = set(args['with_links_any']) + if "with_links_any" in args: + links = set(args["with_links_any"]) if links: subqs = [] for link in links: subqs.append(q.filter(get_link_attr(q.entity(), link).any())) - q = capp.db.nodes(q.entity()).select_entity_from(sa.union_all(*[ - subq.subquery().select() for subq in subqs - ])) + q = capp.db.nodes(q.entity()).select_entity_from( + sa.union_all(*[subq.subquery().select() for subq in subqs]) + ) # without_links (AND) (filter for those missing given links) - if 'without_links' in args: - for link in args['without_links']: + if "without_links" in args: + for link in args["without_links"]: q = q.filter(sa.not_(get_link_attr(q.entity(), link).any())) # with_path_to: (filter for those with a given traversal) - if 'with_path_to' in args: - q = with_path_to(q, args['with_path_to'], info, union=False) + if "with_path_to" in args: + q = with_path_to(q, args["with_path_to"], info, union=False) - if 'with_path_to_any' in args: - q = with_path_to(q, args['with_path_to_any'], info, union=True) + if "with_path_to_any" in args: + q = with_path_to(q, args["with_path_to_any"], info, union=True) # without_path_to: (filter for those missing a given traversal) - if 'without_path_to' in args: - q = q.except_(with_path_to( - q, args['without_path_to'], info, name='without_path_to')) + if "without_path_to" in args: + q = q.except_( + with_path_to(q, args["without_path_to"], info, name="without_path_to") + ) # project.project_id: Filter projects by logical project_id - if 'project_id' in args and q.entity().label == 'project': + if "project_id" in args and q.entity().label == "project": # Special case for filtering project by project_id - q = filter_project_project_id(q, args['project_id'], info) + q = filter_project_project_id(q, args["project_id"], info) # order_by_asc: Apply an ordering to the results # (ascending). NOTE: should be after all other non-ordering, # before limit, offset queries - if 'order_by_asc' in args: - key = args['order_by_asc'] - if key == 'id': + if "order_by_asc" in args: + key = args["order_by_asc"] + if key == "id": q = q.order_by(q.entity().node_id) - elif key in ['type']: + elif key in ["type"]: pass elif key in q.entity().__pg_properties__: q = q.order_by(q.entity()._props[key]) else: - raise RuntimeError('Cannot order by {} on {}'.format( - key, q.entity().label)) + raise RuntimeError("Cannot order by {} on {}".format(key, q.entity().label)) # order_by_desc: Apply an ordering to the results (descending) # NOTE: should be after all other non-ordering, before limit, # offset queries - if 'order_by_desc' in args: - key = args['order_by_desc'] - if key == 'id': + if "order_by_desc" in args: + key = args["order_by_desc"] + if key == "id": q = q.order_by(q.entity().node_id.desc()) - elif key in ['type']: + elif key in ["type"]: pass elif key in q.entity().__pg_properties__: q = q.order_by(q.entity()._props[key].desc()) else: - raise RuntimeError('Cannot order by {} on {}'.format( - key, q.entity().label)) + raise RuntimeError("Cannot order by {} on {}".format(key, q.entity().label)) # first: truncate result list q = apply_arg_limit(q.from_self(), args, info) @@ -379,6 +382,7 @@ def apply_query_args(q, args, info): # ====================================================================== # Node interface + def load_node(n, info, fields_depend_on_columns=None): """Turns a node into a dictionary (including ``type, id``). This dictionary will prune any unexpected properties from the JSONB. @@ -400,6 +404,7 @@ def load_node(n, info, fields_depend_on_columns=None): type=n.label, ) + class Node(graphene.Interface): """The query object that represents the psqlgraph.Node base""" @@ -426,11 +431,11 @@ def resolve_node(self, info, **args): """ # get the list of categories queried by the user - if args.get('category'): + if args.get("category"): subclasses_labels = [ node for node in dictionary.schema - if dictionary.schema[node]['category'] in args['category'] + if dictionary.schema[node]["category"] in args["category"] ] subclasses = [ node @@ -441,7 +446,12 @@ def resolve_node(self, info, **args): else: q_all = query_node_with_args(args, info) - return [__gql_object_classes[n.label](**load_node(n, info, Node.fields_depend_on_columns)) for n in q_all] + return [ + __gql_object_classes[n.label]( + **load_node(n, info, Node.fields_depend_on_columns) + ) + for n in q_all + ] def query_with_args(classes, args, info): @@ -453,20 +463,22 @@ def query_with_args(classes, args, info): args: dictionary of the arguments passed to the query. info: graphene object that holds the query's arguments, models and requested fields. """ - of_types = [psqlgraph.Node.get_subclass(label) - for label in set(args.get('of_type', []))] + of_types = [ + psqlgraph.Node.get_subclass(label) for label in set(args.get("of_type", [])) + ] rv = [] for cls in classes: if not of_types or cls in of_types: q = get_authorized_query(cls) - if 'project_id' in args: - q = q.filter(q.entity()._props['project_id'].astext - == args['project_id']) + if "project_id" in args: + q = q.filter( + q.entity()._props["project_id"].astext == args["project_id"] + ) rv.extend(apply_query_args(q, args, info).all()) # apply_arg_limit() applied the limit to individual query results, but we # are concatenating several query results so we need to apply it again - limit = args.get('first', DEFAULT_LIMIT) + limit = args.get("first", DEFAULT_LIMIT) if limit > 0: return rv[:limit] else: @@ -482,14 +494,14 @@ def query_node_with_args(args, info): XXX: These two methods may be rewritten in a more efficient and consistent way. """ - if 'of_type' in args: + if "of_type" in args: # TODO: (jsm) find a better solution. currently this filter # will do a subquery for each type AND LOAD THE IDS of all the # nodes, then perform a second query given those ids. We # cannot do a ``select_from`` because it does not work # properly for the abstract base class with concrete table # inheritance (a.k.a it can't find the colums for Node) - of_types = set(args['of_type']) + of_types = set(args["of_type"]) ids = [] for label in of_types: entity = psqlgraph.Node.get_subclass(label) @@ -515,7 +527,6 @@ def lookup_graphql_type(T): return { bool: graphene.Boolean, float: graphene.Float, - long: graphene.Float, int: graphene.Int, list: graphene.List(graphene.String), }.get(T, graphene.String) @@ -528,22 +539,22 @@ def lookup_graphql_type(T): def get_node_class_property_args(cls, not_props_io={}): args = { name: lookup_graphql_type(types[0]) - for name, types in cls.__pg_properties__.iteritems() + for name, types in cls.__pg_properties__.items() } - if cls.label == 'project': - args['project_id'] = graphene.List(graphene.String) + if cls.label == "project": + args["project_id"] = graphene.List(graphene.String) - not_props_io_name = 'NotPropertiesInput_{}'.format(cls.label) + not_props_io_name = "NotPropertiesInput_{}".format(cls.label) if not_props_io_name not in not_props_io: args_not = {} args_not.update(get_node_class_property_attrs(cls)) not_props_io[not_props_io_name] = type( - not_props_io_name, - (graphene.InputObjectType,), - args_not, + not_props_io_name, (graphene.InputObjectType,), args_not, ) - globals()[not_props_io[not_props_io_name].__name__] = not_props_io[not_props_io_name] - args['not'] = graphene.List(__name__ + '.' + not_props_io_name) + globals()[not_props_io[not_props_io_name].__name__] = not_props_io[ + not_props_io_name + ] + args["not"] = graphene.List(__name__ + "." + not_props_io_name) return args @@ -565,31 +576,36 @@ def get_base_node_args(): def get_node_interface_args(): - return dict(get_base_node_args(), **dict( - of_type=graphene.List(graphene.String), - project_id=graphene.String(), - category=graphene.String(), - )) + return dict( + get_base_node_args(), + **dict( + of_type=graphene.List(graphene.String), + project_id=graphene.String(), + category=graphene.String(), + ) + ) def get_node_class_args(cls, _cache={}, _type_cache={}): - if 'WithPathToInput' not in _type_cache: + if "WithPathToInput" not in _type_cache: WithPathToInput = get_withpathto_type() - _type_cache['WithPathToInput'] = WithPathToInput + _type_cache["WithPathToInput"] = WithPathToInput else: - WithPathToInput = _type_cache['WithPathToInput'] + WithPathToInput = _type_cache["WithPathToInput"] if cls in _cache: return _cache[cls] args = get_base_node_args() - args.update(dict( - with_links=graphene.List(graphene.String), - with_links_any=graphene.List(graphene.String), - without_links=graphene.List(graphene.String), - with_path_to=graphene.List(WithPathToInput), - with_path_to_any=graphene.List(WithPathToInput), - without_path_to=graphene.List(WithPathToInput), - )) + args.update( + dict( + with_links=graphene.List(graphene.String), + with_links_any=graphene.List(graphene.String), + without_links=graphene.List(graphene.String), + with_path_to=graphene.List(WithPathToInput), + with_path_to_any=graphene.List(WithPathToInput), + without_path_to=graphene.List(WithPathToInput), + ) + ) # For dictionary fields with scalar types, e.g. submitter_id, we accept from the user # either a single scalar arg or a list of scalar args. The latter is treated as a bulk query @@ -606,9 +622,7 @@ def get_node_class_args(cls, _cache={}, _type_cache={}): # See comments at def apply_query_args(). property_args = { - name: graphene.List(val) - if not isinstance(val, graphene.List) - else val + name: graphene.List(val) if not isinstance(val, graphene.List) else val for name, val in get_node_class_property_args(cls).items() } args.update(property_args) @@ -634,17 +648,22 @@ def resolve_type(self, info, *args): attrs = { name: graphene.Field(lookup_graphql_type(types[0])) - for name, types in cls.__pg_properties__.iteritems() + for name, types in cls.__pg_properties__.items() } - attrs['resolve_type'] = resolve_type + attrs["resolve_type"] = resolve_type + + if cls.label == "project": - if cls.label == 'project': def resolve_project_id(self, info, *args): - program = get_authorized_query(md.Program).subq_path( - 'projects', lambda q: q.ids(self.id)).one() - return '{}-{}'.format(program.name, self.code) - attrs['project_id'] = graphene.String() - attrs['resolve_project_id'] = resolve_project_id + program = ( + get_authorized_query(md.Program) + .subq_path("projects", lambda q: q.ids(self.id)) + .one() + ) + return "{}-{}".format(program.name, self.code) + + attrs["project_id"] = graphene.String() + attrs["resolve_project_id"] = resolve_project_id attrs.update(get_node_class_special_attrs(cls)) @@ -668,82 +687,87 @@ def get_node_class_special_attrs(cls): def get_node_class_link_attrs(cls): - attrs = {name: graphene.List( - __name__ + '.' + link['type'].label, - args=get_node_class_args(link['type']), - ) for name, link in cls._pg_edges.iteritems()} + attrs = { + name: graphene.List( + __name__ + "." + link["type"].label, args=get_node_class_args(link["type"]), + ) + for name, link in cls._pg_edges.items() + } def resolve__related_cases(self, info, args): if not case_cache_enabled(): - return [] + return [] # Don't resolve related cases for cases - if cls.label == 'case': + if cls.label == "case": return [] - q = with_path_to(get_authorized_query(md.Case), { - 'type': cls.label, - 'id': self.id, - }, info, name='related_cases') - qcls = __gql_object_classes['case'] + q = with_path_to( + get_authorized_query(md.Case), + {"type": cls.label, "id": self.id,}, + info, + name="related_cases", + ) + qcls = __gql_object_classes["case"] try: - return [qcls(**load_node(n, info, Node.fields_depend_on_columns)) for n in q.all()] + return [ + qcls(**load_node(n, info, Node.fields_depend_on_columns)) + for n in q.all() + ] except Exception as e: capp.logger.exception(e) raise if case_cache_enabled(): - attrs['resolve__related_cases'] = resolve__related_cases - attrs['_related_cases'] = graphene.List( - 'peregrine.resources.submission.graphql.node.case', - args=get_node_class_args(md.Case) + attrs["resolve__related_cases"] = resolve__related_cases + attrs["_related_cases"] = graphene.List( + "peregrine.resources.submission.graphql.node.case", + args=get_node_class_args(md.Case), ) for link in cls._pg_edges: name = COUNT_NAME.format(link) - attrs[name] = graphene.Field( - graphene.Int, args=get_node_class_args(cls)) + attrs[name] = graphene.Field(graphene.Int, args=get_node_class_args(cls)) # transaction logs that affected this node def resolve_transaction_logs_count(self, info, **args): - args = dict(args, **{'entities': [self.id]}) + args = dict(args, **{"entities": [self.id]}) return transaction.resolve_transaction_log_count(self, info, **args) - attrs['resolve__transaction_logs_count'] = resolve_transaction_logs_count - attrs['_transaction_logs_count'] = graphene.Field( - graphene.Int, - args=transaction.get_transaction_log_args(), + attrs["resolve__transaction_logs_count"] = resolve_transaction_logs_count + attrs["_transaction_logs_count"] = graphene.Field( + graphene.Int, args=transaction.get_transaction_log_args(), ) def resolve_transaction_logs(self, info, **args): - args = dict(args, **{'entities': [self.id]}) + args = dict(args, **{"entities": [self.id]}) return transaction.resolve_transaction_log(self, info, **args) - attrs['resolve__transaction_logs'] = resolve_transaction_logs - attrs['_transaction_logs'] = graphene.List( - transaction.TransactionLog, - args=transaction.get_transaction_log_args(), + attrs["resolve__transaction_logs"] = resolve_transaction_logs + attrs["_transaction_logs"] = graphene.List( + transaction.TransactionLog, args=transaction.get_transaction_log_args(), ) _links_args = get_node_interface_args() - _links_args.pop('of_type', None) - attrs['_links'] = graphene.List(Node, args=_links_args) + _links_args.pop("of_type", None) + attrs["_links"] = graphene.List(Node, args=_links_args) return attrs def get_node_class_link_resolver_attrs(cls): link_resolver_attrs = {} - for link_name, link in cls._pg_edges.iteritems(): + for link_name, link in cls._pg_edges.items(): def link_query(self, info, cls=cls, link=link, **args): try: - target, backref = link['type'], link['backref'] + target, backref = link["type"], link["backref"] # Subquery for neighor connected to node - sq = get_authorized_query(target).filter( - getattr(target, backref) - .any(node_id=self.id)).subquery() - q = get_authorized_query(target).filter( - target.node_id == sq.c.node_id) + sq = ( + get_authorized_query(target) + .filter(getattr(target, backref).any(node_id=self.id)) + .subquery() + ) + q = get_authorized_query(target).filter(target.node_id == sq.c.node_id) q = apply_query_args(q, args, info) return q except Exception as e: @@ -754,12 +778,16 @@ def link_query(self, info, cls=cls, link=link, **args): def resolve_link(self, info, cls=cls, link=link, **args): try: q = link_query(self, info, cls=cls, link=link, **args) - qcls = __gql_object_classes[link['type'].label] - return [qcls(**load_node(n, info, Node.fields_depend_on_columns)) for n in q.all()] + qcls = __gql_object_classes[link["type"].label] + return [ + qcls(**load_node(n, info, Node.fields_depend_on_columns)) + for n in q.all() + ] except Exception as e: capp.logger.exception(e) raise - lr_name = 'resolve_{}'.format(link_name) + + lr_name = "resolve_{}".format(link_name) resolve_link.__name__ = lr_name link_resolver_attrs[lr_name] = resolve_link @@ -767,40 +795,47 @@ def resolve_link(self, info, cls=cls, link=link, **args): def resolve_link_count(self, info, cls=cls, link=link, **args): try: q = link_query(self, info, cls=cls, link=link, **args) - q = q.with_entities(sa.distinct(link['type'].node_id)) + q = q.with_entities(sa.distinct(link["type"].node_id)) q = q.limit(None) return clean_count(q) except Exception as e: capp.logger.exception(e) raise - lr_count_name = 'resolve_{}'.format(COUNT_NAME.format(link_name)) + + lr_count_name = "resolve_{}".format(COUNT_NAME.format(link_name)) resolve_link_count.__name__ = lr_count_name link_resolver_attrs[lr_count_name] = resolve_link_count # Arbitrary link def resolve_links(self, info, cls=cls, **args): try: - edge_out_sq = capp.db.edges().filter( - psqlgraph.Edge.src_id == self.id).subquery() - edge_in_sq = capp.db.edges().filter( - psqlgraph.Edge.dst_id == self.id).subquery() + edge_out_sq = ( + capp.db.edges().filter(psqlgraph.Edge.src_id == self.id).subquery() + ) + edge_in_sq = ( + capp.db.edges().filter(psqlgraph.Edge.dst_id == self.id).subquery() + ) q1 = get_authorized_query(psqlgraph.Node).filter( - psqlgraph.Node.node_id == edge_in_sq.c.src_id) + psqlgraph.Node.node_id == edge_in_sq.c.src_id + ) q2 = get_authorized_query(psqlgraph.Node).filter( - psqlgraph.Node.node_id == edge_out_sq.c.dst_id) + psqlgraph.Node.node_id == edge_out_sq.c.dst_id + ) q1 = apply_query_args(q1, args, info).limit(None) q2 = apply_query_args(q2, args, info).limit(None) q = q1.union(q2) apply_arg_limit(q, args, info) return [ - __gql_object_classes[n.label](**load_node(n, info, Node.fields_depend_on_columns)) + __gql_object_classes[n.label]( + **load_node(n, info, Node.fields_depend_on_columns) + ) for n in q.all() ] except Exception as e: capp.logger.exception(e) raise - lr_links_name = 'resolve__links' + lr_links_name = "resolve__links" resolve_link_count.__name__ = lr_links_name link_resolver_attrs[lr_links_name] = resolve_links @@ -809,14 +844,15 @@ def resolve_links(self, info, cls=cls, **args): def create_node_class_gql_object(cls): def _make_inner_meta_type(): - return type('Meta', (), {'interfaces': (Node, )}) + return type("Meta", (), {"interfaces": (Node,)}) + attrs = {} attrs.update(get_node_class_property_attrs(cls)) attrs.update(get_node_class_link_attrs(cls)) attrs.update(get_node_class_link_resolver_attrs(cls)) - attrs['Meta'] = _make_inner_meta_type() + attrs["Meta"] = _make_inner_meta_type() - gql_object = type(cls.label, (graphene.ObjectType, ), attrs) + gql_object = type(cls.label, (graphene.ObjectType,), attrs) # Add this class to the global namespace to graphene can load it globals()[gql_object.__name__] = gql_object @@ -839,25 +875,25 @@ def __init__(self): @classmethod def current(cls): - if not hasattr(flask.g, 'node_counter'): + if not hasattr(flask.g, "node_counter"): flask.g.node_counter = cls() return flask.g.node_counter def add_count(self, cls, args): # escape non-trivial cases defined in `authorization_filter` - if cls != psqlgraph.Node and not hasattr(cls, 'project_id'): + if cls != psqlgraph.Node and not hasattr(cls, "project_id"): return None - if cls.label == 'project': + if cls.label == "project": return None # escape if project_id is not the only args - if list(args.keys()) != ['project_id']: + if list(args.keys()) != ["project_id"]: return None # extract project_id and guarantee permission - project_id = args['project_id'] + project_id = args["project_id"] if isinstance(project_id, (list, tuple)) and len(project_id) == 1: project_id = project_id[0] - if not isinstance(project_id, (str, unicode)): + if not isinstance(project_id, str): # escape if multiple project_ids are given return None if project_id not in flask.g.read_access_projects: @@ -866,34 +902,37 @@ def add_count(self, cls, args): # group project_id and name them project_id_name = self._project_ids.get(project_id, None) if project_id_name is None: - project_id_name = 'p_%s' % len(self._project_ids) + project_id_name = "p_%s" % len(self._project_ids) self._project_ids[project_id] = project_id_name # prepare the subquery and promise - key = 'c_%s' % len(self._queries) + key = "c_%s" % len(self._queries) p = Promise() - self._queries.append(( - key, - p, - "(SELECT count(*) FROM %s WHERE _props->>'project_id' = :%s) AS %s" - % (cls.__tablename__, project_id_name, key), - )) + self._queries.append( + ( + key, + p, + "(SELECT count(*) FROM %s WHERE _props->>'project_id' = :%s) AS %s" + % (cls.__tablename__, project_id_name, key), + ) + ) return p def run(self, session): if not self._queries: return - sql = 'SELECT %s;' % ', '.join(count for _, _, count in self._queries) + sql = "SELECT %s;" % ", ".join(count for _, _, count in self._queries) results = session.execute( - sql, dict((v, k) for k, v in self._project_ids.iteritems())).fetchone() + sql, dict((v, k) for k, v in self._project_ids.items()) + ).fetchone() for key, promise, _ in self._queries: promise.fulfill(results[key]) def create_root_fields(fields): attrs = {} - for cls, gql_object in fields.iteritems(): + for cls, gql_object in fields.items(): name = cls.label # Object resolver @@ -901,17 +940,19 @@ def resolver(self, info, cls=cls, gql_object=gql_object, **args): q = get_authorized_query(cls) q = apply_query_args(q, args, info) try: - return [gql_object(**load_node(n, info, Node.fields_depend_on_columns)) for n in q.all()] + return [ + gql_object(**load_node(n, info, Node.fields_depend_on_columns)) + for n in q.all() + ] except Exception as e: capp.logger.exception(e) raise field = graphene.Field( - graphene.List(gql_object), - args=get_node_class_args(cls), + graphene.List(gql_object), args=get_node_class_args(cls), ) - res_name = 'resolve_{}'.format(name) + res_name = "resolve_{}".format(name) resolver.__name__ = res_name attrs[name] = field attrs[res_name] = resolver @@ -924,30 +965,39 @@ def count_resolver(self, info, cls=cls, gql_object=gql_object, **args): q = get_authorized_query(cls) q = apply_query_args(q, args, info) - if 'with_path_to' in args or 'with_path_to_any' in args: + if "with_path_to" in args or "with_path_to_any" in args: q = q.with_entities(sa.distinct(cls.node_id)) - q = q.limit(args.get('first', None)) + q = q.limit(args.get("first", None)) return clean_count(q) - count_field = graphene.Field( - graphene.Int, args=get_node_class_args(cls)) + count_field = graphene.Field(graphene.Int, args=get_node_class_args(cls)) count_name = COUNT_NAME.format(name) - count_res_name = 'resolve_{}'.format(count_name) + count_res_name = "resolve_{}".format(count_name) count_resolver.__name__ = count_res_name attrs[count_name] = count_field attrs[count_res_name] = count_resolver return attrs + def get_withpathto_type(): - return type('WithPathToInput', (graphene.InputObjectType,), dict( - id=graphene.String(), - type=graphene.String(required=True), - **{k: graphene.Field(v) for cls_attrs in [ - get_node_class_property_args(cls) - for cls in psqlgraph.Node.get_subclasses() - ] for k, v in cls_attrs.iteritems()} - )) + return type( + "WithPathToInput", + (graphene.InputObjectType,), + dict( + id=graphene.String(), + type=graphene.String(required=True), + **{ + k: graphene.Field(v) + for cls_attrs in [ + get_node_class_property_args(cls) + for cls in psqlgraph.Node.get_subclasses() + ] + for k, v in cls_attrs.items() + } + ), + ) + def get_fields(): __fields = { @@ -955,11 +1005,12 @@ def get_fields(): for cls in psqlgraph.Node.get_subclasses() } - for cls, gql_object in __fields.iteritems(): + for cls, gql_object in __fields.items(): __gql_object_classes[cls.label] = gql_object return __fields + NodeField = graphene.List(Node, args=get_node_interface_args()) @@ -970,7 +1021,7 @@ def get_fields(): class DataNode(graphene.Interface): id = graphene.ID() data_subclasses = None - shared_fields = None # fields shared by all data nodes in the dictionary + shared_fields = None # fields shared by all data nodes in the dictionary def get_data_subclasses(): @@ -981,7 +1032,7 @@ def get_data_subclasses(): data_subclasses_labels = set( node for node in dictionary.schema - if dictionary.schema[node]['category'].endswith('_file') + if dictionary.schema[node]["category"].endswith("_file") ) # get the subclasses for the data categories DataNode.data_subclasses = set( @@ -997,6 +1048,7 @@ def get_datanode_fields_dict(): """Return a dictionary containing the fields shared by all data nodes.""" if not DataNode.shared_fields: + def instantiate_graphene(t): return t if isinstance(t, graphene.List) else t() @@ -1004,15 +1056,14 @@ def instantiate_graphene(t): DataNode.shared_fields = { field: instantiate_graphene(lookup_graphql_type(types[0])) for subclass in get_data_subclasses() - for field, types in subclass.__pg_properties__.iteritems() - if field not in subclass._pg_edges.keys() # don't include the links + for field, types in subclass.__pg_properties__.items() + if field not in subclass._pg_edges.keys() # don't include the links } # add required node fields - DataNode.shared_fields.update({ - 'id': graphene.String(), - 'type': graphene.String(), - }) + DataNode.shared_fields.update( + {"id": graphene.String(), "type": graphene.String(),} + ) return DataNode.shared_fields @@ -1024,17 +1075,18 @@ def resolve_datanode(self, info, **args): A list of graphene object classes. """ - return [__gql_object_classes[n.label](**load_node(n, info)) - for n in query_with_args(get_data_subclasses(), args, info)] + return [ + __gql_object_classes[n.label](**load_node(n, info)) + for n in query_with_args(get_data_subclasses(), args, info) + ] def get_datanode_interface_args(): args = get_base_node_args() args.update(get_datanode_fields_dict()) - args.update({ - 'of_type': graphene.List(graphene.String), - 'project_id': graphene.String(), - }) + args.update( + {"of_type": graphene.List(graphene.String), "project_id": graphene.String(),} + ) return args @@ -1044,7 +1096,7 @@ def get_datanode_interface_args(): class NodeType(graphene.Interface): id = graphene.ID() - dictionary_fields = None # all the fields in the dictionary + dictionary_fields = None # all the fields in the dictionary def get_nodetype_fields_dict(): @@ -1052,14 +1104,18 @@ def get_nodetype_fields_dict(): if not NodeType.dictionary_fields: - all_dictionary_fields = set(key for node in dictionary.schema.values() for key in node.keys()) + all_dictionary_fields = set( + key + for node in list(dictionary.schema.values()) + for key in list(node.keys()) + ) # convert to graphene types dictionary_fields_dict = { field: graphene.String() for field in all_dictionary_fields # regex for field names accepted by graphql -> remove '$schema' - if re.match('^[_a-zA-Z][_a-zA-Z0-9]*$', field) + if re.match("^[_a-zA-Z][_a-zA-Z0-9]*$", field) } NodeType.dictionary_fields = dictionary_fields_dict @@ -1093,18 +1149,15 @@ def resolve_nodetype(self, info, **args): all_data = apply_nodetype_args(all_data, args) # convert to graphene objects - gql_objects = [ - type(node, (graphene.ObjectType, ), data) - for data in all_data - ] + gql_objects = [type(node, (graphene.ObjectType,), data) for data in all_data] return gql_objects def get_nodetype_interface_args(): args = { - 'first': graphene.Int(default_value=DEFAULT_LIMIT), - 'order_by_asc': graphene.String(), - 'order_by_desc': graphene.String() + "first": graphene.Int(default_value=DEFAULT_LIMIT), + "order_by_asc": graphene.String(), + "order_by_desc": graphene.String(), } args.update(get_nodetype_fields_dict()) return args @@ -1128,15 +1181,15 @@ def apply_nodetype_args(data, args): l = list(data) - if 'order_by_asc' in args: - l = sorted(l, key=lambda d: d[args['order_by_asc']]) + if "order_by_asc" in args: + l = sorted(l, key=lambda d: d[args["order_by_asc"]]) - if 'order_by_desc' in args: - l = sorted(l, key=lambda d: d[args['order_by_desc']], reverse=True) + if "order_by_desc" in args: + l = sorted(l, key=lambda d: d[args["order_by_desc"]], reverse=True) # apply_arg_limit() applied the limit to individual query results, but we # are concatenating several query results so we need to apply it again - limit = args.get('first', DEFAULT_LIMIT) + limit = args.get("first", DEFAULT_LIMIT) limit = limit if limit > 0 else None l = l[:limit] diff --git a/peregrine/resources/submission/graphql/root.py b/peregrine/resources/submission/graphql/root.py index 0adcc9ab..a0dc2817 100644 --- a/peregrine/resources/submission/graphql/root.py +++ b/peregrine/resources/submission/graphql/root.py @@ -9,7 +9,6 @@ class RootQuery(GraphQLQuery): - def parse(self): """To allow 'arbitrary' complexity but not denial of service, set both database and application level timeouts @@ -33,18 +32,16 @@ def parse_field(self, field, query_class): """ - if query_class and hasattr(query_class, 'parse'): + if query_class and hasattr(query_class, "parse"): pass elif isinstance(field, FragmentSpread): query_class = FragmentQuery elif Node.get_subclass(field.name): query_class = NodeSubclassQuery - elif field.name.endswith('_count'): + elif field.name.endswith("_count"): query_class = NodeCountQuery if query_class: self.subquery(query_class, field, self.result) else: - self.errors.append( - "Cannot query field '{}' on 'Root'" - .format(field.name)) + self.errors.append("Cannot query field '{}' on 'Root'".format(field.name)) diff --git a/peregrine/resources/submission/graphql/transaction.py b/peregrine/resources/submission/graphql/transaction.py index 7c8e4c9f..8a0dc21b 100644 --- a/peregrine/resources/submission/graphql/transaction.py +++ b/peregrine/resources/submission/graphql/transaction.py @@ -19,9 +19,7 @@ logger = get_logger(__name__) -from ..constants import ( - TX_LOG_STATE_SUCCEEDED, -) +from ..constants import TX_LOG_STATE_SUCCEEDED from .util import ( apply_arg_limit, @@ -31,17 +29,12 @@ apply_load_only, ) -from peregrine.resources.submission.constants import ( - case_cache_enabled, -) +from peregrine.resources.submission.constants import case_cache_enabled + def filter_to_cls_fields(cls, doc): fields = set(cls._meta.fields.keys()) - doc = { - key: val - for key, val in doc.iteritems() - if key in fields - } + doc = {key: val for key, val in doc.items() if key in fields} dropped = set(doc.keys()) - fields if dropped: logger.warn("Dropping keys %s", dropped) @@ -65,7 +58,10 @@ def resolve_type(self, info, **args): class TransactionResponseError(graphene.ObjectType): keys = graphene.List(graphene.String) - dependents = graphene.List(GenericEntity, description='List of entities that depend on this entity such that the transaction failed.') + dependents = graphene.List( + GenericEntity, + description="List of entities that depend on this entity such that the transaction failed.", + ) message = graphene.String() type = graphene.String() @@ -74,10 +70,7 @@ def resolve_type(self, info, **args): def resolve_dependents(self, info, **args): try: - return [ - GenericEntity(**dependent) - for dependent in self.dependents or [] - ] + return [GenericEntity(**dependent) for dependent in self.dependents or []] except AttributeError: # graphene does unsightly things, if there are no # dependents passed to init, then it looks for dependents @@ -109,10 +102,7 @@ class TransactionResponseEntity(graphene.ObjectType): warnings = graphene.String() def resolve_errors(self, info, **args): - return [ - TransactionResponseError(**error) - for error in self.errors - ] + return [TransactionResponseError(**error) for error in self.errors] def resolve_unique_keys(self, info, **args): """Return a string dump of the unique keys. This is a string because @@ -213,6 +203,7 @@ def resolve_response_json(cls, document, *args, **kwargs): except Exception as exc: logger.exception(exc) + class TransactionLog(graphene.ObjectType): id = graphene.ID() is_dry_run = graphene.Boolean() @@ -245,18 +236,24 @@ class TransactionLog(graphene.ObjectType): } def resolve_project_id(self, info, **args): - return '{}-{}'.format(self.program, self.project) + return "{}-{}".format(self.program, self.project) def resolve_documents(self, info, **args): - return [TransactionDocument(**dict( - filtered_column_dict(r, info, TransactionDocument.fields_depend_on_columns), - **{'response_json': json.dumps(r.response_json)} - )) for r in self.documents] + return [ + TransactionDocument( + **dict( + filtered_column_dict( + r, info, TransactionDocument.fields_depend_on_columns + ), + **{"response_json": json.dumps(r.response_json)} + ) + ) + for r in self.documents + ] def resolve_snapshots(self, info, **args): return [ - TransactionSnapshot(**filtered_column_dict(r, info)) - for r in self.snapshots + TransactionSnapshot(**filtered_column_dict(r, info)) for r in self.snapshots ] def resolve_type(self, info, **args): @@ -264,16 +261,16 @@ def resolve_type(self, info, **args): return self.TYPE_MAP.get(self.role.lower(), self.role.lower()) def resolve_related_cases(self, info, **args): - if not case_cache_enabled(): + if not case_cache_enabled(): return [] related_cases = {} for document in self.documents: - entities = document.response_json.get('entities', []) + entities = document.response_json.get("entities", []) for entity in entities: - for related_case in entity.get('related_cases', []): - related_cases['id'] = { - 'id': related_case.get('id', None), - 'submitter_id': related_case.get('submitter_id', None), + for related_case in entity.get("related_cases", []): + related_cases["id"] = { + "id": related_case.get("id", None), + "submitter_id": related_case.get("submitter_id", None), } return [ @@ -299,23 +296,33 @@ def get_transaction_log_args(): entities=graphene.List(graphene.String), is_dry_run=graphene.Boolean(), closed=graphene.Boolean(), - committable=graphene.Boolean(description='(committable: true) means (is_dry_run: true) AND (closed: false) AND (state: "SUCCEEDED") AND (committed_by is None). Note: committed_by is None cannot be represented in GraphQL, hence this argument.'), + committable=graphene.Boolean( + description='(committable: true) means (is_dry_run: true) AND (closed: false) AND (state: "SUCCEEDED") AND (committed_by is None). Note: committed_by is None cannot be represented in GraphQL, hence this argument.' + ), state=graphene.String(), committed_by=graphene.ID(), ) def resolve_transaction_log_query(self, info, **args): - sortable = ['id', 'submitter', 'role', 'program', 'project', - 'created_datetime', 'canonical_json', 'project_id'] + sortable = [ + "id", + "submitter", + "role", + "program", + "project", + "created_datetime", + "canonical_json", + "project_id", + ] q = flask.current_app.db.nodes(models.submission.TransactionLog).filter( models.submission.TransactionLog.project_id.in_(flask.g.read_access_projects) ) - if 'quick_search' in args: + if "quick_search" in args: try: - id_ = int(args['quick_search']) + id_ = int(args["quick_search"]) except ValueError: # Because id is an int, if we couldn't parse it to an int, # filter should return 0 results. @@ -323,68 +330,91 @@ def resolve_transaction_log_query(self, info, **args): else: q = q.filter(models.submission.TransactionLog.id == id_) - if 'id' in args: - q = q.filter(models.submission.TransactionLog.id == args['id']) - if 'is_dry_run' in args: - q = q.filter(models.submission.TransactionLog.is_dry_run == args['is_dry_run']) - if 'state' in args: - q = q.filter(models.submission.TransactionLog.state == args['state']) - if 'committed_by' in args: - q = q.filter(models.submission.TransactionLog.committed_by == args['committed_by']) - if 'closed' in args: - q = q.filter(models.submission.TransactionLog.closed == args['closed']) - if 'committable' in args: - if args['committable']: + if "id" in args: + q = q.filter(models.submission.TransactionLog.id == args["id"]) + if "is_dry_run" in args: + q = q.filter(models.submission.TransactionLog.is_dry_run == args["is_dry_run"]) + if "state" in args: + q = q.filter(models.submission.TransactionLog.state == args["state"]) + if "committed_by" in args: + q = q.filter( + models.submission.TransactionLog.committed_by == args["committed_by"] + ) + if "closed" in args: + q = q.filter(models.submission.TransactionLog.closed == args["closed"]) + if "committable" in args: + if args["committable"]: # is committable - q = q.filter(sa.and_( - models.submission.TransactionLog.is_dry_run == True, - models.submission.TransactionLog.state == TX_LOG_STATE_SUCCEEDED, - models.submission.TransactionLog.closed == False, - models.submission.TransactionLog.committed_by == None)) + q = q.filter( + sa.and_( + models.submission.TransactionLog.is_dry_run == True, + models.submission.TransactionLog.state == TX_LOG_STATE_SUCCEEDED, + models.submission.TransactionLog.closed == False, + models.submission.TransactionLog.committed_by == None, + ) + ) else: # is not committable - q = q.filter(sa.or_( - models.submission.TransactionLog.is_dry_run == False, - models.submission.TransactionLog.state != TX_LOG_STATE_SUCCEEDED, - models.submission.TransactionLog.closed == True, - models.submission.TransactionLog.committed_by != None)) - if 'project_id' in args: - q = q.filter(models.submission.TransactionLog.project_id.in_(args['project_id'])) - if 'project' in args: - q = q.filter(models.submission.TransactionLog.project == args['project']) - if 'program' in args: - q = q.filter(models.submission.TransactionLog.program == args['program']) - if 'entities' in args: - q = q.join(models.submission.TransactionLog.entities)\ - .filter(models.submission.TransactionSnapshot.id.in_(args['entities']))\ - .reset_joinpoint() - if 'related_cases' in args: - q = q.join(models.submission.TransactionLog.documents)\ - .filter(sa.or_(models.submission.TransactionDocument.response_json.contains({ - 'entities': [{'related_cases': [ - {'id': r_id}]}]}) for r_id in args['related_cases']))\ - .reset_joinpoint() - if 'type' in args: + q = q.filter( + sa.or_( + models.submission.TransactionLog.is_dry_run == False, + models.submission.TransactionLog.state != TX_LOG_STATE_SUCCEEDED, + models.submission.TransactionLog.closed == True, + models.submission.TransactionLog.committed_by != None, + ) + ) + if "project_id" in args: + q = q.filter( + models.submission.TransactionLog.project_id.in_(args["project_id"]) + ) + if "project" in args: + q = q.filter(models.submission.TransactionLog.project == args["project"]) + if "program" in args: + q = q.filter(models.submission.TransactionLog.program == args["program"]) + if "entities" in args: + q = ( + q.join(models.submission.TransactionLog.entities) + .filter(models.submission.TransactionSnapshot.id.in_(args["entities"])) + .reset_joinpoint() + ) + if "related_cases" in args: + q = ( + q.join(models.submission.TransactionLog.documents) + .filter( + sa.or_( + models.submission.TransactionDocument.response_json.contains( + {"entities": [{"related_cases": [{"id": r_id}]}]} + ) + for r_id in args["related_cases"] + ) + ) + .reset_joinpoint() + ) + if "type" in args: inv_map = defaultdict(list) - for k, v in TransactionLog.TYPE_MAP.iteritems(): + for k, v in TransactionLog.TYPE_MAP.items(): inv_map[v].append(k) - q = q.filter(models.submission.TransactionLog.role.in_( - inv_map.get(args['type'], [args['type']]))) + q = q.filter( + models.submission.TransactionLog.role.in_( + inv_map.get(args["type"], [args["type"]]) + ) + ) - if args.get('order_by_asc') in sortable: - q = q.order_by(getattr(q.entity(), args['order_by_asc'])) - if args.get('order_by_desc') in sortable: - q = q.order_by(getattr(q.entity(), args['order_by_desc']).desc()) + if args.get("order_by_asc") in sortable: + q = q.order_by(getattr(q.entity(), args["order_by_asc"])) + if args.get("order_by_desc") in sortable: + q = q.order_by(getattr(q.entity(), args["order_by_desc"]).desc()) q = apply_arg_limit(q, args, info) - if 'last' in args: + if "last" in args: q = q.limit(None) - q = q.order_by(q.entity().id.desc()).limit(args['last']) + q = q.order_by(q.entity().id.desc()).limit(args["last"]) q = apply_arg_offset(q, args, info) return q + def apply_transaction_log_eagerload(q, info): """Optimize the transaction_log query to prevent an N+1 query problem by eagerly loading relationships from the database @@ -392,14 +422,15 @@ def apply_transaction_log_eagerload(q, info): fields = get_fields(info) - if 'documents' in fields: + if "documents" in fields: q = q.options(subqueryload(models.submission.TransactionLog.documents)) - if 'snapshots' in fields: + if "snapshots" in fields: q = q.options(subqueryload(models.submission.TransactionLog.entities)) return q + def resolve_transaction_log(self, info, **args): fields_depend_on_columns = TransactionLog.fields_depend_on_columns requested_fields = get_fields(info) @@ -412,26 +443,22 @@ def resolve_transaction_log(self, info, **args): for tx_log in q.all(): fields = filtered_column_dict(tx_log, info, fields_depend_on_columns) - if 'documents' in requested_fields: - fields['documents'] = tx_log.documents - if 'snapshots' in requested_fields: - fields['snapshots'] = tx_log.entities + if "documents" in requested_fields: + fields["documents"] = tx_log.documents + if "snapshots" in requested_fields: + fields["snapshots"] = tx_log.entities results.append(TransactionLog(**fields)) return results def resolve_transaction_log_count(self, info, **args): q = resolve_transaction_log_query(self, info, **args) - q = q.limit(args.get('first', None)) + q = q.limit(args.get("first", None)) return q.count() -TransactionLogField = graphene.List( - TransactionLog, - args=get_transaction_log_args(), -) +TransactionLogField = graphene.List(TransactionLog, args=get_transaction_log_args(),) TransactionLogCountField = graphene.Field( - graphene.Int, - args=get_transaction_log_args(), + graphene.Int, args=get_transaction_log_args(), ) diff --git a/peregrine/resources/submission/graphql/traversal.py b/peregrine/resources/submission/graphql/traversal.py index 5894c683..52ed925b 100644 --- a/peregrine/resources/submission/graphql/traversal.py +++ b/peregrine/resources/submission/graphql/traversal.py @@ -13,13 +13,13 @@ import time terminal_nodes = [ - 'annotations', - 'centers', - 'archives', - 'tissue_source_sites', - 'files', - 'related_files', - 'describing_files' + "annotations", + "centers", + "archives", + "tissue_source_sites", + "files", + "related_files", + "describing_files", ] # Assign categories levels @@ -35,10 +35,10 @@ # # See :func:`is_valid_direction` for more details. CATEGORY_LEVEL = { - 'administrative': 0, - 'biospecimen': 1, - 'clinical': 1, - 'data_file': 3, + "administrative": 0, + "biospecimen": 1, + "clinical": 1, + "data_file": 3, } @@ -60,18 +60,9 @@ def is_valid_direction(node, visited): is valid. """ max_level = max(CATEGORY_LEVEL.values()) + 1 - first_level = CATEGORY_LEVEL.get( - visited[0]._dictionary['category'], - max_level - ) - last_level = CATEGORY_LEVEL.get( - visited[-1]._dictionary['category'], - max_level - ) - this_level = CATEGORY_LEVEL.get( - node._dictionary['category'], - max_level - ) + first_level = CATEGORY_LEVEL.get(visited[0]._dictionary["category"], max_level) + last_level = CATEGORY_LEVEL.get(visited[-1]._dictionary["category"], max_level) + this_level = CATEGORY_LEVEL.get(node._dictionary["category"], max_level) if first_level > last_level: # If we are traveling from case out return this_level <= last_level @@ -87,7 +78,7 @@ def construct_traversals_from_node(root_node, app): while to_visit: node, path, visited = to_visit.pop() if path: - path_string = '.'.join(path) + path_string = ".".join(path) if path_string in traversals[node.label]: continue traversals[node.label].add(path_string) @@ -97,11 +88,10 @@ def construct_traversals_from_node(root_node, app): # Don't walk back up the tree if not is_valid_direction(node, visited or [root_node]): continue - name_to_subclass = getattr(app, 'name_to_subclass', None) + name_to_subclass = getattr(app, "name_to_subclass", None) if name_to_subclass is None: name_to_subclass = app.name_to_subclass = { - n.__name__: n - for n in Node.get_subclasses() + n.__name__: n for n in Node.get_subclasses() } neighbors_dst = { (name_to_subclass[edge.__dst_class__], edge.__src_dst_assoc__) @@ -113,12 +103,14 @@ def construct_traversals_from_node(root_node, app): for edge in Edge._get_edges_with_dst(node.__name__) if name_to_subclass[edge.__src_class__] } - to_visit.extend([ - (neighbor, path + [edge], visited + [node]) - for neighbor, edge in neighbors_dst.union(neighbors_src) - if neighbor not in visited - ]) - return {label: list(paths) for label, paths in traversals.iteritems() if paths} + to_visit.extend( + [ + (neighbor, path + [edge], visited + [node]) + for neighbor, edge in neighbors_dst.union(neighbors_src) + if neighbor not in visited + ] + ) + return {label: list(paths) for label, paths in traversals.items() if paths} def make_graph_traversal_dict(app, preload=False): @@ -129,8 +121,8 @@ def make_graph_traversal_dict(app, preload=False): You may call this method with `preload=True` to manually preload the full dict. """ - app.graph_traversals = getattr(app, 'graph_traversals', {}) - if preload or not app.config.get('USE_LAZY_TRAVERSE', True): + app.graph_traversals = getattr(app, "graph_traversals", {}) + if preload or not app.config.get("USE_LAZY_TRAVERSE", True): for node in Node.get_subclasses(): _get_paths_from(node, app) @@ -148,8 +140,11 @@ def _get_paths_from(src, app): app.graph_traversals[src_label] = construct_traversals_from_node(src, app) time_taken = int(round(time.time() - start)) if time_taken > 0.5: - app.logger.info('Traversed the graph starting from "%s" in %.2f sec', - src_label, time_taken) + app.logger.info( + 'Traversed the graph starting from "%s" in %.2f sec', + src_label, + time_taken, + ) return app.graph_traversals[src_label] @@ -215,7 +210,6 @@ def subq_paths(q, dst_label, post_filters=None): nodes = flask.current_app.db.nodes(q.entity()) subquery_paths = [ - q.subq_path(path, post_filters).subquery().select() - for path in paths + q.subq_path(path, post_filters).subquery().select() for path in paths ] return nodes.select_entity_from(sa.union_all(*subquery_paths)) diff --git a/peregrine/resources/submission/graphql/util.py b/peregrine/resources/submission/graphql/util.py index df0ac60b..c9b03ef8 100644 --- a/peregrine/resources/submission/graphql/util.py +++ b/peregrine/resources/submission/graphql/util.py @@ -9,10 +9,8 @@ from flask import current_app as capp from flask import g as fg -from peregrine.errors import AuthError, UserError -import node +from . import node from datamodelutils import models -from graphql import GraphQLError from graphql.utils.ast_to_dict import ast_to_dict import sqlalchemy as sa @@ -29,14 +27,13 @@ def set_session_timeout(session, timeout): session.execute( - 'SET LOCAL statement_timeout = {}' - .format(int(float(timeout)*1000)) + "SET LOCAL statement_timeout = {}".format(int(float(timeout) * 1000)) ) def get_column_names(entity): """Returns an iterable of column names the entity has""" - if hasattr(entity, '__pg_properties__'): + if hasattr(entity, "__pg_properties__"): return (k for k in entity.__pg_properties__) return (c.name for c in entity.__table__.columns) @@ -57,18 +54,15 @@ def filtered_column_dict(row, info, fields_depend_on_columns=None): columns = get_loaded_columns(row, info, fields_depend_on_columns) - return { - column: getattr(row, column) - for column in columns - } + return {column: getattr(row, column) for column in columns} def get_active_project_ids(): return [ - '{}-{}'.format(project.programs[0].name, project.code) + "{}-{}".format(project.programs[0].name, project.code) for project in capp.db.nodes(Project) - .filter(models.Project._props['state'].astext != 'closed') - .filter(models.Project._props['state'].astext != 'legacy') + .filter(models.Project._props["state"].astext != "closed") + .filter(models.Project._props["state"].astext != "legacy") .all() ] @@ -93,13 +87,14 @@ def active_project_filter(q): cls = q.entity() - if cls.label == 'project': - return (q.filter(models.Project._props['state'].astext != 'closed') - .filter(models.Project._props['state'].astext != 'legacy')) + if cls.label == "project": + return q.filter(models.Project._props["state"].astext != "closed").filter( + models.Project._props["state"].astext != "legacy" + ) - fg.active_project_ids = fg.get('active_project_ids') or get_active_project_ids() - if cls == psqlgraph.Node or hasattr(cls, 'project_id'): - project_id_attr = cls._props['project_id'].astext + fg.active_project_ids = fg.get("active_project_ids") or get_active_project_ids() + if cls == psqlgraph.Node or hasattr(cls, "project_id"): + project_id_attr = cls._props["project_id"].astext q = q.filter(project_id_attr.in_(fg.active_project_ids)) return q @@ -124,10 +119,10 @@ def authorization_filter(q): cls = q.entity() - if cls == psqlgraph.Node or hasattr(cls, 'project_id'): - q = q.filter(cls._props['project_id'].astext.in_(fg.read_access_projects)) + if cls == psqlgraph.Node or hasattr(cls, "project_id"): + q = q.filter(cls._props["project_id"].astext.in_(fg.read_access_projects)) - if cls.label == 'project': + if cls.label == "project": # do not return unauthorized projects q = node.filter_project_project_id(q, fg.read_access_projects, None) @@ -142,14 +137,14 @@ def get_authorized_query(cls): def apply_arg_limit(q, args, info): - limit = args.get('first', DEFAULT_LIMIT) + limit = args.get("first", DEFAULT_LIMIT) if limit > 0: q = q.limit(limit) return q def apply_arg_offset(q, args, info): - offset = args.get('offset', 0) + offset = args.get("offset", 0) if offset > 0: q = q.offset(offset) return q @@ -164,11 +159,13 @@ def get_loaded_columns(entity, info, fields_depend_on_columns=None): fields = set(get_fields(info)) if fields_depend_on_columns: - fields.update({ - column - for field in fields - for column in fields_depend_on_columns.get(field, {}) - }) + fields.update( + { + column + for field in fields + for column in fields_depend_on_columns.get(field, {}) + } + ) all_columns = set(get_column_names(entity)) used_columns = fields.intersection(all_columns) @@ -181,7 +178,7 @@ def apply_load_only(query, info, fields_depend_on_columns=None): # if the entity doesn't have a backing table then don't do this # this happens when using the generic node property - if not hasattr(query.entity(), '__table__'): + if not hasattr(query.entity(), "__table__"): return query columns = get_loaded_columns(query.entity(), info, fields_depend_on_columns) @@ -192,6 +189,7 @@ def apply_load_only(query, info, fields_depend_on_columns=None): # The below is lifted from # https://gist.github.com/mixxorz/dc36e180d1888629cf33 + def collect_fields(node, fragments): """Recursively collects fields from the AST Args: @@ -208,15 +206,14 @@ def collect_fields(node, fragments): field = {} - if node.get('selection_set'): - for leaf in node['selection_set']['selections']: - if leaf['kind'] == 'Field': - field.update({ - leaf['name']['value']: collect_fields(leaf, fragments) - }) - elif leaf['kind'] == 'FragmentSpread': - field.update(collect_fields(fragments[leaf['name']['value']], - fragments)) + if node.get("selection_set"): + for leaf in node["selection_set"]["selections"]: + if leaf["kind"] == "Field": + field.update({leaf["name"]["value"]: collect_fields(leaf, fragments)}) + elif leaf["kind"] == "FragmentSpread": + field.update( + collect_fields(fragments[leaf["name"]["value"]], fragments) + ) return field @@ -237,6 +234,7 @@ def get_fields(info): return collect_fields(node, fragments) + def clean_count(q): """Returns the count from this query without pulling all the columns @@ -248,5 +246,9 @@ def clean_count(q): q (psqlgraph.query.GraphQuery): The current query object. """ - query_count = q.options(sa.orm.lazyload('*')).statement.with_only_columns([sa.func.count()]).order_by(None) + query_count = ( + q.options(sa.orm.lazyload("*")) + .statement.with_only_columns([sa.func.count()]) + .order_by(None) + ) return q.session.execute(query_count).scalar() diff --git a/peregrine/resources/submission/util.py b/peregrine/resources/submission/util.py index 504383ce..48b8640d 100644 --- a/peregrine/resources/submission/util.py +++ b/peregrine/resources/submission/util.py @@ -56,7 +56,7 @@ def get_external_proxies(): """ - return capp.config.get('EXTERNAL_PROXIES', {}) + return capp.config.get("EXTERNAL_PROXIES", {}) def oph_raise_for_duplicates(object_pairs): @@ -67,17 +67,16 @@ def oph_raise_for_duplicates(object_pairs): """ counter = Counter(p[0] for p in object_pairs) - duplicates = filter(lambda p: p[1] > 1, counter.iteritems()) + duplicates = [p for p in iter(counter.items()) if p[1] > 1] if duplicates: raise ValueError( - 'The document contains duplicate keys: {}' - .format(','.join(d[0] for d in duplicates))) + "The document contains duplicate keys: {}".format( + ",".join(d[0] for d in duplicates) + ) + ) - return { - pair[0]: pair[1] - for pair in object_pairs - } + return {pair[0]: pair[1] for pair in object_pairs} def parse_json(raw): @@ -91,10 +90,9 @@ def parse_json(raw): """ try: - return simplejson.loads( - raw, object_pairs_hook=oph_raise_for_duplicates) + return simplejson.loads(raw, object_pairs_hook=oph_raise_for_duplicates) except Exception as e: - raise UserError('Unable to parse json: {}'.format(e)) + raise UserError("Unable to parse json: {}".format(e)) def parse_request_json(expected_types=(dict, list)): @@ -110,8 +108,11 @@ def parse_request_json(expected_types=(dict, list)): parsed = parse_json(request.get_data()) if not isinstance(parsed, expected_types): - raise UserError('JSON parsed from request is an invalid type: {}' - .format(parsed.__class__.__name__)) + raise UserError( + "JSON parsed from request is an invalid type: {}".format( + parsed.__class__.__name__ + ) + ) return parsed @@ -126,7 +127,7 @@ def parse_request_yaml(): try: return yaml.safe_load(raw) except Exception as e: - raise UserError('Unable to parse yaml: {}'.format(e)) + raise UserError("Unable to parse yaml: {}".format(e)) def lookup_node(psql_driver, label, node_id=None, secondary_keys=None): @@ -150,10 +151,13 @@ def lookup_node(psql_driver, label, node_id=None, secondary_keys=None): def lookup_project(psql_driver, program, project): """Return a project by Project.code if attached to Program.name""" - return (psql_driver.nodes(models.Project).props(code=project) - .path('programs') - .props(name=program) - .scalar()) + return ( + psql_driver.nodes(models.Project) + .props(code=project) + .path("programs") + .props(name=program) + .scalar() + ) def lookup_program(psql_driver, program): @@ -177,12 +181,12 @@ def parse_boolean(value): if isinstance(value, bool): return value - elif value.lower() == 'true': + elif value.lower() == "true": return True - elif value.lower() == 'false': + elif value.lower() == "false": return False else: - raise UserError('Boolean value not one of [true, false]') + raise UserError("Boolean value not one of [true, false]") def is_flag_set(flag, default=False): @@ -215,12 +219,12 @@ def wrapper(*args, **kwargs): def get_introspection_query(): cur_dir = os.path.dirname(os.path.realpath(__file__)) - f = open(os.path.join(cur_dir, 'graphql', 'introspection_query.txt'), 'r') + f = open(os.path.join(cur_dir, "graphql", "introspection_query.txt"), "r") return f.read() def get_variables(payload): - var_payload = payload.get('variables') + var_payload = payload.get("variables") variables = None errors = None if isinstance(var_payload, dict): @@ -229,5 +233,5 @@ def get_variables(payload): try: variables = json.loads(var_payload) if var_payload else {} except Exception as e: - errors = ['Unable to parse variables', str(e)] + errors = ["Unable to parse variables", str(e)] return variables, errors diff --git a/peregrine/test_settings.py b/peregrine/test_settings.py index ff0e1198..e4e0fe66 100644 --- a/peregrine/test_settings.py +++ b/peregrine/test_settings.py @@ -2,47 +2,44 @@ from collections import OrderedDict -INDEX_CLIENT = { - "host": "http://localhost:8000/", 'version': 'v0', - "auth": None} -AUTH = 'https://fake_auth_url' -INTERNAL_AUTH = 'https://fake_auth_url' +INDEX_CLIENT = {"host": "http://localhost:8000/", "version": "v0", "auth": None} +AUTH = "https://fake_auth_url" +INTERNAL_AUTH = "https://fake_auth_url" AUTH_ADMIN_CREDS = { - 'domain_name': 'some_domain', - 'username': 'iama_username', - 'password': 'iama_password', - 'auth_url': 'https://fake_auth_url', - 'user_domain_name': 'some_domain', + "domain_name": "some_domain", + "username": "iama_username", + "password": "iama_password", + "auth_url": "https://fake_auth_url", + "user_domain_name": "some_domain", } SUBMISSION = { - "bucket": 'test_submission', - "host": 'host', + "bucket": "test_submission", + "host": "host", } STORAGE = {"s3": {"keys": {}, "kwargs": {}}} -STORAGE["s3"]["keys"]["host"] = {"access_key": "fake", - "secret_key": "sooper_sekrit"} +STORAGE["s3"]["keys"]["host"] = {"access_key": "fake", "secret_key": "sooper_sekrit"} STORAGE["s3"]["kwargs"]["host"] = {} PSQLGRAPH = { - 'host': "localhost", - 'user': "test", - 'password': "test", - 'database': "automated_test", + "host": "localhost", + "user": "test", + "password": "test", + "database": "automated_test", } -GDC_PORTAL_ENDPOINT = 'http://fake_portal_endpoint_for_tests' +GDC_PORTAL_ENDPOINT = "http://fake_portal_endpoint_for_tests" PEREGRINE_HOST = "localhost" PEREGRINE_PORT = "443" # Slicing settings SLICING = { - 'host': 'localhost', - 'gencode': 'REPLACEME', + "host": "localhost", + "gencode": "REPLACEME", } -FLASK_SECRET_KEY = 'flask_test_key' +FLASK_SECRET_KEY = "flask_test_key" from cryptography.fernet import Fernet @@ -54,16 +51,24 @@ "redirect_uri": "", } -DICTIONARY_URL = os.environ.get('DICTIONARY_URL','https://s3.amazonaws.com/dictionary-artifacts/datadictionary/develop/schema.json') +DICTIONARY_URL = os.environ.get( + "DICTIONARY_URL", + "https://s3.amazonaws.com/dictionary-artifacts/datadictionary/develop/schema.json", +) USER_API = "http://localhost" VERIFY_PROJECT = False AUTH_SUBMISSION_LIST = False -JWT_KEYPAIR_FILES = OrderedDict([ - ( - 'key-test', - ('resources/keys/test_public_key.pem', 'resources/keys/test_private_key.pem'), - ) -]) +JWT_KEYPAIR_FILES = OrderedDict( + [ + ( + "key-test", + ( + "resources/keys/test_public_key.pem", + "resources/keys/test_private_key.pem", + ), + ) + ] +) diff --git a/peregrine/utils/__init__.py b/peregrine/utils/__init__.py index ced77ead..83345bc7 100644 --- a/peregrine/utils/__init__.py +++ b/peregrine/utils/__init__.py @@ -1,2 +1,2 @@ -from .payload import get_variables,jsonify_check_errors,parse_request_json +from .payload import get_variables, jsonify_check_errors, parse_request_json from .scheduling import AsyncPool diff --git a/peregrine/utils/json2csv.py b/peregrine/utils/json2csv.py index 0d8ae499..1065cee5 100644 --- a/peregrine/utils/json2csv.py +++ b/peregrine/utils/json2csv.py @@ -1,18 +1,18 @@ import csv -from cStringIO import StringIO +from io import StringIO from functools import reduce, partial def list_to_obj(k, v): - return {'{}_{}'.format(k, i): x for i, x in enumerate(v)} + return {"{}_{}".format(k, i): x for i, x in enumerate(v)} def flatten_nested_obj(k, v): - return {'{}_{}'.format(k, k2): v2 for (k2, v2) in v.iteritems()} + return {"{}_{}".format(k, k2): v2 for (k2, v2) in v.items()} def pair_to_obj(acc, (k, v), parent=None): - p = '{}_{}'.format(parent, k) if parent else k + p = "{}_{}".format(parent, k) if parent else k if isinstance(v, list): acc.update(flatten_obj(list_to_obj(p, v))) elif isinstance(v, dict): @@ -25,21 +25,21 @@ def pair_to_obj(acc, (k, v), parent=None): def flatten_obj(json, parent=None): p_pair_to_json = partial(pair_to_obj, parent=parent) - return reduce(p_pair_to_json, json.iteritems(), {}) + return reduce(p_pair_to_json, iter(json.items()), {}) def row_with_headers((rows, header), hit): f_o = flatten_obj(hit) rows.append(f_o) - return rows, header.union(f_o.keys()) + return rows, header.union(list(f_o.keys())) def rows_with_headers(hits): return reduce(row_with_headers, hits, ([], set())) -def to_csv(hits, dialect='excel'): +def to_csv(hits, dialect="excel"): s = StringIO() rows, headers = rows_with_headers(hits) writer = csv.DictWriter(s, fieldnames=headers, dialect=dialect) diff --git a/peregrine/utils/payload.py b/peregrine/utils/payload.py index 39cfad41..abcaff1f 100644 --- a/peregrine/utils/payload.py +++ b/peregrine/utils/payload.py @@ -18,15 +18,6 @@ from threading import Thread from peregrine.errors import UserError -from peregrine.resources.submission.constants import ( - project_seed, - program_seed, - ERROR_STATE, - FLAG_IS_ASYNC, - submitted_state, - UPLOADING_STATE, - SUCCESS_STATE, -) def get_external_proxies(): @@ -53,7 +44,7 @@ def get_external_proxies(): """ - return capp.config.get('EXTERNAL_PROXIES', {}) + return capp.config.get("EXTERNAL_PROXIES", {}) def oph_raise_for_duplicates(object_pairs): @@ -64,17 +55,16 @@ def oph_raise_for_duplicates(object_pairs): """ counter = Counter(p[0] for p in object_pairs) - duplicates = filter(lambda p: p[1] > 1, counter.iteritems()) + duplicates = [p for p in iter(counter.items()) if p[1] > 1] if duplicates: raise ValueError( - 'The document contains duplicate keys: {}' - .format(','.join(d[0] for d in duplicates))) + "The document contains duplicate keys: {}".format( + ",".join(d[0] for d in duplicates) + ) + ) - return { - pair[0]: pair[1] - for pair in object_pairs - } + return {pair[0]: pair[1] for pair in object_pairs} def parse_json(raw): @@ -88,10 +78,9 @@ def parse_json(raw): """ try: - return simplejson.loads( - raw, object_pairs_hook=oph_raise_for_duplicates) + return simplejson.loads(raw, object_pairs_hook=oph_raise_for_duplicates) except Exception as e: - raise UserError('Unable to parse json: {}'.format(e)) + raise UserError("Unable to parse json: {}".format(e)) def parse_request_json(expected_types=(dict, list)): @@ -107,8 +96,11 @@ def parse_request_json(expected_types=(dict, list)): parsed = parse_json(request.get_data()) if not isinstance(parsed, expected_types): - raise UserError('JSON parsed from request is an invalid type: {}' - .format(parsed.__class__.__name__)) + raise UserError( + "JSON parsed from request is an invalid type: {}".format( + parsed.__class__.__name__ + ) + ) return parsed @@ -174,12 +166,12 @@ def parse_boolean(value): if isinstance(value, bool): return value - elif value.lower() == 'true': + elif value.lower() == "true": return True - elif value.lower() == 'false': + elif value.lower() == "false": return False else: - raise UserError('Boolean value not one of [true, false]') + raise UserError("Boolean value not one of [true, false]") def is_flag_set(flag, default=False): @@ -196,7 +188,7 @@ def is_flag_set(flag, default=False): return parse_boolean(request.args.get(flag, default)) -def async(f): +def async_decorator(f): """Decorator to run function in background""" @wraps(f) @@ -212,15 +204,15 @@ def wrapper(*args, **kwargs): def get_introspection_query(): cur_dir = os.path.dirname(os.path.realpath(__file__)) - f = open(os.path.join(cur_dir, 'graphql', 'introspection_query.txt'), 'r') + f = open(os.path.join(cur_dir, "graphql", "introspection_query.txt"), "r") return f.read() + def json_dumps_formatted(data): """Return json string with standard format.""" - dump = json.dumps( - data, indent=2, separators=(', ', ': '), ensure_ascii=False - ) - return dump.encode('utf-8') + dump = json.dumps(data, indent=2, separators=(", ", ": "), ensure_ascii=False) + return dump.encode("utf-8") + def jsonify_check_errors(data_and_errors, error_code=400): """ @@ -228,13 +220,13 @@ def jsonify_check_errors(data_and_errors, error_code=400): """ data, errors = data_and_errors if errors: - return flask.jsonify({'data': data, 'errors': errors}), error_code + return flask.jsonify({"data": data, "errors": errors}), error_code else: - return flask.jsonify({'data': data}), 200 + return flask.jsonify({"data": data}), 200 def get_variables(payload): - var_payload = payload.get('variables') + var_payload = payload.get("variables") variables = None errors = None if isinstance(var_payload, dict): @@ -243,5 +235,5 @@ def get_variables(payload): try: variables = json.loads(var_payload) if var_payload else {} except Exception as e: - errors = ['Unable to parse variables', str(e)] + errors = ["Unable to parse variables", str(e)] return variables, errors diff --git a/peregrine/utils/pyutils.py b/peregrine/utils/pyutils.py index 21fb9521..744fe4a4 100644 --- a/peregrine/utils/pyutils.py +++ b/peregrine/utils/pyutils.py @@ -12,7 +12,7 @@ def log_duration(name="Unnamed action"): start_t = time.time() yield end_t = time.time() - msg = "Executed [{}] in {:.2f} ms".format(name, (end_t-start_t)*1000) + msg = "Executed [{}] in {:.2f} ms".format(name, (end_t - start_t) * 1000) current_app.logger.info(msg) @@ -21,6 +21,8 @@ def get_s3_conn(host): current app context. """ config = current_app.config["STORAGE"]["s3"] - return connect_s3(config["keys"][host]["access_key"], - config["keys"][host]["secret_key"], - **config["kwargs"][host]) + return connect_s3( + config["keys"][host]["access_key"], + config["keys"][host]["secret_key"], + **config["kwargs"][host] + ) diff --git a/peregrine/utils/request.py b/peregrine/utils/request.py index 75d4c0e0..3d44c526 100644 --- a/peregrine/utils/request.py +++ b/peregrine/utils/request.py @@ -1,4 +1,3 @@ -import json from peregrine.errors import UserError # Need this wrapper when `raise` is used in a lambda @@ -20,7 +19,7 @@ def parse_request(request): """ def handlers(ct): - if 'application/x-www-form-urlencoded' in ct: + if "application/x-www-form-urlencoded" in ct: # Converts the immutable multi-dict (class type of request.form) into a regular dict, # because somewhere downstream this parsed options is checked and sanitized, where # mutation occurs which throws an exception (for modifying an immutable). @@ -28,17 +27,27 @@ def handlers(ct): # all values are converted into a list (because form fields can be repeated for # multi-value fields). Here we unbox the value for lists of one single element and # let the ones with multiple values remain as lists. - return {key: value if len(value) > 1 else value[0] for key, value in dict(request.form).items()} - elif 'application/json' in ct: - return request.get_json() if request.data != '' else {} + return { + key: value if len(value) > 1 else value[0] + for key, value in dict(request.form).items() + } + elif "application/json" in ct: + return request.get_json() if request.data != "" else {} else: - error_out(UserError( - "Content-Type header for POST must be 'application/json' or 'application/x-www-form-urlencoded'" - )) + error_out( + UserError( + "Content-Type header for POST must be 'application/json' or 'application/x-www-form-urlencoded'" + ) + ) - all_args = [request.args.to_dict(), {} if request.method == 'GET' else handlers(request.headers.get('Content-Type', '').lower())] + all_args = [ + request.args.to_dict(), + {} + if request.method == "GET" + else handlers(request.headers.get("Content-Type", "").lower()), + ] # Merges two dictionaries in all_args - options = { k: v for d in all_args for k, v in d.items() } + options = {k: v for d in all_args for k, v in d.items()} mimetype, is_csv = select_mimetype(request.headers, options) return options, mimetype, is_csv @@ -54,18 +63,18 @@ def select_mimetype(request_headers, request_options): The mimetype as a string is_csv (boolean): whether the requested format is CSV or not """ - mimetype = request_headers.get('Accept', 'application/json') - if 'format' in request_options: - req_format = request_options['format'].lower() - if req_format == 'xml': + mimetype = request_headers.get("Accept", "application/json") + if "format" in request_options: + req_format = request_options["format"].lower() + if req_format == "xml": mimetype = "text/xml" - elif req_format == 'csv': + elif req_format == "csv": mimetype = "text/csv" elif req_format == "tsv": mimetype = "text/tab-separated-values" else: mimetype = "application/json" - if 'text/csv' in mimetype or 'text/tab-separated-values' in mimetype: + if "text/csv" in mimetype or "text/tab-separated-values" in mimetype: is_csv = True else: is_csv = False diff --git a/peregrine/utils/response.py b/peregrine/utils/response.py index ad468274..cfef8d40 100644 --- a/peregrine/utils/response.py +++ b/peregrine/utils/response.py @@ -19,7 +19,7 @@ repo_subdir = os.path.dirname(os.path.realpath(__file__)) commit_cmd = "cd {}; git rev-parse HEAD".format(repo_subdir) COMMIT = subprocess.check_output(commit_cmd, shell=True).strip() - logger.info('API from commit {}'.format(COMMIT)) + logger.info("API from commit {}".format(COMMIT)) except Exception as e: logger.warning(str(e)) COMMIT = None @@ -27,11 +27,16 @@ def get_data_release(): """TODO: Unhard code this""" - return 'Data Release 3.0 - September 21, 2016' + return "Data Release 3.0 - September 21, 2016" def get_status(): - status = {'status': 'OK', 'version': 1, 'tag': VERSION, 'data_release': get_data_release()} + status = { + "status": "OK", + "version": 1, + "tag": VERSION, + "data_release": get_data_release(), + } if COMMIT: status["commit"] = COMMIT return status @@ -52,7 +57,7 @@ def striptags_from_dict(data): """ if isinstance(data, dict): new_dict = {} - for k, v in data.iteritems(): + for k, v in data.items(): cleanedK = tryToInt(Markup(k).striptags()) if isinstance(v, dict): new_dict[cleanedK] = striptags_from_dict(v) @@ -62,7 +67,9 @@ def striptags_from_dict(data): return new_dict -def add_content_disposition(request_headers, request_options, response, file_name="file"): +def add_content_disposition( + request_headers, request_options, response, file_name="file" +): """ Returns response as a file if attachment parameter in request is true @@ -75,24 +82,35 @@ def add_content_disposition(request_headers, request_options, response, file_nam Returns: A Flask Response object, with Content-Disposition set if attachment was true. Unmodified otherwise. """ - if 'attachment' in request_options.keys(): - if (isinstance(request_options['attachment'], bool) and request_options['attachment']) or request_options[ - 'attachment'].lower() == 'true': - file_extension = request_options.get('format', 'json').lower() - response.headers.add('Content-Disposition', 'attachment', - filename='{}.{}.{}'.format(request_options.get('filename', file_name), - datetime.now().isoformat(), file_extension)) + if "attachment" in request_options.keys(): + if ( + isinstance(request_options["attachment"], bool) + and request_options["attachment"] + ) or request_options["attachment"].lower() == "true": + file_extension = request_options.get("format", "json").lower() + response.headers.add( + "Content-Disposition", + "attachment", + filename="{}.{}.{}".format( + request_options.get("filename", file_name), + datetime.now().isoformat(), + file_extension, + ), + ) response = remove_download_token_from_cookie(request_options, response) return response def is_pretty(options): - return options.get('pretty', 'false').lower() == 'true' + return options.get("pretty", "false").lower() == "true" def to_json(options, data): - return (json.dumps(data, indent=2, separators=(', ', ': ')) if is_pretty(options) - else json.dumps(data)) + return ( + json.dumps(data, indent=2, separators=(", ", ": ")) + if is_pretty(options) + else json.dumps(data) + ) def to_xml(options, data): @@ -125,35 +143,37 @@ def format_response(request_options, data, mimetype): Returns: A Flask Response object, with the data formatted as specified and the Content-Type set """ - if (request_options.get('attachment', '').lower() == 'true' or - "text/csv" in mimetype or - "text/tab-separated-values" in mimetype): - if 'hits' in data['data']: - data = data['data']['hits'] + if ( + request_options.get("attachment", "").lower() == "true" + or "text/csv" in mimetype + or "text/tab-separated-values" in mimetype + ): + if "hits" in data["data"]: + data = data["data"]["hits"] else: - data = [data['data']] + data = [data["data"]] if isinstance(data, dict): - pagination = data.get('data', {}).get('pagination', None) + pagination = data.get("data", {}).get("pagination", None) if pagination: - data['data']['pagination'] = striptags_from_dict(pagination) - warnings = data.get('warnings', None) + data["data"]["pagination"] = striptags_from_dict(pagination) + warnings = data.get("warnings", None) if warnings: - data['warnings'] = striptags_from_dict(warnings) + data["warnings"] = striptags_from_dict(warnings) if "text/xml" in mimetype: data = to_xml(request_options, data) elif "text/csv" in mimetype: - data = to_csv(data, dialect='excel') + data = to_csv(data, dialect="excel") elif "text/tab-separated-values" in mimetype: - data = to_csv(data, dialect='excel-tab') + data = to_csv(data, dialect="excel-tab") else: mimetype = "application/json" data = to_json(request_options, data) response = Response(data, mimetype=mimetype) - for key, value in get_status().iteritems(): - response.headers.extend({'X-GDC-{}'.format(key): value}) + for key, value in get_status().items(): + response.headers.extend({"X-GDC-{}".format(key): value}) return response @@ -169,10 +189,10 @@ def remove_download_token_from_cookie(options, response): Returns: The response object that is passed in """ - cookie_key = options.get('downloadCookieKey', '') - cookie_path = options.get('downloadCookiePath', '/') + cookie_key = options.get("downloadCookieKey", "") + cookie_path = options.get("downloadCookiePath", "/") - if cookie_key != '': + if cookie_key != "": response.set_cookie(cookie_key, expires=0, path=cookie_path) return response diff --git a/peregrine/utils/scheduling.py b/peregrine/utils/scheduling.py index 1ead9f11..0b4a4f2d 100644 --- a/peregrine/utils/scheduling.py +++ b/peregrine/utils/scheduling.py @@ -1,11 +1,9 @@ -from Queue import Queue, Full +from queue import Queue, Full from threading import Thread import cdispyutils -from peregrine.errors import ( - InternalError, -) +from peregrine.errors import InternalError from peregrine.globals import ( ASYNC_MAX_Q_LEN, ERR_ASYNC_SCHEDULING, @@ -55,11 +53,7 @@ def start(self, n_workers): def schedule(self, function, *args, **kwargs): """Add a task to the queue""" try: - self.task_queue.put_nowait(AsyncPoolTask( - function, - *args, - **kwargs - )) + self.task_queue.put_nowait(AsyncPoolTask(function, *args, **kwargs)) except Full: raise InternalError(ERR_ASYNC_SCHEDULING) @@ -73,10 +67,7 @@ def grow(self, n_workers): started immediately. """ workers = [ - self.worker_class( - target=async_pool_consumer, - args=(self.task_queue,), - ) + self.worker_class(target=async_pool_consumer, args=(self.task_queue,),) for _ in range(n_workers) ] diff --git a/peregrine/version_data.py b/peregrine/version_data.py index c7d880b9..0ff58453 100644 --- a/peregrine/version_data.py +++ b/peregrine/version_data.py @@ -1,4 +1,4 @@ -VERSION="" -COMMIT="" -DICTVERSION="" -DICTCOMMIT="" +VERSION = "" +COMMIT = "" +DICTVERSION = "" +DICTCOMMIT = "" diff --git a/requirements.txt b/requirements.txt index c2f02ff0..18fa2ec6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,11 @@ authutils==3.1.1 -datamodelutils==0.4.7 +datamodelutils==1.0.0 defusedxml==0.5.0 -dictionaryutils==2.0.7 -gen3dictionary==2.0.0 +dictionaryutils==3.0.0 +gen3dictionary==2.0.1 SurvivalPy==1.0.2 simplejson==3.8.1 stopit==1.1.1 -functools32==3.2.3-2 -lxml==3.4.1 python-Levenshtein==0.12.0 python-keystoneclient==1.6.0 fuzzywuzzy==0.6.1 @@ -15,7 +13,8 @@ addict==0.2.7 Flask==1.1.1 Flask-Cors==1.9.0 gen3authz==0.2.1 -Jinja2==2.7.3 +Jinja2==2.10.1 +lxml==4.4.1 MarkupSafe==0.23 PyYAML==5.1 Werkzeug==0.16.0 @@ -23,23 +22,21 @@ boto==2.36.0 elasticsearch==1.2.0 itsdangerous==0.24 requests== 2.22.0 -six==1.10.0 -urllib3==1.24.1 -wsgiref==0.1.2 +six==1.12.0 +urllib3==1.24.2 dicttoxml==1.5.8 sqlalchemy==1.3.5 python-dateutil==2.4.2 graphene==2.0.1 graphql-relay==0.4.5 promise==2.2.1 -cyordereddict==1.0.0 Flask-SQLAlchemy-Session==1.1 -psqlgraph==2.0.2 -gen3datamodel==2.0.2 --e git+https://git@github.com/uc-cdis/cdis_oauth2client.git@0.1.3#egg=cdis_oauth2client --e git+https://git@github.com/NCI-GDC/cdisutils.git@8a8e599fdab5ade9bd8c586132d974a102e5d72d#egg=cdisutils -cdispyutils==0.2.13 --e git+https://git@github.com/uc-cdis/storage-client.git@0.1.1#egg=storageclient --e git+https://git@github.com/uc-cdis/graphql-core.git@cdis2.0.0#egg=graphql-core --e git+https://git@github.com/uc-cdis/cdiserrors.git@0.1.1#egg=cdiserrors --e git+https://git@github.com/uc-cdis/cdislogging.git@master#egg=cdislogging +psqlgraph==3.0.0 +gen3datamodel==3.0.1 +cdis_oauth2client==1.0.0 +git+https://git@github.com/NCI-GDC/cdisutils.git@8a8e599fdab5ade9bd8c586132d974a102e5d72d#egg=cdisutils +cdispyutils==1.0.3 +git+https://git@github.com/uc-cdis/storage-client.git@1.0.0#egg=storageclient +git+https://git@github.com/uc-cdis/graphql-core.git@cdis2.0.0#egg=graphql-core +cdiserrors==0.1.2 +cdislogging==1.0.0 diff --git a/run.py b/run.py index 409e4fe7..3bdb6598 100755 --- a/run.py +++ b/run.py @@ -2,7 +2,6 @@ from authutils import ROLES as all_roles from collections import defaultdict -from flask import current_app from mock import patch, PropertyMock import os from peregrine.api import run_for_development @@ -11,19 +10,18 @@ requests.packages.urllib3.disable_warnings() -all_role_values = all_roles.values() +all_role_values = list(all_roles.values()) roles = defaultdict(lambda: all_role_values) class FakeBotoKey(object): - def __init__(self, name): self.name = name def close(self): pass - def open_read(self,*args, **kwargs): + def open_read(self, *args, **kwargs): pass @property @@ -42,17 +40,19 @@ def fake_get_nodes(dids): file_name = files.get(did, {})["data"]["file_name"] except ValueError: file_name = did - nodes.append(Node( - node_id=did, - label="file", - acl=["open"], - properties={ - "file_name": file_name, - "file_size": len("fake data for {}".format(did)), - "md5sum": "fake_md5sum", - "state": "live", - }, - )) + nodes.append( + Node( + node_id=did, + label="file", + acl=["open"], + properties={ + "file_name": file_name, + "file_size": len("fake data for {}".format(did)), + "md5sum": "fake_md5sum", + "state": "live", + }, + ) + ) return nodes @@ -69,27 +69,27 @@ def fake_key_for_node(node): class FakeUser(object): - username = 'test' - roles = roles + username = "test" + roles = roles def set_user(*args, **kwargs): from flask import g + g.user = FakeUser() def run_with_fake_auth(): with patch( - 'peregrine.auth.CurrentUser.roles', + "peregrine.auth.CurrentUser.roles", new_callable=PropertyMock, return_value=roles, ), patch( - 'peregrine.auth.CurrentUser.logged_in', + "peregrine.auth.CurrentUser.logged_in", new_callable=PropertyMock, return_value=lambda: True, ), patch( - 'peregrine.auth.verify_hmac', - new=set_user, + "peregrine.auth.verify_hmac", new=set_user, ): run_for_development(debug=debug, threaded=True) @@ -100,7 +100,7 @@ def run_with_fake_authz(): """ auth_mapping = {} # modify this to mock specific access with patch( - 'gen3authz.client.arborist.client.ArboristClient.auth_mapping', + "gen3authz.client.arborist.client.ArboristClient.auth_mapping", new_callable=PropertyMock, return_value=lambda x: auth_mapping, ): @@ -109,22 +109,24 @@ def run_with_fake_authz(): def run_with_fake_download(): with patch("peregrine.download.get_nodes", fake_get_nodes): - with patch.multiple("peregrine.download", - key_for=fake_key_for, - key_for_node=fake_key_for_node, - urls_from_index_client=fake_urls_from_index_client): + with patch.multiple( + "peregrine.download", + key_for=fake_key_for, + key_for_node=fake_key_for_node, + urls_from_index_client=fake_urls_from_index_client, + ): if os.environ.get("GDC_FAKE_AUTH"): run_with_fake_auth() else: run_for_development(debug=debug, threaded=True) -if __name__ == '__main__': - debug = bool(os.environ.get('PEREGRINE_DEBUG', True)) - if os.environ.get("GDC_FAKE_DOWNLOAD") == 'True': +if __name__ == "__main__": + debug = bool(os.environ.get("PEREGRINE_DEBUG", True)) + if os.environ.get("GDC_FAKE_DOWNLOAD") == "True": run_with_fake_download() else: - if os.environ.get("GDC_FAKE_AUTH") == 'True': + if os.environ.get("GDC_FAKE_AUTH") == "True": run_with_fake_auth() else: run_with_fake_authz() diff --git a/scripts/git_hooks/commit-msg.py b/scripts/git_hooks/commit-msg.py index 77c15dca..73285805 100755 --- a/scripts/git_hooks/commit-msg.py +++ b/scripts/git_hooks/commit-msg.py @@ -10,32 +10,41 @@ import sys import re -import sys, os +import os from subprocess import call -valid_commit_types = ['feat', 'fix', 'docs', 'style', 'refactor', 'test', 'chore'] -help_address = 'https://docs.google.com/document/d/1QrDFcIiPjSLDn3EL15IJygNPiHORgU1_OOAqWjiDU5Y/edit#' +valid_commit_types = ["feat", "fix", "docs", "style", "refactor", "test", "chore"] +help_address = "https://docs.google.com/document/d/1QrDFcIiPjSLDn3EL15IJygNPiHORgU1_OOAqWjiDU5Y/edit#" -if os.environ.get('EDITOR') is not None: - editor = os.environ['EDITOR'] +if os.environ.get("EDITOR") is not None: + editor = os.environ["EDITOR"] else: editor = "vim" message_file = sys.argv[1] + def header_check(real_lineno, line): - m = re.search('^(.*)\((.*)\): (.*)$', line) + m = re.search("^(.*)\((.*)\): (.*)$", line) if not m or len(m.groups()) != 3: - return "Line %d: Header does not follow format: type(scope): message" % (real_lineno,) + return "Line %d: Header does not follow format: type(scope): message" % ( + real_lineno, + ) commit_type, commit_scope, commit_message = m.groups() if commit_type not in valid_commit_types: - return "Line %d: Commit type not valid. Must be one of:\n#! %s" % (real_lineno,", ".join(valid_commit_types)) + return "Line %d: Commit type not valid. Must be one of:\n#! %s" % ( + real_lineno, + ", ".join(valid_commit_types), + ) if len(line) > 50: - return "Line %d: First line should be less than 50 characters in length. (is %d)" % (real_lineno, len(line)) + return ( + "Line %d: First line should be less than 50 characters in length. (is %d)" + % (real_lineno, len(line)) + ) def check_format_rules(lineno, line): @@ -47,8 +56,11 @@ def check_format_rules(lineno, line): if lineno == 1 and line: return "Line %d: Second line should be empty." % (real_lineno,) - if not line.startswith('#') and len(line) > 72: - return "Line %d: No line should be over 72 characters long. (is %d)" % (real_lineno,len(line)) + if not line.startswith("#") and len(line) > 72: + return "Line %d: No line should be over 72 characters long. (is %d)" % ( + real_lineno, + len(line), + ) return False @@ -60,27 +72,31 @@ def check_format_rules(lineno, line): with open(message_file) as commit_fd: for lineno, line in enumerate(commit_fd): stripped_line = line.strip() - if not line.startswith('#!'): + if not line.startswith("#!"): commit_msg.append(line) e = check_format_rules(lineno, stripped_line) if e: errors.append(e) - with open(message_file, 'w') as commit_fd: + with open(message_file, "w") as commit_fd: for line in commit_msg: commit_fd.write(line) if errors: if commit_msg[-1] != "\n": - commit_fd.write('\n') - commit_fd.write('%s\n#! %s\n' % ('#! GIT COMMIT MESSAGE FORMAT ERRORS:', help_address)) + commit_fd.write("\n") + commit_fd.write( + "%s\n#! %s\n" % ("#! GIT COMMIT MESSAGE FORMAT ERRORS:", help_address) + ) for error in errors: - commit_fd.write('#! %s\n' % (error,)) + commit_fd.write("#! %s\n" % (error,)) if errors: - re_edit = raw_input('Invalid git commit message format. Press y to edit and n to cancel the commit. [Y/n]: ') - if re_edit.lower() in ('n','no'): + re_edit = input( + "Invalid git commit message format. Press y to edit and n to cancel the commit. [Y/n]: " + ) + if re_edit.lower() in ("n", "no"): sys.exit(1) - call('%s %s' % (editor, message_file), shell=True) + call("%s %s" % (editor, message_file), shell=True) continue break diff --git a/setup.py b/setup.py index b1ee9e31..99064a70 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,5 @@ "peregrine.resources.submission", "peregrine.resources.submission.graphql", ], - entry_points={ - 'console_scripts': ['peregrine=peregrine.api:main'] - }, + entry_points={"console_scripts": ["peregrine=peregrine.api:main"]}, ) diff --git a/tests/conftest.py b/tests/conftest.py index d7b4a816..88bcf167 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,7 +3,6 @@ from gen3authz.client.arborist.errors import ArboristError from indexclient.client import IndexClient -from multiprocessing import Process from psqlgraph import PsqlGraphDriver import json import pytest @@ -12,8 +11,7 @@ import peregrine from peregrine.api import app as _app, app_init -from peregrine.errors import AuthZError -import utils +from . import utils # Python 2 and 3 compatible try: @@ -30,37 +28,36 @@ def pg_config(): return dict( - host='localhost', - user='test', - password='test', - database='automated_test', + host="localhost", user="test", password="test", database="automated_test", ) -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def app(request): _app.config.from_object("peregrine.test_settings") app_init(_app) - - sheepdog_blueprint = sheepdog.blueprint.create_blueprint('submission') - _app.register_blueprint(sheepdog_blueprint, url_prefix='/v0/submission') - _app.logger.info('Initializing IndexClient') + sheepdog_blueprint = sheepdog.blueprint.create_blueprint("submission") + _app.register_blueprint(sheepdog_blueprint, url_prefix="/v0/submission") + + _app.logger.info("Initializing IndexClient") _app.index_client = IndexClient( - _app.config['INDEX_CLIENT']['host'], - version=_app.config['INDEX_CLIENT']['version'], - auth=_app.config['INDEX_CLIENT']['auth']) + _app.config["INDEX_CLIENT"]["host"], + version=_app.config["INDEX_CLIENT"]["version"], + auth=_app.config["INDEX_CLIENT"]["auth"], + ) try: - _app.logger.info('Initializing Auth driver') + _app.logger.info("Initializing Auth driver") except Exception: _app.logger.exception("Couldn't initialize auth, continuing anyway") - _app.logger.setLevel(os.environ.get("GDC_LOG_LEVEL", "WARNING")) - _app.jwt_public_keys = {_app.config['USER_API']: { - 'key-test': utils.read_file('resources/keys/test_public_key.pem') - }} + _app.jwt_public_keys = { + _app.config["USER_API"]: { + "key-test": utils.read_file("resources/keys/test_public_key.pem") + } + } return _app @@ -72,18 +69,18 @@ def tearDown(): with pg_driver.engine.begin() as conn: for table in Node().get_subclass_table_names(): if table != Node.__tablename__: - conn.execute('delete from {}'.format(table)) + conn.execute("delete from {}".format(table)) for table in Edge().get_subclass_table_names(): if table != Edge.__tablename__: - conn.execute('delete from {}'.format(table)) - conn.execute('delete from versioned_nodes') - conn.execute('delete from _voided_nodes') - conn.execute('delete from _voided_edges') - conn.execute('delete from transaction_snapshots') - conn.execute('delete from transaction_documents') - conn.execute('delete from transaction_logs') - - tearDown() #cleanup potential last test data + conn.execute("delete from {}".format(table)) + conn.execute("delete from versioned_nodes") + conn.execute("delete from _voided_nodes") + conn.execute("delete from _voided_edges") + conn.execute("delete from transaction_snapshots") + conn.execute("delete from transaction_documents") + conn.execute("delete from transaction_logs") + + tearDown() # cleanup potential last test data request.addfinalizer(tearDown) return pg_driver @@ -99,9 +96,8 @@ def closeConnection(): return pg_driver -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def encoded_jwt(app): - def encoded_jwt_function(private_key, user): """ Return an example JWT containing the claims and encoded with the private @@ -114,58 +110,63 @@ def encoded_jwt_function(private_key, user): Return: str: JWT containing claims encoded with private key """ - kid = peregrine.test_settings.JWT_KEYPAIR_FILES.keys()[0] - scopes = ['openid'] + kid = list(peregrine.test_settings.JWT_KEYPAIR_FILES.keys())[0] + scopes = ["openid"] token = utils.generate_signed_access_token( - kid, private_key, user, 3600, scopes, forced_exp_time=None, - iss=app.config['USER_API'], + kid, + private_key, + user, + 3600, + scopes, + forced_exp_time=None, + iss=app.config["USER_API"], ) return token.token return encoded_jwt_function -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def submitter(encoded_jwt): - private_key = utils.read_file('resources/keys/test_private_key.pem') + private_key = utils.read_file("resources/keys/test_private_key.pem") # set up a fake User object which has all the attributes needed # to generate a token user_properties = { - 'id': 1, - 'username': 'submitter', - 'is_admin': False, - 'policies': [], - 'google_proxy_group_id': None, + "id": 1, + "username": "submitter", + "is_admin": False, + "policies": [], + "google_proxy_group_id": None, } - user = type('User', (object,), user_properties) + user = type("User", (object,), user_properties) token = encoded_jwt(private_key, user) - return {'Authorization': 'bearer ' + token} + return {"Authorization": "bearer " + token} -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def admin(encoded_jwt): - private_key = utils.read_file('resources/keys/test_private_key.pem') + private_key = utils.read_file("resources/keys/test_private_key.pem") user_properties = { - 'id': 2, - 'username': 'admin', - 'is_admin': True, - 'policies': [], - 'google_proxy_group_id': None, + "id": 2, + "username": "admin", + "is_admin": True, + "policies": [], + "google_proxy_group_id": None, } - user = type('User', (object,), user_properties) + user = type("User", (object,), user_properties) token = encoded_jwt(private_key, user) - return {'Authorization': 'bearer ' + token} + return {"Authorization": "bearer " + token} -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def es_setup(request): es = Elasticsearch(["localhost"], port=9200) def es_teardown(): es.indices.delete( - index=INDEX, - ignore=404, # ignores error if index doesn't exists + index=INDEX, ignore=404, # ignores error if index doesn't exists ) + request.addfinalizer(es_teardown) es.indices.create( @@ -174,27 +175,35 @@ def es_teardown(): ignore=400, # ignores error if index already exists ) - es.indices.put_mapping(index=INDEX, doc_type="file", body=mappings.get_file_es_mapping()) - es.indices.put_mapping(index=INDEX, doc_type="project", body=mappings.get_project_es_mapping()) - es.indices.put_mapping(index=INDEX, doc_type="case", body=mappings.get_case_es_mapping()) - es.indices.put_mapping(index=INDEX, doc_type="annotation", body=mappings.get_annotation_es_mapping()) + es.indices.put_mapping( + index=INDEX, doc_type="file", body=mappings.get_file_es_mapping() + ) + es.indices.put_mapping( + index=INDEX, doc_type="project", body=mappings.get_project_es_mapping() + ) + es.indices.put_mapping( + index=INDEX, doc_type="case", body=mappings.get_case_es_mapping() + ) + es.indices.put_mapping( + index=INDEX, doc_type="annotation", body=mappings.get_annotation_es_mapping() + ) - json_data = open(os.path.join(os.path.dirname(__file__), 'data/projects.json')) + json_data = open(os.path.join(os.path.dirname(__file__), "data/projects.json")) data = json.load(json_data) for p in data["data"]["hits"]: es.index(index=INDEX, doc_type=project_model.doc_type, body=p) - json_data = open(os.path.join(os.path.dirname(__file__), 'data/files.json')) + json_data = open(os.path.join(os.path.dirname(__file__), "data/files.json")) data = json.load(json_data) for f in data["data"]["hits"]: es.index(index=INDEX, doc_type=f_model.doc_type, body=f) - json_data = open(os.path.join(os.path.dirname(__file__), 'data/cases.json')) + json_data = open(os.path.join(os.path.dirname(__file__), "data/cases.json")) data = json.load(json_data) for pa in data["data"]["hits"]: es.index(index=INDEX, doc_type=case_model.doc_type, body=pa) - json_data = open(os.path.join(os.path.dirname(__file__), 'data/annotations.json')) + json_data = open(os.path.join(os.path.dirname(__file__), "data/annotations.json")) data = json.load(json_data) for a in data["data"]["hits"]: es.index(index=INDEX, doc_type=annotation_model.doc_type, body=a) @@ -207,8 +216,10 @@ def es_teardown(): @pytest.fixture def public_dataset_api(request): os.environ["PUBLIC_DATASETS"] = "true" + def tearDown(): os.environ["PUBLIC_DATASETS"] = "false" + request.addfinalizer(tearDown) @@ -244,15 +255,27 @@ def response(*args, **kwargs): mocked_response.get = lambda *args, **kwargs: None return mocked_response + return response mocked_auth_mapping = MagicMock(side_effect=make_mock_response("auth_mapping")) mocked_auth_request = MagicMock(side_effect=make_mock_response("auth_request")) - mocked_create_resource = MagicMock(side_effect=make_mock_response("create_resource")) + mocked_create_resource = MagicMock( + side_effect=make_mock_response("create_resource") + ) - patch_auth_mapping = patch("gen3authz.client.arborist.client.ArboristClient.auth_mapping", mocked_auth_mapping) - patch_auth_request = patch("gen3authz.client.arborist.client.ArboristClient.auth_request", mocked_auth_request) - patch_create_resource = patch("gen3authz.client.arborist.client.ArboristClient.create_resource", mocked_create_resource) + patch_auth_mapping = patch( + "gen3authz.client.arborist.client.ArboristClient.auth_mapping", + mocked_auth_mapping, + ) + patch_auth_request = patch( + "gen3authz.client.arborist.client.ArboristClient.auth_request", + mocked_auth_request, + ) + patch_create_resource = patch( + "gen3authz.client.arborist.client.ArboristClient.create_resource", + mocked_create_resource, + ) patch_auth_mapping.start() patch_auth_request.start() @@ -272,10 +295,10 @@ def arborist_authorized(mock_arborist_requests): To mock a different response, use the fixture in the test itself: "mock_arborist_requests(auth_mapping={...}, known_user=True/False)" """ - mock_arborist_requests(auth_mapping={ - "/programs/CGCI/projects/BLGSP": [ - { - "service": "peregrine", "method": "read" - } - ] - }) + mock_arborist_requests( + auth_mapping={ + "/programs/CGCI/projects/BLGSP": [ + {"service": "peregrine", "method": "read"} + ] + } + ) diff --git a/tests/graphql/conftest.py b/tests/graphql/conftest.py index 3b0fa93c..3086edab 100644 --- a/tests/graphql/conftest.py +++ b/tests/graphql/conftest.py @@ -3,7 +3,7 @@ import flask import pytest from datamodelutils import models -#from datamodelutils.models.submission import TransactionLog + from tests.graphql import utils from tests.graphql.test_graphql import ( @@ -11,15 +11,18 @@ post_example_entities_together, ) -path = '/v0/submission/graphql' +path = "/v0/submission/graphql" + @pytest.fixture def graphql_client(client, submitter): def execute(query, variables={}): - return client.post(path, headers=submitter, data=json.dumps({ - 'query': query, - 'variables': variables, - })) + return client.post( + path, + headers=submitter, + data=json.dumps({"query": query, "variables": variables,}), + ) + return execute @@ -29,14 +32,16 @@ def cgci_blgsp(client, admin): TODO: Docstring for put_cgci_blgsp. """ put_cgci(client, auth=admin) - path = '/v0/submission/CGCI/' - data = json.dumps({ - "type": "project", - "code": "BLGSP", - "dbgap_accession_number": 'phs000527', - "name": "Burkitt Lymphoma Genome Sequencing Project", - "state": "open" - }) + path = "/v0/submission/CGCI/" + data = json.dumps( + { + "type": "project", + "code": "BLGSP", + "dbgap_accession_number": "phs000527", + "name": "Burkitt Lymphoma Genome Sequencing Project", + "state": "open", + } + ) r = client.put(path, headers=admin, data=data) assert r.status_code == 200, r.data del flask.g.user @@ -45,20 +50,21 @@ def cgci_blgsp(client, admin): @pytest.fixture def put_tcga_brca(admin, client): - data = json.dumps({ - 'name': 'TCGA', 'type': 'program', - 'dbgap_accession_number': 'phs000178' - }) - r = client.put('/v0/submission/', headers=admin, data=data) + data = json.dumps( + {"name": "TCGA", "type": "program", "dbgap_accession_number": "phs000178"} + ) + r = client.put("/v0/submission/", headers=admin, data=data) assert r.status_code == 200, r.data - data = json.dumps({ - "type": "project", - "code": "BRCA", - "name": "TEST", - "dbgap_accession_number": "phs000178", - "state": "open" - }) - r = client.put('/v0/submission/TCGA/', headers=admin, data=data) + data = json.dumps( + { + "type": "project", + "code": "BRCA", + "name": "TEST", + "dbgap_accession_number": "phs000178", + "state": "open", + } + ) + r = client.put("/v0/submission/TCGA/", headers=admin, data=data) assert r.status_code == 200, r.data return r @@ -67,15 +73,17 @@ def put_tcga_brca(admin, client): def mock_tx_log(pg_driver_clean): utils.reset_transactions(pg_driver_clean) with pg_driver_clean.session_scope() as session: - return session.merge(models.submission.TransactionLog( - is_dry_run=True, - program='CGCI', - project='BLGSP', - role='create', - state='SUCCEEDED', - committed_by=12345, - closed=False, - )) + return session.merge( + models.submission.TransactionLog( + is_dry_run=True, + program="CGCI", + project="BLGSP", + role="create", + state="SUCCEEDED", + committed_by=12345, + closed=False, + ) + ) @pytest.fixture @@ -88,26 +96,26 @@ def populated_blgsp(client, submitter, pg_driver_clean): def failed_deletion_transaction(client, submitter, pg_driver_clean, populated_blgsp): with pg_driver_clean.session_scope(): node_id = pg_driver_clean.nodes(models.Sample).first().node_id - delete_path = '/v0/submission/CGCI/BLGSP/entities/{}'.format(node_id) - r = client.delete( - delete_path, - headers=submitter) + delete_path = "/v0/submission/CGCI/BLGSP/entities/{}".format(node_id) + r = client.delete(delete_path, headers=submitter) assert r.status_code == 400, r.data - return str(r.json['transaction_id']) + return str(r.json["transaction_id"]) @pytest.fixture def failed_upload_transaction(client, submitter, pg_driver_clean): - put_path = '/v0/submission/CGCI/BLGSP/' + put_path = "/v0/submission/CGCI/BLGSP/" r = client.put( put_path, - data=json.dumps({ - 'type': 'sample', - 'cases': [{'id': 'no idea'}], - 'sample_type': 'teapot', - 'how_heavy': 'no', - }), - headers=submitter) + data=json.dumps( + { + "type": "sample", + "cases": [{"id": "no idea"}], + "sample_type": "teapot", + "how_heavy": "no", + } + ), + headers=submitter, + ) assert r.status_code == 400, r.data - return str(r.json['transaction_id']) - + return str(r.json["transaction_id"]) diff --git a/tests/graphql/test_datasets.py b/tests/graphql/test_datasets.py index ec0188e9..4763a418 100644 --- a/tests/graphql/test_datasets.py +++ b/tests/graphql/test_datasets.py @@ -1,6 +1,5 @@ -from test_graphql import post_example_entities_together +from .test_graphql import post_example_entities_together from datamodelutils import models -import os def test_authorized_call_with_protected_config( @@ -15,7 +14,7 @@ def test_authorized_call_with_protected_config( case.project_id = "OTHER-OTHER" s.merge(case) r = client.get("/datasets?nodes=case,aliquot", headers=submitter) - assert r.json.keys() == ["CGCI-BLGSP"] + assert list(r.json.keys()) == ["CGCI-BLGSP"] assert r.json["CGCI-BLGSP"]["case"] == case_count - 2 r = client.get("/datasets/projects", headers=submitter) @@ -23,8 +22,8 @@ def test_authorized_call_with_protected_config( def test_unauthorized_call_with_protected_config( - client, submitter, pg_driver_clean, cgci_blgsp, mock_arborist_requests - ): + client, submitter, pg_driver_clean, cgci_blgsp, mock_arborist_requests +): post_example_entities_together(client, pg_driver_clean, submitter) mock_arborist_requests(auth_mapping={}) @@ -44,7 +43,9 @@ def test_anonymous_call_with_protected_config(client, pg_driver_clean, cgci_blgs assert r.status_code == 401 -def test_anonymous_projects_call_with_protected_config(client, pg_driver_clean, cgci_blgsp): +def test_anonymous_projects_call_with_protected_config( + client, pg_driver_clean, cgci_blgsp +): r = client.get("/datasets/projects") assert r.status_code == 401 @@ -71,15 +72,15 @@ def test_anonymous_call_with_public_config( assert r.json["CGCI-OTHER"]["aliquot"] == 0 assert r.json["CGCI-OTHER"]["case"] == 2 + def test_get_projects_anonymous( - client, submitter, pg_driver_clean, cgci_blgsp, public_dataset_api - ): + client, submitter, pg_driver_clean, cgci_blgsp, public_dataset_api +): post_example_entities_together(client, pg_driver_clean, submitter) with pg_driver_clean.session_scope() as s: project = models.Project( - "other", name="name", - code="OTHER", dbgap_accession_number="phsid" + "other", name="name", code="OTHER", dbgap_accession_number="phsid" ) program = pg_driver_clean.nodes(models.Program).props(name="CGCI").first() project.programs = [program] @@ -87,9 +88,11 @@ def test_get_projects_anonymous( r = client.get("/datasets/projects") assert r.json == { "projects": [ - {"dbgap_accession_number": "phs000527", - "code": "BLGSP", - "name": "Burkitt Lymphoma Genome Sequencing Project"}, - {"dbgap_accession_number": "phsid", - "code": "OTHER", "name": "name"}] - } + { + "dbgap_accession_number": "phs000527", + "code": "BLGSP", + "name": "Burkitt Lymphoma Genome Sequencing Project", + }, + {"dbgap_accession_number": "phsid", "code": "OTHER", "name": "name"}, + ] + } diff --git a/tests/graphql/test_graphql.py b/tests/graphql/test_graphql.py index 29c30ce3..d4e07109 100644 --- a/tests/graphql/test_graphql.py +++ b/tests/graphql/test_graphql.py @@ -12,20 +12,21 @@ from tests.graphql import utils from tests.graphql.utils import data_fnames -BLGSP_PATH = '/v0/submission/CGCI/BLGSP/' -BRCA_PATH = '/v0/submission/TCGA/BRCA/' +BLGSP_PATH = "/v0/submission/CGCI/BLGSP/" +BRCA_PATH = "/v0/submission/TCGA/BRCA/" -DATA_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data') +DATA_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data") -path = '/v0/submission/graphql' +path = "/v0/submission/graphql" def post_example_entities_together( - client, pg_driver_clean, submitter, data_fnames=data_fnames): + client, pg_driver_clean, submitter, data_fnames=data_fnames +): path = BLGSP_PATH data = [] for fname in data_fnames: - with open(os.path.join(DATA_DIR, fname), 'r') as f: + with open(os.path.join(DATA_DIR, fname), "r") as f: data.append(json.loads(f.read())) return client.post(path, headers=submitter, data=json.dumps(data)) @@ -34,29 +35,32 @@ def put_example_entities_together(client, pg_driver_clean, submitter): path = BLGSP_PATH data = [] for fname in data_fnames: - with open(os.path.join(DATA_DIR, fname), 'r') as f: + with open(os.path.join(DATA_DIR, fname), "r") as f: data.append(json.loads(f.read())) return client.put(path, headers=submitter, data=json.dumps(data)) + def put_cgci(client, auth=None): - path = '/v0/submission' - data = json.dumps({ - 'name': 'CGCI', 'type': 'program', - 'dbgap_accession_number': 'phs000235' - }) + path = "/v0/submission" + data = json.dumps( + {"name": "CGCI", "type": "program", "dbgap_accession_number": "phs000235"} + ) r = client.put(path, headers=auth, data=data) return r + def put_cgci_blgsp(client, auth=None): put_cgci(client, auth=auth) - path = '/v0/submission/CGCI/' - data = json.dumps({ - "type": "project", - "code": "BLGSP", - "dbgap_accession_number": 'phs000527', - "name": "Burkitt Lymphoma Genome Sequencing Project", - "state": "open" - }) + path = "/v0/submission/CGCI/" + data = json.dumps( + { + "type": "project", + "code": "BLGSP", + "dbgap_accession_number": "phs000527", + "name": "Burkitt Lymphoma Genome Sequencing Project", + "state": "open", + } + ) r = client.put(path, headers=auth, data=data) assert r.status_code == 200, r.data del g.user @@ -66,117 +70,153 @@ def put_cgci_blgsp(client, auth=None): def test_node_subclasses(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) for cls in Node.get_subclasses(): - print cls - data = json.dumps({ - 'query': """query Test {{ {} {{ id }}}}""".format(cls.label) - }) + print(cls) + data = json.dumps( + {"query": """query Test {{ {} {{ id }}}}""".format(cls.label)} + ) r = client.post(path, headers=submitter, data=data) - print r.data - assert cls.label in r.json['data'], r.data + print(r.data) + assert cls.label in r.json["data"], r.data def test_alias(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - data = json.dumps({ - 'query': """query Test { alias1: case { id } }""" - }) + data = json.dumps({"query": """query Test { alias1: case { id } }"""}) r = client.post(path, headers=submitter, data=data) - assert 'alias1' in r.json.get('data', {}), r.data + assert "alias1" in r.json.get("data", {}), r.data def test_types(client, submitter, pg_driver_clean, cgci_blgsp): post = post_example_entities_together(client, pg_driver_clean, submitter) assert post.status_code == 201 - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """query Test { + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """query Test { boolean: sample (first: 1) { is_ffpe } float : aliquot(first: 1) { concentration } }""" - })) + } + ), + ) - print("types data is " + str(r.json)) - assert isinstance(r.json['data']['boolean'][0]['is_ffpe'], bool) - assert isinstance(r.json['data']['float'][0]['concentration'], float) + print(("types data is " + str(r.json))) + assert isinstance(r.json["data"]["boolean"][0]["is_ffpe"], bool) + assert isinstance(r.json["data"]["float"][0]["concentration"], float) -def test_unathenticated_graphql_query( - client, submitter, pg_driver_clean, cgci_blgsp): +def test_unathenticated_graphql_query(client, submitter, pg_driver_clean, cgci_blgsp): """ Test that sending a query with no auth header returns a 401. """ post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers={}, data=json.dumps({ - 'query': """query Test { alias1: case { id } }""" - })) + r = client.post( + path, + headers={}, + data=json.dumps({"query": """query Test { alias1: case { id } }"""}), + ) assert r.status_code == 401, r.data def test_fragment(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ query Test { case { ... caseFragment } } fragment caseFragment on case { id type } - """})) - assert r.json.get('data', {}).get('case'), r.data - for case in r.json.get('data', {}).get('case'): - assert case.get('type') == 'case', case - assert 'amount' not in case + """ + } + ), + ) + assert r.json.get("data", {}).get("case"), r.data + for case in r.json.get("data", {}).get("case"): + assert case.get("type") == "case", case + assert "amount" not in case def test_viewer(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ query Test { viewer { case { id type } } } - """})) - assert r.json.get('data', {}).get('viewer', {}).get('case'), r.data - for case in r.json.get('data', {}).get('viewer', {}).get('case'): - assert 'type' in case + """ + } + ), + ) + assert r.json.get("data", {}).get("viewer", {}).get("case"), r.data + for case in r.json.get("data", {}).get("viewer", {}).get("case"): + assert "type" in case def test_node_interface(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """query Test { node (first: 100) { + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """query Test { node (first: 100) { # ^ Default limit is 10, but we have more than 10 nodes, so override id type project_id created_datetime - }}"""})) - results = r.json.get('data', {}).get('node', {}) + }}""" + } + ), + ) + results = r.json.get("data", {}).get("node", {}) assert len(results) == len(utils.data_fnames) for node in results: - assert 'type' in node - assert 'id' in node - assert 'project_id' in node - assert 'created_datetime' in node + assert "type" in node + assert "id" in node + assert "project_id" in node + assert "created_datetime" in node def test_quicksearch(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) with pg_driver_clean.session_scope(): aliquot = pg_driver_clean.nodes(models.Aliquot).first() - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """query Test { + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """query Test { aliquot(quick_search: "%s") { id type project_id submitter_id }} - """ % aliquot.submitter_id[15:] - })) + """ + % aliquot.submitter_id[15:] + } + ), + ) assert r.json == { "data": { - "aliquot": [{ - "id": aliquot.node_id, - "submitter_id": aliquot.submitter_id, - "project_id": "CGCI-BLGSP", - "type": "aliquot" - }] + "aliquot": [ + { + "id": aliquot.node_id, + "submitter_id": aliquot.submitter_id, + "project_id": "CGCI-BLGSP", + "type": "aliquot", + } + ] } } def test_quicksearch_skip_empty(client, submitter, pg_driver_clean, cgci_blgsp): from peregrine.resources.submission.graphql import node + orig = node.apply_arg_quicksearch try: queries = [] @@ -188,12 +228,18 @@ def apply_arg_quicksearch(q, *args): return rv node.apply_arg_quicksearch = apply_arg_quicksearch - client.post(path, headers=submitter, data=json.dumps({ - 'query': """query Test { + client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """query Test { aliquot(quick_search: "") { id type project_id submitter_id }} """ - })) - assert queries[0] is queries[1], 'should not apply empty quick_search' + } + ), + ) + assert queries[0] is queries[1], "should not apply empty quick_search" finally: node.apply_arg_quicksearch = orig @@ -202,19 +248,27 @@ def test_node_interface_project_id(client, admin, submitter, pg_driver_clean): assert put_cgci_blgsp(client, auth=admin).status_code == 200 post = post_example_entities_together(client, pg_driver_clean, submitter) assert post.status_code == 201 - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """query Test { + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """query Test { a: node(project_id: "CGCI-BLGSP" ) { id } b: node(project_id: "FAKE-PROJECT") { id } - }"""})) - assert r.json['data']['a'] - assert not r.json['data']['b'] + }""" + } + ), + ) + assert r.json["data"]["a"] + assert not r.json["data"]["b"] def test_node_interface_of_type(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - data = json.dumps({ - 'query': """ + data = json.dumps( + { + "query": """ query Test { node (of_type: ["case"]) { id @@ -222,55 +276,72 @@ def test_node_interface_of_type(client, submitter, pg_driver_clean, cgci_blgsp): } } """ - }) + } + ) r = client.post(path, headers=submitter, data=data) - print r.data - types = {d['type'] for d in r.json['data']['node']} - assert not {'case'}.symmetric_difference(types) + print(r.data) + types = {d["type"] for d in r.json["data"]["node"]} + assert not {"case"}.symmetric_difference(types) def test_node_interface_category(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - category = dictionary.schema.values()[0]['category'] + category = list(dictionary.schema.values())[0]["category"] accepted_types = [ node for node in dictionary.schema - if dictionary.schema[node]['category'] == category + if dictionary.schema[node]["category"] == category ] - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """query Test {{ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """query Test {{ node (category: "{}") {{ id project_id type }} - }}""".format(category)})) + }}""".format( + category + ) + } + ), + ) assert r.status_code == 200, r.data - results = r.json.get('data', {}).get('node', {}) + results = r.json.get("data", {}).get("node", {}) for node in results: - assert 'id' in node - assert 'project_id' in node - assert 'type' in node - assert node['type'] in accepted_types + assert "id" in node + assert "project_id" in node + assert "type" in node + assert node["type"] in accepted_types def test_node_interface_program_project(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """query Test { + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """query Test { node (category: "administrative") { type } - }"""})) + }""" + } + ), + ) assert r.status_code == 200, r.data - results = r.json.get('data', {}).get('node', {}) + results = r.json.get("data", {}).get("node", {}) if results: programs = 0 projects = 0 for node in results: - assert 'type' in node - if node['type'] == 'program': + assert "type" in node + if node["type"] == "program": programs += 1 - elif node['type'] == 'project': + elif node["type"] == "project": projects += 1 assert programs > 0 assert projects > 0 @@ -278,27 +349,45 @@ def test_node_interface_program_project(client, submitter, pg_driver_clean, cgci def test_arg_props(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ query Test { sample (project_id: "CGCI-BLGSP") { project_id }} - """})) - data = r.json.get('data') + """ + } + ), + ) + data = r.json.get("data") assert data, r.data - assert data['sample'][0]['project_id'] == "CGCI-BLGSP" + assert data["sample"][0]["project_id"] == "CGCI-BLGSP" - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ query Test { sample (project_id: "fake-project") { project_id }} - """})) - data = r.json.get('data') + """ + } + ), + ) + data = r.json.get("data") assert data, r.data - assert not data['sample'] + assert not data["sample"] def test_project_project_id_filter(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ query Test { a: project (project_id: "CGCI-BLGSP") { project_id } b: project (project_id: "FAKE") { project_id } @@ -307,52 +396,63 @@ def test_project_project_id_filter(client, submitter, pg_driver_clean, cgci_blgs project_id } } - """})) + """ + } + ), + ) assert r.json == { "data": { "a": [{"project_id": "CGCI-BLGSP"}], "b": [], "c": [], - "d": [{"project_id": "CGCI-BLGSP"}] + "d": [{"project_id": "CGCI-BLGSP"}], } } def test_arg_first(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ - query Test { + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ + query Test { case (first: 1, order_by_asc: "submitter_id") { submitter_id } - } - """})) - assert r.json == { - 'data': { - 'case': [{ - 'submitter_id': 'BLGSP-71-06-00019' - }] - } - }, r.data + } + """ + } + ), + ) + assert r.json == {"data": {"case": [{"submitter_id": "BLGSP-71-06-00019"}]}}, r.data def test_arg_offset(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ query Test { case (first: 5) { id }} """})) - first = {c['id'] for c in r.json['data']['case']} - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ query Test { case (offset: 5) { id }} """})) - data = r.json.get('data') + r = client.post( + path, + headers=submitter, + data=json.dumps({"query": """ query Test { case (first: 5) { id }} """}), + ) + first = {c["id"] for c in r.json["data"]["case"]} + r = client.post( + path, + headers=submitter, + data=json.dumps({"query": """ query Test { case (offset: 5) { id }} """}), + ) + data = r.json.get("data") assert data, r.data - offset = {c['id'] for c in r.json['data']['case']} + offset = {c["id"] for c in r.json["data"]["case"]} assert not offset.intersection(first) -@pytest.mark.skip(reason='must rewrite query') +@pytest.mark.skip(reason="must rewrite query") def test_with_path(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - data = json.dumps({ - 'query': """ + data = json.dumps( + { + "query": """ query Test { case ( order_by_desc: "created_datetime", @@ -364,29 +464,33 @@ def test_with_path(client, submitter, pg_driver_clean, cgci_blgsp): } } """ - }) + } + ) r = client.post(path, headers=submitter, data=data) - print r.data - assert len(r.json['data']['case']) == 1 - assert r.json['data']['case'][0]['submitter_id'] == "BLGSP-71-06-00019",\ - r.data + print(r.data) + assert len(r.json["data"]["case"]) == 1 + assert r.json["data"]["case"][0]["submitter_id"] == "BLGSP-71-06-00019", r.data def test_with_path_to_any(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) with pg_driver_clean.session_scope() as s: - props = dict(project_id='CGCI-BLGSP', state='validated') - case1 = models.Case('case1', submitter_id='case1', **props) - case2 = models.Case('case2', submitter_id='case2', **props) - sample1 = models.Sample('sample1', submitter_id='sample1', **props) - sample2 = models.Sample('sample2', submitter_id='sample2', **props) + props = dict(project_id="CGCI-BLGSP", state="validated") + case1 = models.Case("case1", submitter_id="case1", **props) + case2 = models.Case("case2", submitter_id="case2", **props) + sample1 = models.Sample("sample1", submitter_id="sample1", **props) + sample2 = models.Sample("sample2", submitter_id="sample2", **props) case1.samples = [sample1] case2.samples = [sample2] s.add_all((case1, case2)) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """query Test($sampleId1: String, $sampleId2: String) { + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """query Test($sampleId1: String, $sampleId2: String) { a: _case_count (with_path_to_any: [ {type: "sample", submitter_id: $sampleId1} {type: "sample", submitter_id: $sampleId2} @@ -408,64 +512,67 @@ def test_with_path_to_any(client, submitter, pg_driver_clean, cgci_blgsp): {type: "sample", submitter_id: $sampleId2} ]) }""", - 'variables': { - "sampleId1": sample1.submitter_id, - "sampleId2": sample2.submitter_id, - } - })) + "variables": { + "sampleId1": sample1.submitter_id, + "sampleId2": sample2.submitter_id, + }, + } + ), + ) assert r.status_code == 200, r.data - assert r.json == { - 'data': { - 'a': 2, - 'b': 1, - 'c': 1, - 'd': 1, - 'e': 1, - 'f': 0, - } - }, r.data + assert r.json == {"data": {"a": 2, "b": 1, "c": 1, "d": 1, "e": 1, "f": 0,}}, r.data def test_with_path_to_invalid_type(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ query Test { case (order_by_desc: "created_datetime", with_path_to: {type: "BAD_TYPE"}) { submitter_id } } - """})) - print r.data - assert len(r.json['data']['case']) == 0 + """ + } + ), + ) + print(r.data) + assert len(r.json["data"]["case"]) == 0 -@pytest.mark.skip(reason='test is wrong') +@pytest.mark.skip(reason="test is wrong") def test_without_path(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) with pg_driver_clean.session_scope(): - blgsp = pg_driver_clean.nodes(models.Project).props(code='BLGSP').one() - blgsp.cases += [models.Case('id1', project_id='CGCI-BLGSP')] - data = json.dumps({ - 'query': """ + blgsp = pg_driver_clean.nodes(models.Project).props(code="BLGSP").one() + blgsp.cases += [models.Case("id1", project_id="CGCI-BLGSP")] + data = json.dumps( + { + "query": """ query Test { with : _case_count(with_path_to : {type: "aliquot"}) without: _case_count(without_path_to: {type: "aliquot"}) total : _case_count } """ - }) + } + ) r = client.post(path, headers=submitter, data=data) - print r.data - data = r.json['data'] - assert data['with'] - assert data['without'] - assert data['with'] + data['without'] == data['total'] + print(r.data) + data = r.json["data"] + assert data["with"] + assert data["without"] + assert data["with"] + data["without"] == data["total"] -@pytest.mark.skip(reason='test does not conform to latest dictionary') +@pytest.mark.skip(reason="test does not conform to latest dictionary") def test_counts_with_path_filter_multiple_paths( - client, submitter, pg_driver_clean, cgci_blgsp): + client, submitter, pg_driver_clean, cgci_blgsp +): post_example_entities_together(client, pg_driver_clean, submitter) # create multiple paths @@ -476,36 +583,54 @@ def test_counts_with_path_filter_multiple_paths( aliquot.samples = [sample] s.merge(aliquot) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ query Test { with: _sample_count(with_path_to: {type: "aliquot"}) } - """})) - print r.data - data = r.json['data'] - assert data['with'] == 1 + """ + } + ), + ) + print(r.data) + data = r.json["data"] + assert data["with"] == 1 def test_with_path_negative(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ query Test { case (with_path_to: { type: "portion", submitter_id: "incorrect"}) { submitter_id } } -"""})) - assert len(r.json['data']['case']) == 0, r.data +""" + } + ), + ) + assert len(r.json["data"]["case"]) == 0, r.data -@pytest.mark.skip(reason='test does not conform to latest dictionary') +@pytest.mark.skip(reason="test does not conform to latest dictionary") def test_with_path_multiple(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ query Test { case (with_path_to: [ {type: "sample", submitter_id: "BLGSP-71-06-00019s"}, @@ -513,86 +638,120 @@ def test_with_path_multiple(client, submitter, pg_driver_clean, cgci_blgsp): submitter_id } } -"""})) - assert r.json['data']['case'][0]['submitter_id'] == "BLGSP-71-06-00019",\ - r.data +""" + } + ), + ) + assert r.json["data"]["case"][0]["submitter_id"] == "BLGSP-71-06-00019", r.data def test_order_by_asc_id(client, submitter, pg_driver_clean, cgci_blgsp): utils.reset_transactions(pg_driver_clean) post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """query Test { case (order_by_asc: "id") { id }}"""})) - print r.data - _original = r.json['data']['case'] - _sorted = sorted(_original, cmp=(lambda a, b: cmp(a['id'], b['id']))) + r = client.post( + path, + headers=submitter, + data=json.dumps( + {"query": """query Test { case (order_by_asc: "id") { id }}"""} + ), + ) + print(r.data) + _original = r.json["data"]["case"] + _sorted = sorted(_original, key=(lambda x: x["id"])) assert _original == _sorted, r.data def test_order_by_desc_id(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """query Test { case (order_by_desc: "id") { id }}"""})) - print r.data - _original = r.json['data']['case'] - _sorted = sorted(_original, cmp=(lambda a, b: cmp(b['id'], a['id']))) + r = client.post( + path, + headers=submitter, + data=json.dumps( + {"query": """query Test { case (order_by_desc: "id") { id }}"""} + ), + ) + print(r.data) + _original = r.json["data"]["case"] + _sorted = sorted(_original, key=(lambda x: x["id"]), reverse=True) assert _original == _sorted, r.data def test_order_by_asc_prop(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """query Test { case (order_by_asc: "submitter_id") { + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """query Test { case (order_by_asc: "submitter_id") { submitter_id - }}"""})) - print r.data - _original = r.json['data']['case'] - _sorted = sorted(_original, cmp=( - lambda a, b: cmp(a['submitter_id'], b['submitter_id']))) + }}""" + } + ), + ) + print(r.data) + _original = r.json["data"]["case"] + _sorted = sorted(_original, key=(lambda x: x["submitter_id"])) assert _original == _sorted, r.data def test_order_by_desc_prop(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """query Test { case (order_by_desc: "submitter_id") { + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """query Test { case (order_by_desc: "submitter_id") { submitter_id - }}"""})) - print r.data - _original = r.json['data']['case'] - _sorted = sorted(_original, cmp=( - lambda a, b: cmp(b['submitter_id'], a['submitter_id']))) + }}""" + } + ), + ) + print(r.data) + _original = r.json["data"]["case"] + _sorted = sorted(_original, key=(lambda x: x["submitter_id"]), reverse=True) assert _original == _sorted, r.data -@pytest.mark.skip(reason='test does not conform to latest dictionary') +@pytest.mark.skip(reason="test does not conform to latest dictionary") def test_auth_node_subclass(client, submitter, pg_driver_clean, cgci_blgsp): with pg_driver_clean.session_scope(): - blgsp = pg_driver_clean.nodes(models.Project).props(code='BLGSP').one() - blgsp.cases += [models.Case('id1', project_id='CGCI-BLGSP')] - blgsp.cases += [models.Case('id2', project_id='OTHER-OTHER')] - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """query Test { case { project_id }}"""})) + blgsp = pg_driver_clean.nodes(models.Project).props(code="BLGSP").one() + blgsp.cases += [models.Case("id1", project_id="CGCI-BLGSP")] + blgsp.cases += [models.Case("id2", project_id="OTHER-OTHER")] + r = client.post( + path, + headers=submitter, + data=json.dumps({"query": """query Test { case { project_id }}"""}), + ) with pg_driver_clean.session_scope(): - assert len(r.json['data']['case']) == 1 + assert len(r.json["data"]["case"]) == 1 def test_auth_node_subclass_links(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) with pg_driver_clean.session_scope() as s: - cases = pg_driver_clean.nodes(models.Case).subq_path('samples').all() + cases = pg_driver_clean.nodes(models.Case).subq_path("samples").all() for case in cases: for sample in case.samples: - sample.project_id = 'OTHER-OTHER' + sample.project_id = "OTHER-OTHER" s.merge(sample) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """query Test { case (with_links: ["samples"]) { - submitter_id samples { id } _samples_count }}"""})) - print r.data + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """query Test { case (with_links: ["samples"]) { + submitter_id samples { id } _samples_count }}""" + } + ), + ) + print(r.data) with pg_driver_clean.session_scope(): - for case in r.json['data']['case']: - assert len(case['samples']) == 0, r.data - assert case['_samples_count'] == 0, r.data + for case in r.json["data"]["case"]: + assert len(case["samples"]) == 0, r.data + assert case["_samples_count"] == 0, r.data @pytest.mark.skip(reason='"clinicals" is not a link name') @@ -600,24 +759,24 @@ def test_with_links_any(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) with pg_driver_clean.session_scope(): ncases = pg_driver_clean.nodes(models.Case).count() - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """query Test { + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """query Test { a: _case_count (with_links_any: []) b: _case_count (with_links_any: ["clinicals"]) c: _case_count (with_links_any: ["samples"]) d: _case_count (with_links_any: ["samples", "clinicals"]) e: _case_count (with_links_any: ["clinicals", "samples"]) f: _case_count (with_links_any: ["clinicals", "samples", "projects"]) - }"""})) + }""" + } + ), + ) assert r.json == { - 'data': { - 'a': 1, - 'b': 0, - 'c': 1, - 'd': 1, - 'e': 1, - 'f': ncases, - } + "data": {"a": 1, "b": 0, "c": 1, "d": 1, "e": 1, "f": ncases,} }, r.data @@ -628,18 +787,25 @@ def test_auth_counts(client, submitter, pg_driver_clean, cgci_blgsp): with pg_driver_clean.session_scope() as s: cases = pg_driver_clean.nodes(models.Case).limit(n).all() for case in cases: - case.project_id = 'OTHER-OTHER' + case.project_id = "OTHER-OTHER" s.merge(case) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """query Test { _case_count }"""})) + r = client.post( + path, + headers=submitter, + data=json.dumps({"query": """query Test { _case_count }"""}), + ) with pg_driver_clean.session_scope(): - assert r.json['data']['_case_count'] == 0 + assert r.json["data"]["_case_count"] == 0 def test_transaction_logs(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ query Test { transaction_log ( first: 1, @@ -649,14 +815,13 @@ def test_transaction_logs(client, submitter, pg_driver_clean, cgci_blgsp): submitter } } - """})) - assert len(r.json['data']['transaction_log']) == 1, r.data + """ + } + ), + ) + assert len(r.json["data"]["transaction_log"]) == 1, r.data assert r.json == { - "data": { - "transaction_log": [{ - 'project_id': 'CGCI-BLGSP', 'submitter': None - }] - } + "data": {"transaction_log": [{"project_id": "CGCI-BLGSP", "submitter": None}]} } @@ -665,35 +830,52 @@ def test_auth_transaction_logs(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) with pg_driver_clean.session_scope() as s: log = pg_driver_clean.nodes(models.submission.TransactionLog).one() - log.program = 'OTHER' + log.program = "OTHER" s.merge(log) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """query Test { transaction_log { id } }"""})) + r = client.post( + path, + headers=submitter, + data=json.dumps({"query": """query Test { transaction_log { id } }"""}), + ) with pg_driver_clean.session_scope(): - assert len(r.json['data']['transaction_log']) == 0, r.data + assert len(r.json["data"]["transaction_log"]) == 0, r.data def test_with_path_to(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) with pg_driver_clean.session_scope(): - case_sub_id = pg_driver_clean.nodes(models.Case).path('samples')\ - .first().submitter_id - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + case_sub_id = ( + pg_driver_clean.nodes(models.Case).path("samples").first().submitter_id + ) + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ query Test {{ aliquot (with_path_to: {{type: "case", submitter_id: "{}"}}) {{ a: submitter_id }} - }}""".format(case_sub_id)})) - assert r.json['data']['aliquot'] == [{'a': 'BLGSP-71-06-00019-01A-11D'}] + }}""".format( + case_sub_id + ) + } + ), + ) + assert r.json["data"]["aliquot"] == [{"a": "BLGSP-71-06-00019-01A-11D"}] def test_variable(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) with pg_driver_clean.session_scope(): - case = pg_driver_clean.nodes(models.Case).path('samples').one() - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + case = pg_driver_clean.nodes(models.Case).path("samples").one() + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ query Test ($caseId: String) { a: case (id: $caseId) { submitter_id @@ -703,14 +885,16 @@ def test_variable(client, submitter, pg_driver_clean, cgci_blgsp): } } """, - 'variables': {'caseId': case.node_id} - })) + "variables": {"caseId": case.node_id}, + } + ), + ) - print r.data + print(r.data) assert r.json == { "data": { - 'a': [{"submitter_id": case.submitter_id}], - 'b': [{'cases': [{"submitter_id": case.submitter_id}]}], + "a": [{"submitter_id": case.submitter_id}], + "b": [{"cases": [{"submitter_id": case.submitter_id}]}], } } @@ -718,38 +902,36 @@ def test_variable(client, submitter, pg_driver_clean, cgci_blgsp): def test_null_variable(client, submitter, pg_driver_clean, cgci_blgsp): utils.reset_transactions(pg_driver_clean) post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ query Test ($projectId: [String]) { a: _case_count (project_id: $projectId) t: _transaction_log_count(project_id: $projectId) } """, - })) + } + ), + ) with pg_driver_clean.session_scope(): cases = pg_driver_clean.nodes(models.Case).count() - print r.data - assert r.json == { - "data": { - 'a': cases, - 't': 1, - } - } + print(r.data) + assert r.json == {"data": {"a": cases, "t": 1,}} def test_property_lists(client, submitter, pg_driver_clean, cgci_blgsp): utils.reset_transactions(pg_driver_clean) post_example_entities_together(client, pg_driver_clean, submitter) with pg_driver_clean.session_scope() as s: - s.merge( - models.Case('case1', submitter_id='s1', project_id='CGCI-BLGSP') - ) - s.merge( - models.Case('case2', submitter_id='s2', project_id='CGCI-BLGSP') - ) - data = json.dumps({ - 'query': """{ + s.merge(models.Case("case1", submitter_id="s1", project_id="CGCI-BLGSP")) + s.merge(models.Case("case2", submitter_id="s2", project_id="CGCI-BLGSP")) + data = json.dumps( + { + "query": """{ case (submitter_id: ["s1", "s2"]) { id submitter_id }, @@ -757,51 +939,58 @@ def test_property_lists(client, submitter, pg_driver_clean, cgci_blgsp): c2: _transaction_log_count(project_id: ["CGCI-FAKE"]) c3: _transaction_log_count(project_id: "CGCI-BLGSP") }""", - }) + } + ) response = client.post(path, headers=submitter, data=data) # fix for the unicode artifacts - expected_json = json.loads(json.dumps({ - "data": { - 'case': [ - {"id": "case1", "submitter_id": "s1"}, - {"id": "case2", "submitter_id": "s2"}, - ], - 'c1': 1, - 'c2': 0, - 'c3': 1, - } - })) + expected_json = json.loads( + json.dumps( + { + "data": { + "case": [ + {"id": "case1", "submitter_id": "s1"}, + {"id": "case2", "submitter_id": "s2"}, + ], + "c1": 1, + "c2": 0, + "c3": 1, + } + } + ) + ) assert response.json == expected_json, response.data def test_not_property(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) with pg_driver_clean.session_scope() as s: - s.merge( - models.Case('case1', submitter_id='s1', project_id='CGCI-BLGSP') - ) - s.merge( - models.Case('case2', submitter_id='s2', project_id='CGCI-BLGSP') - ) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """{ + s.merge(models.Case("case1", submitter_id="s1", project_id="CGCI-BLGSP")) + s.merge(models.Case("case2", submitter_id="s2", project_id="CGCI-BLGSP")) + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """{ case (not: {submitter_id: "s1"}, submitter_id: ["s1", "s2"]) { id submitter_id } }""", - })) + } + ), + ) assert r.json == { - "data": { - 'case': [ - {"id": "case2", "submitter_id": "s2"}, - ], - } + "data": {"case": [{"id": "case2", "submitter_id": "s2"},],} }, r.data def test_schema(client, submitter, pg_driver_clean): - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ query IntrospectionQuery { __schema { queryType { name } @@ -878,17 +1067,27 @@ def test_schema(client, submitter, pg_driver_clean): } } } - """})) + """ + } + ), + ) assert r.status_code == 200 # Check the watermark of known types - assert len(r.json['data']['__schema']['types']) > 30 + assert len(r.json["data"]["__schema"]["types"]) > 30 def test_special_case_project_id( - client, submitter, pg_driver_clean, cgci_blgsp, put_tcga_brca, mock_arborist_requests): - data = json.dumps({ - 'query': """ + client, + submitter, + pg_driver_clean, + cgci_blgsp, + put_tcga_brca, + mock_arborist_requests, +): + data = json.dumps( + { + "query": """ { valid: project (project_id: "CGCI-BLGSP") { ...f } invalid: project (project_id: "TCGA-TEST") { ...f } @@ -896,92 +1095,89 @@ def test_special_case_project_id( } fragment f on project { project_id code } """ - }) + } + ) # the user has read access to CGCI-BLGSP and TCGA-BRCA - mock_arborist_requests(auth_mapping={ - "/programs/CGCI/projects/BLGSP": [ - { - "service": "peregrine", "method": "read" - } - ], - "/programs/TCGA/projects/BRCA": [ - { - "service": "peregrine", "method": "read" - } - ] - }) + mock_arborist_requests( + auth_mapping={ + "/programs/CGCI/projects/BLGSP": [ + {"service": "peregrine", "method": "read"} + ], + "/programs/TCGA/projects/BRCA": [ + {"service": "peregrine", "method": "read"} + ], + } + ) r = client.post(path, headers=submitter, data=data) - print r.data + print(r.data) assert r.json == { "data": { - 'valid': [{ - 'project_id': 'CGCI-BLGSP', - 'code': 'BLGSP' - }], - 'invalid': [], - 'multiple': [{ - 'project_id': 'TCGA-BRCA', - 'code': 'BRCA' - }, { - 'project_id': 'CGCI-BLGSP', - 'code': 'BLGSP' - }], + "valid": [{"project_id": "CGCI-BLGSP", "code": "BLGSP"}], + "invalid": [], + "multiple": [ + {"project_id": "TCGA-BRCA", "code": "BRCA"}, + {"project_id": "CGCI-BLGSP", "code": "BLGSP"}, + ], } } def test_catch_language_error(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """{ case-1: case (first: 1) { id }} """})) + r = client.post( + path, + headers=submitter, + data=json.dumps({"query": """{ case-1: case (first: 1) { id }} """}), + ) assert r.status_code == 400, r.data { "data": None, - "errors": [( - "Syntax Error GraphQL request (1:7) Expected Name, found Int" - " \"-1\"\n\n1: { case-1: case (first: 1) { id }} \n ^\n" - )] + "errors": [ + ( + "Syntax Error GraphQL request (1:7) Expected Name, found Int" + ' "-1"\n\n1: { case-1: case (first: 1) { id }} \n ^\n' + ) + ], } -@pytest.mark.skip(reason='must rewrite query') + +@pytest.mark.skip(reason="must rewrite query") def test_filter_empty_prop_list( - client, submitter, pg_driver_clean, cgci_blgsp, monkeypatch): + client, submitter, pg_driver_clean, cgci_blgsp, monkeypatch +): post_example_entities_together(client, pg_driver_clean, submitter) - utils.put_entity_from_file(client, 'read_group.json', submitter) + utils.put_entity_from_file(client, "read_group.json", submitter) utils.patch_indexclient(monkeypatch) - utils.put_entity_from_file( - client, 'submitted_unaligned_reads.json', submitter - ) + utils.put_entity_from_file(client, "submitted_unaligned_reads.json", submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """{ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """{ a: _case_count(submitter_id: []) b: _submitted_unaligned_reads_count c: _submitted_unaligned_reads_count(file_state: []) }""" - })) + } + ), + ) - assert r.json == { - 'data': { - 'a': 1, - 'b': 1, - 'c': 1, - } - } + assert r.json == {"data": {"a": 1, "b": 1, "c": 1,}} def test_submitted_unaligned_reads_with_path_to_read_group( - client, submitter, pg_driver_clean, cgci_blgsp): + client, submitter, pg_driver_clean, cgci_blgsp +): """Regression for incorrect counts""" post_example_entities_together(client, pg_driver_clean, submitter) - utils.put_entity_from_file(client, 'read_group.json', submitter) + utils.put_entity_from_file(client, "read_group.json", submitter) files = [ - models.SubmittedUnalignedReads( - 'file_{}'.format(i), project_id='CGCI-BLGSP' - ) + models.SubmittedUnalignedReads("file_{}".format(i), project_id="CGCI-BLGSP") for i in range(3) ] @@ -990,8 +1186,12 @@ def test_submitted_unaligned_reads_with_path_to_read_group( rg.submitted_unaligned_reads_files = files rg = s.merge(rg) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """{ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """{ read_group(id: "%s") { id _submitted_unaligned_reads_files_count @@ -999,22 +1199,22 @@ def test_submitted_unaligned_reads_with_path_to_read_group( submitted_unaligned_reads(with_path_to:{type: "read_group"}) { id } - }""" % rg.node_id - })) + }""" + % rg.node_id + } + ), + ) assert r.json == { "data": { - "read_group": [{ - "_submitted_unaligned_reads_files_count": 3, - "id": rg.node_id - }], - "submitted_unaligned_reads": [{ - "id": "file_0" - }, { - "id": "file_1" - }, { - "id": "file_2" - }] + "read_group": [ + {"_submitted_unaligned_reads_files_count": 3, "id": rg.node_id} + ], + "submitted_unaligned_reads": [ + {"id": "file_0"}, + {"id": "file_1"}, + {"id": "file_2"}, + ], } } @@ -1029,20 +1229,24 @@ def test_without_path_order(client, submitter, pg_driver_clean, cgci_blgsp): # Also remove samples links. cases = pg_driver_clean.nodes(models.Case).all() for c in cases: - if c.submitter_id == 'BLGSP-71-06-00019': - c.created_datetime = '2019-01-03T12:34:19.017404-06:00' - elif c.submitter_id == 'BLGSP-71-06-00020': - c.created_datetime = '2019-01-03T12:34:20.017404-06:00' - elif c.submitter_id == 'BLGSP-71-06-00021': - c.created_datetime = '2019-01-03T12:34:21.017404-06:00' - elif c.submitter_id == 'BLGSP-71-06-00022': - c.created_datetime = '2019-01-03T12:34:22.017404-06:00' - elif c.submitter_id == 'BLGSP-71-06-00023': - c.created_datetime = '2019-01-03T12:34:23.017404-06:00' + if c.submitter_id == "BLGSP-71-06-00019": + c.created_datetime = "2019-01-03T12:34:19.017404-06:00" + elif c.submitter_id == "BLGSP-71-06-00020": + c.created_datetime = "2019-01-03T12:34:20.017404-06:00" + elif c.submitter_id == "BLGSP-71-06-00021": + c.created_datetime = "2019-01-03T12:34:21.017404-06:00" + elif c.submitter_id == "BLGSP-71-06-00022": + c.created_datetime = "2019-01-03T12:34:22.017404-06:00" + elif c.submitter_id == "BLGSP-71-06-00023": + c.created_datetime = "2019-01-03T12:34:23.017404-06:00" c.samples = [] - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ query Test { case ( order_by_desc: "created_datetime", @@ -1051,7 +1255,9 @@ def test_without_path_order(client, submitter, pg_driver_clean, cgci_blgsp): { submitter_id } } """ - })) + } + ), + ) assert r.json == { "data": { @@ -1060,108 +1266,106 @@ def test_without_path_order(client, submitter, pg_driver_clean, cgci_blgsp): {"submitter_id": "BLGSP-71-06-00022"}, {"submitter_id": "BLGSP-71-06-00021"}, {"submitter_id": "BLGSP-71-06-00020"}, - {"submitter_id": "BLGSP-71-06-00019"} + {"submitter_id": "BLGSP-71-06-00019"}, ] } }, r.data -def test_read_group_with_path_to_case( - client, submitter, pg_driver_clean, cgci_blgsp): +def test_read_group_with_path_to_case(client, submitter, pg_driver_clean, cgci_blgsp): """Regression for incorrect counts""" put_example_entities_together(client, pg_driver_clean, submitter) - utils.put_entity_from_file(client, 'read_group.json', submitter) - data = json.dumps({ - 'query': """ + utils.put_entity_from_file(client, "read_group.json", submitter) + data = json.dumps( + { + "query": """ { _read_group_count (with_path_to: {type: "case"}) } """, - }) - r = client.post(path, headers=submitter, data=data) - assert r.json == { - "data": { - "_read_group_count": 1, } - } - - + ) + r = client.post(path, headers=submitter, data=data) + assert r.json == {"data": {"_read_group_count": 1,}} def test_tx_logs_async_fields(pg_driver_clean, graphql_client, cgci_blgsp): - assert graphql_client("""{ + assert ( + graphql_client( + """{ tx_log: transaction_log { is_dry_run, state, committed_by } - }""").json == { - "data": { - 'tx_log': [{ - "is_dry_run": False, - "state": "PENDING", - "committed_by": None - }], + }""" + ).json + == { + "data": { + "tx_log": [ + {"is_dry_run": False, "state": "PENDING", "committed_by": None} + ], + } } - } + ) def test_tx_logs_state(pg_driver_clean, graphql_client, cgci_blgsp, mock_tx_log): - assert graphql_client("""{ + assert ( + graphql_client( + """{ total: _transaction_log_count succeeded: _transaction_log_count(state: "SUCCEEDED") failed: _transaction_log_count(state: "FAILED") - }""").json == { - "data": { - "total": 1, - "succeeded": 1, - "failed": 0, - } - } + }""" + ).json + == {"data": {"total": 1, "succeeded": 1, "failed": 0,}} + ) def test_tx_logs_is_dry_run(pg_driver_clean, cgci_blgsp, mock_tx_log, graphql_client): - assert graphql_client("""{ + assert ( + graphql_client( + """{ total: _transaction_log_count is: _transaction_log_count(is_dry_run: true) isnt: _transaction_log_count(is_dry_run: false) - }""").json == { - "data": { - "total": 1, - "is": 1, - "isnt": 0, - } - } + }""" + ).json + == {"data": {"total": 1, "is": 1, "isnt": 0,}} + ) def test_tx_logs_committed_by(pg_driver_clean, cgci_blgsp, mock_tx_log, graphql_client): - assert graphql_client("""{ + assert ( + graphql_client( + """{ total: _transaction_log_count right: _transaction_log_count(committed_by: 12345) wrong: _transaction_log_count(committed_by: 54321) - }""").json == { - "data": { - "total": 1, - "right": 1, - "wrong": 0, - } - } + }""" + ).json + == {"data": {"total": 1, "right": 1, "wrong": 0,}} + ) def test_tx_logs_committable(pg_driver_clean, graphql_client, cgci_blgsp, mock_tx_log): - assert graphql_client("""{ + assert ( + graphql_client( + """{ total: _transaction_log_count committable: _transaction_log_count(committable: true) not_committable: _transaction_log_count(committable: false) - }""").json == { - "data": { - "total": 1, - "committable": 0, - "not_committable": 1, - } - } + }""" + ).json + == {"data": {"total": 1, "committable": 0, "not_committable": 1,}} + ) -@pytest.mark.skip(reason='we have different data') -def test_tx_logs_deletion(pg_driver_clean, graphql_client, cgci_blgsp, failed_deletion_transaction): - response = graphql_client("""{ + +@pytest.mark.skip(reason="we have different data") +def test_tx_logs_deletion( + pg_driver_clean, graphql_client, cgci_blgsp, failed_deletion_transaction +): + response = graphql_client( + """{ transaction_log(id: %s) { id type @@ -1178,30 +1382,44 @@ def test_tx_logs_deletion(pg_driver_clean, graphql_client, cgci_blgsp, failed_de } } } - """ % failed_deletion_transaction) + """ + % failed_deletion_transaction + ) assert response.json == { "data": { - "transaction_log": [{ - "documents": [{ - "response": { - "entities": [{ - 'related_cases': [{ - 'submitter_id': 'BLGSP-71-06-00019' - }], - 'unique_keys': json.dumps([{ - "project_id": "CGCI-BLGSP", - "submitter_id": "BLGSP-71-06-00019s", - }]), - "errors": [{ - "message": "Unable to delete entity because 4 others directly or indirectly depend on it. You can only delete this entity by deleting its dependents prior to, or during the same transaction as this one." - }] - }] - } - }], - "id": failed_deletion_transaction, - "type": "delete" - }] + "transaction_log": [ + { + "documents": [ + { + "response": { + "entities": [ + { + "related_cases": [ + {"submitter_id": "BLGSP-71-06-00019"} + ], + "unique_keys": json.dumps( + [ + { + "project_id": "CGCI-BLGSP", + "submitter_id": "BLGSP-71-06-00019s", + } + ] + ), + "errors": [ + { + "message": "Unable to delete entity because 4 others directly or indirectly depend on it. You can only delete this entity by deleting its dependents prior to, or during the same transaction as this one." + } + ], + } + ] + } + } + ], + "id": failed_deletion_transaction, + "type": "delete", + } + ] } } @@ -1255,58 +1473,74 @@ def test_tx_logs_deletion(pg_driver_clean, graphql_client, cgci_blgsp, failed_de def test_tx_log_comprehensive_query_failed_upload( - pg_driver_clean, graphql_client, cgci_blgsp, failed_upload_transaction): + pg_driver_clean, graphql_client, cgci_blgsp, failed_upload_transaction +): """Test a comprehensive tx_log query for a failed upload""" response = graphql_client(COMPREHENSIVE_TX_LOG_QUERY) assert response.status_code == 200, response.data - assert 'errors' not in response.json, response.data + assert "errors" not in response.json, response.data def test_tx_log_comprehensive_query_upload( - pg_driver_clean, graphql_client, populated_blgsp): + pg_driver_clean, graphql_client, populated_blgsp +): """Test a comprehensive tx_log query for a successful upload""" response = graphql_client(COMPREHENSIVE_TX_LOG_QUERY) assert response.status_code == 200, response.data - assert 'errors' not in response.json, response.data + assert "errors" not in response.json, response.data def test_tx_log_comprehensive_query_failed_deletion( - pg_driver_clean, graphql_client, cgci_blgsp, failed_deletion_transaction): + pg_driver_clean, graphql_client, cgci_blgsp, failed_deletion_transaction +): """Test a comprehensive tx_log query for a failed deletion""" response = graphql_client(COMPREHENSIVE_TX_LOG_QUERY) assert response.status_code == 200, response.data - assert 'errors' not in response.json, response.data + assert "errors" not in response.json, response.data def test_nodetype_interface(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - category = dictionary.schema.values()[0]['category'] + category = list(dictionary.schema.values())[0]["category"] - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ query Test {{ _node_type (category: "{}", first: 1) {{ id title category }} - }}""".format(category)})) + }}""".format( + category + ) + } + ), + ) - results = r.json.get('data', {}).get('_node_type', {}) + results = r.json.get("data", {}).get("_node_type", {}) assert len(results) == 1 for node in results: - assert 'id' in node - assert 'title' in node - assert 'category' in node - assert node['category'] == category + assert "id" in node + assert "title" in node + assert "category" in node + assert node["category"] == category def test_array_type_arg(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ query Test { case0: case (consent_codes: ["cc1"]) { consent_codes } case1: case (consent_codes: ["cc2"]) { consent_codes } @@ -1318,65 +1552,71 @@ def test_array_type_arg(client, submitter, pg_driver_clean, cgci_blgsp): case5: case (consent_codes: ["xcc1"]) { consent_codes } case6: case (consent_codes: ["cc1x"]) { consent_codes } } - """})) + """ + } + ), + ) expected_dict = { "data": { "case0": [ {"consent_codes": ["cc1", "cc2", "cc3"]}, - {"consent_codes": ["cc1"]} + {"consent_codes": ["cc1"]}, ], "case1": [ {"consent_codes": ["cc2"]}, - {"consent_codes": ["cc1", "cc2", "cc3"]} - ], - "case2": [ {"consent_codes": ["cc1", "cc2", "cc3"]}, ], - "case3": [ - ], + "case2": [{"consent_codes": ["cc1", "cc2", "cc3"]},], + "case3": [], "case4": [ {"consent_codes": ["cc1"]}, - {"consent_codes": ["cc1", "cc2", "cc3"]} - ], - "case5": [ - {"consent_codes": ["xcc1"]} + {"consent_codes": ["cc1", "cc2", "cc3"]}, ], - "case6": [ - {"consent_codes": ["cc1x"]} - ] + "case5": [{"consent_codes": ["xcc1"]}], + "case6": [{"consent_codes": ["cc1x"]}], } } # Lists are ordered but here order does not matter so we sort them before comparing. for k, v in expected_dict["data"].items(): - expected_dict["data"][k] = sorted(v) + expected_dict["data"][k] = sorted(v, key=(lambda x: sorted(x.items()))) for k, v in r.json["data"].items(): - r.json["data"][k] = sorted(v) - assert json.dumps(r.json, sort_keys=True) == json.dumps(expected_dict, sort_keys=True) + r.json["data"][k] = sorted(v, key=(lambda x: sorted(x.items()))) + assert json.dumps(r.json, sort_keys=True) == json.dumps( + expected_dict, sort_keys=True + ) def test_invalid_array_arg(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ query Test { case0: case (project_id : [["list", "of"], ["lists"]]) { id } } """ - })) + } + ), + ) assert r.json == { - 'data': None, - 'errors': ['Argument "project_id" has invalid value [["list", "of"], ["lists"]].\nIn element #0: Expected type "String", found ["list", "of"].\nIn element #1: Expected type "String", found ["lists"].'] + "data": None, + "errors": [ + 'Argument "project_id" has invalid value [["list", "of"], ["lists"]].\nIn element #0: Expected type "String", found ["list", "of"].\nIn element #1: Expected type "String", found ["lists"].' + ], } def test_datanode(graphql_client, client, submitter, pg_driver_clean, cgci_blgsp): obj_id = str(random.random())[:8] post_example_entities_together(client, pg_driver_clean, submitter) - utils.put_entity_from_file(client, 'read_group.json', submitter) + utils.put_entity_from_file(client, "read_group.json", submitter) files = [ models.SubmittedUnalignedReads( - 'file_131', project_id='CGCI-BLGSP', object_id=obj_id, + "file_131", project_id="CGCI-BLGSP", object_id=obj_id, ) ] @@ -1384,7 +1624,7 @@ def test_datanode(graphql_client, client, submitter, pg_driver_clean, cgci_blgsp rg = pg_driver_clean.nodes(models.ReadGroup).one() rg.submitted_unaligned_reads_files = files s.merge(rg) - j1 = graphql_client('{datanode {object_id}}').json + j1 = graphql_client("{datanode {object_id}}").json j2 = graphql_client('{datanode(object_id: "%s") {object_id}}' % obj_id).json assert j1 == j2 @@ -1393,58 +1633,82 @@ def test_boolean_filter(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) # make sure the existing data is what is expected - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ { experiment { copy_numbers_identified } } """ - })) + } + ), + ) print("Existing data should contain a single experiment:") print(r.data) assert len(r.json["data"]["experiment"]) == 1 assert r.json["data"]["experiment"][0]["copy_numbers_identified"] == True # test boolean filter true - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ { experiment (copy_numbers_identified: true) { copy_numbers_identified } } """ - })) + } + ), + ) print("Filtering by boolean=true should return the experiment:") print(r.data) assert len(r.json["data"]["experiment"]) == 1 # test boolean filter false - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ { experiment (copy_numbers_identified: false) { copy_numbers_identified } } """ - })) + } + ), + ) print("Filtering by boolean=false should not return any data:") print(r.data) assert len(r.json["data"]["experiment"]) == 0 # test boolean filter [true,false] - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ { experiment (copy_numbers_identified: [true,false]) { copy_numbers_identified } } """ - })) + } + ), + ) print("Filtering by boolean=[true,false] should return the experiment:") print(r.data) assert len(r.json["data"]["experiment"]) == 1 @@ -1454,8 +1718,12 @@ def test_datetime_filters(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) # make sure the existing data is what is expected - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ { experiment { id @@ -1464,7 +1732,9 @@ def test_datetime_filters(client, submitter, pg_driver_clean, cgci_blgsp): } } """ - })) + } + ), + ) print("in DB:", r.data) assert len(r.json["data"]["experiment"]) == 1 experiment_id = r.json["data"]["experiment"][0]["id"] @@ -1478,61 +1748,95 @@ def test_datetime_filters(client, submitter, pg_driver_clean, cgci_blgsp): yesterday_str = (created_datetime - timedelta(days=1)).strftime(date_format) # test created_after filter - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ {{ experiment (created_after: "{}") {{ id }} }} - """.format(yesterday_str) - })) + """.format( + yesterday_str + ) + } + ), + ) print("created_after query result:", r.data) assert len(r.json["data"]["experiment"]) == 1 assert r.json["data"]["experiment"][0]["id"] == experiment_id # test created_before filter - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ {{ experiment (created_before: "{}") {{ id }} }} - """.format(yesterday_str) - })) + """.format( + yesterday_str + ) + } + ), + ) print("created_before query result:", r.data) assert len(r.json["data"]["experiment"]) == 0 # test updated_after filter - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ {{ experiment (updated_after: "{}") {{ id }} }} - """.format(yesterday_str) - })) + """.format( + yesterday_str + ) + } + ), + ) print("updated_after query result:", r.data) assert len(r.json["data"]["experiment"]) == 1 assert r.json["data"]["experiment"][0]["id"] == experiment_id # test updated_before filter - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ {{ experiment (updated_before: "{}") {{ id }} }} - """.format(yesterday_str) - })) + """.format( + yesterday_str + ) + } + ), + ) print("updated_before query result:", r.data) assert len(r.json["data"]["experiment"]) == 0 -def test_arborist_unknown_user(client, pg_driver_clean, submitter, cgci_blgsp, mock_arborist_requests): +def test_arborist_unknown_user( + client, pg_driver_clean, submitter, cgci_blgsp, mock_arborist_requests +): """ Tests that if a logged in user does not exist in the DB, peregrine does not throw an error but gracefully returns no data @@ -1540,8 +1844,6 @@ def test_arborist_unknown_user(client, pg_driver_clean, submitter, cgci_blgsp, m post_example_entities_together(client, pg_driver_clean, submitter) mock_arborist_requests(known_user=False) r = client.post( - path, - headers=submitter, - data=json.dumps({"query": "{ project { code } }"}) + path, headers=submitter, data=json.dumps({"query": "{ project { code } }"}) ) - assert r.json == { "data": { "project": [] } } + assert r.json == {"data": {"project": []}} diff --git a/tests/graphql/test_transaction_logs.py b/tests/graphql/test_transaction_logs.py index 4c49ef2d..ec7ed7b9 100644 --- a/tests/graphql/test_transaction_logs.py +++ b/tests/graphql/test_transaction_logs.py @@ -2,12 +2,12 @@ import pytest from datamodelutils import models -from test_graphql import post_example_entities_together +from .test_graphql import post_example_entities_together from peregrine.resources.submission import graphql from tests.graphql import utils -path = '/v0/submission/graphql' +path = "/v0/submission/graphql" query_dry_run = """ @@ -44,9 +44,7 @@ not_committable: _transaction_log_count(committable: false) } """ -result_committable = { - "data": {"total": 1, "committable": 0, "not_committable": 1} -} +result_committable = {"data": {"total": 1, "committable": 0, "not_committable": 1}} query_async_fields = """ { @@ -55,19 +53,21 @@ } } """ -result_async_fields = json.loads(json.dumps({ - 'data': { - 'tx_log': [{ - "is_dry_run": True, - "state": "SUCCEEDED", - "committed_by": '12345', - }], - } -})) +result_async_fields = json.loads( + json.dumps( + { + "data": { + "tx_log": [ + {"is_dry_run": True, "state": "SUCCEEDED", "committed_by": "12345",} + ], + } + } + ) +) @pytest.mark.parametrize( - 'query, expected_json', + "query, expected_json", [ (query_dry_run, result_dry_run), (query_state, result_state), @@ -75,36 +75,33 @@ (query_committable, result_committable), (query_async_fields, result_async_fields), ], - ids=[ - 'dry_run', - 'state', - 'committed_by', - 'committable', - 'async_fields', - ] + ids=["dry_run", "state", "committed_by", "committable", "async_fields",], ) def test_transaction_logs_queries( - pg_driver_clean, cgci_blgsp, mock_tx_log, graphql_client, query, - expected_json): + pg_driver_clean, cgci_blgsp, mock_tx_log, graphql_client, query, expected_json +): with pg_driver_clean.session_scope() as session: session.query(models.submission.TransactionSnapshot).delete() session.query(models.submission.TransactionDocument).delete() session.query(models.submission.TransactionLog).delete() - session.merge(models.submission.TransactionLog( - is_dry_run=True, - program='CGCI', - project='BLGSP', - role='create', - state='SUCCEEDED', - committed_by=12345, - closed=False, - )) + session.merge( + models.submission.TransactionLog( + is_dry_run=True, + program="CGCI", + project="BLGSP", + role="create", + state="SUCCEEDED", + committed_by=12345, + closed=False, + ) + ) assert graphql_client(query).json == expected_json -@pytest.mark.skip(reason='fails with AuthError in failed_deletion_transaction') +@pytest.mark.skip(reason="fails with AuthError in failed_deletion_transaction") def test_transaction_logs_deletion( - pg_driver_clean, graphql_client, failed_deletion_transaction): + pg_driver_clean, graphql_client, failed_deletion_transaction +): query = """ { transaction_log(id: %s) { @@ -127,32 +124,44 @@ def test_transaction_logs_deletion( response = graphql_client(query % failed_deletion_transaction) expected_json = { "data": { - "transaction_log": [{ - "documents": [{ - "response": { - "entities": [{ - 'related_cases': [{ - 'submitter_id': 'BLGSP-71-06-00019' - }], - 'unique_keys': json.dumps([{ - "project_id": "CGCI-BLGSP", - "submitter_id": "BLGSP-71-06-00019s", - }]), - "errors": [{ - "message": ( - 'Unable to delete entity because 4 others' - ' directly or indirectly depend on it. You' - ' can only delete this entity by deleting' - ' its dependents prior to, or during the' - ' same transaction as this one.' - ) - }] - }] - } - }], - "id": failed_deletion_transaction, - "type": "delete" - }] + "transaction_log": [ + { + "documents": [ + { + "response": { + "entities": [ + { + "related_cases": [ + {"submitter_id": "BLGSP-71-06-00019"} + ], + "unique_keys": json.dumps( + [ + { + "project_id": "CGCI-BLGSP", + "submitter_id": "BLGSP-71-06-00019s", + } + ] + ), + "errors": [ + { + "message": ( + "Unable to delete entity because 4 others" + " directly or indirectly depend on it. You" + " can only delete this entity by deleting" + " its dependents prior to, or during the" + " same transaction as this one." + ) + } + ], + } + ] + } + } + ], + "id": failed_deletion_transaction, + "type": "delete", + } + ] } } assert response.json == expected_json @@ -206,30 +215,33 @@ def test_transaction_logs_deletion( }""" -@pytest.mark.skip(reason='fails with AuthError in failed_upload_transaction') +@pytest.mark.skip(reason="fails with AuthError in failed_upload_transaction") def test_transaction_log_comprehensive_query_failed_upload( - pg_driver_clean, graphql_client, cgci_blgsp, failed_upload_transaction): + pg_driver_clean, graphql_client, cgci_blgsp, failed_upload_transaction +): """Test a comprehensive transaction_log query for a failed upload""" response = graphql_client(COMPREHENSIVE_TRANSACTION_LOG_QUERY) assert response.status_code == 200, response.data - assert 'errors' not in response.json, response.data + assert "errors" not in response.json, response.data def test_transaction_log_comprehensive_query_upload( - pg_driver_clean, graphql_client, populated_blgsp): + pg_driver_clean, graphql_client, populated_blgsp +): """Test a comprehensive transaction_log query for a successful upload""" response = graphql_client(COMPREHENSIVE_TRANSACTION_LOG_QUERY) assert response.status_code == 200, response.data - assert 'errors' not in response.json, response.data + assert "errors" not in response.json, response.data -@pytest.mark.skip(reason='fails with AuthError in failed_deletion_transaction') +@pytest.mark.skip(reason="fails with AuthError in failed_deletion_transaction") def test_transaction_log_comprehensive_query_failed_deletion( - pg_driver_clean, graphql_client, failed_deletion_transaction): + pg_driver_clean, graphql_client, failed_deletion_transaction +): """Test a comprehensive transaction_log query for a failed deletion""" response = graphql_client(COMPREHENSIVE_TRANSACTION_LOG_QUERY) assert response.status_code == 200, response.data - assert 'errors' not in response.json, response.data + assert "errors" not in response.json, response.data def test_transaction_logs(client, submitter, pg_driver_clean, cgci_blgsp): @@ -239,78 +251,97 @@ def test_transaction_logs(client, submitter, pg_driver_clean, cgci_blgsp): assert pg_driver_clean.nodes(models.submission.TransactionLog).count() == 1 -@pytest.mark.skip(reason='deprecated') -def test_transaction_log_related_cases( - client, submitter, pg_driver_clean, cgci_blgsp): +@pytest.mark.skip(reason="deprecated") +def test_transaction_log_related_cases(client, submitter, pg_driver_clean, cgci_blgsp): utils.reset_transactions(pg_driver_clean) post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """ query Test { a: transaction_log (first: 1) { type documents {response { entities {related_cases { id submitter_id }}}}} - }"""})) + }""" + } + ), + ) assert r.status_code == 200 print(r.data) - related_case = (r.json['data'] - ['a'][0] - ['documents'][0] - ['response'] - ['entities'][1] - ['related_cases'][0]) - assert 'submitter_id' in related_case - assert 'id' in related_case + related_case = r.json["data"]["a"][0]["documents"][0]["response"]["entities"][1][ + "related_cases" + ][0] + assert "submitter_id" in related_case + assert "id" in related_case -@pytest.mark.skip(reason='deprecated') +@pytest.mark.skip(reason="deprecated") def test_transaction_log_related_cases_filter( - client, submitter, pg_driver_clean, cgci_blgsp): + client, submitter, pg_driver_clean, cgci_blgsp +): utils.reset_transactions(pg_driver_clean) post_example_entities_together(client, pg_driver_clean, submitter) - data = json.dumps({ - 'query': """ + data = json.dumps( + { + "query": """ {a: transaction_log (first: 1) { related_cases { id }}} """ - }) + } + ) r = client.post(path, headers=submitter, data=data) assert r.status_code == 200 - print r.data - case_id = r.json['data']['a'][0]['related_cases'][0]['id'] - data = json.dumps({ - 'query': """ + print(r.data) + case_id = r.json["data"]["a"][0]["related_cases"][0]["id"] + data = json.dumps( + { + "query": """ query Test($caseId: String) { a: transaction_log (related_cases: [$caseId]) { related_cases { id submitter_id } } } """, - "variables": {"caseId": case_id}, - }) + "variables": {"caseId": case_id}, + } + ) r = client.post(path, headers=submitter, data=data) assert r.status_code == 200 - print r.data - related_case_doc = r.json['data']['a'][0]['related_cases'][0] - assert related_case_doc['id'] == case_id - assert related_case_doc['submitter_id'] + print(r.data) + related_case_doc = r.json["data"]["a"][0]["related_cases"][0] + assert related_case_doc["id"] == case_id + assert related_case_doc["submitter_id"] def test_transaction_log_type(client, submitter, pg_driver_clean, cgci_blgsp): utils.reset_transactions(pg_driver_clean) post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """{ a: transaction_log { role type }}"""})) - print r.data - type_ = graphql.transaction.TransactionLog.TYPE_MAP['create'] - assert r.json['data']['a'][0]['type'] == type_ - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """{ + r = client.post( + path, + headers=submitter, + data=json.dumps({"query": """{ a: transaction_log { role type }}"""}), + ) + print(r.data) + type_ = graphql.transaction.TransactionLog.TYPE_MAP["create"] + assert r.json["data"]["a"][0]["type"] == type_ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """{ a: transaction_log(type: "%s") { role type } - }""" % type_})) - print r.data - assert r.json['data']['a'] + }""" + % type_ + } + ), + ) + print(r.data) + assert r.json["data"]["a"] def test_transaction_log_type_map(): @@ -328,30 +359,45 @@ def test_transaction_log_entities(client, submitter, pg_driver_clean, cgci_blgsp post_example_entities_together(client, pg_driver_clean, submitter) # using response - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """{ log: transaction_log { - doc: documents { resp: response { ent: entities { type }}}}}"""})) + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """{ log: transaction_log { + doc: documents { resp: response { ent: entities { type }}}}}""" + } + ), + ) assert r.status_code == 200 - entities = r.json['data']['log'][0]['doc'][0]['resp']['ent'] - assert all(e['type'] for e in entities) + entities = r.json["data"]["log"][0]["doc"][0]["resp"]["ent"] + assert all(e["type"] for e in entities) # using response_json - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """{ log: transaction_log { - doc: documents { resp: response_json }}}"""})) + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """{ log: transaction_log { + doc: documents { resp: response_json }}}""" + } + ), + ) assert r.status_code == 200 - resp = json.loads(r.json['data']['log'][0]['doc'][0]['resp']) - assert all(entity['type'] for entity in resp['entities']) + resp = json.loads(r.json["data"]["log"][0]["doc"][0]["resp"]) + assert all(entity["type"] for entity in resp["entities"]) def test_transaction_log_entities_errors( - client, submitter, pg_driver_clean, cgci_blgsp): + client, submitter, pg_driver_clean, cgci_blgsp +): utils.reset_transactions(pg_driver_clean) post_example_entities_together(client, pg_driver_clean, submitter) put_response = utils.put_entity_from_file( - client, 'read_group_invalid.json', submitter=submitter, validate=False + client, "read_group_invalid.json", submitter=submitter, validate=False ) - transaction_id = put_response.json.get('transaction_id') + transaction_id = put_response.json.get("transaction_id") # using response query = """ @@ -360,11 +406,9 @@ def test_transaction_log_entities_errors( err: errors {{ type keys message }} }} }} }} }} }} """ query = query.format(transaction_id) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': query - })) + r = client.post(path, headers=submitter, data=json.dumps({"query": query})) assert r.status_code == 200 - error = r.json['data']['log'][0]['doc'][0]['resp']['ent'][0]['err'][0] + error = r.json["data"]["log"][0]["doc"][0]["resp"]["ent"][0]["err"][0] # using response_json query = """ @@ -373,25 +417,30 @@ def test_transaction_log_entities_errors( }} }} }} """ query = query.format(transaction_id) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': query - })) + r = client.post(path, headers=submitter, data=json.dumps({"query": query})) assert r.status_code == 200 - resp = json.loads(r.json['data']['log'][0]['doc'][0]['resp']) - error = resp['entities'][0]['errors'][0] - assert all(key in error for key in ('type', 'keys', 'message')) + resp = json.loads(r.json["data"]["log"][0]["doc"][0]["resp"]) + error = resp["entities"][0]["errors"][0] + assert all(key in error for key in ("type", "keys", "message")) def test_transaction_log_documents(client, submitter, pg_driver_clean, cgci_blgsp): utils.reset_transactions(pg_driver_clean) post_example_entities_together(client, pg_driver_clean, submitter) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """{ log: transaction_log { - doc: documents { doc_size name }}}"""})) - doc = r.json['data']['log'][0]['doc'][0] - assert doc['name'] is None - assert isinstance(doc['doc_size'], int) + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """{ log: transaction_log { + doc: documents { doc_size name }}}""" + } + ), + ) + doc = r.json["data"]["log"][0]["doc"][0] + assert doc["name"] is None + assert isinstance(doc["doc_size"], int) def test_transaction_logs_order_asc(client, submitter, pg_driver_clean, cgci_blgsp): @@ -399,17 +448,24 @@ def test_transaction_logs_order_asc(client, submitter, pg_driver_clean, cgci_blg post_example_entities_together(client, pg_driver_clean, submitter) with pg_driver_clean.session_scope(): assert pg_driver_clean.nodes(models.submission.TransactionLog).count() == 1 - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """{ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """{ a: transaction_log (order_by_asc: "id") { id project_id created_datetime } - }"""})) - print r.data - _original = r.json['data']['a'] - _sorted = sorted(_original, cmp=(lambda a, b: cmp(a['id'], b['id']))) + }""" + } + ), + ) + print(r.data) + _original = r.json["data"]["a"] + _sorted = sorted(_original, key=(lambda x: x["id"])) assert _original == _sorted, r.data @@ -418,36 +474,46 @@ def test_transaction_logs_order_desc(client, submitter, pg_driver_clean, cgci_bl post_example_entities_together(client, pg_driver_clean, submitter) with pg_driver_clean.session_scope(): assert pg_driver_clean.nodes(models.submission.TransactionLog).count() == 1 - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """{ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """{ a: transaction_log (order_by_desc: "id") { id project_id created_datetime } - }"""})) - print r.data - _original = r.json['data']['a'] - _sorted = sorted(_original, cmp=(lambda a, b: cmp(b['id'], a['id']))) + }""" + } + ), + ) + print(r.data) + _original = r.json["data"]["a"] + _sorted = sorted(_original, key=(lambda x: x["id"])) assert _original == _sorted, r.data -def test_transaction_logs_quick_search( - client, submitter, pg_driver_clean, cgci_blgsp): +def test_transaction_logs_quick_search(client, submitter, pg_driver_clean, cgci_blgsp): utils.reset_transactions(pg_driver_clean) post_example_entities_together(client, pg_driver_clean, submitter) with pg_driver_clean.session_scope(): id_ = str(pg_driver_clean.nodes(models.submission.TransactionLog).first().id) - r = client.post(path, headers=submitter, data=json.dumps({ - 'query': """{ + r = client.post( + path, + headers=submitter, + data=json.dumps( + { + "query": """{ a: transaction_log (quick_search: "%s") { id } b: transaction_log (quick_search: %s) { id } c: transaction_log (quick_search: "A") { id } - }""" % (id_, id_)})) + }""" + % (id_, id_) + } + ), + ) assert r.json == { - 'data': { - 'a': [{'id': id_}], - 'b': [{'id': id_}], - 'c': [], - } + "data": {"a": [{"id": id_}], "b": [{"id": id_}], "c": [],} }, r.data diff --git a/tests/graphql/utils.py b/tests/graphql/utils.py index a8c6cdde..ca6a4f91 100644 --- a/tests/graphql/utils.py +++ b/tests/graphql/utils.py @@ -7,34 +7,35 @@ from datamodelutils import models -DATA_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data') +DATA_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data") # https://stackoverflow.com/questions/373194/python-regex-for-md5-hash -re_md5 = re.compile(r'(i?)(?