Skip to content

Commit

Permalink
Download visuals using client (#810)
Browse files Browse the repository at this point in the history
* wip add visual download to client

* fix login issue

* refactor and add docstring

* add test (skipped) and lint

* add success message

* ruff ruff

* rewrite with a context manager

* continue

* try to fix ci for now

* create a context manager for visual downloads

* update tests

* unused import

* revise class location and fix circular import

* fix imports

* remove unused functions

* establish cookie based session for playwright

* lint

* try to fix tests

* raise HawcClientException instead of timing out

* fix so we can run from notebooks

* write a test script!

* cleanup

* complete test suite

* wait for page to load

* wait

* require python 3.10+ w/ match statement

* simplify plotly logic

* hide django toolbar if present

---------

Co-authored-by: Andy Shapiro <shapiromatron@gmail.com>
  • Loading branch information
munnsmunns and shapiromatron authored Jan 9, 2024
1 parent 08d22f4 commit f5f6812
Show file tree
Hide file tree
Showing 14 changed files with 267 additions and 10 deletions.
4 changes: 4 additions & 0 deletions client/hawc_client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .epimeta import EpiMetaClient
from .epiv2 import EpiV2Client
from .exceptions import HawcClientException, HawcServerException
from .interactive import InteractiveHawcClient
from .invitro import InvitroClient
from .literature import LiteratureClient
from .riskofbias import RiskOfBiasClient
Expand Down Expand Up @@ -73,3 +74,6 @@ def set_authentication_token(self, token: str, login: bool = False) -> bool:
bool: Returns true if session is valid
"""
return self.session.set_authentication_token(token, login)

def interactive(self, headless: bool = True) -> InteractiveHawcClient:
return InteractiveHawcClient(client=self, headless=headless)
151 changes: 151 additions & 0 deletions client/hawc_client/interactive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
from io import BytesIO
from pathlib import Path

from playwright._impl._api_structures import SetCookieParam
from playwright.async_api import Page, expect
from playwright.async_api._context_manager import PlaywrightContextManager as pcm

from .client import BaseClient
from .exceptions import HawcClientException


async def remove_dj_toolbar(page: Page):
if await page.evaluate("document.querySelector('#djDebug')"):
await page.evaluate("document.querySelector('#djDebug').remove()")


async def fetch_png(page: Page) -> BytesIO:
"""Helper method to download a PNG from a visualization page
Args:
page (Page): a page instance
Returns:
BytesIO: The PNG image, in bytes
"""
await page.wait_for_load_state("load")
await expect(page.locator(".is-loading")).to_have_count(0)
await remove_dj_toolbar(page)

viz_type = await page.evaluate(
"document.querySelector('meta[name=hawc-viz-type]').dataset.vizType"
)
match viz_type:
case (
"data pivot"
| "animal bioassay endpoint aggregation"
| "animal bioassay endpoint crossview"
| "risk of bias heatmap"
| "risk of bias barchart"
| "literature tagtree"
| "exploratory heatmap"
):
download_button = page.locator("button", has=page.locator("i.fa-download"))
download_confirm = page.locator("text=Download as a PNG")
case "embedded external website":
download_button = page.frame_locator("iframe").locator(
'div[role="button"]:has-text("Download")'
)
download_confirm = page.frame_locator("iframe").locator('button:has-text("Image")')
case "plotly":
download_button = None
download_confirm = page.locator(".js-plotly-plot .modebar-btn").first
case "static image":
b = await page.locator("#visual-image img").screenshot(type="png")
return BytesIO(b)
case _:
raise ValueError(f"Unknown visual type: {viz_type}")

if download_button:
await download_button.click()
async with page.expect_download() as download_info:
await download_confirm.click()
download = await download_info.value
path = await download.path()
if path is None:
raise ValueError("Download failed")
return BytesIO(path.read_bytes())


PathLike = Path | str | None


def write_to_file(data: BytesIO, path: PathLike) -> None:
"""Write to a file, given a path-like object"""
if path is None:
return
if isinstance(path, str):
path = Path(path)
path.write_bytes(data.getvalue())


class InteractiveHawcClient:
"""
A context manager for downloading assessment visuals.
"""

def __init__(self, client: BaseClient, headless: bool = True):
self.client = client
self.headless = headless

async def __aenter__(self):
self.playwright = await pcm().start()
browser = await self.playwright.chromium.launch(headless=self.headless)
self.context = await browser.new_context()
self.page = await self.context.new_page()
cookies = [
SetCookieParam(name=k, value=v, url=self.client.session.root_url)
for k, v in self.client.session._session.cookies.items()
]
if not cookies:
raise HawcClientException(
403,
"No cookies found on client session;\nwhen setting authorization token; set login to True",
)
await self.context.add_cookies(cookies)
return self

async def __aexit__(self, *args) -> None:
await self.context.close()
await self.playwright.stop()

async def download_visual(self, id: int, fn: PathLike = None) -> BytesIO:
"""Download a PNG visualization given a visual ID
Args:
id (int): The visual ID
fn (PathLike, optional): If a path or string is specified, the PNG is written to
that location. If None (default), no data is written to a Path.
Returns:
BytesIO: the PNG representation of the visual, in bytes.
"""
url = f"{self.client.session.root_url}/summary/visual/{id}/"
# ensure response is OK before waiting
response = await self.page.goto(url)
if response and not response.ok:
raise HawcClientException(response.status, response.status_text)
data = await fetch_png(self.page)
write_to_file(data, fn)
return data

async def download_data_pivot(self, id: int, fn: PathLike = None) -> BytesIO:
"""Download a PNG data pivot given a data pivot ID
Args:
id (int): The data pivot ID
fn (PathLike, optional): If a path or string is specified, the a PNG is written to
that location. If None (default), no data is written to a Path.
Returns:
BytesIO: the PNG representation of the data pivot, in bytes.
"""
url = f"{self.client.session.root_url}/summary/data-pivot/{id}/"
# ensure response is OK before waiting
response = await self.page.goto(url)
if response and not response.ok:
raise HawcClientException(response.status, response.status_text)
# attempt to fetch PNG
data = await fetch_png(self.page)
write_to_file(data, fn)
return data
5 changes: 3 additions & 2 deletions client/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,16 @@ keywords = [
]
classifiers = [
"Intended Audience :: Science/Research",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
]
requires-python = ">=3.9"
requires-python = ">=3.10"
dependencies = [
"rapidfuzz",
"requests",
"pandas",
"playwright",
"tqdm",
]

Expand Down
13 changes: 13 additions & 0 deletions docs/docs/client.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,20 @@ client.set_authentication_token(token=getpass())

# get all references for an assessment
client.lit.references(assessment_id=123)
```

An interactive client also exists which downloads figures or visualizations:

```python
from getpass import getpass
from hawc_client import HawcClient

client = HawcClient("https://hawcproject.org")
client.set_authentication_token(getpass(), login=True) # must set login to True

with client.interactive(headless=False) as iclient:
iclient.download_visual(123, 'visual.png')
iclient.download_data_pivot(456, 'data-pivot.png')
```

There are many more commands available in the HAWC client that aren't documented here. It is recommended to use an interactive terminal session using a jupyter notebook to browse the available methods and their docstrings for more details.
Expand Down
20 changes: 16 additions & 4 deletions docs/docs/development.md
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ make startdb
```

## Local Settings

### Django settings inheritance

HAWC settings are structured according to the django settings framework. Within ``hawc/main/settings``, there are a number of settings files that inherit using the following pattern:
Expand Down Expand Up @@ -218,7 +219,6 @@ HAWC settings are structured according to the django settings framework. Within

To make changes to your local environment, create (and then modify) ``hawc/main/settings/local.py``. This file is not created by default (and is not tracked in git), but a template can be copied and renamed from ``hawc/main/settings/local.example.py`` as a starting point. You can make changes to this file to configure your local environment, such as which database is used or the "flavor" of HAWC (see "More Settings").


## Testing HAWC

### The test database
Expand Down Expand Up @@ -272,7 +272,6 @@ If tests aren't working after the database has changed (ie., migrated); try drop

Some tests compare large exports on disk to ensure the generated output is the same as expected. In some cases, these export files should changes. Therefore, you can set a flag in the `tests/conftest.py` to set `rewrite_data_files` to True. This will rewrite all saved files, so please review the changes to ensure they're expected. A test is in CI to ensure that `rewrite_data_files` is False.


### Loading a database dump

If you have a database dump saved locally, you can load that in instead. If you have multiple databases, you can switch them on the fly in your local.py settings (see Django Settings Inheritance above).
Expand Down Expand Up @@ -301,7 +300,6 @@ manage scrub_db
pg_dump -U hawc hawc | gzip > db_dump.sql.gz
```


### Mocking external resources in tests

When writing tests for code that accesses external resources (e.g., data from PubMed API endpoints), the ``vcr`` python package is used to save "cassettes" of expected responses for faster tests and stability in case external resources are intermittently offline. These cassettes can be rebuilt by running ``make test-refresh``, which will delete the ``cassettes`` directory and run the python test suite, which will recreate the cassettes based on actual responses.
Expand Down Expand Up @@ -386,7 +384,6 @@ export PWDEBUG=1
py.test -sv tests/integration/test_myuser.py --pdb
```


## More settings

### Visual Studio Code
Expand Down Expand Up @@ -521,3 +518,18 @@ To generate a report on the lines of code, install [cloc](https://github.com/AlD
```bash
make loc
```

### Testing the client

Most tests for the `hawc-client` package are integrated into our standard test suite using pytest. However, the interactive tests which require interacting with the HTML DOM to download figures and images couldn't be integrated into the standard test suite without significant effort. Therefore, it requires manually running a test using our test fixture.

To run this script, start the django webserver using the hawc-fixture database. It must be running on port 8000, and the django webserver as well as the node javascript server. Make sure that the django debug toolbar is not enabled:

```bash
export "DJANGO_SETTINGS_MODULE=hawc.main.settings.unittest"
createdb -U hawc-fixture
python manage.py load_test_db
python scripts/test_iclient.py
```

Make sure the standard unit tests pass as well as the addition `test_iclient.py` tests before distributing a new version of the `hawc-client` package.
2 changes: 1 addition & 1 deletion frontend/shared/components/Loading.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import React from "react";
class Loading extends React.Component {
render() {
return (
<div>
<div className="is-loading">
<p>
Loading, please wait...&nbsp;
<span className="fa fa-spin fa-spinner" />
Expand Down
2 changes: 1 addition & 1 deletion frontend/summary/summary/TableauDashboard.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class TableauDashboard extends Component {
let fullPath = queryArgs && queryArgs.length > 0 ? `${path}?${queryArgs.join("&")}` : path;

return (
<tableau-viz src={hostUrl + fullPath} height={height} width={width}>
<tableau-viz src={hostUrl + fullPath} height={height} width={width} class="tableau-viz">
{filters.map((filter, i) => {
return (
<viz-filter key={i} field={filter.field} value={filter.value}></viz-filter>
Expand Down
2 changes: 1 addition & 1 deletion hawc/apps/common/templatetags/bs4.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def plotly(fig: Figure | None, **kw) -> str:
return mark_safe(
dedent(
f"""
<div id="{id}"><span class="text-muted">Loading...</span></div>
<div id="{id}"><span class="is-loading text-muted">Loading, please wait...</span></div>
<script>document.addEventListener("{event}", {func}, false);</script>"""
)
)
Expand Down
4 changes: 4 additions & 0 deletions hawc/apps/summary/templates/summary/datapivot_detail.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

{% load static %}

{% block extrahead %}
<meta name="hawc-viz-type" data-viz-type="data pivot" />
{% endblock %}

{% block content %}
<div class="d-flex">
<h2>{{object}}</h2>
Expand Down
4 changes: 4 additions & 0 deletions hawc/apps/summary/templates/summary/visual_detail.html
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{% extends 'assessment-rooted.html' %}

{% block extrahead %}
<meta name="hawc-viz-type" data-viz-type="{{object.get_visual_type_display}}" />
{% endblock %}

{% block content %}
<div class='visualization'></div>
{% endblock %}
Expand Down
4 changes: 4 additions & 0 deletions hawc/apps/summary/templates/summary/visual_detail_image.html
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
{% extends 'assessment-rooted.html' %}
{% load static %}

{% block extrahead %}
<meta name="hawc-viz-type" data-viz-type="{{object.get_visual_type_display}}" />
{% endblock %}

{% block content %}
<div class="visualization">
<div class='d-flex'>
Expand Down
4 changes: 4 additions & 0 deletions hawc/apps/summary/templates/summary/visual_detail_plotly.html
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{% extends 'assessment-rooted.html' %}

{% block extrahead %}
<meta name="hawc-viz-type" data-viz-type="{{object.get_visual_type_display}}" />
{% endblock %}

{% block content %}
<div class="visualization">
<div class='d-flex'>
Expand Down
2 changes: 1 addition & 1 deletion requirements/dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ pytest==7.4.2
pytest-django==4.5.2
vcrpy==5.1.0
pytest-vcr==1.0.2
playwright==1.39.0
playwright==1.40.0
pytest-playwright==0.4.3

# hawc; save in editable mode so it doesn't copy to venv
Expand Down
Loading

0 comments on commit f5f6812

Please sign in to comment.