Download visuals using client (#810)

* wip add visual download to client * fix login issue * refactor and add docstring * add test (skipped) and lint * add success message * ruff ruff * rewrite with a context manager * continue * try to fix ci for now * create a context manager for visual downloads * update tests * unused import * revise class location and fix circular import * fix imports * remove unused functions * establish cookie based session for playwright * lint * try to fix tests * raise HawcClientException instead of timing out * fix so we can run from notebooks * write a test script! * cleanup * complete test suite * wait for page to load * wait * require python 3.10+ w/ match statement * simplify plotly logic * hide django toolbar if present --------- Co-authored-by: Andy Shapiro <shapiromatron@gmail.com>
shapiromatron · Jan 9, 2024 · f5f6812 · f5f6812
1 parent 08d22f4
commit f5f6812
Show file tree

Hide file tree

Showing 14 changed files with 267 additions and 10 deletions.
diff --git a/client/hawc_client/__init__.py b/client/hawc_client/__init__.py
@@ -6,6 +6,7 @@
 from .epimeta import EpiMetaClient
 from .epiv2 import EpiV2Client
 from .exceptions import HawcClientException, HawcServerException
+from .interactive import InteractiveHawcClient
 from .invitro import InvitroClient
 from .literature import LiteratureClient
 from .riskofbias import RiskOfBiasClient
@@ -73,3 +74,6 @@ def set_authentication_token(self, token: str, login: bool = False) -> bool:
             bool: Returns true if session is valid
         """
         return self.session.set_authentication_token(token, login)
+
+    def interactive(self, headless: bool = True) -> InteractiveHawcClient:
+        return InteractiveHawcClient(client=self, headless=headless)
diff --git a/client/hawc_client/interactive.py b/client/hawc_client/interactive.py
@@ -0,0 +1,151 @@
+from io import BytesIO
+from pathlib import Path
+
+from playwright._impl._api_structures import SetCookieParam
+from playwright.async_api import Page, expect
+from playwright.async_api._context_manager import PlaywrightContextManager as pcm
+
+from .client import BaseClient
+from .exceptions import HawcClientException
+
+
+async def remove_dj_toolbar(page: Page):
+    if await page.evaluate("document.querySelector('#djDebug')"):
+        await page.evaluate("document.querySelector('#djDebug').remove()")
+
+
+async def fetch_png(page: Page) -> BytesIO:
+    """Helper method to download a PNG from a visualization page
+
+    Args:
+        page (Page): a page instance
+
+    Returns:
+        BytesIO: The PNG image, in bytes
+    """
+    await page.wait_for_load_state("load")
+    await expect(page.locator(".is-loading")).to_have_count(0)
+    await remove_dj_toolbar(page)
+
+    viz_type = await page.evaluate(
+        "document.querySelector('meta[name=hawc-viz-type]').dataset.vizType"
+    )
+    match viz_type:
+        case (
+            "data pivot"
+            | "animal bioassay endpoint aggregation"
+            | "animal bioassay endpoint crossview"
+            | "risk of bias heatmap"
+            | "risk of bias barchart"
+            | "literature tagtree"
+            | "exploratory heatmap"
+        ):
+            download_button = page.locator("button", has=page.locator("i.fa-download"))
+            download_confirm = page.locator("text=Download as a PNG")
+        case "embedded external website":
+            download_button = page.frame_locator("iframe").locator(
+                'div[role="button"]:has-text("Download")'
+            )
+            download_confirm = page.frame_locator("iframe").locator('button:has-text("Image")')
+        case "plotly":
+            download_button = None
+            download_confirm = page.locator(".js-plotly-plot .modebar-btn").first
+        case "static image":
+            b = await page.locator("#visual-image img").screenshot(type="png")
+            return BytesIO(b)
+        case _:
+            raise ValueError(f"Unknown visual type: {viz_type}")
+
+    if download_button:
+        await download_button.click()
+    async with page.expect_download() as download_info:
+        await download_confirm.click()
+    download = await download_info.value
+    path = await download.path()
+    if path is None:
+        raise ValueError("Download failed")
+    return BytesIO(path.read_bytes())
+
+
+PathLike = Path | str | None
+
+
+def write_to_file(data: BytesIO, path: PathLike) -> None:
+    """Write to a file, given a path-like object"""
+    if path is None:
+        return
+    if isinstance(path, str):
+        path = Path(path)
+    path.write_bytes(data.getvalue())
+
+
+class InteractiveHawcClient:
+    """
+    A context manager for downloading assessment visuals.
+    """
+
+    def __init__(self, client: BaseClient, headless: bool = True):
+        self.client = client
+        self.headless = headless
+
+    async def __aenter__(self):
+        self.playwright = await pcm().start()
+        browser = await self.playwright.chromium.launch(headless=self.headless)
+        self.context = await browser.new_context()
+        self.page = await self.context.new_page()
+        cookies = [
+            SetCookieParam(name=k, value=v, url=self.client.session.root_url)
+            for k, v in self.client.session._session.cookies.items()
+        ]
+        if not cookies:
+            raise HawcClientException(
+                403,
+                "No cookies found on client session;\nwhen setting authorization token; set login to True",
+            )
+        await self.context.add_cookies(cookies)
+        return self
+
+    async def __aexit__(self, *args) -> None:
+        await self.context.close()
+        await self.playwright.stop()
+
+    async def download_visual(self, id: int, fn: PathLike = None) -> BytesIO:
+        """Download a PNG visualization given a visual ID
+
+        Args:
+            id (int): The visual ID
+            fn (PathLike, optional): If a path or string is specified, the PNG is written to
+                that location. If None (default), no data is written to a Path.
+
+        Returns:
+            BytesIO: the PNG representation of the visual, in bytes.
+        """
+        url = f"{self.client.session.root_url}/summary/visual/{id}/"
+        # ensure response is OK before waiting
+        response = await self.page.goto(url)
+        if response and not response.ok:
+            raise HawcClientException(response.status, response.status_text)
+        data = await fetch_png(self.page)
+        write_to_file(data, fn)
+        return data
+
+    async def download_data_pivot(self, id: int, fn: PathLike = None) -> BytesIO:
+        """Download a PNG data pivot given a data pivot ID
+
+        Args:
+            id (int): The data pivot ID
+            fn (PathLike, optional): If a path or string is specified, the a PNG is written to
+                that location. If None (default), no data is written to a Path.
+
+        Returns:
+            BytesIO: the PNG representation of the data pivot, in bytes.
+        """
+        url = f"{self.client.session.root_url}/summary/data-pivot/{id}/"
+        # ensure response is OK before waiting
+        response = await self.page.goto(url)
+        if response and not response.ok:
+            raise HawcClientException(response.status, response.status_text)
+        # attempt to fetch PNG
+        data = await fetch_png(self.page)
+        write_to_file(data, fn)
+        return data
diff --git a/client/pyproject.toml b/client/pyproject.toml
@@ -13,15 +13,16 @@ keywords = [
 ]
 classifiers = [
   "Intended Audience :: Science/Research",
-  "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
 ]
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 dependencies = [
   "rapidfuzz",
   "requests",
   "pandas",
+  "playwright",
   "tqdm",
 ]
 

diff --git a/docs/docs/client.md b/docs/docs/client.md
@@ -29,7 +29,20 @@ client.set_authentication_token(token=getpass())
 
 # get all references for an assessment
 client.lit.references(assessment_id=123)
+```
+
+An interactive client also exists which downloads figures or visualizations:
+
+```python
+from getpass import getpass
+from hawc_client import HawcClient
+
+client = HawcClient("https://hawcproject.org")
+client.set_authentication_token(getpass(), login=True)  # must set login to True
 
+with client.interactive(headless=False) as iclient:
+    iclient.download_visual(123, 'visual.png')
+    iclient.download_data_pivot(456, 'data-pivot.png')
 ```
 
 There are many more commands available in the HAWC client that aren't documented here. It is recommended to use an interactive terminal session using a jupyter notebook to browse the available methods and their docstrings for more details.

diff --git a/docs/docs/development.md b/docs/docs/development.md
@@ -191,6 +191,7 @@ make startdb
 ```
 
 ## Local Settings
+
 ### Django settings inheritance
 
 HAWC settings are structured according to the django settings framework. Within ``hawc/main/settings``, there are a number of settings files that inherit using the following pattern:
@@ -218,7 +219,6 @@ HAWC settings are structured according to the django settings framework. Within
 
 To make changes to your local environment, create (and then modify) ``hawc/main/settings/local.py``. This file is not created by default (and is not tracked in git), but a template can be copied and renamed from ``hawc/main/settings/local.example.py`` as a starting point. You can make changes to this file to configure your local environment, such as which database is used or the "flavor" of HAWC (see "More Settings").
 
-
 ## Testing HAWC
 
 ### The test database
@@ -272,7 +272,6 @@ If tests aren't working after the database has changed (ie., migrated); try drop
 
 Some tests compare large exports on disk to ensure the generated output is the same as expected. In some cases, these export files should changes. Therefore, you can set a flag in the `tests/conftest.py` to set `rewrite_data_files` to True. This will rewrite all saved files, so please review the changes to ensure they're expected. A test is in CI to ensure that `rewrite_data_files` is False.
 
-
 ### Loading a database dump
 
 If you have a database dump saved locally, you can load that in instead. If you have multiple databases, you can switch them on the fly in your local.py settings (see Django Settings Inheritance above).
@@ -301,7 +300,6 @@ manage scrub_db
 pg_dump -U hawc hawc | gzip > db_dump.sql.gz
 ```
 
-
 ### Mocking external resources in tests
 
 When writing tests for code that accesses external resources (e.g., data from PubMed API endpoints), the ``vcr`` python package is used to save "cassettes" of expected responses for faster tests and stability in case external resources are intermittently offline. These cassettes can be rebuilt by running ``make test-refresh``, which will delete the ``cassettes`` directory and run the python test suite, which will recreate the cassettes based on actual responses.
@@ -386,7 +384,6 @@ export PWDEBUG=1
 py.test -sv tests/integration/test_myuser.py --pdb
 ```
 
-
 ## More settings
 
 ### Visual Studio Code
@@ -521,3 +518,18 @@ To generate a report on the lines of code, install [cloc](https://github.com/AlD
 ```bash
 make loc
 ```
+
+### Testing the client
+
+Most tests for the `hawc-client` package are integrated into our standard test suite using pytest. However, the interactive tests which require interacting with the HTML DOM to download figures and images couldn't be integrated into the standard test suite without significant effort. Therefore, it requires manually running a test using our test fixture.
+
+To run this script, start the django webserver using the hawc-fixture database. It must be running on port 8000, and the django webserver as well as the node javascript server. Make sure that the django debug toolbar is not enabled:
+
+```bash
+export "DJANGO_SETTINGS_MODULE=hawc.main.settings.unittest"
+createdb -U hawc-fixture
+python manage.py load_test_db
+python scripts/test_iclient.py
+```
+
+Make sure the standard unit tests pass as well as the addition `test_iclient.py` tests before distributing a new version of the `hawc-client` package.
diff --git a/frontend/shared/components/Loading.js b/frontend/shared/components/Loading.js
@@ -3,7 +3,7 @@ import React from "react";
 class Loading extends React.Component {
     render() {
         return (
-            <div>
+            <div className="is-loading">
                 <p>
                     Loading, please wait...&nbsp;
                     <span className="fa fa-spin fa-spinner" />

diff --git a/frontend/summary/summary/TableauDashboard.js b/frontend/summary/summary/TableauDashboard.js
@@ -32,7 +32,7 @@ class TableauDashboard extends Component {
         let fullPath = queryArgs && queryArgs.length > 0 ? `${path}?${queryArgs.join("&")}` : path;
 
         return (
-            <tableau-viz src={hostUrl + fullPath} height={height} width={width}>
+            <tableau-viz src={hostUrl + fullPath} height={height} width={width} class="tableau-viz">
                 {filters.map((filter, i) => {
                     return (
                         <viz-filter key={i} field={filter.field} value={filter.value}></viz-filter>

diff --git a/hawc/apps/common/templatetags/bs4.py b/hawc/apps/common/templatetags/bs4.py
@@ -105,7 +105,7 @@ def plotly(fig: Figure | None, **kw) -> str:
     return mark_safe(
         dedent(
             f"""
-    <div id="{id}"><span class="text-muted">Loading...</span></div>
+    <div id="{id}"><span class="is-loading text-muted">Loading, please wait...</span></div>
     <script>document.addEventListener("{event}", {func}, false);</script>"""
         )
     )

diff --git a/hawc/apps/summary/templates/summary/datapivot_detail.html b/hawc/apps/summary/templates/summary/datapivot_detail.html
@@ -2,6 +2,10 @@
 
 {% load static %}
 
+{% block extrahead %}
+<meta name="hawc-viz-type" data-viz-type="data pivot" />
+{% endblock %}
+
 {% block content %}
 <div class="d-flex">
   <h2>{{object}}</h2>

diff --git a/hawc/apps/summary/templates/summary/visual_detail.html b/hawc/apps/summary/templates/summary/visual_detail.html
@@ -1,5 +1,9 @@
 {% extends 'assessment-rooted.html' %}
 
+{% block extrahead %}
+<meta name="hawc-viz-type" data-viz-type="{{object.get_visual_type_display}}" />
+{% endblock %}
+
 {% block content %}
   <div class='visualization'></div>
 {% endblock %}

diff --git a/hawc/apps/summary/templates/summary/visual_detail_image.html b/hawc/apps/summary/templates/summary/visual_detail_image.html
@@ -1,6 +1,10 @@
 {% extends 'assessment-rooted.html' %}
 {% load static %}
 
+{% block extrahead %}
+<meta name="hawc-viz-type" data-viz-type="{{object.get_visual_type_display}}" />
+{% endblock %}
+
 {% block content %}
 <div class="visualization">
   <div class='d-flex'>

diff --git a/hawc/apps/summary/templates/summary/visual_detail_plotly.html b/hawc/apps/summary/templates/summary/visual_detail_plotly.html
@@ -1,5 +1,9 @@
 {% extends 'assessment-rooted.html' %}
 
+{% block extrahead %}
+<meta name="hawc-viz-type" data-viz-type="{{object.get_visual_type_display}}" />
+{% endblock %}
+
 {% block content %}
 <div class="visualization">
   <div class='d-flex'>

diff --git a/requirements/dev.txt b/requirements/dev.txt
@@ -19,7 +19,7 @@ pytest==7.4.2
 pytest-django==4.5.2
 vcrpy==5.1.0
 pytest-vcr==1.0.2
-playwright==1.39.0
+playwright==1.40.0
 pytest-playwright==0.4.3
 
 # hawc; save in editable mode so it doesn't copy to venv