Skip to content

Commit

Permalink
fix: cache-warmup fails (#31173)
Browse files Browse the repository at this point in the history
Co-authored-by: Sivarajan Narayanan <narayanan_sivarajan@apple.com>
Co-authored-by: Pat Heard <patrick.heard@cds-snc.ca>
(cherry picked from commit 592564b)
  • Loading branch information
nsivarajan authored and sadpandajoe committed Jan 22, 2025
1 parent 9946596 commit 41bcc9d
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 14 deletions.
8 changes: 7 additions & 1 deletion UPDATING.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ under the License.
This file documents any backwards-incompatible changes in Superset and
assists people when migrating to a new version.

## 4.1.2

- [31173](https://github.com/apache/superset/pull/31173) Modified `fetch_csrf_token` to align with HTTP standards, particularly regarding how cookies are handled. If you encounter any issues related to CSRF functionality, please report them as a new issue and reference this PR for context.

### Potential Downtime

## 4.1.0

- [29274](https://github.com/apache/superset/pull/29274): We made it easier to trigger CI on your
Expand Down Expand Up @@ -229,7 +235,7 @@ assists people when migrating to a new version.
- [19231](https://github.com/apache/superset/pull/19231): The `ENABLE_REACT_CRUD_VIEWS` feature flag has been removed (permanently enabled). Any deployments which had set this flag to false will need to verify that the React views support their use case.
- [19230](https://github.com/apache/superset/pull/19230): The `ROW_LEVEL_SECURITY` feature flag has been removed (permanently enabled). Any deployments which had set this flag to false will need to verify that the presence of the Row Level Security feature does not interfere with their use case.
- [19168](https://github.com/apache/superset/pull/19168): Celery upgrade to 5.X resulted in breaking changes to its command line invocation.
html#step-1-adjust-your-command-line-invocation) instructions for adjustments. Also consider migrating you Celery config per [here](https://docs.celeryq.dev/en/stable/userguide/configuration.html#conf-old-settings-map).
html#step-1-adjust-your-command-line-invocation) instructions for adjustments. Also consider migrating you Celery config per [here](https://docs.celeryq.dev/en/stable/userguide/configuration.html#conf-old-settings-map).
- [19142](https://github.com/apache/superset/pull/19142): The `VERSIONED_EXPORT` config key is now `True` by default.
- [19113](https://github.com/apache/superset/pull/19113): The `ENABLE_JAVASCRIPT_CONTROLS` config key has moved from an app config to a feature flag. Any deployments who overrode this setting will now need to override the feature flag from here onward.
- [19107](https://github.com/apache/superset/pull/19107): The `SQLLAB_BACKEND_PERSISTENCE` feature flag is now `True` by default, which enables persisting SQL Lab tabs in the backend instead of the browser's `localStorage`.
Expand Down
9 changes: 7 additions & 2 deletions superset/tasks/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from superset.utils import json
from superset.utils.date_parser import parse_human_datetime
from superset.utils.machine_auth import MachineAuthProvider
from superset.utils.urls import get_url_path
from superset.utils.urls import get_url_path, is_secure_url

logger = get_task_logger(__name__)
logger.setLevel(logging.INFO)
Expand Down Expand Up @@ -220,10 +220,15 @@ def fetch_url(data: str, headers: dict[str, str]) -> dict[str, str]:
"""
result = {}
try:
url = get_url_path("ChartRestApi.warm_up_cache")

if is_secure_url(url):
logger.info("URL '%s' is secure. Adding Referer header.", url)
headers.update({"Referer": url})

# Fetch CSRF token for API request
headers.update(fetch_csrf_token(headers))

url = get_url_path("ChartRestApi.warm_up_cache")
logger.info("Fetching %s with payload %s", url, data)
req = request.Request(
url, data=bytes(data, "utf-8"), headers=headers, method="PUT"
Expand Down
2 changes: 1 addition & 1 deletion superset/tasks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def fetch_csrf_token(
data = json.loads(body)
res = {"X-CSRF-Token": data["result"]}
if session_cookie is not None:
res["Cookie"] = session_cookie
res["Cookie"] = f"{session_cookie_name}={session_cookie}"
return res

logger.error("Error fetching CSRF token, status code: %s", response.status)
Expand Down
12 changes: 12 additions & 0 deletions superset/utils/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# under the License.
import urllib
from typing import Any
from urllib.parse import urlparse

from flask import current_app, url_for

Expand Down Expand Up @@ -50,3 +51,14 @@ def modify_url_query(url: str, **kwargs: Any) -> str:
f"{k}={urllib.parse.quote(str(v[0]))}" for k, v in params.items()
)
return urllib.parse.urlunsplit(parts)


def is_secure_url(url: str) -> bool:
"""
Validates if a URL is secure (uses HTTPS).
:param url: The URL to validate.
:return: True if the URL uses HTTPS (secure), False if it uses HTTP (non-secure).
"""
parsed_url = urlparse(url)
return parsed_url.scheme == "https"
46 changes: 39 additions & 7 deletions tests/integration_tests/tasks/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,32 @@


@pytest.mark.parametrize(
"base_url",
"base_url, expected_referer",
[
"http://base-url",
"http://base-url/",
("http://base-url", None),
("http://base-url/", None),
("https://base-url", "https://base-url/api/v1/chart/warm_up_cache"),
("https://base-url/", "https://base-url/api/v1/chart/warm_up_cache"),
],
ids=[
"Without trailing slash (HTTP)",
"With trailing slash (HTTP)",
"Without trailing slash (HTTPS)",
"With trailing slash (HTTPS)",
],
ids=["Without trailing slash", "With trailing slash"],
)
@mock.patch("superset.tasks.cache.fetch_csrf_token")
@mock.patch("superset.tasks.cache.request.Request")
@mock.patch("superset.tasks.cache.request.urlopen")
def test_fetch_url(mock_urlopen, mock_request_cls, mock_fetch_csrf_token, base_url):
@mock.patch("superset.tasks.cache.is_secure_url")
def test_fetch_url(
mock_is_secure_url,
mock_urlopen,
mock_request_cls,
mock_fetch_csrf_token,
base_url,
expected_referer,
):
from superset.tasks.cache import fetch_url

mock_request = mock.MagicMock()
Expand All @@ -41,8 +56,17 @@ def test_fetch_url(mock_urlopen, mock_request_cls, mock_fetch_csrf_token, base_u
mock_urlopen.return_value = mock.MagicMock()
mock_urlopen.return_value.code = 200

# Mock the URL validation to return True for HTTPS and False for HTTP
mock_is_secure_url.return_value = base_url.startswith("https")

initial_headers = {"Cookie": "cookie", "key": "value"}
csrf_headers = initial_headers | {"X-CSRF-Token": "csrf_token"}

# Conditionally add the Referer header and assert its presence
if expected_referer:
csrf_headers = csrf_headers | {"Referer": expected_referer}
assert csrf_headers["Referer"] == expected_referer

mock_fetch_csrf_token.return_value = csrf_headers

app.config["WEBDRIVER_BASEURL"] = base_url
Expand All @@ -51,13 +75,21 @@ def test_fetch_url(mock_urlopen, mock_request_cls, mock_fetch_csrf_token, base_u

result = fetch_url(data, initial_headers)

assert data == result["success"]
expected_url = (
f"{base_url}/api/v1/chart/warm_up_cache"
if not base_url.endswith("/")
else f"{base_url}api/v1/chart/warm_up_cache"
)

mock_fetch_csrf_token.assert_called_once_with(initial_headers)

mock_request_cls.assert_called_once_with(
"http://base-url/api/v1/chart/warm_up_cache",
expected_url, # Use the dynamic URL based on base_url
data=data_encoded,
headers=csrf_headers,
method="PUT",
)
# assert the same Request object is used
mock_urlopen.assert_called_once_with(mock_request, timeout=mock.ANY)

assert data == result["success"]
19 changes: 16 additions & 3 deletions tests/integration_tests/tasks/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,15 @@
[
"http://base-url",
"http://base-url/",
"https://base-url",
"https://base-url/",
],
ids=[
"Without trailing slash (HTTP)",
"With trailing slash (HTTP)",
"Without trailing slash (HTTPS)",
"With trailing slash (HTTPS)",
],
ids=["Without trailing slash", "With trailing slash"],
)
@mock.patch("superset.tasks.cache.request.Request")
@mock.patch("superset.tasks.cache.request.urlopen")
Expand All @@ -52,13 +59,19 @@ def test_fetch_csrf_token(mock_urlopen, mock_request_cls, base_url, app_context)

result_headers = fetch_csrf_token(headers)

expected_url = (
f"{base_url}/api/v1/security/csrf_token/"
if not base_url.endswith("/")
else f"{base_url}api/v1/security/csrf_token/"
)

mock_request_cls.assert_called_with(
"http://base-url/api/v1/security/csrf_token/",
expected_url,
headers=headers,
method="GET",
)

assert result_headers["X-CSRF-Token"] == "csrf_token"
assert result_headers["Cookie"] == "new_session_cookie"
assert result_headers["Cookie"] == "session=new_session_cookie" # Updated assertion
# assert the same Request object is used
mock_urlopen.assert_called_once_with(mock_request, timeout=mock.ANY)

0 comments on commit 41bcc9d

Please sign in to comment.