From 41bcc9d0a6150ddb5081b208d378d0736c5e2b88 Mon Sep 17 00:00:00 2001 From: nsivarajan <117266407+nsivarajan@users.noreply.github.com> Date: Sat, 7 Dec 2024 07:11:38 +0530 Subject: [PATCH] fix: cache-warmup fails (#31173) Co-authored-by: Sivarajan Narayanan Co-authored-by: Pat Heard (cherry picked from commit 592564b623a18c5eb553300b63f407e249b744e3) --- UPDATING.md | 8 +++- superset/tasks/cache.py | 9 +++- superset/tasks/utils.py | 2 +- superset/utils/urls.py | 12 ++++++ tests/integration_tests/tasks/test_cache.py | 46 +++++++++++++++++---- tests/integration_tests/tasks/test_utils.py | 19 +++++++-- 6 files changed, 82 insertions(+), 14 deletions(-) diff --git a/UPDATING.md b/UPDATING.md index 6908c1f906f49..290417fa6c941 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -22,6 +22,12 @@ under the License. This file documents any backwards-incompatible changes in Superset and assists people when migrating to a new version. +## 4.1.2 + +- [31173](https://github.com/apache/superset/pull/31173) Modified `fetch_csrf_token` to align with HTTP standards, particularly regarding how cookies are handled. If you encounter any issues related to CSRF functionality, please report them as a new issue and reference this PR for context. + +### Potential Downtime + ## 4.1.0 - [29274](https://github.com/apache/superset/pull/29274): We made it easier to trigger CI on your @@ -229,7 +235,7 @@ assists people when migrating to a new version. - [19231](https://github.com/apache/superset/pull/19231): The `ENABLE_REACT_CRUD_VIEWS` feature flag has been removed (permanently enabled). Any deployments which had set this flag to false will need to verify that the React views support their use case. - [19230](https://github.com/apache/superset/pull/19230): The `ROW_LEVEL_SECURITY` feature flag has been removed (permanently enabled). Any deployments which had set this flag to false will need to verify that the presence of the Row Level Security feature does not interfere with their use case. - [19168](https://github.com/apache/superset/pull/19168): Celery upgrade to 5.X resulted in breaking changes to its command line invocation. -html#step-1-adjust-your-command-line-invocation) instructions for adjustments. Also consider migrating you Celery config per [here](https://docs.celeryq.dev/en/stable/userguide/configuration.html#conf-old-settings-map). + html#step-1-adjust-your-command-line-invocation) instructions for adjustments. Also consider migrating you Celery config per [here](https://docs.celeryq.dev/en/stable/userguide/configuration.html#conf-old-settings-map). - [19142](https://github.com/apache/superset/pull/19142): The `VERSIONED_EXPORT` config key is now `True` by default. - [19113](https://github.com/apache/superset/pull/19113): The `ENABLE_JAVASCRIPT_CONTROLS` config key has moved from an app config to a feature flag. Any deployments who overrode this setting will now need to override the feature flag from here onward. - [19107](https://github.com/apache/superset/pull/19107): The `SQLLAB_BACKEND_PERSISTENCE` feature flag is now `True` by default, which enables persisting SQL Lab tabs in the backend instead of the browser's `localStorage`. diff --git a/superset/tasks/cache.py b/superset/tasks/cache.py index 1509b0d0624aa..18ca7d7165eef 100644 --- a/superset/tasks/cache.py +++ b/superset/tasks/cache.py @@ -33,7 +33,7 @@ from superset.utils import json from superset.utils.date_parser import parse_human_datetime from superset.utils.machine_auth import MachineAuthProvider -from superset.utils.urls import get_url_path +from superset.utils.urls import get_url_path, is_secure_url logger = get_task_logger(__name__) logger.setLevel(logging.INFO) @@ -220,10 +220,15 @@ def fetch_url(data: str, headers: dict[str, str]) -> dict[str, str]: """ result = {} try: + url = get_url_path("ChartRestApi.warm_up_cache") + + if is_secure_url(url): + logger.info("URL '%s' is secure. Adding Referer header.", url) + headers.update({"Referer": url}) + # Fetch CSRF token for API request headers.update(fetch_csrf_token(headers)) - url = get_url_path("ChartRestApi.warm_up_cache") logger.info("Fetching %s with payload %s", url, data) req = request.Request( url, data=bytes(data, "utf-8"), headers=headers, method="PUT" diff --git a/superset/tasks/utils.py b/superset/tasks/utils.py index 6fc799c4abc2c..5e3bc148082b9 100644 --- a/superset/tasks/utils.py +++ b/superset/tasks/utils.py @@ -133,7 +133,7 @@ def fetch_csrf_token( data = json.loads(body) res = {"X-CSRF-Token": data["result"]} if session_cookie is not None: - res["Cookie"] = session_cookie + res["Cookie"] = f"{session_cookie_name}={session_cookie}" return res logger.error("Error fetching CSRF token, status code: %s", response.status) diff --git a/superset/utils/urls.py b/superset/utils/urls.py index 57a1b63dd41d3..9b186f54f31e6 100644 --- a/superset/utils/urls.py +++ b/superset/utils/urls.py @@ -16,6 +16,7 @@ # under the License. import urllib from typing import Any +from urllib.parse import urlparse from flask import current_app, url_for @@ -50,3 +51,14 @@ def modify_url_query(url: str, **kwargs: Any) -> str: f"{k}={urllib.parse.quote(str(v[0]))}" for k, v in params.items() ) return urllib.parse.urlunsplit(parts) + + +def is_secure_url(url: str) -> bool: + """ + Validates if a URL is secure (uses HTTPS). + + :param url: The URL to validate. + :return: True if the URL uses HTTPS (secure), False if it uses HTTP (non-secure). + """ + parsed_url = urlparse(url) + return parsed_url.scheme == "https" diff --git a/tests/integration_tests/tasks/test_cache.py b/tests/integration_tests/tasks/test_cache.py index 6e8d3ffe03b4d..368cb1ebf0afb 100644 --- a/tests/integration_tests/tasks/test_cache.py +++ b/tests/integration_tests/tasks/test_cache.py @@ -22,17 +22,32 @@ @pytest.mark.parametrize( - "base_url", + "base_url, expected_referer", [ - "http://base-url", - "http://base-url/", + ("http://base-url", None), + ("http://base-url/", None), + ("https://base-url", "https://base-url/api/v1/chart/warm_up_cache"), + ("https://base-url/", "https://base-url/api/v1/chart/warm_up_cache"), + ], + ids=[ + "Without trailing slash (HTTP)", + "With trailing slash (HTTP)", + "Without trailing slash (HTTPS)", + "With trailing slash (HTTPS)", ], - ids=["Without trailing slash", "With trailing slash"], ) @mock.patch("superset.tasks.cache.fetch_csrf_token") @mock.patch("superset.tasks.cache.request.Request") @mock.patch("superset.tasks.cache.request.urlopen") -def test_fetch_url(mock_urlopen, mock_request_cls, mock_fetch_csrf_token, base_url): +@mock.patch("superset.tasks.cache.is_secure_url") +def test_fetch_url( + mock_is_secure_url, + mock_urlopen, + mock_request_cls, + mock_fetch_csrf_token, + base_url, + expected_referer, +): from superset.tasks.cache import fetch_url mock_request = mock.MagicMock() @@ -41,8 +56,17 @@ def test_fetch_url(mock_urlopen, mock_request_cls, mock_fetch_csrf_token, base_u mock_urlopen.return_value = mock.MagicMock() mock_urlopen.return_value.code = 200 + # Mock the URL validation to return True for HTTPS and False for HTTP + mock_is_secure_url.return_value = base_url.startswith("https") + initial_headers = {"Cookie": "cookie", "key": "value"} csrf_headers = initial_headers | {"X-CSRF-Token": "csrf_token"} + + # Conditionally add the Referer header and assert its presence + if expected_referer: + csrf_headers = csrf_headers | {"Referer": expected_referer} + assert csrf_headers["Referer"] == expected_referer + mock_fetch_csrf_token.return_value = csrf_headers app.config["WEBDRIVER_BASEURL"] = base_url @@ -51,13 +75,21 @@ def test_fetch_url(mock_urlopen, mock_request_cls, mock_fetch_csrf_token, base_u result = fetch_url(data, initial_headers) - assert data == result["success"] + expected_url = ( + f"{base_url}/api/v1/chart/warm_up_cache" + if not base_url.endswith("/") + else f"{base_url}api/v1/chart/warm_up_cache" + ) + mock_fetch_csrf_token.assert_called_once_with(initial_headers) + mock_request_cls.assert_called_once_with( - "http://base-url/api/v1/chart/warm_up_cache", + expected_url, # Use the dynamic URL based on base_url data=data_encoded, headers=csrf_headers, method="PUT", ) # assert the same Request object is used mock_urlopen.assert_called_once_with(mock_request, timeout=mock.ANY) + + assert data == result["success"] diff --git a/tests/integration_tests/tasks/test_utils.py b/tests/integration_tests/tasks/test_utils.py index b1213b78c85a0..29e5f38319cb9 100644 --- a/tests/integration_tests/tasks/test_utils.py +++ b/tests/integration_tests/tasks/test_utils.py @@ -26,8 +26,15 @@ [ "http://base-url", "http://base-url/", + "https://base-url", + "https://base-url/", + ], + ids=[ + "Without trailing slash (HTTP)", + "With trailing slash (HTTP)", + "Without trailing slash (HTTPS)", + "With trailing slash (HTTPS)", ], - ids=["Without trailing slash", "With trailing slash"], ) @mock.patch("superset.tasks.cache.request.Request") @mock.patch("superset.tasks.cache.request.urlopen") @@ -52,13 +59,19 @@ def test_fetch_csrf_token(mock_urlopen, mock_request_cls, base_url, app_context) result_headers = fetch_csrf_token(headers) + expected_url = ( + f"{base_url}/api/v1/security/csrf_token/" + if not base_url.endswith("/") + else f"{base_url}api/v1/security/csrf_token/" + ) + mock_request_cls.assert_called_with( - "http://base-url/api/v1/security/csrf_token/", + expected_url, headers=headers, method="GET", ) assert result_headers["X-CSRF-Token"] == "csrf_token" - assert result_headers["Cookie"] == "new_session_cookie" + assert result_headers["Cookie"] == "session=new_session_cookie" # Updated assertion # assert the same Request object is used mock_urlopen.assert_called_once_with(mock_request, timeout=mock.ANY)