From 6e8f646bf9aa071154069bfcace22e53b4d35574 Mon Sep 17 00:00:00 2001 From: Usiel Riedl Date: Wed, 22 Nov 2023 18:19:04 +0800 Subject: [PATCH] Fix for infinite recursion due to secrets_masker (#35048) * Fix for infinite recursion due to secrets_masker We can get into trouble for types that cannot be initiated with re2's `type(obj)()` call. The `secrets_masker` thus fails, which triggers a warning log, which also fails because we pass the object to the logger, which is then masked again, and so forth. We can break the recursion by emitting a log without trying to redact the value again (this ensures no new bug will cause a stack overflow). This issue has occured previously: https://github.com/apache/airflow/issues/19816#issuecomment-983311373 Additionally, we fix this particular bug by ensuring whatever re2 receives is a simple `str`. I noticed this issue while working with a DAG that calls Airflow's DB cleanup function. Example DAG: ``` from datetime import datetime from airflow import DAG from airflow.models import Variable from airflow.operators.python import PythonOperator class MyStringClass(str): def __init__(self, required_arg): pass def fail(task_instance): # make sure the `SecretsMasker` has a replacer Variable.set(key="secret", value="secret_value") Variable.get("secret") # trigger the infinite recursion task_instance.log.info("%s", MyStringClass("secret_value")) with DAG( dag_id="secrets_masker_recursion", start_date=datetime(2023, 9, 26), ): PythonOperator(task_id="fail", python_callable=fail) ``` * Improve error message --------- Co-authored-by: Tzu-ping Chung --- airflow/utils/log/secrets_masker.py | 9 +++++---- tests/utils/log/test_secrets_masker.py | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/airflow/utils/log/secrets_masker.py b/airflow/utils/log/secrets_masker.py index 246377c169c0b..0b1b65f8405c1 100644 --- a/airflow/utils/log/secrets_masker.py +++ b/airflow/utils/log/secrets_masker.py @@ -261,7 +261,7 @@ def _redact(self, item: Redactable, name: str | None, depth: int, max_depth: int # We can't replace specific values, but the key-based redacting # can still happen, so we can't short-circuit, we need to walk # the structure. - return self.replacer.sub("***", item) + return self.replacer.sub("***", str(item)) return item elif isinstance(item, (tuple, set)): # Turn set in to tuple! @@ -276,14 +276,15 @@ def _redact(self, item: Redactable, name: str | None, depth: int, max_depth: int return item # I think this should never happen, but it does not hurt to leave it just in case # Well. It happened (see https://github.com/apache/airflow/issues/19816#issuecomment-983311373) - # but it caused infinite recursion, so we need to cast it to str first. + # but it caused infinite recursion, to avoid this we mark the log as already filtered. except Exception as exc: log.warning( - "Unable to redact %r, please report this via . " - "Error was: %s: %s", + "Unable to redact value of type %s, please report this via " + ". Error was: %s: %s", item, type(exc).__name__, exc, + extra={self.ALREADY_FILTERED_FLAG: True}, ) return item diff --git a/tests/utils/log/test_secrets_masker.py b/tests/utils/log/test_secrets_masker.py index ffaf2977ae1c5..4657a7c1f3275 100644 --- a/tests/utils/log/test_secrets_masker.py +++ b/tests/utils/log/test_secrets_masker.py @@ -305,6 +305,24 @@ def test_redact_max_depth(self, val, expected, max_depth): got = redact(val, max_depth=max_depth) assert got == expected + def test_redact_with_str_type(self, logger, caplog): + """ + SecretsMasker's re2 replacer has issues handling a redactable item of type + `str` with required constructor args. This test ensures there is a shim in + place that avoids any issues. + See: https://github.com/apache/airflow/issues/19816#issuecomment-983311373 + """ + + class StrLikeClassWithRequiredConstructorArg(str): + def __init__(self, required_arg): + pass + + text = StrLikeClassWithRequiredConstructorArg("password") + logger.info("redacted: %s", text) + + # we expect the object's __str__() output to be logged (no warnings due to a failed masking) + assert caplog.messages == ["redacted: ***"] + @pytest.mark.parametrize( "state, expected", [