From 283b3d21bc8af5a9721737fa8892cd98396bbc4a Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Mon, 1 Aug 2022 21:41:50 +0100 Subject: [PATCH] Add option to mask sensitive data in UI configuration page (#25346) * Add option to mask sensitive data in UI configuration page This PR adds an option to mask sensitive data in the UI configuration page, making it possible to view the page with redated data --- airflow/configuration.py | 56 +++++++++++---- airflow/www/views.py | 26 ++++++- setup.cfg | 1 + tests/core/test_configuration.py | 34 ++++++++++ tests/www/views/test_views_configuration.py | 75 +++++++++++++++++++++ 5 files changed, 177 insertions(+), 15 deletions(-) create mode 100644 tests/www/views/test_views_configuration.py diff --git a/airflow/configuration.py b/airflow/configuration.py index e8d592dacb100..139172d0f3c74 100644 --- a/airflow/configuration.py +++ b/airflow/configuration.py @@ -142,6 +142,20 @@ def default_config_yaml() -> List[Dict[str, Any]]: return yaml.safe_load(config_file) +SENSITIVE_CONFIG_VALUES = { + ('database', 'sql_alchemy_conn'), + ('core', 'fernet_key'), + ('celery', 'broker_url'), + ('celery', 'flower_basic_auth'), + ('celery', 'result_backend'), + ('atlas', 'password'), + ('smtp', 'smtp_password'), + ('webserver', 'secret_key'), + # The following options are deprecated + ('core', 'sql_alchemy_conn'), +} + + class AirflowConfigParser(ConfigParser): """Custom Airflow Configparser supporting defaults and deprecated options""" @@ -150,18 +164,8 @@ class AirflowConfigParser(ConfigParser): # is to not store password on boxes in text files. # These configs can also be fetched from Secrets backend # following the "{section}__{name}__secret" pattern - sensitive_config_values: Set[Tuple[str, str]] = { - ('database', 'sql_alchemy_conn'), - ('core', 'fernet_key'), - ('celery', 'broker_url'), - ('celery', 'flower_basic_auth'), - ('celery', 'result_backend'), - ('atlas', 'password'), - ('smtp', 'smtp_password'), - ('webserver', 'secret_key'), - # The following options are deprecated - ('core', 'sql_alchemy_conn'), - } + + sensitive_config_values: Set[Tuple[str, str]] = SENSITIVE_CONFIG_VALUES # A mapping of (new section, new option) -> (old section, old option, since_version). # When reading new option, the old option will be checked to see if it exists. If it does a @@ -887,6 +891,15 @@ def as_dict( :return: Dictionary, where the key is the name of the section and the content is the dictionary with the name of the parameter and its value. """ + if not display_sensitive: + # We want to hide the sensitive values at the appropriate methods + # since envs from cmds, secrets can be read at _include_envs method + if not all([include_env, include_cmds, include_secret]): + raise ValueError( + "If display_sensitive is false, then include_env, " + "include_cmds, include_secret must all be set as True" + ) + config_sources: ConfigSourcesType = {} configs = [ ('default', self.airflow_defaults), @@ -922,6 +935,20 @@ def as_dict( else: self._filter_by_source(config_sources, display_source, self._get_secret_option) + if not display_sensitive: + # This ensures the ones from config file is hidden too + # if they are not provided through env, cmd and secret + hidden = '< hidden >' + for (section, key) in self.sensitive_config_values: + if not config_sources.get(section): + continue + if config_sources[section].get(key, None): + if display_source: + source = config_sources[section][key][1] + config_sources[section][key] = (hidden, source) + else: + config_sources[section][key] = hidden + return config_sources def _include_secrets( @@ -987,7 +1014,10 @@ def _include_envs( log.warning("Ignoring unknown env var '%s'", env_var) continue if not display_sensitive and env_var != self._env_var_name('core', 'unit_test_mode'): - opt = '< hidden >' + # Don't hide cmd/secret values here + if not env_var.lower().endswith('cmd') and not env_var.lower().endswith("secret"): + opt = '< hidden >' + elif raw: opt = opt.replace('%', '%%') if display_source: diff --git a/airflow/www/views.py b/airflow/www/views.py index 2b51bbbf88188..d4bfc28c940bc 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -35,6 +35,7 @@ from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union from urllib.parse import parse_qsl, unquote, urlencode, urlparse +import configupdater import lazy_object_proxy import markupsafe import nvd3 @@ -3752,12 +3753,33 @@ def conf(self): raw = request.args.get('raw') == "true" title = "Airflow Configuration" subtitle = AIRFLOW_CONFIG + + expose_config = conf.get('webserver', 'expose_config') + # Don't show config when expose_config variable is False in airflow config - if conf.getboolean("webserver", "expose_config"): + # Don't show sensitive config values if expose_config variable is 'non-sensitive-only' + # in airflow config + if expose_config.lower() == 'non-sensitive-only': + from airflow.configuration import SENSITIVE_CONFIG_VALUES + + updater = configupdater.ConfigUpdater() + updater.read(AIRFLOW_CONFIG) + for sect, key in SENSITIVE_CONFIG_VALUES: + if updater.has_option(sect, key): + updater[sect][key].value = '< hidden >' + config = str(updater) + + table = [ + (section, key, str(value), source) + for section, parameters in conf.as_dict(True, False).items() + for key, (value, source) in parameters.items() + ] + elif expose_config.lower() in ['true', 't', '1']: + with open(AIRFLOW_CONFIG) as file: config = file.read() table = [ - (section, key, value, source) + (section, key, str(value), source) for section, parameters in conf.as_dict(True, True).items() for key, (value, source) in parameters.items() ] diff --git a/setup.cfg b/setup.cfg index 1cc2e4b6adc21..550ab9beaf7ec 100644 --- a/setup.cfg +++ b/setup.cfg @@ -95,6 +95,7 @@ install_requires = # Colorlog 6.x merges TTYColoredFormatter into ColoredFormatter, breaking backwards compatibility with 4.x # Update CustomTTYColoredFormatter to remove colorlog>=4.0.2, <5.0 + configupdater>=3.1.1 connexion[swagger-ui,flask]>=2.10.0 cron-descriptor>=1.2.24 croniter>=0.3.17 diff --git a/tests/core/test_configuration.py b/tests/core/test_configuration.py index 3532aac7e52fb..15c8bbffc1499 100644 --- a/tests/core/test_configuration.py +++ b/tests/core/test_configuration.py @@ -261,6 +261,31 @@ def test_config_from_secret_backend(self, mock_hvac): assert 'sqlite:////Users/airflow/airflow/airflow.db' == test_conf.get('test', 'sql_alchemy_conn') + def test_hidding_of_sensitive_config_values(self): + test_config = '''[test] + sql_alchemy_conn_secret = sql_alchemy_conn + ''' + test_config_default = '''[test] + sql_alchemy_conn = airflow + ''' + test_conf = AirflowConfigParser(default_config=parameterized_config(test_config_default)) + test_conf.read_string(test_config) + test_conf.sensitive_config_values = test_conf.sensitive_config_values | { + ('test', 'sql_alchemy_conn'), + } + + assert 'airflow' == test_conf.get('test', 'sql_alchemy_conn') + # Hide sensitive fields + asdict = test_conf.as_dict(display_sensitive=False) + assert '< hidden >' == asdict['test']['sql_alchemy_conn'] + # If display_sensitive is false, then include_cmd, include_env,include_secrets must all be True + # This ensures that cmd and secrets env are hidden at the appropriate method and no surprises + with pytest.raises(ValueError): + test_conf.as_dict(display_sensitive=False, include_cmds=False) + # Test that one of include_cmds, include_env, include_secret can be false when display_sensitive + # is True + assert test_conf.as_dict(display_sensitive=True, include_cmds=False) + @mock.patch("airflow.providers.hashicorp._internal_client.vault_client.hvac") @conf_vars( { @@ -559,6 +584,15 @@ def test_command_from_env(self): # the environment variable's echo command assert test_cmdenv_conf.get('testcmdenv', 'notacommand') == 'OK' + @pytest.mark.parametrize('display_sensitive, result', [(True, 'OK'), (False, '< hidden >')]) + def test_as_dict_display_sensitivewith_command_from_env(self, display_sensitive, result): + + test_cmdenv_conf = AirflowConfigParser() + test_cmdenv_conf.sensitive_config_values.add(('testcmdenv', 'itsacommand')) + with unittest.mock.patch.dict('os.environ'): + asdict = test_cmdenv_conf.as_dict(True, display_sensitive) + assert asdict['testcmdenv']['itsacommand'] == (result, 'cmd') + def test_parameterized_config_gen(self): config = textwrap.dedent( """ diff --git a/tests/www/views/test_views_configuration.py b/tests/www/views/test_views_configuration.py new file mode 100644 index 0000000000000..df55fac43a7e1 --- /dev/null +++ b/tests/www/views/test_views_configuration.py @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import html + +from airflow.configuration import SENSITIVE_CONFIG_VALUES, conf +from tests.test_utils.config import conf_vars +from tests.test_utils.www import check_content_in_response, check_content_not_in_response + + +@conf_vars({("webserver", "expose_config"): 'False'}) +def test_user_cant_view_configuration(admin_client): + resp = admin_client.get('configuration', follow_redirects=True) + check_content_in_response( + "Your Airflow administrator chose not to expose the configuration, " + "most likely for security reasons.", + resp, + ) + + +@conf_vars({("webserver", "expose_config"): 'True'}) +def test_user_can_view_configuration(admin_client): + resp = admin_client.get('configuration', follow_redirects=True) + for section, key in SENSITIVE_CONFIG_VALUES: + value = conf.get(section, key, fallback='') + if not value: + continue + check_content_in_response(html.escape(value), resp) + + +@conf_vars({("webserver", "expose_config"): 'non-sensitive-only'}) +def test_configuration_redacted(admin_client): + resp = admin_client.get('configuration', follow_redirects=True) + for section, key in SENSITIVE_CONFIG_VALUES: + value = conf.get(section, key, fallback='') + if not value or value == 'airflow': + continue + if value.startswith('db+postgresql'): # this is in configuration comment + continue + check_content_not_in_response(value, resp) + + +@conf_vars({("webserver", "expose_config"): 'non-sensitive-only'}) +def test_configuration_redacted_in_running_configuration(admin_client): + resp = admin_client.get('configuration', follow_redirects=True) + for section, key in SENSITIVE_CONFIG_VALUES: + value = conf.get(section, key, fallback='') + if not value or value == 'airflow': + continue + check_content_not_in_response("" + html.escape(value) + '