Skip to content

Commit

Permalink
Accept non URL-encoded headers (#4103)
Browse files Browse the repository at this point in the history
  • Loading branch information
xrmx authored Aug 5, 2024
1 parent 2f928a2 commit 57d30b3
Show file tree
Hide file tree
Showing 9 changed files with 109 additions and 28 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Update log export example to not use root logger ([#4090](https://github.com/open-telemetry/opentelemetry-python/pull/4090))
- sdk: Add OS resource detector
([#3992](https://github.com/open-telemetry/opentelemetry-python/pull/3992))
- sdk: Accept non URL-encoded headers in `OTEL_EXPORTER_OTLP_*HEADERS` to match other languages SDKs
([#4103](https://github.com/open-telemetry/opentelemetry-python/pull/4103))
- Update semantic conventions to version 1.27.0
([#4104](https://github.com/open-telemetry/opentelemetry-python/pull/4104))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def __init__(

self._headers = headers or environ.get(OTEL_EXPORTER_OTLP_HEADERS)
if isinstance(self._headers, str):
temp_headers = parse_env_headers(self._headers)
temp_headers = parse_env_headers(self._headers, liberal=True)
self._headers = tuple(temp_headers.items())
elif isinstance(self._headers, dict):
self._headers = tuple(self._headers.items())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ def __init__(
OTEL_EXPORTER_OTLP_LOGS_HEADERS,
environ.get(OTEL_EXPORTER_OTLP_HEADERS, ""),
)
self._headers = headers or parse_env_headers(headers_string)
self._headers = headers or parse_env_headers(
headers_string, liberal=True
)
self._timeout = timeout or int(
environ.get(
OTEL_EXPORTER_OTLP_LOGS_TIMEOUT,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,9 @@ def __init__(
OTEL_EXPORTER_OTLP_METRICS_HEADERS,
environ.get(OTEL_EXPORTER_OTLP_HEADERS, ""),
)
self._headers = headers or parse_env_headers(headers_string)
self._headers = headers or parse_env_headers(
headers_string, liberal=True
)
self._timeout = timeout or int(
environ.get(
OTEL_EXPORTER_OTLP_METRICS_TIMEOUT,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ def __init__(
OTEL_EXPORTER_OTLP_TRACES_HEADERS,
environ.get(OTEL_EXPORTER_OTLP_HEADERS, ""),
)
self._headers = headers or parse_env_headers(headers_string)
self._headers = headers or parse_env_headers(
headers_string, liberal=True
)
self._timeout = timeout or int(
environ.get(
OTEL_EXPORTER_OTLP_TRACES_TIMEOUT,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,8 @@ def test_headers_parse_from_env(self):
(
"Header format invalid! Header values in environment "
"variables must be URL encoded per the OpenTelemetry "
"Protocol Exporter specification: missingValue"
"Protocol Exporter specification or a comma separated "
"list of name=value occurrences: missingValue"
),
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,8 @@ def test_headers_parse_from_env(self):
(
"Header format invalid! Header values in environment "
"variables must be URL encoded per the OpenTelemetry "
"Protocol Exporter specification: missingValue"
"Protocol Exporter specification or a comma separated "
"list of name=value occurrences: missingValue"
),
)

Expand Down
56 changes: 45 additions & 11 deletions opentelemetry-api/src/opentelemetry/util/re.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,23 @@
_KEY_VALUE_FORMAT = rf"{_OWS}{_KEY_FORMAT}{_OWS}={_OWS}{_VALUE_FORMAT}{_OWS}"

_HEADER_PATTERN = compile(_KEY_VALUE_FORMAT)
_LIBERAL_HEADER_PATTERN = compile(
rf"{_OWS}{_KEY_FORMAT}{_OWS}={_OWS}[\w ]*{_OWS}"
)
_DELIMITER_PATTERN = compile(r"[ \t]*,[ \t]*")

_BAGGAGE_PROPERTY_FORMAT = rf"{_KEY_VALUE_FORMAT}|{_OWS}{_KEY_FORMAT}{_OWS}"

_INVALID_HEADER_ERROR_MESSAGE_STRICT_TEMPLATE = (
"Header format invalid! Header values in environment variables must be "
"URL encoded per the OpenTelemetry Protocol Exporter specification: %s"
)

_INVALID_HEADER_ERROR_MESSAGE_LIBERAL_TEMPLATE = (
"Header format invalid! Header values in environment variables must be "
"URL encoded per the OpenTelemetry Protocol Exporter specification or "
"a comma separated list of name=value occurrences: %s"
)

# pylint: disable=invalid-name

Expand All @@ -49,30 +62,51 @@ def parse_headers(s: str) -> Mapping[str, str]:
return parse_env_headers(s)


def parse_env_headers(s: str) -> Mapping[str, str]:
def parse_env_headers(s: str, liberal: bool = False) -> Mapping[str, str]:
"""
Parse ``s``, which is a ``str`` instance containing HTTP headers encoded
for use in ENV variables per the W3C Baggage HTTP header format at
https://www.w3.org/TR/baggage/#baggage-http-header-format, except that
additional semi-colon delimited metadata is not supported.
If ``liberal`` is True we try to parse ``s`` anyway to be more compatible
with other languages SDKs that accept non URL-encoded headers by default.
"""
headers: Dict[str, str] = {}
headers_list: List[str] = split(_DELIMITER_PATTERN, s)
for header in headers_list:
if not header: # empty string
continue
match = _HEADER_PATTERN.fullmatch(header.strip())
if not match:
header_match = _HEADER_PATTERN.fullmatch(header.strip())
if not header_match and not liberal:
_logger.warning(
"Header format invalid! Header values in environment variables must be "
"URL encoded per the OpenTelemetry Protocol Exporter specification: %s",
header,
_INVALID_HEADER_ERROR_MESSAGE_STRICT_TEMPLATE, header
)
continue
# value may contain any number of `=`
name, value = match.string.split("=", 1)
name = unquote(name).strip().lower()
value = unquote(value).strip()
headers[name] = value

if header_match:
match_string: str = header_match.string
# value may contain any number of `=`
name, value = match_string.split("=", 1)
name = unquote(name).strip().lower()
value = unquote(value).strip()
headers[name] = value
else:
# this is not url-encoded and does not match the spec but we decided to be
# liberal in what we accept to match other languages SDKs behaviour
liberal_header_match = _LIBERAL_HEADER_PATTERN.fullmatch(
header.strip()
)
if not liberal_header_match:
_logger.warning(
_INVALID_HEADER_ERROR_MESSAGE_LIBERAL_TEMPLATE, header
)
continue

liberal_match_string: str = liberal_header_match.string
# value may contain any number of `=`
name, value = liberal_match_string.split("=", 1)
name = name.strip().lower()
value = value.strip()
headers[name] = value

return headers
59 changes: 48 additions & 11 deletions opentelemetry-api/tests/util/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@


class TestParseHeaders(unittest.TestCase):
def test_parse_env_headers(self):
inp = [
@staticmethod
def _common_test_cases():
return [
# invalid header name
("=value", [], True),
("}key=value", [], True),
Expand Down Expand Up @@ -59,18 +60,54 @@ def test_parse_env_headers(self):
True,
),
]

def test_parse_env_headers(self):
inp = self._common_test_cases() + [
# invalid header value
("key=value othervalue", [], True),
]
for case_ in inp:
headers, expected, warn = case_
if warn:
with self.assertLogs(level="WARNING") as cm:
with self.subTest(headers=headers):
if warn:
with self.assertLogs(level="WARNING") as cm:
self.assertEqual(
parse_env_headers(headers), dict(expected)
)
self.assertTrue(
"Header format invalid! Header values in environment "
"variables must be URL encoded per the OpenTelemetry "
"Protocol Exporter specification:"
in cm.records[0].message,
)
else:
self.assertEqual(
parse_env_headers(headers), dict(expected)
)
self.assertTrue(
"Header format invalid! Header values in environment "
"variables must be URL encoded per the OpenTelemetry "
"Protocol Exporter specification:"
in cm.records[0].message,

def test_parse_env_headers_liberal(self):
inp = self._common_test_cases() + [
# valid header value
("key=value othervalue", [("key", "value othervalue")], False),
]
for case_ in inp:
headers, expected, warn = case_
with self.subTest(headers=headers):
if warn:
with self.assertLogs(level="WARNING") as cm:
self.assertEqual(
parse_env_headers(headers, liberal=True),
dict(expected),
)
self.assertTrue(
"Header format invalid! Header values in environment "
"variables must be URL encoded per the OpenTelemetry "
"Protocol Exporter specification or a comma separated "
"list of name=value occurrences:"
in cm.records[0].message,
)
else:
self.assertEqual(
parse_env_headers(headers, liberal=True),
dict(expected),
)
else:
self.assertEqual(parse_env_headers(headers), dict(expected))

0 comments on commit 57d30b3

Please sign in to comment.