Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Flask integration based on WSGI ext #206

Merged
merged 17 commits into from
Nov 23, 2019
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

import opentelemetry.ext.http_requests
from opentelemetry import propagators, trace
from opentelemetry.ext.wsgi import OpenTelemetryMiddleware
from opentelemetry.ext.wsgi.flask_util import wrap_flask
from opentelemetry.sdk.context.propagation.b3_format import B3Format
from opentelemetry.sdk.trace import Tracer

Expand Down Expand Up @@ -57,7 +57,7 @@ def configure_opentelemetry(flask_app: flask.Flask):
# and the frameworks and libraries that are used together, automatically
# creating Spans and propagating context as appropriate.
opentelemetry.ext.http_requests.enable(trace.tracer())
flask_app.wsgi_app = OpenTelemetryMiddleware(flask_app.wsgi_app)
wrap_flask(flask_app)


app = flask.Flask(__name__)
Expand Down
4 changes: 4 additions & 0 deletions ext/opentelemetry-ext-http-requests/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Note: This package is not named "requests" because of
# https://github.com/PyCQA/pylint/issues/2648

import os

import setuptools
Expand Down
3 changes: 3 additions & 0 deletions ext/opentelemetry-ext-wsgi/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,8 @@ packages=find_namespace:
install_requires =
opentelemetry-api

[options.extras_require]
test = flask~=1.0

[options.packages.find]
where = src
169 changes: 95 additions & 74 deletions ext/opentelemetry-ext-wsgi/src/opentelemetry/ext/wsgi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,75 +24,108 @@

from opentelemetry import propagators, trace
from opentelemetry.ext.wsgi.version import __version__ # noqa
from opentelemetry.util import time_ns


def get_header_from_environ(
environ: dict, header_name: str
) -> typing.List[str]:
"""Retrieve a HTTP header value from the PEP3333-conforming WSGI environ.

Returns:
A list with a single string with the header value if it exists, else an empty list.
"""
environ_key = "HTTP_" + header_name.upper().replace("-", "_")
value = environ.get(environ_key)
if value is not None:
return [value]
return []


def add_request_attributes(span, environ):
"""Adds HTTP request attributes from the PEP3333-conforming WSGI environ to span."""

span.set_attribute("component", "http")
span.set_attribute("http.method", environ["REQUEST_METHOD"])

host = environ.get("HTTP_HOST")
if not host:
host = environ["SERVER_NAME"]
port = environ["SERVER_PORT"]
scheme = environ["wsgi.url_scheme"]
if (
scheme == "http"
and port != "80"
or scheme == "https"
and port != "443"
):
host += ":" + port

# NOTE: Nonstandard (but see
# https://github.com/open-telemetry/opentelemetry-specification/pull/263)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since this will likely be merged anyway (3 approvals), maybe just remove the comment?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not yet, according to that spec PR the http.host must only be set if there is an http.host header.

span.set_attribute("http.host", host)

url = environ.get("REQUEST_URI") or environ.get("RAW_URI")

if url:
if url[0] == "/":
# We assume that no scheme-relative URLs will be in url here.
# After all, if a request is made to http://myserver//foo, we may get
# //foo which looks like scheme-relative but isn't.
url = environ["wsgi.url_scheme"] + "://" + host + url
elif not url.startswith(environ["wsgi.url_scheme"] + ":"):
# Something fishy is in RAW_URL. Let's fall back to request_uri()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when does this happen? Just out of curiosity. Sounds like a common occurence.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you think this is common? Personally, I don't really know how often that occurs in practice, but I imagine a buggy client could send a line like GET foobar HTTP/1.0 instead of GET /foobar HTTP/1.0.

url = wsgiref_util.request_uri(environ)
else:
url = wsgiref_util.request_uri(environ)

span.set_attribute("http.url", url)


def add_response_attributes(
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this is now a public API (no underscore & docstring), I thought it is better to add the response_haders argument, since we might very well need it in the future, should we ever want to capture certain response headers as span attributes.

span, start_response_status, response_headers
): # pylint: disable=unused-argument
"""Adds HTTP response attributes to span using the arguments
passed to a PEP3333-conforming start_response callable."""

status_code, status_text = start_response_status.split(" ", 1)
span.set_attribute("http.status_text", status_text)

try:
status_code = int(status_code)
except ValueError:
pass
else:
span.set_attribute("http.status_code", status_code)


def get_default_span_name(environ):
"""Calculates a (generic) span name for an incoming HTTP request based on the PEP3333 conforming WSGI environ."""

# TODO: Update once
# https://github.com/open-telemetry/opentelemetry-specification/issues/270
# is resolved
return environ.get("PATH_INFO", "/")


class OpenTelemetryMiddleware:
"""The WSGI application middleware.

This class is used to create and annotate spans for requests to a WSGI
application.
This class is a PEP 3333 conforming WSGI middleware that starts and
annotates spans for any requests it is invoked with.

Args:
wsgi: The WSGI application callable.
wsgi: The WSGI application callable to forward requests to.
"""

def __init__(self, wsgi):
self.wsgi = wsgi

@staticmethod
def _add_request_attributes(span, environ):
span.set_attribute("component", "http")
span.set_attribute("http.method", environ["REQUEST_METHOD"])

host = environ.get("HTTP_HOST")
if not host:
host = environ["SERVER_NAME"]
port = environ["SERVER_PORT"]
scheme = environ["wsgi.url_scheme"]
if (
scheme == "http"
and port != "80"
or scheme == "https"
and port != "443"
):
host += ":" + port

# NOTE: Nonstandard
span.set_attribute("http.host", host)

url = environ.get("REQUEST_URI") or environ.get("RAW_URI")

if url:
if url[0] == "/":
# We assume that no scheme-relative URLs will be in url here.
# After all, if a request is made to http://myserver//foo, we may get
# //foo which looks like scheme-relative but isn't.
url = environ["wsgi.url_scheme"] + "://" + host + url
elif not url.startswith(environ["wsgi.url_scheme"] + ":"):
# Something fishy is in RAW_URL. Let's fall back to request_uri()
url = wsgiref_util.request_uri(environ)
else:
url = wsgiref_util.request_uri(environ)

span.set_attribute("http.url", url)

@staticmethod
def _add_response_attributes(span, status):
status_code, status_text = status.split(" ", 1)
span.set_attribute("http.status_text", status_text)

try:
status_code = int(status_code)
except ValueError:
pass
else:
span.set_attribute("http.status_code", status_code)

@classmethod
def _create_start_response(cls, span, start_response):
def _create_start_response(span, start_response):
@functools.wraps(start_response)
def _start_response(status, response_headers, *args, **kwargs):
cls._add_response_attributes(span, status)
add_response_attributes(span, status, response_headers)
return start_response(status, response_headers, *args, **kwargs)

return _start_response
Expand All @@ -105,17 +138,20 @@ def __call__(self, environ, start_response):
start_response: The WSGI start_response callable.
"""

start_timestamp = time_ns()

tracer = trace.tracer()
path_info = environ["PATH_INFO"] or "/"
parent_span = propagators.extract(_get_header_from_environ, environ)
parent_span = propagators.extract(get_header_from_environ, environ)
span_name = get_default_span_name(environ)

span = tracer.create_span(
path_info, parent_span, kind=trace.SpanKind.SERVER
span_name, parent_span, kind=trace.SpanKind.SERVER
)
span.start()
span.start(start_timestamp)

try:
with tracer.use_span(span):
self._add_request_attributes(span, environ)
add_request_attributes(span, environ)
start_response = self._create_start_response(
span, start_response
)
Expand All @@ -127,21 +163,6 @@ def __call__(self, environ, start_response):
raise


def _get_header_from_environ(
environ: dict, header_name: str
) -> typing.List[str]:
"""Retrieve the header value from the wsgi environ dictionary.

Returns:
A string with the header value if it exists, else None.
"""
environ_key = "HTTP_" + header_name.upper().replace("-", "_")
value = environ.get(environ_key)
if value:
return [value]
return []


# Put this in a subfunction to not delay the call to the wrapped
# WSGI application (instrumentation should change the application
# behavior as little as possible).
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Note: This package is not named "flask" because of
# https://github.com/PyCQA/pylint/issues/2648

import logging

from flask import request as flask_request

import opentelemetry.ext.wsgi as otel_wsgi
from opentelemetry import propagators, trace
from opentelemetry.util import time_ns

logger = logging.getLogger(__name__)

_ENVIRON_STARTTIME_KEY = object()
_ENVIRON_SPAN_KEY = object()
_ENVIRON_ACTIVATION_KEY = object()


def wrap_flask(flask):
wsgi = flask.wsgi_app

def wrapped_app(environ, start_response):
# We want to measure the time for route matching, etc.
# In theory, we could start the span here and use update_name later
# but that API is "highly discouraged" so we better avoid it.
environ[_ENVIRON_STARTTIME_KEY] = time_ns()

def _start_response(status, response_headers, *args, **kwargs):
span = flask_request.environ.get(_ENVIRON_SPAN_KEY)
if span:
otel_wsgi.add_response_attributes(
span, status, response_headers
)
else:
logger.warning(
"Flask environ's OTel span missing at _start_response(%s)",
status,
)
return start_response(status, response_headers, *args, **kwargs)

return wsgi(environ, _start_response)

flask.wsgi_app = wrapped_app

flask.before_request(_before_flask_request)
flask.teardown_request(_teardown_flask_request)


def _before_flask_request():
environ = flask_request.environ
span_name = flask_request.endpoint or otel_wsgi.get_default_span_name(
environ
)
parent_span = propagators.extract(
otel_wsgi.get_header_from_environ, environ
)

tracer = trace.tracer()

span = tracer.create_span(
span_name, parent_span, kind=trace.SpanKind.SERVER
)
span.start(environ.get(_ENVIRON_STARTTIME_KEY))
activation = tracer.use_span(span, end_on_exit=True)
activation.__enter__()
environ[_ENVIRON_ACTIVATION_KEY] = activation
environ[_ENVIRON_SPAN_KEY] = span
otel_wsgi.add_request_attributes(span, environ)
if flask_request.url_rule:
# For 404 that result from no route found, etc, we don't have a url_rule.
span.set_attribute("http.route", flask_request.url_rule.rule)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this default to the method then? as recommended in the opentelemetry-specification#270?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

open-telemetry/opentelemetry-specification#270 only talks about the span name, the http.route attribute is unrelated.



def _teardown_flask_request(exc):
activation = flask_request.environ.get(_ENVIRON_ACTIVATION_KEY)
if not activation:
logger.warning(
"Flask environ's OTel activation missing at _teardown_flask_request(%s)",
exc,
)
return

if exc is None:
activation.__exit__(None, None, None)
else:
activation.__exit__(
type(exc), exc, getattr(exc, "__traceback__", None)
)
Loading