Skip to content

Commit

Permalink
Merge pull request #901 from lsst/tickets/DM-41367
Browse files Browse the repository at this point in the history
DM-41367: Make Butler server deployable
  • Loading branch information
dhirving authored Nov 3, 2023
2 parents 97f57a3 + c50bf28 commit 60026e2
Show file tree
Hide file tree
Showing 9 changed files with 441 additions and 119 deletions.
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ dynamic = ["version"]

[project.optional-dependencies]
postgres = ["psycopg2"]
server = [
"fastapi",
"safir >= 3.4.0"
]
test = [
"pytest >= 3.2",
"pytest-openfiles >= 0.5.0",
Expand Down
98 changes: 98 additions & 0 deletions python/lsst/daf/butler/remote_butler/_authentication.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__all__ = ()

import os
from fnmatch import fnmatchcase
from urllib.parse import urlparse

_SERVER_WHITELIST = ["*.lsst.cloud"]
_EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY = "BUTLER_RUBIN_ACCESS_TOKEN"
_RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY = "ACCESS_TOKEN"


def get_authentication_token_from_environment(server_url: str) -> str | None:
"""Search the environment for a Rubin Science Platform access token.
The token may come from the following sources in this order:
1. The ``BUTLER_RUBIN_ACCESS_TOKEN`` environment variable.
This environment variable is meant primarily for development use,
running outside the Rubin Science Platform. This token will be sent
to EVERY server that we connect to, so be careful when connecting to
untrusted servers.
2. The ``ACCESS_TOKEN`` environment variable.
This environment variable is provided by the Rubin Science Platform
Jupyter notebooks. It will only be returned if the given ``server_url``
is in a whitelist of servers known to belong to the Rubin Science
Platform. Because this is a long-lived token that can be used to
impersonate the user with their full access rights, it should not be
sent to untrusted servers.
Parameters
----------
server_url : `str`
URL of the Butler server that the caller intends to connect to.
Returns
-------
access_token: `str` or `None`
A Rubin Science Platform access token, or `None` if no token was
configured in the environment.
"""
explicit_butler_token = os.getenv(_EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY)
if explicit_butler_token:
return explicit_butler_token

hostname = urlparse(server_url.lower()).hostname
hostname_in_whitelist = any(
(hostname and fnmatchcase(hostname, pattern) for pattern in _SERVER_WHITELIST)
)
notebook_token = os.getenv(_RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY)
if hostname_in_whitelist and notebook_token:
return notebook_token

return None


def get_authentication_headers(access_token: str) -> dict[str, str]:
"""Return HTTP headers required for authenticating the user via Rubin
Science Platform's Gafaelfawr service.
Parameters
----------
access_token : `str`
Rubin Science Platform access token.
Returns
-------
header_map : `dict` [`str`, `str`]
HTTP header names and values as a mapping from name to value.
"""
# Access tokens are opaque bearer tokens. See https://sqr-069.lsst.io/
return {"Authorization": f"Bearer {access_token}"}
34 changes: 27 additions & 7 deletions python/lsst/daf/butler/remote_butler/_remote_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

import httpx
from lsst.daf.butler import __version__
from lsst.daf.butler.repo_relocation import replaceRoot
from lsst.resources import ResourcePath, ResourcePathExpression
from lsst.utils.introspection import get_full_type_name

Expand All @@ -52,6 +53,7 @@
from ..registry import MissingDatasetTypeError, NoDefaultCollectionError, Registry, RegistryDefaults
from ..registry.wildcards import CollectionWildcard
from ..transfers import RepoExportContext
from ._authentication import get_authentication_headers, get_authentication_token_from_environment
from ._config import RemoteButlerConfigModel
from .server import FindDatasetModel

Expand All @@ -69,10 +71,22 @@ def __init__(
inferDefaults: bool = True,
# Parameters unique to RemoteButler
http_client: httpx.Client | None = None,
access_token: str | None = None,
**kwargs: Any,
):
butler_config = ButlerConfig(config, searchPaths, without_datastore=True)
# There is a convention in Butler config files where <butlerRoot> in a
# configuration option refers to the directory containing the
# configuration file. We allow this for the remote butler's URL so
# that the server doesn't have to know which hostname it is being
# accessed from.
server_url_key = ("remote_butler", "url")
if server_url_key in butler_config:
butler_config[server_url_key] = replaceRoot(
butler_config[server_url_key], butler_config.configDir
)
self._config = RemoteButlerConfigModel.model_validate(butler_config)

self._dimensions: DimensionUniverse | None = None
# TODO: RegistryDefaults should have finish() called on it, but this
# requires getCollectionSummary() which is not yet implemented
Expand All @@ -83,8 +97,16 @@ def __init__(
# This is generally done for testing.
self._client = http_client
else:
server_url = str(self._config.remote_butler.url)
auth_headers = {}
if access_token is None:
access_token = get_authentication_token_from_environment(server_url)
if access_token is not None:
auth_headers = get_authentication_headers(access_token)

headers = {"user-agent": f"{get_full_type_name(self)}/{__version__}"}
self._client = httpx.Client(headers=headers, base_url=str(self._config.remote_butler.url))
headers.update(auth_headers)
self._client = httpx.Client(headers=headers, base_url=server_url)

def isWriteable(self) -> bool:
# Docstring inherited.
Expand Down Expand Up @@ -420,20 +442,18 @@ def pruneDatasets(
raise NotImplementedError()

def _get_url(self, path: str, version: str = "v1") -> str:
"""Form the complete path to an endpoint on the server
"""Form the complete path to an endpoint on the server.
Parameters
----------
path : `str`
The relative path to the server endpoint. Should not include the
"/butler" prefix.
The relative path to the server endpoint.
version : `str`, optional
Version string to prepend to path. Defaults to "v1".
Returns
-------
path : `str`
The full path to the endpoint
The full path to the endpoint.
"""
prefix = "butler"
return f"{prefix}/{version}/{path}"
return f"{version}/{path}"
43 changes: 43 additions & 0 deletions python/lsst/daf/butler/remote_butler/server/_dependencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from functools import cache

from lsst.daf.butler import Butler

from ._config import get_config_from_env
from ._factory import Factory


@cache
def _make_global_butler() -> Butler:
config = get_config_from_env()
return Butler.from_config(config.config_uri)


def factory_dependency() -> Factory:
return Factory(butler=_make_global_butler())
Loading

0 comments on commit 60026e2

Please sign in to comment.