From 93580f76374d370a8519702f3c980143a77af8f2 Mon Sep 17 00:00:00 2001 From: brimoor Date: Fri, 27 Dec 2024 15:54:56 -0600 Subject: [PATCH] handle tree paths in GitHub identifiers --- fiftyone/plugins/core.py | 26 ++++++++++++++--- fiftyone/plugins/utils.py | 21 ++++++++++++-- fiftyone/utils/github.py | 60 +++++++++++++++++++++++++++++++++------ 3 files changed, 91 insertions(+), 16 deletions(-) diff --git a/fiftyone/plugins/core.py b/fiftyone/plugins/core.py index 7e1c8830190..1862e5c8731 100644 --- a/fiftyone/plugins/core.py +++ b/fiftyone/plugins/core.py @@ -22,6 +22,7 @@ import fiftyone as fo import fiftyone.constants as foc from fiftyone.core.config import locate_app_config +import fiftyone.core.storage as fos import fiftyone.core.utils as fou from fiftyone.plugins.definitions import PluginDefinition from fiftyone.utils.github import GitHubRepository @@ -172,6 +173,8 @@ def download_plugin(url_or_gh_repo, plugin_names=None, overwrite=False): ``https://github.com///tree/`` or ``https://github.com///commit/`` - a GitHub ref string like ``/[/]`` + - a GitHub tree path like + ``https://github.com///tree//`` - a publicly accessible URL of an archive (eg zip or tar) file plugin_names (None): a plugin name or iterable of plugin names to @@ -186,11 +189,11 @@ def download_plugin(url_or_gh_repo, plugin_names=None, overwrite=False): repo = None if etaw.is_url(url_or_gh_repo): if "github" in url_or_gh_repo: - repo = GitHubRepository(url_or_gh_repo) + repo = GitHubRepository(url_or_gh_repo, safe=True) else: url = url_or_gh_repo else: - repo = GitHubRepository(url_or_gh_repo) + repo = GitHubRepository(url_or_gh_repo, safe=True) if etau.is_str(plugin_names): plugin_names = {plugin_names} @@ -201,18 +204,33 @@ def download_plugin(url_or_gh_repo, plugin_names=None, overwrite=False): downloaded_plugins = {} with etau.TempDir() as tmpdir: + root_dir = tmpdir + if repo is not None: logger.info(f"Downloading {repo.identifier}...") repo.download(tmpdir) + + # Limit search to tree path if requested + if repo.safe_path is not None: + # There should be exactly one subdir; if there's not then + # something is wrong and we better not limit the search tree + subdirs = fos.list_subdirs(root_dir) + if len(subdirs) == 1: + root_dir = os.path.join( + root_dir, + subdirs[0], + *repo.safe_path.split("/"), + ) else: logger.info(f"Downloading {url}...") _download_archive(url, tmpdir) metadata_paths = list( - _iter_plugin_metadata_files(root_dir=tmpdir, strict=True) + _iter_plugin_metadata_files(root_dir=root_dir, strict=True) ) if not metadata_paths: - logger.info(f"No plugin YAML files found in {url}") + src = repo.identifier if repo is not None else url + logger.info(f"No plugin YAML files found in {src}") for metadata_path in metadata_paths: try: diff --git a/fiftyone/plugins/utils.py b/fiftyone/plugins/utils.py index 58c1bc69236..3936e9858b9 100644 --- a/fiftyone/plugins/utils.py +++ b/fiftyone/plugins/utils.py @@ -79,6 +79,12 @@ def find_plugins(gh_repo, path=None, info=False): plugins = fopu.find_plugins("https://github.com/voxel51/fiftyone-plugins") print(plugins) + # Search a specific tree root + plugins = fopu.find_plugins( + "https://github.com/voxel51/fiftyone-plugins/tree/main/plugins/annotation" + ) + print(plugins) + # Search a specific branch + subdirectory plugins = fopu.find_plugins( "https://github.com/voxel51/fiftyone-plugins/tree/main", @@ -87,16 +93,19 @@ def find_plugins(gh_repo, path=None, info=False): print(plugins) Args: - gh_repo: the GitHub repository or identifier, which can be: + gh_repo: the GitHub repository, identifier, or tree path, which can be: - a GitHub repo URL like ``https://github.com//`` - a GitHub ref like ``https://github.com///tree/`` or ``https://github.com///commit/`` - a GitHub ref string like ``/[/]`` + - a GitHub tree path like + ``https://github.com///tree//`` path (None): an optional subdirectory of the repository to which to - restrict the search + restrict the search. If ``gh_repo`` also contains a ````, it + is prepended to this value info (False): whether to retrieve full plugin info for each plugin (True) or just return paths to the fiftyone YAML files (False) @@ -104,7 +113,13 @@ def find_plugins(gh_repo, path=None, info=False): a list of paths to fiftyone YAML files or plugin info dicts """ root = path - repo = GitHubRepository(gh_repo) + repo = GitHubRepository(gh_repo, safe=True) + + if repo.safe_path is not None: + if path is not None: + root = repo.safe_path + "/" + path + else: + root = repo.safe_path paths = [] for d in repo.list_repo_contents(recursive=True): diff --git a/fiftyone/utils/github.py b/fiftyone/utils/github.py index a2bcd47f85e..2d2aa529ffb 100644 --- a/fiftyone/utils/github.py +++ b/fiftyone/utils/github.py @@ -34,9 +34,13 @@ class GitHubRepository(object): ``https://github.com///tree/`` or ``https://github.com///commit/`` - a GitHub ref string like ``/[/]`` + safe (False): whether to allow ``repo`` to contain a tree path like + ``https://github.com///tree//``. If + ``safe=True`` and a ```` is found, it is extracted and stored + in the :meth:`safe_path` property """ - def __init__(self, repo): + def __init__(self, repo, safe=False): if etaw.is_url(repo): params = self.parse_url(repo) else: @@ -45,8 +49,41 @@ def __init__(self, repo): self._user = params.get("user") self._repo = params.get("repo") self._ref = params.get("ref", None) + self._safe_path = None self._session = None + if safe: + self._handle_safe_path() + + def _handle_safe_path(self): + if self._ref is None or "/" not in self._ref: + return + + api_root = f"https://api.github.com/repos/{self.user}/{self.repo}" + + # Unfortunately, branch/tag names may contain slashes, so the only way + # to disambiguate / is to query the API to see what exists + chunks = self._ref.split("/") + for i in range(len(chunks), 0, -1): + ref = "/".join(chunks[:i]) + urls = [ + f"{api_root}/git/ref/heads/{ref}", # branch + f"{api_root}/git/ref/tags/{ref}", # tag + ] + if "/" not in ref: + urls.append(f"{api_root}/commits/{ref}") # commit + + for url in urls: + try: + _ = self._get(url) + if ref != self._ref: + self._ref = ref + self._safe_path = "/".join(chunks[i:]) + + return + except: + pass + @property def user(self): """The username of the repo.""" @@ -62,9 +99,10 @@ def ref(self): """The ref (e.g. branch, tag, commit hash), if any.""" return self._ref - @ref.setter - def ref(self, ref): - self._ref = ref + @property + def safe_path(self): + """The path that was extracted from the provided ref, if any.""" + return self._safe_path @property def identifier(self): @@ -183,24 +221,28 @@ def _get_session(self): def _make_session(self): session = requests.Session() - token = os.environ.get("GITHUB_TOKEN", None) + token = self._get_token() if token: logger.debug("Using GitHub token as authorization") session.headers.update({"Authorization": "token " + token}) return session + def _get_token(self): + return os.environ.get("GITHUB_TOKEN", None) + def _get(self, url, json=True): try: resp = self._get_session().get(url) resp.raise_for_status() except requests.exceptions.HTTPError as e: - if e.response.status_code in (403, 404): + if e.response.status_code in (403, 404) and not self._get_token(): raise requests.exceptions.HTTPError( ( - "You can interact with private repositories and avoid " - "rate limit errors by providing a personal access " - "token via the 'GITHUB_TOKEN' environment variable" + f"{e}.\n\nDid you know? You can interact with private " + "repositories and avoid rate limit errors by " + "providing a personal access token via the " + "'GITHUB_TOKEN' environment variable" ), response=e.response, )