Skip to content

Commit

Permalink
Merge pull request #5324 from voxel51/bugfix/github-tree-path
Browse files Browse the repository at this point in the history
Handle tree paths in GitHub identifiers
  • Loading branch information
brimoor authored Dec 28, 2024
2 parents 18dde78 + 93580f7 commit 35c5b78
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 16 deletions.
26 changes: 22 additions & 4 deletions fiftyone/plugins/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import fiftyone as fo
import fiftyone.constants as foc
from fiftyone.core.config import locate_app_config
import fiftyone.core.storage as fos
import fiftyone.core.utils as fou
from fiftyone.plugins.definitions import PluginDefinition
from fiftyone.utils.github import GitHubRepository
Expand Down Expand Up @@ -172,6 +173,8 @@ def download_plugin(url_or_gh_repo, plugin_names=None, overwrite=False):
``https://github.com/<user>/<repo>/tree/<branch>`` or
``https://github.com/<user>/<repo>/commit/<commit>``
- a GitHub ref string like ``<user>/<repo>[/<ref>]``
- a GitHub tree path like
``https://github.com/<user>/<repo>/tree/<branch>/<path>``
- a publicly accessible URL of an archive (eg zip or tar) file
plugin_names (None): a plugin name or iterable of plugin names to
Expand All @@ -186,11 +189,11 @@ def download_plugin(url_or_gh_repo, plugin_names=None, overwrite=False):
repo = None
if etaw.is_url(url_or_gh_repo):
if "github" in url_or_gh_repo:
repo = GitHubRepository(url_or_gh_repo)
repo = GitHubRepository(url_or_gh_repo, safe=True)
else:
url = url_or_gh_repo
else:
repo = GitHubRepository(url_or_gh_repo)
repo = GitHubRepository(url_or_gh_repo, safe=True)

if etau.is_str(plugin_names):
plugin_names = {plugin_names}
Expand All @@ -201,18 +204,33 @@ def download_plugin(url_or_gh_repo, plugin_names=None, overwrite=False):

downloaded_plugins = {}
with etau.TempDir() as tmpdir:
root_dir = tmpdir

if repo is not None:
logger.info(f"Downloading {repo.identifier}...")
repo.download(tmpdir)

# Limit search to tree path if requested
if repo.safe_path is not None:
# There should be exactly one subdir; if there's not then
# something is wrong and we better not limit the search tree
subdirs = fos.list_subdirs(root_dir)
if len(subdirs) == 1:
root_dir = os.path.join(
root_dir,
subdirs[0],
*repo.safe_path.split("/"),
)
else:
logger.info(f"Downloading {url}...")
_download_archive(url, tmpdir)

metadata_paths = list(
_iter_plugin_metadata_files(root_dir=tmpdir, strict=True)
_iter_plugin_metadata_files(root_dir=root_dir, strict=True)
)
if not metadata_paths:
logger.info(f"No plugin YAML files found in {url}")
src = repo.identifier if repo is not None else url
logger.info(f"No plugin YAML files found in {src}")

for metadata_path in metadata_paths:
try:
Expand Down
21 changes: 18 additions & 3 deletions fiftyone/plugins/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ def find_plugins(gh_repo, path=None, info=False):
plugins = fopu.find_plugins("https://github.com/voxel51/fiftyone-plugins")
print(plugins)
# Search a specific tree root
plugins = fopu.find_plugins(
"https://github.com/voxel51/fiftyone-plugins/tree/main/plugins/annotation"
)
print(plugins)
# Search a specific branch + subdirectory
plugins = fopu.find_plugins(
"https://github.com/voxel51/fiftyone-plugins/tree/main",
Expand All @@ -87,24 +93,33 @@ def find_plugins(gh_repo, path=None, info=False):
print(plugins)
Args:
gh_repo: the GitHub repository or identifier, which can be:
gh_repo: the GitHub repository, identifier, or tree path, which can be:
- a GitHub repo URL like ``https://github.com/<user>/<repo>``
- a GitHub ref like
``https://github.com/<user>/<repo>/tree/<branch>`` or
``https://github.com/<user>/<repo>/commit/<commit>``
- a GitHub ref string like ``<user>/<repo>[/<ref>]``
- a GitHub tree path like
``https://github.com/<user>/<repo>/tree/<branch>/<path>``
path (None): an optional subdirectory of the repository to which to
restrict the search
restrict the search. If ``gh_repo`` also contains a ``<path>``, it
is prepended to this value
info (False): whether to retrieve full plugin info for each plugin
(True) or just return paths to the fiftyone YAML files (False)
Returns:
a list of paths to fiftyone YAML files or plugin info dicts
"""
root = path
repo = GitHubRepository(gh_repo)
repo = GitHubRepository(gh_repo, safe=True)

if repo.safe_path is not None:
if path is not None:
root = repo.safe_path + "/" + path
else:
root = repo.safe_path

paths = []
for d in repo.list_repo_contents(recursive=True):
Expand Down
60 changes: 51 additions & 9 deletions fiftyone/utils/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,13 @@ class GitHubRepository(object):
``https://github.com/<user>/<repo>/tree/<branch>`` or
``https://github.com/<user>/<repo>/commit/<commit>``
- a GitHub ref string like ``<user>/<repo>[/<ref>]``
safe (False): whether to allow ``repo`` to contain a tree path like
``https://github.com/<user>/<repo>/tree/<branch>/<path>``. If
``safe=True`` and a ``<path>`` is found, it is extracted and stored
in the :meth:`safe_path` property
"""

def __init__(self, repo):
def __init__(self, repo, safe=False):
if etaw.is_url(repo):
params = self.parse_url(repo)
else:
Expand All @@ -45,8 +49,41 @@ def __init__(self, repo):
self._user = params.get("user")
self._repo = params.get("repo")
self._ref = params.get("ref", None)
self._safe_path = None
self._session = None

if safe:
self._handle_safe_path()

def _handle_safe_path(self):
if self._ref is None or "/" not in self._ref:
return

api_root = f"https://api.github.com/repos/{self.user}/{self.repo}"

# Unfortunately, branch/tag names may contain slashes, so the only way
# to disambiguate <ref>/<path> is to query the API to see what exists
chunks = self._ref.split("/")
for i in range(len(chunks), 0, -1):
ref = "/".join(chunks[:i])
urls = [
f"{api_root}/git/ref/heads/{ref}", # branch
f"{api_root}/git/ref/tags/{ref}", # tag
]
if "/" not in ref:
urls.append(f"{api_root}/commits/{ref}") # commit

for url in urls:
try:
_ = self._get(url)
if ref != self._ref:
self._ref = ref
self._safe_path = "/".join(chunks[i:])

return
except:
pass

@property
def user(self):
"""The username of the repo."""
Expand All @@ -62,9 +99,10 @@ def ref(self):
"""The ref (e.g. branch, tag, commit hash), if any."""
return self._ref

@ref.setter
def ref(self, ref):
self._ref = ref
@property
def safe_path(self):
"""The path that was extracted from the provided ref, if any."""
return self._safe_path

@property
def identifier(self):
Expand Down Expand Up @@ -183,24 +221,28 @@ def _get_session(self):

def _make_session(self):
session = requests.Session()
token = os.environ.get("GITHUB_TOKEN", None)
token = self._get_token()
if token:
logger.debug("Using GitHub token as authorization")
session.headers.update({"Authorization": "token " + token})

return session

def _get_token(self):
return os.environ.get("GITHUB_TOKEN", None)

def _get(self, url, json=True):
try:
resp = self._get_session().get(url)
resp.raise_for_status()
except requests.exceptions.HTTPError as e:
if e.response.status_code in (403, 404):
if e.response.status_code in (403, 404) and not self._get_token():
raise requests.exceptions.HTTPError(
(
"You can interact with private repositories and avoid "
"rate limit errors by providing a personal access "
"token via the 'GITHUB_TOKEN' environment variable"
f"{e}.\n\nDid you know? You can interact with private "
"repositories and avoid rate limit errors by "
"providing a personal access token via the "
"'GITHUB_TOKEN' environment variable"
),
response=e.response,
)
Expand Down

0 comments on commit 35c5b78

Please sign in to comment.