Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle tree paths in GitHub identifiers #5324

Merged
merged 1 commit into from
Dec 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions fiftyone/plugins/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import fiftyone as fo
import fiftyone.constants as foc
from fiftyone.core.config import locate_app_config
import fiftyone.core.storage as fos
import fiftyone.core.utils as fou
from fiftyone.plugins.definitions import PluginDefinition
from fiftyone.utils.github import GitHubRepository
Expand Down Expand Up @@ -172,6 +173,8 @@ def download_plugin(url_or_gh_repo, plugin_names=None, overwrite=False):
``https://github.com/<user>/<repo>/tree/<branch>`` or
``https://github.com/<user>/<repo>/commit/<commit>``
- a GitHub ref string like ``<user>/<repo>[/<ref>]``
- a GitHub tree path like
``https://github.com/<user>/<repo>/tree/<branch>/<path>``
- a publicly accessible URL of an archive (eg zip or tar) file

plugin_names (None): a plugin name or iterable of plugin names to
Expand All @@ -186,11 +189,11 @@ def download_plugin(url_or_gh_repo, plugin_names=None, overwrite=False):
repo = None
if etaw.is_url(url_or_gh_repo):
if "github" in url_or_gh_repo:
repo = GitHubRepository(url_or_gh_repo)
repo = GitHubRepository(url_or_gh_repo, safe=True)
else:
url = url_or_gh_repo
else:
repo = GitHubRepository(url_or_gh_repo)
repo = GitHubRepository(url_or_gh_repo, safe=True)

if etau.is_str(plugin_names):
plugin_names = {plugin_names}
Expand All @@ -201,18 +204,33 @@ def download_plugin(url_or_gh_repo, plugin_names=None, overwrite=False):

downloaded_plugins = {}
with etau.TempDir() as tmpdir:
root_dir = tmpdir

if repo is not None:
logger.info(f"Downloading {repo.identifier}...")
repo.download(tmpdir)

# Limit search to tree path if requested
if repo.safe_path is not None:
# There should be exactly one subdir; if there's not then
# something is wrong and we better not limit the search tree
subdirs = fos.list_subdirs(root_dir)
if len(subdirs) == 1:
root_dir = os.path.join(
root_dir,
subdirs[0],
*repo.safe_path.split("/"),
)
else:
logger.info(f"Downloading {url}...")
_download_archive(url, tmpdir)

metadata_paths = list(
_iter_plugin_metadata_files(root_dir=tmpdir, strict=True)
_iter_plugin_metadata_files(root_dir=root_dir, strict=True)
)
if not metadata_paths:
logger.info(f"No plugin YAML files found in {url}")
src = repo.identifier if repo is not None else url
logger.info(f"No plugin YAML files found in {src}")

for metadata_path in metadata_paths:
try:
Expand Down
21 changes: 18 additions & 3 deletions fiftyone/plugins/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ def find_plugins(gh_repo, path=None, info=False):
plugins = fopu.find_plugins("https://github.com/voxel51/fiftyone-plugins")
print(plugins)

# Search a specific tree root
plugins = fopu.find_plugins(
"https://github.com/voxel51/fiftyone-plugins/tree/main/plugins/annotation"
)
print(plugins)

# Search a specific branch + subdirectory
plugins = fopu.find_plugins(
"https://github.com/voxel51/fiftyone-plugins/tree/main",
Expand All @@ -87,24 +93,33 @@ def find_plugins(gh_repo, path=None, info=False):
print(plugins)

Args:
gh_repo: the GitHub repository or identifier, which can be:
gh_repo: the GitHub repository, identifier, or tree path, which can be:

- a GitHub repo URL like ``https://github.com/<user>/<repo>``
- a GitHub ref like
``https://github.com/<user>/<repo>/tree/<branch>`` or
``https://github.com/<user>/<repo>/commit/<commit>``
- a GitHub ref string like ``<user>/<repo>[/<ref>]``
- a GitHub tree path like
``https://github.com/<user>/<repo>/tree/<branch>/<path>``

path (None): an optional subdirectory of the repository to which to
restrict the search
restrict the search. If ``gh_repo`` also contains a ``<path>``, it
is prepended to this value
info (False): whether to retrieve full plugin info for each plugin
(True) or just return paths to the fiftyone YAML files (False)

Returns:
a list of paths to fiftyone YAML files or plugin info dicts
"""
root = path
repo = GitHubRepository(gh_repo)
repo = GitHubRepository(gh_repo, safe=True)

if repo.safe_path is not None:
if path is not None:
root = repo.safe_path + "/" + path
else:
root = repo.safe_path

paths = []
for d in repo.list_repo_contents(recursive=True):
Expand Down
60 changes: 51 additions & 9 deletions fiftyone/utils/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,13 @@ class GitHubRepository(object):
``https://github.com/<user>/<repo>/tree/<branch>`` or
``https://github.com/<user>/<repo>/commit/<commit>``
- a GitHub ref string like ``<user>/<repo>[/<ref>]``
safe (False): whether to allow ``repo`` to contain a tree path like
``https://github.com/<user>/<repo>/tree/<branch>/<path>``. If
``safe=True`` and a ``<path>`` is found, it is extracted and stored
in the :meth:`safe_path` property
"""

def __init__(self, repo):
def __init__(self, repo, safe=False):
if etaw.is_url(repo):
params = self.parse_url(repo)
else:
Expand All @@ -45,8 +49,41 @@ def __init__(self, repo):
self._user = params.get("user")
self._repo = params.get("repo")
self._ref = params.get("ref", None)
self._safe_path = None
self._session = None

if safe:
self._handle_safe_path()

def _handle_safe_path(self):
if self._ref is None or "/" not in self._ref:
return

api_root = f"https://api.github.com/repos/{self.user}/{self.repo}"

# Unfortunately, branch/tag names may contain slashes, so the only way
# to disambiguate <ref>/<path> is to query the API to see what exists
chunks = self._ref.split("/")
for i in range(len(chunks), 0, -1):
ref = "/".join(chunks[:i])
urls = [
f"{api_root}/git/ref/heads/{ref}", # branch
f"{api_root}/git/ref/tags/{ref}", # tag
]
if "/" not in ref:
urls.append(f"{api_root}/commits/{ref}") # commit

for url in urls:
try:
_ = self._get(url)
if ref != self._ref:
self._ref = ref
self._safe_path = "/".join(chunks[i:])

return
except:
pass

@property
def user(self):
"""The username of the repo."""
Expand All @@ -62,9 +99,10 @@ def ref(self):
"""The ref (e.g. branch, tag, commit hash), if any."""
return self._ref

@ref.setter
def ref(self, ref):
self._ref = ref
@property
def safe_path(self):
"""The path that was extracted from the provided ref, if any."""
return self._safe_path

@property
def identifier(self):
Expand Down Expand Up @@ -183,24 +221,28 @@ def _get_session(self):

def _make_session(self):
session = requests.Session()
token = os.environ.get("GITHUB_TOKEN", None)
token = self._get_token()
if token:
logger.debug("Using GitHub token as authorization")
session.headers.update({"Authorization": "token " + token})

return session

def _get_token(self):
return os.environ.get("GITHUB_TOKEN", None)

def _get(self, url, json=True):
try:
resp = self._get_session().get(url)
resp.raise_for_status()
except requests.exceptions.HTTPError as e:
if e.response.status_code in (403, 404):
if e.response.status_code in (403, 404) and not self._get_token():
raise requests.exceptions.HTTPError(
(
"You can interact with private repositories and avoid "
"rate limit errors by providing a personal access "
"token via the 'GITHUB_TOKEN' environment variable"
f"{e}.\n\nDid you know? You can interact with private "
"repositories and avoid rate limit errors by "
"providing a personal access token via the "
"'GITHUB_TOKEN' environment variable"
),
response=e.response,
)
Expand Down
Loading