Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fallback for find_files on git/hg archives #752

Merged
merged 5 commits into from
Aug 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ setuptools.finalize_distribution_options =
setuptools_scm.files_command =
.hg = setuptools_scm.file_finder_hg:hg_find_files
.git = setuptools_scm.file_finder_git:git_find_files
setuptools_scm.files_command_fallback =
.hg_archival.txt = setuptools_scm.file_finder_hg:hg_archive_find_files
.git_archival.txt = setuptools_scm.file_finder_git:git_archive_find_files
setuptools_scm.local_scheme =
node-and-date = setuptools_scm.version:get_local_node_and_date
node-and-timestamp = setuptools_scm.version:get_local_node_and_timestamp
Expand Down
17 changes: 12 additions & 5 deletions src/setuptools_scm/file_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@


def scm_find_files(
path: _t.PathT, scm_files: set[str], scm_dirs: set[str]
path: _t.PathT,
scm_files: set[str],
scm_dirs: set[str],
force_all_files: bool = False,
) -> list[str]:
""" setuptools compatible file finder that follows symlinks

Expand All @@ -20,6 +23,7 @@ def scm_find_files(
(including symlinks to directories)
- scm_dirs: set of scm controlled directories
(including directories containing no scm controlled files)
- force_all_files: ignore ``scm_files`` and ``scm_dirs`` and list everything.

scm_files and scm_dirs must be absolute with symlinks resolved (realpath),
with normalized case (normcase)
Expand All @@ -38,7 +42,7 @@ def _link_not_in_scm(n: str) -> bool:
fn = os.path.join(realdirpath, os.path.normcase(n))
return os.path.islink(fn) and fn not in scm_files

if realdirpath not in scm_dirs:
if not force_all_files and realdirpath not in scm_dirs:
# directory not in scm, don't walk it's content
dirnames[:] = []
continue
Expand All @@ -54,13 +58,16 @@ def _link_not_in_scm(n: str) -> bool:
# symlink loop protection
dirnames[:] = []
continue
dirnames[:] = [dn for dn in dirnames if not _link_not_in_scm(dn)]
dirnames[:] = [
dn for dn in dirnames if force_all_files or not _link_not_in_scm(dn)
]
for filename in filenames:
if _link_not_in_scm(filename):
if not force_all_files and _link_not_in_scm(filename):
continue
# dirpath + filename with symlinks preserved
fullfilename = os.path.join(dirpath, filename)
if os.path.normcase(os.path.realpath(fullfilename)) in scm_files:
is_tracked = os.path.normcase(os.path.realpath(fullfilename)) in scm_files
if force_all_files or is_tracked:
res.append(os.path.join(path, os.path.relpath(fullfilename, realpath)))
seen.add(realdirpath)
return res
Expand Down
18 changes: 18 additions & 0 deletions src/setuptools_scm/file_finder_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from .file_finder import is_toplevel_acceptable
from .file_finder import scm_find_files
from .utils import data_from_mime
from .utils import do_ex
from .utils import trace

Expand Down Expand Up @@ -101,3 +102,20 @@ def git_find_files(path: _t.PathT = "") -> list[str]:
trace("toplevel mismatch", toplevel, fullpath)
git_files, git_dirs = _git_ls_files_and_dirs(toplevel)
return scm_find_files(path, git_files, git_dirs)


def git_archive_find_files(path: _t.PathT = "") -> list[str]:
# This function assumes that ``path`` is obtained from a git archive
# and therefore all the files that should be ignored were already removed.
archival = os.path.join(path, ".git_archival.txt")
if not os.path.exists(archival):
return []

data = data_from_mime(archival)

if "$Format" in data.get("node", ""):
# Substitutions have not been performed, so not a reliable archive
return []

trace("git archive detected - fallback to listing all files")
return scm_find_files(path, set(), set(), force_all_files=True)
23 changes: 23 additions & 0 deletions src/setuptools_scm/file_finder_hg.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,16 @@

import os
import subprocess
from typing import TYPE_CHECKING

from .file_finder import is_toplevel_acceptable
from .file_finder import scm_find_files
from .utils import data_from_mime
from .utils import do_ex
from .utils import trace

if TYPE_CHECKING:
from . import _types as _t


def _hg_toplevel(path: str) -> str | None:
Expand Down Expand Up @@ -49,3 +55,20 @@ def hg_find_files(path: str = "") -> list[str]:
assert toplevel is not None
hg_files, hg_dirs = _hg_ls_files_and_dirs(toplevel)
return scm_find_files(path, hg_files, hg_dirs)


def hg_archive_find_files(path: _t.PathT = "") -> list[str]:
# This function assumes that ``path`` is obtained from a mercurial archive
# and therefore all the files that should be ignored were already removed.
archival = os.path.join(path, ".hg_archival.txt")
if not os.path.exists(archival):
return []

data = data_from_mime(archival)

if "node" not in data:
# Ensure file is valid
return []

trace("hg archive detected - fallback to listing all files")
return scm_find_files(path, set(), set(), force_all_files=True)
6 changes: 5 additions & 1 deletion src/setuptools_scm/integration.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import itertools
import os
import warnings
from typing import Any
Expand Down Expand Up @@ -91,7 +92,10 @@ def version_keyword(


def find_files(path: _t.PathT = "") -> list[str]:
for ep in iter_entry_points("setuptools_scm.files_command"):
for ep in itertools.chain(
iter_entry_points("setuptools_scm.files_command"),
iter_entry_points("setuptools_scm.files_command_fallback"),
):
command = ep.load()
if isinstance(command, str):
# this technique is deprecated
Expand Down
30 changes: 30 additions & 0 deletions testing/test_file_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,33 @@ def test_symlink_not_in_scm_while_target_is(inwd: WorkDir) -> None:
@pytest.mark.skip_commit
def test_not_commited(inwd: WorkDir) -> None:
assert find_files() == []


def test_unexpanded_git_archival(wd: WorkDir, monkeypatch: pytest.MonkeyPatch) -> None:
# When substitutions in `.git_archival.txt` are not expanded, files should
# not be automatically listed.
monkeypatch.chdir(wd.cwd)
(wd.cwd / ".git_archival.txt").write_text("node: $Format:%H$", encoding="utf-8")
(wd.cwd / "file1.txt").touch()
assert find_files() == []


@pytest.mark.parametrize("archive_file", (".git_archival.txt", ".hg_archival.txt"))
def test_archive(
wd: WorkDir, monkeypatch: pytest.MonkeyPatch, archive_file: str
) -> None:
# When substitutions in `.git_archival.txt` are not expanded, files should
# not be automatically listed.
monkeypatch.chdir(wd.cwd)
sha = "a1bda3d984d1a40d7b00ae1d0869354d6d503001"
(wd.cwd / archive_file).write_text(f"node: {sha}", encoding="utf-8")
(wd.cwd / "data").mkdir()
(wd.cwd / "data" / "datafile").touch()

datalink = wd.cwd / "data" / "datalink"
if sys.platform != "win32":
datalink.symlink_to("data/datafile")
else:
os.link("data/datafile", datalink)

assert set(find_files()) == _sep({archive_file, "data/datafile", "data/datalink"})