Skip to content

Commit

Permalink
Merge pull request aboutcode-org#2 from nexB/visit-symlinks
Browse files Browse the repository at this point in the history
Modify walk(), is_file(), and is_dir() to have the option to follow symlinks
  • Loading branch information
JonoYang authored Oct 20, 2020
2 parents 3ae7ac3 + 94a95ae commit d89c6b6
Show file tree
Hide file tree
Showing 8 changed files with 85 additions and 23 deletions.
7 changes: 4 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ license_file = apache-2.0.LICENSE
name = commoncode
author = nexB. Inc. and others
author_email = info@aboutcode.org
description = commoncode
description = commoncode
long_description = file:README.rst
url = https://github.com/nexB/commoncode
classifiers =
Expand All @@ -16,7 +16,7 @@ classifiers =
Programming Language :: Python :: 3
Topic :: Software Development
Topic :: Utilities
keywords =
keywords =

[options]
package_dir=
Expand All @@ -33,7 +33,8 @@ install_requires =
requests >= 2.7.0, < 3.0.0
intbitset >= 2.3.0, < 3.0
saneyaml
setup_requires = setuptools_scm >= 4
typing >=3.6, < 3.7
setup_requires = setuptools_scm[toml] >= 4

[options.packages.find]
where=src
Expand Down
16 changes: 10 additions & 6 deletions src/commoncode/filetype.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,20 +45,24 @@ def is_link(location):
return location and os.path.islink(location)


def is_file(location):
def is_file(location, follow_symlinks=False):
"""
Return True if `location` is a file.
"""
return (location and os.path.isfile(location)
and not is_link(location) and not is_broken_link(location))
_is_file = location and os.path.isfile(location)
if follow_symlinks:
return _is_file
return _is_file and not is_link(location) and not is_broken_link(location)


def is_dir(location):
def is_dir(location, follow_symlinks=False):
"""
Return True if `location` is a directory.
"""
return (location and os.path.isdir(location) and not is_file(location)
and not is_link(location) and not is_broken_link(location))
_is_dir = location and os.path.isdir(location) and not is_file(location)
if follow_symlinks:
return _is_dir
return _is_dir and not is_link(location) and not is_broken_link(location)


def is_regular(location):
Expand Down
36 changes: 22 additions & 14 deletions src/commoncode/fileutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def prepare_path(pth):
if not isinstance(pth, bytes):
pth = fsencode(pth)
return pth
else:
else:
if not isinstance(pth, compat.unicode):
return fsdecode(pth)
return pth
Expand Down Expand Up @@ -365,16 +365,19 @@ def ignore_nothing(_):
return False


def walk(location, ignored=None):
def walk(location, ignored=None, follow_symlinks=False):
"""
Walk location returning the same tuples as os.walk but with a different
behavior:
- always walk top-down, breadth-first.
- always ignore and never follow symlinks,
- always ignore and never follow symlinks (unless `follow_symlinks` is True),
- always ignore special files (FIFOs, etc.)
- optionally ignore files and directories by invoking the `ignored`
callable on files and directories returning True if it should be ignored.
- location is a directory or a file: for a file, the file is returned.
If `follow_symlinks` is True, then symlinks will not be ignored and be
collected like regular files and directories
"""
if on_linux and py2:
location = fsencode(location)
Expand All @@ -387,33 +390,38 @@ def walk(location, ignored=None):
logger_debug('walk: ignored:', location, is_ignored)
return

if filetype.is_file(location) :
if filetype.is_file(location, follow_symlinks=follow_symlinks) :
yield parent_directory(location), [], [file_name(location)]

elif filetype.is_dir(location):
elif filetype.is_dir(location, follow_symlinks=follow_symlinks):
dirs = []
files = []
# TODO: consider using scandir
for name in os.listdir(location):
loc = os.path.join(location, name)
if filetype.is_special(loc) or (ignored and ignored(loc)):
if TRACE:
ign = ignored and ignored(loc)
logger_debug('walk: ignored:', loc, ign)
continue
if (follow_symlinks
and filetype.is_link(loc)
and not filetype.is_broken_link(location)):
pass
else:
if TRACE:
ign = ignored and ignored(loc)
logger_debug('walk: ignored:', loc, ign)
continue
# special files and symlinks are always ignored
if filetype.is_dir(loc):
if filetype.is_dir(loc, follow_symlinks=follow_symlinks):
dirs.append(name)
elif filetype.is_file(loc):
elif filetype.is_file(loc, follow_symlinks=follow_symlinks):
files.append(name)
yield location, dirs, files

for dr in dirs:
for tripple in walk(os.path.join(location, dr), ignored):
for tripple in walk(os.path.join(location, dr), ignored, follow_symlinks=follow_symlinks):
yield tripple


def resource_iter(location, ignored=ignore_nothing, with_dirs=True):
def resource_iter(location, ignored=ignore_nothing, with_dirs=True, follow_symlinks=False):
"""
Return an iterable of paths at `location` recursively.
Expand All @@ -424,7 +432,7 @@ def resource_iter(location, ignored=ignore_nothing, with_dirs=True):
"""
if on_linux and py2:
location = fsencode(location)
for top, dirs, files in walk(location, ignored):
for top, dirs, files in walk(location, ignored, follow_symlinks=follow_symlinks):
if with_dirs:
for d in dirs:
yield os.path.join(top, d)
Expand Down
Empty file.
Empty file.
Empty file.
22 changes: 22 additions & 0 deletions tests/commoncode/test_filetype.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,3 +209,25 @@ def test_get_file_count(self):
for test_file, count in tests:
result = filetype.get_file_count(os.path.join(test_dir, test_file))
assert count == result


def SymlinkTest(FileBasedTesting):
test_data_dir = os.path.join(os.path.dirname(__file__), 'data')

@skipIf(on_windows, 'os.symlink does not work on Windows')
def test_is_file(self):
test_file = self.get_test_loc('symlink/test', copy=True)
temp_dir = fileutils.get_temp_dir()
test_link = join(temp_dir, 'test-link')
os.symlink(test_file, test_link)
assert filetype.is_file(test_link, follow_symlinks=True)
assert not filetype.is_file(test_link, follow_symlinks=False)

@skipIf(on_windows, 'os.symlink does not work on Windows')
def test_is_dir(self):
test_dir = self.get_test_loc('symlink', copy=True)
temp_dir = fileutils.get_temp_dir()
test_link = join(temp_dir, 'test-dir-link')
os.symlink(test_dir, test_link)
assert filetype.is_dir(test_link, follow_symlinks=True)
assert not filetype.is_dir(test_link, follow_symlinks=False)
27 changes: 27 additions & 0 deletions tests/commoncode/test_fileutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,20 @@ def test_os_walk_can_walk_non_utf8_path_from_unicode_path(self):
_dirpath, _dirnames, filenames = result
assert 18 == len(filenames)

@skipIf(on_windows, 'os.symlink does not work on Windows')
def test_walk_on_symlinks(self):
test_dir = self.get_test_loc('symlink/walk', copy=True)
temp_dir = fileutils.get_temp_dir()
test_link = join(temp_dir, 'test-dir-link')
os.symlink(test_dir, test_link)
results = list(fileutils.walk(test_link, follow_symlinks=True))
results = [(os.path.basename(top), dirs, files) for top, dirs, files in results]
expected = [
('test-dir-link', ['dir'], ['a']),
('dir', [], ['b'])
]
assert expected == results


class TestFileUtilsIter(FileBasedTesting):
test_data_dir = os.path.join(os.path.dirname(__file__), 'data')
Expand Down Expand Up @@ -553,6 +567,19 @@ def test_resource_iter_can_walk_non_utf8_path_from_unicode_path(self):
result = list(fileutils.resource_iter(test_dir, with_dirs=False))
assert 18 == len(result)

def test_resource_iter_follow_symlinks(self):
test_dir = self.get_test_loc('symlink/walk', copy=True)
temp_dir = fileutils.get_temp_dir()
test_link = join(temp_dir, 'test-dir-link')
os.symlink(test_dir, test_link)
result = [os.path.basename(f) for f in fileutils.resource_iter(test_dir, follow_symlinks=True)]
expected = [
'dir',
'a',
'b'
]
assert sorted(expected) == sorted(result)


class TestBaseName(FileBasedTesting):
test_data_dir = os.path.join(os.path.dirname(__file__), 'data')
Expand Down

0 comments on commit d89c6b6

Please sign in to comment.