From 39dffedc075cbf3908c1609547471bf9a086d896 Mon Sep 17 00:00:00 2001
From: Barney Gale <barney.gale@gmail.com>
Date: Wed, 29 May 2024 21:11:30 +0100
Subject: [PATCH] GH-89727: Partially fix `shutil.rmtree()` recursion error on
 deep trees (#119634)

Make `shutil._rmtree_unsafe()` call `os.walk()`, which is implemented
without recursion.

`shutil._rmtree_safe_fd()` is not affected and can still raise a recursion
error.

Co-authored-by: Jelle Zijlstra <jelle.zijlstra@gmail.com>
---
 Lib/os.py                                     |  9 ++++-
 Lib/shutil.py                                 | 38 ++++++-------------
 Lib/test/test_shutil.py                       | 11 ++++++
 ...4-05-29-20-42-17.gh-issue-89727.5lPTTW.rst |  3 ++
 4 files changed, 33 insertions(+), 28 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2024-05-29-20-42-17.gh-issue-89727.5lPTTW.rst

diff --git a/Lib/os.py b/Lib/os.py
index 7661ce68ca3be2b..ae9e646361e82c8 100644
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -281,6 +281,10 @@ def renames(old, new):
 
 __all__.extend(["makedirs", "removedirs", "renames"])
 
+# Private sentinel that makes walk() classify all symlinks and junctions as
+# regular files.
+_walk_symlinks_as_files = object()
+
 def walk(top, topdown=True, onerror=None, followlinks=False):
     """Directory tree generator.
 
@@ -382,7 +386,10 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
                     break
 
                 try:
-                    is_dir = entry.is_dir()
+                    if followlinks is _walk_symlinks_as_files:
+                        is_dir = entry.is_dir(follow_symlinks=False) and not entry.is_junction()
+                    else:
+                        is_dir = entry.is_dir()
                 except OSError:
                     # If is_dir() raises an OSError, consider the entry not to
                     # be a directory, same behaviour as os.path.isdir().
diff --git a/Lib/shutil.py b/Lib/shutil.py
index c9b4da34b1e19ba..03a9d7560304303 100644
--- a/Lib/shutil.py
+++ b/Lib/shutil.py
@@ -606,37 +606,21 @@ def _rmtree_islink(st):
 
 # version vulnerable to race conditions
 def _rmtree_unsafe(path, onexc):
-    try:
-        with os.scandir(path) as scandir_it:
-            entries = list(scandir_it)
-    except FileNotFoundError:
-        return
-    except OSError as err:
-        onexc(os.scandir, path, err)
-        entries = []
-    for entry in entries:
-        fullname = entry.path
-        try:
-            is_dir = entry.is_dir(follow_symlinks=False)
-        except FileNotFoundError:
-            continue
-        except OSError:
-            is_dir = False
-
-        if is_dir and not entry.is_junction():
+    def onerror(err):
+        if not isinstance(err, FileNotFoundError):
+            onexc(os.scandir, err.filename, err)
+    results = os.walk(path, topdown=False, onerror=onerror, followlinks=os._walk_symlinks_as_files)
+    for dirpath, dirnames, filenames in results:
+        for name in dirnames:
+            fullname = os.path.join(dirpath, name)
             try:
-                if entry.is_symlink():
-                    # This can only happen if someone replaces
-                    # a directory with a symlink after the call to
-                    # os.scandir or entry.is_dir above.
-                    raise OSError("Cannot call rmtree on a symbolic link")
+                os.rmdir(fullname)
             except FileNotFoundError:
                 continue
             except OSError as err:
-                onexc(os.path.islink, fullname, err)
-                continue
-            _rmtree_unsafe(fullname, onexc)
-        else:
+                onexc(os.rmdir, fullname, err)
+        for name in filenames:
+            fullname = os.path.join(dirpath, name)
             try:
                 os.unlink(fullname)
             except FileNotFoundError:
diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py
index df9e7a660bf29e4..01f139073dcd97e 100644
--- a/Lib/test/test_shutil.py
+++ b/Lib/test/test_shutil.py
@@ -741,6 +741,17 @@ def _onexc(fn, path, exc):
             shutil.rmtree(TESTFN)
             raise
 
+    @unittest.skipIf(shutil._use_fd_functions, "fd-based functions remain unfixed (GH-89727)")
+    def test_rmtree_above_recursion_limit(self):
+        recursion_limit = 40
+        # directory_depth > recursion_limit
+        directory_depth = recursion_limit + 10
+        base = os.path.join(TESTFN, *(['d'] * directory_depth))
+        os.makedirs(base)
+
+        with support.infinite_recursion(recursion_limit):
+            shutil.rmtree(TESTFN)
+
 
 class TestCopyTree(BaseTest, unittest.TestCase):
 
diff --git a/Misc/NEWS.d/next/Library/2024-05-29-20-42-17.gh-issue-89727.5lPTTW.rst b/Misc/NEWS.d/next/Library/2024-05-29-20-42-17.gh-issue-89727.5lPTTW.rst
new file mode 100644
index 000000000000000..3b73d2789fd6f9e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-05-29-20-42-17.gh-issue-89727.5lPTTW.rst
@@ -0,0 +1,3 @@
+Partially fix issue with :func:`shutil.rmtree` where a :exc:`RecursionError`
+is raised on deep directory trees. A recursion error is no longer raised
+when :data:`!rmtree.avoids_symlink_attacks` is false.