diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 01dabe286969bb..15d496ec4cbe56 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -850,6 +850,51 @@ call fails (for example because the path doesn't exist). .. versionadded:: 3.5 +.. classmethod:: Path.from_uri(uri) + + Return a new path object from parsing a 'file' URI conforming to + :rfc:`8089`. For example:: + + >>> p = Path.from_uri('file:///etc/hosts') + PosixPath('/etc/hosts') + + On Windows, DOS device and UNC paths may be parsed from URIs:: + + >>> p = Path.from_uri('file:///c:/windows') + WindowsPath('c:/windows') + >>> p = Path.from_uri('file://server/share') + WindowsPath('//server/share') + + Several variant forms are supported:: + + >>> p = Path.from_uri('file:////server/share') + WindowsPath('//server/share') + >>> p = Path.from_uri('file://///server/share') + WindowsPath('//server/share') + >>> p = Path.from_uri('file:c:/windows') + WindowsPath('c:/windows') + >>> p = Path.from_uri('file:/c|/windows') + WindowsPath('c:/windows') + >>> p = Path.from_uri('file://///c:/windows') + WindowsPath('c:/windows') + + URIs with no slash after the scheme (and no drive letter) are parsed as + relative paths:: + + >>> p = Path.from_uri('file:foo/bar') + WindowsPath('foo/bar') + + Users may wish to test the result with :meth:`~PurePath.is_absolute` and + reject relative paths, as these are not portable across processes with + differing working directories. + + :func:`os.fsdecode` is used to decode percent-escaped byte sequences, and + so file URIs are not portable across machines with differing + :ref:`filesystem encodings `. + + .. versionadded:: 3.13 + + .. method:: Path.stat(*, follow_symlinks=True) Return a :class:`os.stat_result` object containing information about this path, like :func:`os.stat`. diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 479d08b24b112a..ca79f5495a52de 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -118,6 +118,9 @@ pathlib :exc:`NotImplementedError` when a path operation isn't supported. (Contributed by Barney Gale in :gh:`89812`.) +* Add :method:`Path.from_uri` classmethod. + (Contributed by Barney Gale in :gh:`107465`.) + * Add support for recursive wildcards in :meth:`pathlib.PurePath.match`. (Contributed by Barney Gale in :gh:`73435`.) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 8ff4d4ea19168f..c59593baffbdb3 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -17,7 +17,6 @@ from _collections_abc import Sequence from errno import ENOENT, ENOTDIR, EBADF, ELOOP from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO -from urllib.parse import quote_from_bytes as urlquote_from_bytes __all__ = [ @@ -433,7 +432,8 @@ def as_uri(self): # It's a posix path => 'file:///etc/hosts' prefix = 'file://' path = str(self) - return prefix + urlquote_from_bytes(os.fsencode(path)) + from urllib.parse import quote_from_bytes + return prefix + quote_from_bytes(os.fsencode(path)) @property def _str_normcase(self): @@ -1178,6 +1178,22 @@ def __new__(cls, *args, **kwargs): cls = WindowsPath if os.name == 'nt' else PosixPath return object.__new__(cls) + @classmethod + def from_uri(cls, uri): + """Return a new path from the given 'file' URI.""" + uri = uri.removeprefix('file:') + if uri[:3] == '///': + # Remove empty authority + uri = uri[2:] + if uri[:1] == '/' and (uri[2:3] in ':|' or uri[1:3] == '//'): + # Remove slash before DOS device/UNC path + uri = uri[1:] + if uri[1:2] == '|': + # Replace bar with colon in DOS drive + uri = uri[:1] + ':' + uri[2:] + from urllib.parse import unquote_to_bytes + return cls(os.fsdecode(unquote_to_bytes(uri))) + @classmethod def cwd(cls): """Return a new path pointing to the current working directory.""" diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 78948e3b720320..67492a29593d34 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -11,6 +11,7 @@ import tempfile import unittest from unittest import mock +from urllib.request import pathname2url from test.support import import_helper from test.support import set_recursion_limit @@ -2913,6 +2914,20 @@ def test_passing_kwargs_deprecated(self): with self.assertWarns(DeprecationWarning): self.cls(foo="bar") + def test_from_uri_common(self): + P = self.cls + self.assertEqual(P.from_uri('file:foo/bar'), P('foo/bar')) + self.assertEqual(P.from_uri('file:/foo/bar'), P('/foo/bar')) + self.assertEqual(P.from_uri('file://foo/bar'), P('//foo/bar')) + self.assertEqual(P.from_uri('file:///foo/bar'), P('/foo/bar')) + self.assertEqual(P.from_uri('file:////foo/bar'), P('//foo/bar')) + + def test_from_uri_pathname2url_common(self): + P = self.cls + self.assertEqual(P.from_uri(pathname2url('foo/bar')), P('foo/bar')) + self.assertEqual(P.from_uri(pathname2url('/foo/bar')), P('/foo/bar')) + self.assertEqual(P.from_uri(pathname2url('//foo/bar')), P('//foo/bar')) + class WalkTests(unittest.TestCase): @@ -3441,7 +3456,23 @@ def check(): env['HOME'] = 'C:\\Users\\eve' check() + def test_from_uri(self): + P = self.cls + # DOS drive paths + self.assertEqual(P.from_uri('file:c:/path/to/file'), P('c:/path/to/file')) + self.assertEqual(P.from_uri('file:c|/path/to/file'), P('c:/path/to/file')) + self.assertEqual(P.from_uri('file:/c|/path/to/file'), P('c:/path/to/file')) + self.assertEqual(P.from_uri('file:///c|/path/to/file'), P('c:/path/to/file')) + self.assertEqual(P.from_uri('file://///c|/path/to/file'), P('c:/path/to/file')) + # UNC paths + self.assertEqual(P.from_uri('file://server/path/to/file'), P('//server/path/to/file')) + self.assertEqual(P.from_uri('file:////server/path/to/file'), P('//server/path/to/file')) + self.assertEqual(P.from_uri('file://///server/path/to/file'), P('//server/path/to/file')) + def test_from_uri_pathname2url(self): + P = self.cls + self.assertEqual(P.from_uri(pathname2url(r'c:\path\to\file')), P('c:/path/to/file')) + self.assertEqual(P.from_uri(pathname2url(r'\\server\path\to\file')), P('//server/path/to/file')) class PathSubclassTest(PathTest): class cls(pathlib.Path):