diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 775aa32df99a46..2b6905a845b423 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -2175,7 +2175,7 @@ features: Accepts a :term:`path-like object`. -.. function:: lstat(path, *, dir_fd=None) +.. function:: lstat(path, *, dir_fd=None, fast=False) Perform the equivalent of an :c:func:`lstat` system call on the given path. Similar to :func:`~os.stat`, but does not follow symbolic links. Return a @@ -2184,8 +2184,15 @@ features: On platforms that do not support symbolic links, this is an alias for :func:`~os.stat`. + Passing *fast* as ``True`` may omit some information on some platforms + for the sake of performance. These omissions are not guaranteed (that is, + the information may be returned anyway), and may change between Python + releases without a deprecation period or due to operating system updates + without warning. See :class:`stat_result` documentation for the fields + that are guaranteed to be present under this option. + As of Python 3.3, this is equivalent to ``os.stat(path, dir_fd=dir_fd, - follow_symlinks=False)``. + follow_symlinks=False, fast=fast)``. This function can also support :ref:`paths relative to directory descriptors `. @@ -2209,6 +2216,9 @@ features: Other kinds of reparse points are resolved by the operating system as for :func:`~os.stat`. + .. versionchanged:: 3.12 + Added the *fast* parameter. + .. function:: mkdir(path, mode=0o777, *, dir_fd=None) @@ -2781,7 +2791,7 @@ features: for :class:`bytes` paths on Windows. -.. function:: stat(path, *, dir_fd=None, follow_symlinks=True) +.. function:: stat(path, *, dir_fd=None, follow_symlinks=True, fast=False) Get the status of a file or a file descriptor. Perform the equivalent of a :c:func:`stat` system call on the given path. *path* may be specified as @@ -2806,6 +2816,13 @@ features: possible and call :func:`lstat` on the result. This does not apply to dangling symlinks or junction points, which will raise the usual exceptions. + Passing *fast* as ``True`` may omit some information on some platforms + for the sake of performance. These omissions are not guaranteed (that is, + the information may be returned anyway), and may change between Python + releases without a deprecation period or due to operating system updates + without warning. See :class:`stat_result` documentation for the fields + that are guaranteed to be present under this option. + .. index:: module: stat Example:: @@ -2838,6 +2855,9 @@ features: returns the information for the original path as if ``follow_symlinks=False`` had been specified instead of raising an error. + .. versionchanged:: 3.12 + Added the *fast* parameter. + .. class:: stat_result @@ -2845,12 +2865,22 @@ features: :c:type:`stat` structure. It is used for the result of :func:`os.stat`, :func:`os.fstat` and :func:`os.lstat`. + When the *fast* argument to these functions is passed ``True``, some + information may be reduced or omitted. Those attributes that are + guaranteed to be valid, and those currently known to be omitted, are + marked in the documentation below. If not specified and you depend on + that field, explicitly pass *fast* as ``False`` to ensure it is + calculated. + Attributes: .. attribute:: st_mode File mode: file type and file mode bits (permissions). + When *fast* is ``True``, only the file type bits are guaranteed + to be valid (the mode bits may be zero). + .. attribute:: st_ino Platform dependent, but if non-zero, uniquely identifies the @@ -2865,6 +2895,8 @@ features: Identifier of the device on which this file resides. + On Windows, when *fast* is ``True``, this may be zero. + .. attribute:: st_nlink Number of hard links. @@ -2883,6 +2915,8 @@ features: The size of a symbolic link is the length of the pathname it contains, without a terminating null byte. + This field is guaranteed to be filled when specifying *fast*. + Timestamps: .. attribute:: st_atime @@ -2893,6 +2927,8 @@ features: Time of most recent content modification expressed in seconds. + This field is guaranteed to be filled when specifying *fast*. + .. attribute:: st_ctime Platform dependent: @@ -2909,6 +2945,9 @@ features: Time of most recent content modification expressed in nanoseconds as an integer. + This field is guaranteed to be filled when specifying *fast*, subject + to the note below. + .. attribute:: st_ctime_ns Platform dependent: @@ -2998,12 +3037,16 @@ features: :c:func:`GetFileInformationByHandle`. See the ``FILE_ATTRIBUTE_*`` constants in the :mod:`stat` module. + This field is guaranteed to be filled when specifying *fast*. + .. attribute:: st_reparse_tag When :attr:`st_file_attributes` has the ``FILE_ATTRIBUTE_REPARSE_POINT`` set, this field contains the tag identifying the type of reparse point. See the ``IO_REPARSE_TAG_*`` constants in the :mod:`stat` module. + This field is guaranteed to be filled when specifying *fast*. + The standard module :mod:`stat` defines functions and constants that are useful for extracting information from a :c:type:`stat` structure. (On Windows, some items are filled with dummy values.) @@ -3039,6 +3082,10 @@ features: files as :const:`S_IFCHR`, :const:`S_IFIFO` or :const:`S_IFBLK` as appropriate. + .. versionchanged:: 3.12 + Added the *fast* argument and defined the minimum set of returned + fields. + .. function:: statvfs(path) Perform a :c:func:`statvfs` system call on the given path. The return value is diff --git a/Include/internal/pycore_fileutils_windows.h b/Include/internal/pycore_fileutils_windows.h new file mode 100644 index 00000000000000..d1545eff30f51a --- /dev/null +++ b/Include/internal/pycore_fileutils_windows.h @@ -0,0 +1,77 @@ +#ifndef Py_INTERNAL_FILEUTILS_WINDOWS_H +#define Py_INTERNAL_FILEUTILS_WINDOWS_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "Py_BUILD_CORE must be defined to include this header" +#endif + +#ifdef MS_WINDOWS + +#if !defined(NTDDI_WIN10_NI) || !(NTDDI_VERSION >= NTDDI_WIN10_NI) +typedef struct _FILE_STAT_BASIC_INFORMATION { + LARGE_INTEGER FileId; + LARGE_INTEGER CreationTime; + LARGE_INTEGER LastAccessTime; + LARGE_INTEGER LastWriteTime; + LARGE_INTEGER ChangeTime; + LARGE_INTEGER AllocationSize; + LARGE_INTEGER EndOfFile; + ULONG FileAttributes; + ULONG ReparseTag; + ULONG NumberOfLinks; + ULONG DeviceType; + ULONG DeviceCharacteristics; +} FILE_STAT_BASIC_INFORMATION; + +typedef enum _FILE_INFO_BY_NAME_CLASS { + FileStatByNameInfo, + FileStatLxByNameInfo, + FileCaseSensitiveByNameInfo, + FileStatBasicByNameInfo, + MaximumFileInfoByNameClass +} FILE_INFO_BY_NAME_CLASS; +#endif + +typedef BOOL (WINAPI *PGetFileInformationByName)( + PCWSTR FileName, + FILE_INFO_BY_NAME_CLASS FileInformationClass, + PVOID FileInfoBuffer, + ULONG FileInfoBufferSize +); + +static inline BOOL GetFileInformationByName( + PCWSTR FileName, + FILE_INFO_BY_NAME_CLASS FileInformationClass, + PVOID FileInfoBuffer, + ULONG FileInfoBufferSize +) { + static PGetFileInformationByName GetFileInformationByName = NULL; + static int GetFileInformationByName_init = -1; + + if (GetFileInformationByName_init < 0) { + HMODULE hMod = LoadLibraryW(L"api-ms-win-core-file-l2-1-4"); + GetFileInformationByName_init = 0; + if (hMod) { + GetFileInformationByName = (PGetFileInformationByName)GetProcAddress( + hMod, "GetFileInformationByName"); + if (GetFileInformationByName) { + GetFileInformationByName_init = 1; + } else { + FreeLibrary(hMod); + } + } + } + + if (GetFileInformationByName_init <= 0) { + SetLastError(ERROR_NOT_SUPPORTED); + return FALSE; + } + return GetFileInformationByName(FileName, FileInformationClass, FileInfoBuffer, FileInfoBufferSize); +} + +#endif + +#endif diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 494bcf293cdb7b..379c00adf8d736 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -903,6 +903,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(false)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(family)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fanout)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fast)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fd)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fd2)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fdel)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index b0cb8365933e77..d3d6a26f19026b 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -389,6 +389,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(false) STRUCT_FOR_ID(family) STRUCT_FOR_ID(fanout) + STRUCT_FOR_ID(fast) STRUCT_FOR_ID(fd) STRUCT_FOR_ID(fd2) STRUCT_FOR_ID(fdel) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 4b128da54555b7..b580e36a74e008 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -895,6 +895,7 @@ extern "C" { INIT_ID(false), \ INIT_ID(family), \ INIT_ID(fanout), \ + INIT_ID(fast), \ INIT_ID(fd), \ INIT_ID(fd2), \ INIT_ID(fdel), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 7ef1f7e94ddead..efb42da89413ab 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -684,6 +684,8 @@ _PyUnicode_InitStaticStrings(void) { PyUnicode_InternInPlace(&string); string = &_Py_ID(fanout); PyUnicode_InternInPlace(&string); + string = &_Py_ID(fast); + PyUnicode_InternInPlace(&string); string = &_Py_ID(fd); PyUnicode_InternInPlace(&string); string = &_Py_ID(fd2); diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 94db8bb7737acd..76c2a75e6813fb 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -613,6 +613,18 @@ def test_stat_result_pickle(self): unpickled = pickle.loads(p) self.assertEqual(result, unpickled) + def test_stat_result_fast(self): + # Minimum guaranteed fields when requesting incomplete info + result_1 = os.stat(self.fname, fast=True) + result_2 = os.stat(self.fname, fast=False) + result_3 = os.stat(self.fname) + self.assertEqual(stat.S_IFMT(result_1.st_mode), + stat.S_IFMT(result_2.st_mode)) + self.assertEqual(result_1.st_size, result_2.st_size) + self.assertEqual(result_1.st_mtime, result_2.st_mtime) + # Ensure the default matches fast=False + self.assertEqual(result_2, result_3) + @unittest.skipUnless(hasattr(os, 'statvfs'), 'test needs os.statvfs()') def test_statvfs_attributes(self): result = os.statvfs(self.fname) diff --git a/Misc/NEWS.d/next/Library/2022-11-23-15-15-59.gh-issue-99726.6m-YhG.rst b/Misc/NEWS.d/next/Library/2022-11-23-15-15-59.gh-issue-99726.6m-YhG.rst new file mode 100644 index 00000000000000..138800bd684896 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-11-23-15-15-59.gh-issue-99726.6m-YhG.rst @@ -0,0 +1,2 @@ +Adds `fast` argument to :func:`os.stat` and :func:`os.lstat` to enable +performance optimizations by skipping some fields in the result. diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index f9f6ca372ec6c7..33d4f01fdd6f67 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -9,7 +9,7 @@ preserve PyDoc_STRVAR(os_stat__doc__, -"stat($module, /, path, *, dir_fd=None, follow_symlinks=True)\n" +"stat($module, /, path, *, dir_fd=None, follow_symlinks=True, fast=False)\n" "--\n" "\n" "Perform a stat system call on the given path.\n" @@ -25,6 +25,9 @@ PyDoc_STRVAR(os_stat__doc__, " If False, and the last element of the path is a symbolic link,\n" " stat will examine the symbolic link itself instead of the file\n" " the link points to.\n" +" fast\n" +" If True, certain data may be omitted on some platforms to\n" +" allow faster results. See the documentation for specific cases.\n" "\n" "dir_fd and follow_symlinks may not be implemented\n" " on your platform. If they are unavailable, using them will raise a\n" @@ -37,7 +40,8 @@ PyDoc_STRVAR(os_stat__doc__, {"stat", _PyCFunction_CAST(os_stat), METH_FASTCALL|METH_KEYWORDS, os_stat__doc__}, static PyObject * -os_stat_impl(PyObject *module, path_t *path, int dir_fd, int follow_symlinks); +os_stat_impl(PyObject *module, path_t *path, int dir_fd, int follow_symlinks, + int fast); static PyObject * os_stat(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -45,14 +49,14 @@ os_stat(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwn PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 3 + #define NUM_KEYWORDS 4 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD PyObject *ob_item[NUM_KEYWORDS]; } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) - .ob_item = { &_Py_ID(path), &_Py_ID(dir_fd), &_Py_ID(follow_symlinks), }, + .ob_item = { &_Py_ID(path), &_Py_ID(dir_fd), &_Py_ID(follow_symlinks), &_Py_ID(fast), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -61,18 +65,19 @@ os_stat(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwn # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"path", "dir_fd", "follow_symlinks", NULL}; + static const char * const _keywords[] = {"path", "dir_fd", "follow_symlinks", "fast", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "stat", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[3]; + PyObject *argsbuf[4]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; path_t path = PATH_T_INITIALIZE("stat", "path", 0, 1); int dir_fd = DEFAULT_DIR_FD; int follow_symlinks = 1; + int fast = 0; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); if (!args) { @@ -92,12 +97,21 @@ os_stat(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwn goto skip_optional_kwonly; } } - follow_symlinks = PyObject_IsTrue(args[2]); - if (follow_symlinks < 0) { + if (args[2]) { + follow_symlinks = PyObject_IsTrue(args[2]); + if (follow_symlinks < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + fast = PyObject_IsTrue(args[3]); + if (fast < 0) { goto exit; } skip_optional_kwonly: - return_value = os_stat_impl(module, &path, dir_fd, follow_symlinks); + return_value = os_stat_impl(module, &path, dir_fd, follow_symlinks, fast); exit: /* Cleanup for path */ @@ -107,11 +121,15 @@ os_stat(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwn } PyDoc_STRVAR(os_lstat__doc__, -"lstat($module, /, path, *, dir_fd=None)\n" +"lstat($module, /, path, *, dir_fd=None, fast=False)\n" "--\n" "\n" "Perform a stat system call on the given path, without following symbolic links.\n" "\n" +" fast\n" +" If True, certain data may be omitted on some platforms to\n" +" allow faster results. See the documentation for specific cases.\n" +"\n" "Like stat(), but do not follow symbolic links.\n" "Equivalent to stat(path, follow_symlinks=False)."); @@ -119,7 +137,7 @@ PyDoc_STRVAR(os_lstat__doc__, {"lstat", _PyCFunction_CAST(os_lstat), METH_FASTCALL|METH_KEYWORDS, os_lstat__doc__}, static PyObject * -os_lstat_impl(PyObject *module, path_t *path, int dir_fd); +os_lstat_impl(PyObject *module, path_t *path, int dir_fd, int fast); static PyObject * os_lstat(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -127,14 +145,14 @@ os_lstat(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD PyObject *ob_item[NUM_KEYWORDS]; } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) - .ob_item = { &_Py_ID(path), &_Py_ID(dir_fd), }, + .ob_item = { &_Py_ID(path), &_Py_ID(dir_fd), &_Py_ID(fast), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -143,17 +161,18 @@ os_lstat(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"path", "dir_fd", NULL}; + static const char * const _keywords[] = {"path", "dir_fd", "fast", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "lstat", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; path_t path = PATH_T_INITIALIZE("lstat", "path", 0, 0); int dir_fd = DEFAULT_DIR_FD; + int fast = 0; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); if (!args) { @@ -165,11 +184,20 @@ os_lstat(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw if (!noptargs) { goto skip_optional_kwonly; } - if (!FSTATAT_DIR_FD_CONVERTER(args[1], &dir_fd)) { + if (args[1]) { + if (!FSTATAT_DIR_FD_CONVERTER(args[1], &dir_fd)) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + fast = PyObject_IsTrue(args[2]); + if (fast < 0) { goto exit; } skip_optional_kwonly: - return_value = os_lstat_impl(module, &path, dir_fd); + return_value = os_lstat_impl(module, &path, dir_fd, fast); exit: /* Cleanup for path */ @@ -11549,4 +11577,4 @@ os_waitstatus_to_exitcode(PyObject *module, PyObject *const *args, Py_ssize_t na #ifndef OS_WAITSTATUS_TO_EXITCODE_METHODDEF #define OS_WAITSTATUS_TO_EXITCODE_METHODDEF #endif /* !defined(OS_WAITSTATUS_TO_EXITCODE_METHODDEF) */ -/*[clinic end generated code: output=4192d8e09e216300 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=8653c0259a7b7c5e input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 8185517b06b5dd..cf682ace4fb832 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -36,6 +36,7 @@ # include "posixmodule.h" #else # include "winreparse.h" +# include "pycore_fileutils_windows.h" // GetFileInformationByName() #endif #if !defined(EX_OK) && defined(EXIT_SUCCESS) @@ -664,6 +665,8 @@ PyOS_AfterFork(void) void _Py_time_t_to_FILE_TIME(time_t, int, FILETIME *); void _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *, ULONG, struct _Py_stat_struct *); +void _Py_stat_basic_info_to_stat(FILE_STAT_BASIC_INFORMATION *, + struct _Py_stat_struct *); #endif @@ -1834,17 +1837,110 @@ attributes_from_dir(LPCWSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, ULONG *re return TRUE; } +static void +win32_xstat_fixup_exec_mode(const wchar_t *path, + struct _Py_stat_struct *result) +{ + if (!(result->st_file_attributes & FILE_ATTRIBUTE_DIRECTORY)) { + /* Fix the file execute permissions. This hack sets S_IEXEC if + the filename has an extension that is commonly used by files + that CreateProcessW can execute. A real implementation calls + GetSecurityInfo, OpenThreadToken/OpenProcessToken, and + AccessCheck to check for generic read, write, and execute + access. */ + const wchar_t *fileExtension = wcsrchr(path, '.'); + if (fileExtension) { + if (_wcsicmp(fileExtension, L".exe") == 0 || + _wcsicmp(fileExtension, L".bat") == 0 || + _wcsicmp(fileExtension, L".cmd") == 0 || + _wcsicmp(fileExtension, L".com") == 0) { + result->st_mode |= 0111; + } + } + } +} + +static int +win32_xstat_get_st_dev(const wchar_t *path, + struct _Py_stat_struct *result) +{ + const wchar_t *rootEnd; + WCHAR rootBuffer[MAX_PATH]; + WCHAR *root = rootBuffer; + DWORD oldErrorMode; + DWORD vsn = 0; + int retval = 0; + + if (result->st_dev || !path || !path[0]) { + return 0; + } + + if (PathCchSkipRoot(path, &rootEnd) || + wcsncpy_s(rootBuffer, MAX_PATH, path, (rootEnd - path))) { + /* No root for the path, so let it use the current volume */ + root = NULL; + } + + /* Change the thread's error mode to avoid popping up dialogs for + "no disk in drive" situations. */ + if (!SetThreadErrorMode(SEM_FAILCRITICALERRORS, &oldErrorMode)) { + oldErrorMode = 0; + } + + if (GetVolumeInformationW(root, NULL, 0, &vsn, NULL, NULL, NULL, 0)) { + result->st_dev = vsn; + result->st_rdev = vsn; + } else { + retval = -1; + } + + SetThreadErrorMode(oldErrorMode, NULL); + return retval; +} + static int win32_xstat_impl(const wchar_t *path, struct _Py_stat_struct *result, - BOOL traverse) + BOOL traverse, BOOL fast) { HANDLE hFile; + FILE_STAT_BASIC_INFORMATION statInfo; BY_HANDLE_FILE_INFORMATION fileInfo; FILE_ATTRIBUTE_TAG_INFO tagInfo = { 0 }; - DWORD fileType, error; + DWORD fileType, error = 0; BOOL isUnhandledTag = FALSE; int retval = 0; + /* Try the fast path first. This is an lstat equivalent, but if we + don't find a symlink it'll be faster to try it first */ + if (GetFileInformationByName(path, FileStatBasicByNameInfo, + &statInfo, sizeof(statInfo))) { + /* Fast path succeeded. If we're not traversing or the file isn't + a name surrogate reparse point, we can continue */ + if (!traverse || + !(statInfo.FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT && + IsReparseTagNameSurrogate(statInfo.ReparseTag))) { + _Py_stat_basic_info_to_stat(&statInfo, result); + if (!fast) { + win32_xstat_fixup_exec_mode(path, result); + /* st_dev is not included in FileStatBasicByName, + so we get it separately */ + if (win32_xstat_get_st_dev(path, result) != 0) { + return -1; + } + } + return 0; + } + } else { + /* Some errors aren't worth retrying with the slow path */ + switch(GetLastError()) { + case ERROR_FILE_NOT_FOUND: + case ERROR_PATH_NOT_FOUND: + case ERROR_NOT_READY: + case ERROR_BAD_NET_NAME: + return -1; + } + } + DWORD access = FILE_READ_ATTRIBUTES; DWORD flags = FILE_FLAG_BACKUP_SEMANTICS; /* Allow opening directories. */ if (!traverse) { @@ -1969,7 +2065,7 @@ win32_xstat_impl(const wchar_t *path, struct _Py_stat_struct *result, for an unhandled tag. */ } else if (!isUnhandledTag) { CloseHandle(hFile); - return win32_xstat_impl(path, result, TRUE); + return win32_xstat_impl(path, result, TRUE, fast); } } } @@ -1991,23 +2087,8 @@ win32_xstat_impl(const wchar_t *path, struct _Py_stat_struct *result, } _Py_attribute_data_to_stat(&fileInfo, tagInfo.ReparseTag, result); - - if (!(fileInfo.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { - /* Fix the file execute permissions. This hack sets S_IEXEC if - the filename has an extension that is commonly used by files - that CreateProcessW can execute. A real implementation calls - GetSecurityInfo, OpenThreadToken/OpenProcessToken, and - AccessCheck to check for generic read, write, and execute - access. */ - const wchar_t *fileExtension = wcsrchr(path, '.'); - if (fileExtension) { - if (_wcsicmp(fileExtension, L".exe") == 0 || - _wcsicmp(fileExtension, L".bat") == 0 || - _wcsicmp(fileExtension, L".cmd") == 0 || - _wcsicmp(fileExtension, L".com") == 0) { - result->st_mode |= 0111; - } - } + if (!fast) { + win32_xstat_fixup_exec_mode(path, result); } cleanup: @@ -2026,11 +2107,12 @@ win32_xstat_impl(const wchar_t *path, struct _Py_stat_struct *result, } static int -win32_xstat(const wchar_t *path, struct _Py_stat_struct *result, BOOL traverse) +win32_xstat(const wchar_t *path, struct _Py_stat_struct *result, + BOOL traverse, BOOL fast) { /* Protocol violation: we explicitly clear errno, instead of setting it to a POSIX error. Callers should use GetLastError. */ - int code = win32_xstat_impl(path, result, traverse); + int code = win32_xstat_impl(path, result, traverse, fast); errno = 0; return code; } @@ -2047,13 +2129,13 @@ win32_xstat(const wchar_t *path, struct _Py_stat_struct *result, BOOL traverse) static int win32_lstat(const wchar_t* path, struct _Py_stat_struct *result) { - return win32_xstat(path, result, FALSE); + return win32_xstat(path, result, FALSE, TRUE); } static int win32_stat(const wchar_t* path, struct _Py_stat_struct *result) { - return win32_xstat(path, result, TRUE); + return win32_xstat(path, result, TRUE, TRUE); } #endif /* MS_WINDOWS */ @@ -2463,7 +2545,7 @@ _pystat_fromstructstat(PyObject *module, STRUCT_STAT *st) static PyObject * posix_do_stat(PyObject *module, const char *function_name, path_t *path, - int dir_fd, int follow_symlinks) + int dir_fd, int follow_symlinks, int fast) { STRUCT_STAT st; int result; @@ -2486,10 +2568,8 @@ posix_do_stat(PyObject *module, const char *function_name, path_t *path, if (path->fd != -1) result = FSTAT(path->fd, &st); #ifdef MS_WINDOWS - else if (follow_symlinks) - result = win32_stat(path->wide, &st); else - result = win32_lstat(path->wide, &st); + result = win32_xstat(path->wide, &st, follow_symlinks, fast); #else else #if defined(HAVE_LSTAT) @@ -2837,6 +2917,10 @@ os.stat stat will examine the symbolic link itself instead of the file the link points to. + fast: bool = False + If True, certain data may be omitted on some platforms to + allow faster results. See the documentation for specific cases. + Perform a stat system call on the given path. dir_fd and follow_symlinks may not be implemented @@ -2849,10 +2933,12 @@ It's an error to use dir_fd or follow_symlinks when specifying path as [clinic start generated code]*/ static PyObject * -os_stat_impl(PyObject *module, path_t *path, int dir_fd, int follow_symlinks) -/*[clinic end generated code: output=7d4976e6f18a59c5 input=01d362ebcc06996b]*/ +os_stat_impl(PyObject *module, path_t *path, int dir_fd, int follow_symlinks, + int fast) +/*[clinic end generated code: output=2657ee2ccb8586f6 input=ec99c0b72e50d965]*/ { - return posix_do_stat(module, "stat", path, dir_fd, follow_symlinks); + return posix_do_stat(module, "stat", path, dir_fd, + follow_symlinks, fast); } @@ -2865,6 +2951,10 @@ os.lstat dir_fd : dir_fd(requires='fstatat') = None + fast: bool = False + If True, certain data may be omitted on some platforms to + allow faster results. See the documentation for specific cases. + Perform a stat system call on the given path, without following symbolic links. Like stat(), but do not follow symbolic links. @@ -2872,11 +2962,12 @@ Equivalent to stat(path, follow_symlinks=False). [clinic start generated code]*/ static PyObject * -os_lstat_impl(PyObject *module, path_t *path, int dir_fd) -/*[clinic end generated code: output=ef82a5d35ce8ab37 input=0b7474765927b925]*/ +os_lstat_impl(PyObject *module, path_t *path, int dir_fd, int fast) +/*[clinic end generated code: output=e7fc00813e269d21 input=4311ddb7b2baed54]*/ { int follow_symlinks = 0; - return posix_do_stat(module, "lstat", path, dir_fd, follow_symlinks); + return posix_do_stat(module, "lstat", path, dir_fd, + follow_symlinks, fast); } diff --git a/Python/fileutils.c b/Python/fileutils.c index 244bd899b3bd24..5309d1379cf49c 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -9,6 +9,7 @@ # include # include # include // PathCchCombineEx +# include "pycore_fileutils_windows.h" // FILE_STAT_BASIC_INFORMATION extern int winerror_to_errno(int); #endif @@ -1048,6 +1049,18 @@ FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out) *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t); } +static void +LARGE_INTEGER_to_time_t_nsec(LARGE_INTEGER *in_ptr, time_t *time_out, int* nsec_out) +{ + /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */ + /* Cannot simply cast and dereference in_ptr, + since it might not be aligned properly */ + __int64 in; + memcpy(&in, in_ptr, sizeof(in)); + *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */ + *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t); +} + void _Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr) { @@ -1104,6 +1117,51 @@ _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag, } result->st_file_attributes = info->dwFileAttributes; } + +void +_Py_stat_basic_info_to_stat(FILE_STAT_BASIC_INFORMATION *info, + struct _Py_stat_struct *result) +{ + memset(result, 0, sizeof(*result)); + result->st_mode = attributes_to_mode(info->FileAttributes); + result->st_size = info->EndOfFile.QuadPart; + result->st_dev = 0; + result->st_rdev = 0; + LARGE_INTEGER_to_time_t_nsec(&info->CreationTime, &result->st_ctime, &result->st_ctime_nsec); + LARGE_INTEGER_to_time_t_nsec(&info->LastWriteTime, &result->st_mtime, &result->st_mtime_nsec); + LARGE_INTEGER_to_time_t_nsec(&info->LastAccessTime, &result->st_atime, &result->st_atime_nsec); + result->st_nlink = info->NumberOfLinks; + result->st_ino = info->FileId.QuadPart; + /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will + open other name surrogate reparse points without traversing them. To + detect/handle these, check st_file_attributes and st_reparse_tag. */ + result->st_reparse_tag = info->ReparseTag; + if (info->FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT && + info->ReparseTag == IO_REPARSE_TAG_SYMLINK) { + /* set the bits that make this a symlink */ + result->st_mode = (result->st_mode & ~S_IFMT) | S_IFLNK; + } + result->st_file_attributes = info->FileAttributes; + switch (info->DeviceType) { + case FILE_TYPE_DISK: + break; + case FILE_TYPE_CHAR: + /* \\.\nul */ + result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFCHR; + break; + case FILE_TYPE_PIPE: + /* \\.\pipe\spam */ + result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFIFO; + break; + default: + if (info->FileAttributes & FILE_ATTRIBUTE_DIRECTORY) { + /* \\.\pipe\ or \\.\mailslot\ */ + result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFDIR; + } + break; + } +} + #endif /* Return information about a file.