Skip to content

Commit

Permalink
GH-87235: Make sure "python /dev/fd/9 9</path/to/script.py" works on …
Browse files Browse the repository at this point in the history
…macOS (#99768)

On macOS all file descriptors for a particular file in /dev/fd
share the same file offset, that is ``open("/dev/fd/9", "r")`` behaves
more like ``dup(9)`` than a regular open.

This causes problems when a user tries to run "/dev/fd/9" as a script
because zipimport changes the file offset to try to read a zipfile
directory. Therefore change zipimport to reset the file offset after
trying to read the zipfile directory.
  • Loading branch information
ronaldoussoren authored Nov 27, 2022
1 parent 9c9f085 commit d08fb25
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 98 deletions.
14 changes: 14 additions & 0 deletions Lib/test/test_cmd_line_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,20 @@ def test_nonexisting_script(self):
self.assertIn(": can't open file ", err)
self.assertNotEqual(proc.returncode, 0)

@unittest.skipUnless(os.path.exists('/dev/fd/0'), 'requires /dev/fd platform')
def test_script_as_dev_fd(self):
# GH-87235: On macOS passing a non-trivial script to /dev/fd/N can cause
# problems because all open /dev/fd/N file descriptors share the same
# offset.
script = 'print("12345678912345678912345")'
with os_helper.temp_dir() as work_dir:
script_name = _make_test_script(work_dir, 'script.py', script)
with open(script_name, "r") as fp:
p = spawn_python(f"/dev/fd/{fp.fileno()}", close_fds=False, pass_fds=(0,1,2,fp.fileno()))
out, err = p.communicate()
self.assertEqual(out, b"12345678912345678912345\n")



def tearDownModule():
support.reap_children()
Expand Down
203 changes: 105 additions & 98 deletions Lib/zipimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,114 +347,121 @@ def _read_directory(archive):
raise ZipImportError(f"can't open Zip file: {archive!r}", path=archive)

with fp:
# GH-87235: On macOS all file descriptors for /dev/fd/N share the same
# file offset, reset the file offset after scanning the zipfile diretory
# to not cause problems when some runs 'python3 /dev/fd/9 9<some_script'
start_offset = fp.tell()
try:
fp.seek(-END_CENTRAL_DIR_SIZE, 2)
header_position = fp.tell()
buffer = fp.read(END_CENTRAL_DIR_SIZE)
except OSError:
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
if len(buffer) != END_CENTRAL_DIR_SIZE:
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
if buffer[:4] != STRING_END_ARCHIVE:
# Bad: End of Central Dir signature
# Check if there's a comment.
try:
fp.seek(0, 2)
file_size = fp.tell()
except OSError:
raise ZipImportError(f"can't read Zip file: {archive!r}",
path=archive)
max_comment_start = max(file_size - MAX_COMMENT_LEN -
END_CENTRAL_DIR_SIZE, 0)
try:
fp.seek(max_comment_start)
data = fp.read()
except OSError:
raise ZipImportError(f"can't read Zip file: {archive!r}",
path=archive)
pos = data.rfind(STRING_END_ARCHIVE)
if pos < 0:
raise ZipImportError(f'not a Zip file: {archive!r}',
path=archive)
buffer = data[pos:pos+END_CENTRAL_DIR_SIZE]
if len(buffer) != END_CENTRAL_DIR_SIZE:
raise ZipImportError(f"corrupt Zip file: {archive!r}",
path=archive)
header_position = file_size - len(data) + pos

header_size = _unpack_uint32(buffer[12:16])
header_offset = _unpack_uint32(buffer[16:20])
if header_position < header_size:
raise ZipImportError(f'bad central directory size: {archive!r}', path=archive)
if header_position < header_offset:
raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive)
header_position -= header_size
arc_offset = header_position - header_offset
if arc_offset < 0:
raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive)

files = {}
# Start of Central Directory
count = 0
try:
fp.seek(header_position)
except OSError:
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
while True:
buffer = fp.read(46)
if len(buffer) < 4:
raise EOFError('EOF read where not expected')
# Start of file header
if buffer[:4] != b'PK\x01\x02':
break # Bad: Central Dir File Header
if len(buffer) != 46:
raise EOFError('EOF read where not expected')
flags = _unpack_uint16(buffer[8:10])
compress = _unpack_uint16(buffer[10:12])
time = _unpack_uint16(buffer[12:14])
date = _unpack_uint16(buffer[14:16])
crc = _unpack_uint32(buffer[16:20])
data_size = _unpack_uint32(buffer[20:24])
file_size = _unpack_uint32(buffer[24:28])
name_size = _unpack_uint16(buffer[28:30])
extra_size = _unpack_uint16(buffer[30:32])
comment_size = _unpack_uint16(buffer[32:34])
file_offset = _unpack_uint32(buffer[42:46])
header_size = name_size + extra_size + comment_size
if file_offset > header_offset:
raise ZipImportError(f'bad local header offset: {archive!r}', path=archive)
file_offset += arc_offset

try:
name = fp.read(name_size)
fp.seek(-END_CENTRAL_DIR_SIZE, 2)
header_position = fp.tell()
buffer = fp.read(END_CENTRAL_DIR_SIZE)
except OSError:
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
if len(name) != name_size:
if len(buffer) != END_CENTRAL_DIR_SIZE:
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
# On Windows, calling fseek to skip over the fields we don't use is
# slower than reading the data because fseek flushes stdio's
# internal buffers. See issue #8745.
if buffer[:4] != STRING_END_ARCHIVE:
# Bad: End of Central Dir signature
# Check if there's a comment.
try:
fp.seek(0, 2)
file_size = fp.tell()
except OSError:
raise ZipImportError(f"can't read Zip file: {archive!r}",
path=archive)
max_comment_start = max(file_size - MAX_COMMENT_LEN -
END_CENTRAL_DIR_SIZE, 0)
try:
fp.seek(max_comment_start)
data = fp.read()
except OSError:
raise ZipImportError(f"can't read Zip file: {archive!r}",
path=archive)
pos = data.rfind(STRING_END_ARCHIVE)
if pos < 0:
raise ZipImportError(f'not a Zip file: {archive!r}',
path=archive)
buffer = data[pos:pos+END_CENTRAL_DIR_SIZE]
if len(buffer) != END_CENTRAL_DIR_SIZE:
raise ZipImportError(f"corrupt Zip file: {archive!r}",
path=archive)
header_position = file_size - len(data) + pos

header_size = _unpack_uint32(buffer[12:16])
header_offset = _unpack_uint32(buffer[16:20])
if header_position < header_size:
raise ZipImportError(f'bad central directory size: {archive!r}', path=archive)
if header_position < header_offset:
raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive)
header_position -= header_size
arc_offset = header_position - header_offset
if arc_offset < 0:
raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive)

files = {}
# Start of Central Directory
count = 0
try:
if len(fp.read(header_size - name_size)) != header_size - name_size:
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
fp.seek(header_position)
except OSError:
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
while True:
buffer = fp.read(46)
if len(buffer) < 4:
raise EOFError('EOF read where not expected')
# Start of file header
if buffer[:4] != b'PK\x01\x02':
break # Bad: Central Dir File Header
if len(buffer) != 46:
raise EOFError('EOF read where not expected')
flags = _unpack_uint16(buffer[8:10])
compress = _unpack_uint16(buffer[10:12])
time = _unpack_uint16(buffer[12:14])
date = _unpack_uint16(buffer[14:16])
crc = _unpack_uint32(buffer[16:20])
data_size = _unpack_uint32(buffer[20:24])
file_size = _unpack_uint32(buffer[24:28])
name_size = _unpack_uint16(buffer[28:30])
extra_size = _unpack_uint16(buffer[30:32])
comment_size = _unpack_uint16(buffer[32:34])
file_offset = _unpack_uint32(buffer[42:46])
header_size = name_size + extra_size + comment_size
if file_offset > header_offset:
raise ZipImportError(f'bad local header offset: {archive!r}', path=archive)
file_offset += arc_offset

if flags & 0x800:
# UTF-8 file names extension
name = name.decode()
else:
# Historical ZIP filename encoding
try:
name = name.decode('ascii')
except UnicodeDecodeError:
name = name.decode('latin1').translate(cp437_table)

name = name.replace('/', path_sep)
path = _bootstrap_external._path_join(archive, name)
t = (path, compress, data_size, file_size, file_offset, time, date, crc)
files[name] = t
count += 1
name = fp.read(name_size)
except OSError:
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
if len(name) != name_size:
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
# On Windows, calling fseek to skip over the fields we don't use is
# slower than reading the data because fseek flushes stdio's
# internal buffers. See issue #8745.
try:
if len(fp.read(header_size - name_size)) != header_size - name_size:
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
except OSError:
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)

if flags & 0x800:
# UTF-8 file names extension
name = name.decode()
else:
# Historical ZIP filename encoding
try:
name = name.decode('ascii')
except UnicodeDecodeError:
name = name.decode('latin1').translate(cp437_table)

name = name.replace('/', path_sep)
path = _bootstrap_external._path_join(archive, name)
t = (path, compress, data_size, file_size, file_offset, time, date, crc)
files[name] = t
count += 1
finally:
fp.seek(start_offset)
_bootstrap._verbose_message('zipimport: found {} names in {!r}', count, archive)
return files

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
On macOS ``python3 /dev/fd/9 9</path/to/script.py`` failed for any script longer than a couple of bytes.

0 comments on commit d08fb25

Please sign in to comment.