-
-
Notifications
You must be signed in to change notification settings - Fork 31k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GH-99554: marshal
bytecode more efficiently
#99555
Changes from 3 commits
4034318
8b6b393
5f81b82
e34c460
23ba802
6ed83af
5534112
ead8f26
bcd7980
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Modify the :mod:`marshal` format to serialize bytecode more efficiently. |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,13 +17,13 @@ | |
from typing import Dict, FrozenSet, TextIO, Tuple | ||
|
||
import umarshal | ||
import opcode_for_build as opcode | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This isn't a normal import so it shouldn't look like one. Maybe something like |
||
from generate_global_objects import get_identifiers_and_strings | ||
|
||
verbose = False | ||
identifiers, strings = get_identifiers_and_strings() | ||
|
||
# This must be kept in sync with opcode.py | ||
RESUME = 151 | ||
RESUME = opcode.opmap["RESUME"] | ||
|
||
def isprintable(b: bytes) -> bool: | ||
return all(0x20 <= c < 0x7f for c in b) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
""" | ||
Parts of our build process (looking at you, deepfreeze) need the opcode module | ||
for the Python *being built*, not the Python *doing the building*. | ||
|
||
This basically just loads ../../Lib/opcode.py and re-exports everything: | ||
|
||
>>> import opcode_for_build as opcode | ||
""" | ||
|
||
import os | ||
|
||
_opcode_path = os.path.join( | ||
os.path.dirname(__file__), os.pardir, os.pardir, "Lib", "opcode.py" | ||
) | ||
with open(_opcode_path, encoding="utf-8") as _opcode_file: | ||
# Don't try this at home, kids: | ||
exec(_opcode_file.read()) |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -1,6 +1,7 @@ | ||||||
# Implementat marshal.loads() in pure Python | ||||||
|
||||||
import ast | ||||||
import opcode_for_build as opcode | ||||||
|
||||||
from typing import Any, Tuple | ||||||
|
||||||
|
@@ -47,6 +48,8 @@ class Type: | |||||
CO_FAST_CELL = 0x40 | ||||||
CO_FAST_FREE = 0x80 | ||||||
|
||||||
CACHE = opcode.opmap["CACHE"] | ||||||
|
||||||
|
||||||
class Code: | ||||||
def __init__(self, **kwds: Any): | ||||||
|
@@ -178,6 +181,23 @@ def r_object(self) -> Any: | |||||
finally: | ||||||
self.level = old_level | ||||||
|
||||||
def r_bytecode(self) -> bytes: | ||||||
nbytes = self.r_long() * 2 | ||||||
bytecode = bytearray() | ||||||
while len(bytecode) < nbytes: | ||||||
opcode_byte = self.r_byte() | ||||||
if opcode.HAVE_ARGUMENT <= opcode_byte: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks weird. I'm so used to
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have a weird habit where I always use I'll change it, though. |
||||||
oparg_byte = self.r_byte() | ||||||
else: | ||||||
oparg_byte = 0 | ||||||
assert 0x00 <= opcode_byte < 0x100 | ||||||
assert 0x00 <= oparg_byte < 0x100 | ||||||
bytecode.extend([opcode_byte, oparg_byte]) | ||||||
for _ in range(opcode._inline_cache_entries[opcode_byte]): | ||||||
bytecode.extend([CACHE, 0]) | ||||||
assert len(bytecode) == nbytes | ||||||
return bytes(bytecode) | ||||||
|
||||||
def _r_object(self) -> Any: | ||||||
code = self.r_byte() | ||||||
flag = code & FLAG_REF | ||||||
|
@@ -279,7 +299,7 @@ def R_REF(obj: Any) -> Any: | |||||
retval.co_kwonlyargcount = self.r_long() | ||||||
retval.co_stacksize = self.r_long() | ||||||
retval.co_flags = self.r_long() | ||||||
retval.co_code = self.r_object() | ||||||
retval.co_code = self.r_bytecode() | ||||||
retval.co_consts = self.r_object() | ||||||
retval.co_names = self.r_object() | ||||||
retval.co_localsplusnames = self.r_object() | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe assert that
i < size
here, since there is potential for a buffer overrun here.