Skip to content

Commit

Permalink
Slim down stack frames
Browse files Browse the repository at this point in the history
This reduces the stack frame size of mp_builtin___import__ by
limiting the support path length of files from 256 to 96. This
function can be called recursively for nested imports so it adds up.

Also reduce mp_execute_bytecode (vm.c) from 206 a bc call to 124.
This too is recursive and adds up. It is reduced by preventing
some inlining. It may decrease performance slightly when importing
and unpacking.

Adds two new scripts for debugging. One is used from gdb to print
frame sizes in a backtrace. The other prints what pcs use a
particular stack offset. This helps find infrequently used stack
space.

Fixes #8053.
  • Loading branch information
tannewt committed Jun 6, 2023
1 parent 475ffc3 commit dd71ae1
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 7 deletions.
4 changes: 2 additions & 2 deletions py/circuitpy_mpconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ extern void common_hal_mcu_enable_interrupts(void);
//
// default is 128; consider raising to reduce fragmentation.
#define MICROPY_ALLOC_PARSE_CHUNK_INIT (16)
// default is 512.
#define MICROPY_ALLOC_PATH_MAX (256)
// default is 512. Longest path in .py bundle as of June 6th, 2023 is 73 characters.
#define MICROPY_ALLOC_PATH_MAX (96)
#define MICROPY_CAN_OVERRIDE_BUILTINS (1)
#define MICROPY_COMP_CONST (1)
#define MICROPY_COMP_DOUBLE_TUPLE_ASSIGN (1)
Expand Down
10 changes: 5 additions & 5 deletions py/runtime.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ mp_obj_t MICROPY_WRAP_MP_LOAD_GLOBAL(mp_load_global)(qstr qst) {
return elem->value;
}

mp_obj_t mp_load_build_class(void) {
mp_obj_t __attribute__((noinline)) mp_load_build_class(void) {
DEBUG_OP_printf("load_build_class\n");
#if MICROPY_CAN_OVERRIDE_BUILTINS
if (MP_STATE_VM(mp_module_builtins_override_dict) != NULL) {
Expand Down Expand Up @@ -858,7 +858,7 @@ mp_obj_t mp_call_method_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_ob
}

// unpacked items are stored in reverse order into the array pointed to by items
void mp_unpack_sequence(mp_obj_t seq_in, size_t num, mp_obj_t *items) {
void __attribute__((noinline,)) mp_unpack_sequence(mp_obj_t seq_in, size_t num, mp_obj_t *items) {
size_t seq_len;
if (mp_obj_is_type(seq_in, &mp_type_tuple) || mp_obj_is_type(seq_in, &mp_type_list)) {
mp_obj_t *seq_items;
Expand Down Expand Up @@ -905,7 +905,7 @@ void mp_unpack_sequence(mp_obj_t seq_in, size_t num, mp_obj_t *items) {
}

// unpacked items are stored in reverse order into the array pointed to by items
void mp_unpack_ex(mp_obj_t seq_in, size_t num_in, mp_obj_t *items) {
void __attribute__((noinline)) mp_unpack_ex(mp_obj_t seq_in, size_t num_in, mp_obj_t *items) {
size_t num_left = num_in & 0xff;
size_t num_right = (num_in >> 8) & 0xff;
DEBUG_OP_printf("unpack ex " UINT_FMT " " UINT_FMT "\n", num_left, num_right);
Expand Down Expand Up @@ -1482,7 +1482,7 @@ mp_obj_t mp_import_name(qstr name, mp_obj_t fromlist, mp_obj_t level) {
return mp_builtin___import__(5, args);
}

mp_obj_t mp_import_from(mp_obj_t module, qstr name) {
mp_obj_t __attribute__((noinline,)) mp_import_from(mp_obj_t module, qstr name) {
DEBUG_printf("import from %p %s\n", module, qstr_str(name));

mp_obj_t dest[2];
Expand Down Expand Up @@ -1528,7 +1528,7 @@ mp_obj_t mp_import_from(mp_obj_t module, qstr name) {
#endif
}

void mp_import_all(mp_obj_t module) {
void __attribute__((noinline)) mp_import_all(mp_obj_t module) {
DEBUG_printf("import all %p\n", module);

// TODO: Support __all__
Expand Down
64 changes: 64 additions & 0 deletions tools/gdb-stack-size.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""Source this file into gdb `source ../../tools/gdb-stack-size.py` then run
`stack-size` to print a backtrace with each frame size next to it."""


class StackSize(gdb.Command):
def __init__(self):
super(StackSize, self).__init__("stack-size", gdb.COMMAND_USER)

def invoke(self, arg, from_tty):
frame = gdb.newest_frame()
total_size = 0
while frame:
sp = frame.read_register("sp")
frame_up = frame.older()
if not frame_up:
break
f = frame.function()
l = frame.level()
if l < 10:
l = "#" + str(l) + " "
else:
l = "#" + str(l)
size = frame_up.read_register("sp") - sp
total_size += size
print(l, sp, frame.type(), f, " " * (40 - len(str(f))), size)
# print(dir(f))
# Tweak this if for more detail for a specific function.
if False and f.name == "mp_execute_bytecode":
b = frame.block()
prev_b = None
while not b.is_static:
print(" block", hex(b.start), hex(b.end), b.function)
for sym in b:
if not sym.needs_frame:
continue
v = sym.value(frame)
print(" ", sym.addr_class, v.address, sym.type.sizeof, sym, sym.type, v)
prev_b = b
b = b.superblock

if b.function == f:
break
b = prev_b
print("pc scan", hex(b.start), hex(b.end))
seen = set()
for pc in range(b.start, b.end, 2):
b = gdb.block_for_pc(pc)
r = (b.start, b.end)
if r in seen:
continue
seen.add(r)
print(" ", hex(pc), hex(b.start), hex(b.end), b.function)
for sym in b:
if not sym.needs_frame:
continue
# if sym.type.sizeof <= 4:
# continue
v = sym.value(frame)
print(" ", sym.addr_class, v.address, sym.type.sizeof, sym, sym.type, v)
frame = frame_up
print("total size:", total_size)


StackSize()
28 changes: 28 additions & 0 deletions tools/stack-loc-to-pc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Prints the pcs that access each stack location in a function. Useful for finding
infrequently used stack space.
Pipe in disassembly like so:
arm-none-eabi-objdump --disassemble=mp_execute_bytecode build-metro_m0_express/firmware.elf | python ../../tools/stack-loc-to-pc.py
"""

import sys
import re

offset = re.compile(r"sp, #(\d+)")

offsets = {}
for line in sys.stdin:
if "sp" in line:
m = offset.search(line)
o = int(m.groups()[0])
pc = line.split(":")[0]
if o not in offsets:
offsets[o] = []
offsets[o].append(pc.strip())

print("Offset", "Size", "PCs", sep="\t")
last_o = 0
for o in sorted(offsets):
print(o, o - last_o, offsets[o], sep="\t")
last_o = o

0 comments on commit dd71ae1

Please sign in to comment.