diff --git a/py/circuitpy_mpconfig.h b/py/circuitpy_mpconfig.h index cc7886cb3765..e1909a064f54 100644 --- a/py/circuitpy_mpconfig.h +++ b/py/circuitpy_mpconfig.h @@ -59,8 +59,8 @@ extern void common_hal_mcu_enable_interrupts(void); // // default is 128; consider raising to reduce fragmentation. #define MICROPY_ALLOC_PARSE_CHUNK_INIT (16) -// default is 512. -#define MICROPY_ALLOC_PATH_MAX (256) +// default is 512. Longest path in .py bundle as of June 6th, 2023 is 73 characters. +#define MICROPY_ALLOC_PATH_MAX (96) #define MICROPY_CAN_OVERRIDE_BUILTINS (1) #define MICROPY_COMP_CONST (1) #define MICROPY_COMP_DOUBLE_TUPLE_ASSIGN (1) diff --git a/py/runtime.c b/py/runtime.c index 804b955e0745..ebe1590484b7 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -201,7 +201,7 @@ mp_obj_t MICROPY_WRAP_MP_LOAD_GLOBAL(mp_load_global)(qstr qst) { return elem->value; } -mp_obj_t mp_load_build_class(void) { +mp_obj_t __attribute__((noinline)) mp_load_build_class(void) { DEBUG_OP_printf("load_build_class\n"); #if MICROPY_CAN_OVERRIDE_BUILTINS if (MP_STATE_VM(mp_module_builtins_override_dict) != NULL) { @@ -858,7 +858,7 @@ mp_obj_t mp_call_method_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_ob } // unpacked items are stored in reverse order into the array pointed to by items -void mp_unpack_sequence(mp_obj_t seq_in, size_t num, mp_obj_t *items) { +void __attribute__((noinline,)) mp_unpack_sequence(mp_obj_t seq_in, size_t num, mp_obj_t *items) { size_t seq_len; if (mp_obj_is_type(seq_in, &mp_type_tuple) || mp_obj_is_type(seq_in, &mp_type_list)) { mp_obj_t *seq_items; @@ -905,7 +905,7 @@ void mp_unpack_sequence(mp_obj_t seq_in, size_t num, mp_obj_t *items) { } // unpacked items are stored in reverse order into the array pointed to by items -void mp_unpack_ex(mp_obj_t seq_in, size_t num_in, mp_obj_t *items) { +void __attribute__((noinline)) mp_unpack_ex(mp_obj_t seq_in, size_t num_in, mp_obj_t *items) { size_t num_left = num_in & 0xff; size_t num_right = (num_in >> 8) & 0xff; DEBUG_OP_printf("unpack ex " UINT_FMT " " UINT_FMT "\n", num_left, num_right); @@ -1482,7 +1482,7 @@ mp_obj_t mp_import_name(qstr name, mp_obj_t fromlist, mp_obj_t level) { return mp_builtin___import__(5, args); } -mp_obj_t mp_import_from(mp_obj_t module, qstr name) { +mp_obj_t __attribute__((noinline,)) mp_import_from(mp_obj_t module, qstr name) { DEBUG_printf("import from %p %s\n", module, qstr_str(name)); mp_obj_t dest[2]; @@ -1528,7 +1528,7 @@ mp_obj_t mp_import_from(mp_obj_t module, qstr name) { #endif } -void mp_import_all(mp_obj_t module) { +void __attribute__((noinline)) mp_import_all(mp_obj_t module) { DEBUG_printf("import all %p\n", module); // TODO: Support __all__ diff --git a/tools/gdb-stack-size.py b/tools/gdb-stack-size.py new file mode 100644 index 000000000000..4d3fc9fe08aa --- /dev/null +++ b/tools/gdb-stack-size.py @@ -0,0 +1,64 @@ +"""Source this file into gdb `source ../../tools/gdb-stack-size.py` then run + `stack-size` to print a backtrace with each frame size next to it.""" + + +class StackSize(gdb.Command): + def __init__(self): + super(StackSize, self).__init__("stack-size", gdb.COMMAND_USER) + + def invoke(self, arg, from_tty): + frame = gdb.newest_frame() + total_size = 0 + while frame: + sp = frame.read_register("sp") + frame_up = frame.older() + if not frame_up: + break + f = frame.function() + l = frame.level() + if l < 10: + l = "#" + str(l) + " " + else: + l = "#" + str(l) + size = frame_up.read_register("sp") - sp + total_size += size + print(l, sp, frame.type(), f, " " * (40 - len(str(f))), size) + # print(dir(f)) + # Tweak this if for more detail for a specific function. + if False and f.name == "mp_execute_bytecode": + b = frame.block() + prev_b = None + while not b.is_static: + print(" block", hex(b.start), hex(b.end), b.function) + for sym in b: + if not sym.needs_frame: + continue + v = sym.value(frame) + print(" ", sym.addr_class, v.address, sym.type.sizeof, sym, sym.type, v) + prev_b = b + b = b.superblock + + if b.function == f: + break + b = prev_b + print("pc scan", hex(b.start), hex(b.end)) + seen = set() + for pc in range(b.start, b.end, 2): + b = gdb.block_for_pc(pc) + r = (b.start, b.end) + if r in seen: + continue + seen.add(r) + print(" ", hex(pc), hex(b.start), hex(b.end), b.function) + for sym in b: + if not sym.needs_frame: + continue + # if sym.type.sizeof <= 4: + # continue + v = sym.value(frame) + print(" ", sym.addr_class, v.address, sym.type.sizeof, sym, sym.type, v) + frame = frame_up + print("total size:", total_size) + + +StackSize() diff --git a/tools/stack-loc-to-pc.py b/tools/stack-loc-to-pc.py new file mode 100644 index 000000000000..a1ce788f2b65 --- /dev/null +++ b/tools/stack-loc-to-pc.py @@ -0,0 +1,28 @@ +"""Prints the pcs that access each stack location in a function. Useful for finding + infrequently used stack space. + + Pipe in disassembly like so: + + arm-none-eabi-objdump --disassemble=mp_execute_bytecode build-metro_m0_express/firmware.elf | python ../../tools/stack-loc-to-pc.py + """ + +import sys +import re + +offset = re.compile(r"sp, #(\d+)") + +offsets = {} +for line in sys.stdin: + if "sp" in line: + m = offset.search(line) + o = int(m.groups()[0]) + pc = line.split(":")[0] + if o not in offsets: + offsets[o] = [] + offsets[o].append(pc.strip()) + +print("Offset", "Size", "PCs", sep="\t") +last_o = 0 +for o in sorted(offsets): + print(o, o - last_o, offsets[o], sep="\t") + last_o = o