From cae02f1cdfdf0c4b3f67d83a4abc12b2f5f36a94 Mon Sep 17 00:00:00 2001 From: Jeff Epler Date: Tue, 20 Jun 2023 11:01:46 -0500 Subject: [PATCH] Speed LTO builds by using multiple threads On my i5-1235U laptop this speeds LTO "partition=balanced" builds substantially, because each "partition" can be run on a separate CPU thread. I used "pygamer" as my test build with a parallelism of `-j4`, and took the best elapsed time reported over 4 builds. The improvement was from 34.6s to 24.0s (-30%). A link-only build (rm build-pygamer/firmware.elf; make -j...) improved from1 17.4s to 5.1s (-70%) The size of the resulting firmware is unchanged. Boards that are nearly full use "-flto-partition=one" to improve code size optimization. When LTO partition is "one", this feature doesn't help but it doesn't seem to negatively affect anything either (tested building trinket_m0) --- py/circuitpy_defns.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py/circuitpy_defns.mk b/py/circuitpy_defns.mk index ae4b1bf3dbff..4bfd55b0c9e3 100644 --- a/py/circuitpy_defns.mk +++ b/py/circuitpy_defns.mk @@ -70,7 +70,7 @@ endif CIRCUITPY_LTO ?= 0 CIRCUITPY_LTO_PARTITION ?= balanced ifeq ($(CIRCUITPY_LTO),1) -CFLAGS += -flto -flto-partition=$(CIRCUITPY_LTO_PARTITION) -DCIRCUITPY_LTO=1 +CFLAGS += -flto=jobserver -flto-partition=$(CIRCUITPY_LTO_PARTITION) -DCIRCUITPY_LTO=1 else CFLAGS += -DCIRCUITPY_LTO=0 endif