Skip to content

Commit

Permalink
cmd/internal/obj/x86: use push/pop instead of mov to store/load FP
Browse files Browse the repository at this point in the history
This CL changes how the x86 compiler stores and loads the frame pointer
on each function prologue and epilogue, with the goal to reduce the
final binary size without affecting performance.

The compiler is currently using MOV instructions to load and store BP,
which can take from 5 to 8 bytes each.

This CL changes this approach so it emits PUSH/POP instructions instead,
which always take only 1 byte each (when operating with BP). It can also
avoid using the SUBQ/ADDQ to grow the stack for functions that have
frame pointer but does not have local variables.

On Windows, this CL reduces the go toolchain size from 15,697,920 bytes
to 15,584,768 bytes, a reduction of 0.7%.

Example of epilog and prologue for a function with 0x10 bytes of
local variables:

Before

===
 SUBQ    $0x18, SP
 MOVQ    BP, 0x10(SP)
 LEAQ    0x10(SP), BP

 ... function body ...

 MOVQ    0x10(SP), BP
 ADDQ    $0x18, SP
 RET
===

After

===
  PUSHQ   BP
  LEAQ    0(SP), BP
  SUBQ    $0x10, SP

  ... function body ...

  MOVQ    ADDQ $0x10, SP
  POPQ    BP
  RET
===

Updates golang#6853

Change-Id: Ice9e14bbf8dff083c5f69feb97e9a764c3ca7785
Reviewed-on: https://go-review.googlesource.com/c/go/+/462300
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Quim Muntal <quimmuntal@gmail.com>
  • Loading branch information
qmuntal authored and johanbrandhorst committed Feb 12, 2023
1 parent 1715282 commit 1b4b504
Showing 1 changed file with 33 additions and 37 deletions.
70 changes: 33 additions & 37 deletions src/cmd/internal/obj/x86/obj6.go
Original file line number Diff line number Diff line change
Expand Up @@ -684,37 +684,13 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
p, regg = loadG(ctxt, cursym, p, newprog)
}

// Delve debugger would like the next instruction to be noted as the end of the function prologue.
// TODO: are there other cases (e.g., wrapper functions) that need marking?
markedPrologue := false

if autoffset != 0 {
if autoffset%int32(ctxt.Arch.RegSize) != 0 {
ctxt.Diag("unaligned stack size %d", autoffset)
}
p = obj.Appendp(p, newprog)
p.As = AADJSP
p.From.Type = obj.TYPE_CONST
p.From.Offset = int64(autoffset)
p.Spadj = autoffset
p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
markedPrologue = true
}

if bpsize > 0 {
// Save caller's BP
p = obj.Appendp(p, newprog)

p.As = AMOVQ
p.As = APUSHQ
p.From.Type = obj.TYPE_REG
p.From.Reg = REG_BP
p.To.Type = obj.TYPE_MEM
p.To.Reg = REG_SP
p.To.Scale = 1
p.To.Offset = int64(autoffset) - int64(bpsize)
if !markedPrologue {
p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
}

// Move current frame to BP
p = obj.Appendp(p, newprog)
Expand All @@ -723,11 +699,32 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
p.From.Type = obj.TYPE_MEM
p.From.Reg = REG_SP
p.From.Scale = 1
p.From.Offset = int64(autoffset) - int64(bpsize)
p.From.Offset = 0
p.To.Type = obj.TYPE_REG
p.To.Reg = REG_BP
}

if autoffset%int32(ctxt.Arch.RegSize) != 0 {
ctxt.Diag("unaligned stack size %d", autoffset)
}

// localoffset is autoffset discounting the frame pointer,
// which has already been allocated in the stack.
localoffset := autoffset - int32(bpsize)
if localoffset != 0 {
p = obj.Appendp(p, newprog)
p.As = AADJSP
p.From.Type = obj.TYPE_CONST
p.From.Offset = int64(localoffset)
p.Spadj = localoffset
}

// Delve debugger would like the next instruction to be noted as the end of the function prologue.
// TODO: are there other cases (e.g., wrapper functions) that need marking?
if autoffset != 0 {
p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
}

if cursym.Func().Text.From.Sym.Wrapper() {
// if g._panic != nil && g._panic.argp == FP {
// g._panic.argp = bottom-of-frame
Expand Down Expand Up @@ -933,24 +930,23 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
if autoffset != 0 {
to := p.To // Keep To attached to RET for retjmp below
p.To = obj.Addr{}
if localoffset != 0 {
p.As = AADJSP
p.From.Type = obj.TYPE_CONST
p.From.Offset = int64(-localoffset)
p.Spadj = -localoffset
p = obj.Appendp(p, newprog)
}

if bpsize > 0 {
// Restore caller's BP
p.As = AMOVQ

p.From.Type = obj.TYPE_MEM
p.From.Reg = REG_SP
p.From.Scale = 1
p.From.Offset = int64(autoffset) - int64(bpsize)
p.As = APOPQ
p.To.Type = obj.TYPE_REG
p.To.Reg = REG_BP
p.Spadj = -int32(bpsize)
p = obj.Appendp(p, newprog)
}

p.As = AADJSP
p.From.Type = obj.TYPE_CONST
p.From.Offset = int64(-autoffset)
p.Spadj = -autoffset
p = obj.Appendp(p, newprog)
p.As = obj.ARET
p.To = to

Expand Down

0 comments on commit 1b4b504

Please sign in to comment.