Skip to content

Commit

Permalink
[mono] Optimizing arm64 trampolines (dotnet#93547)
Browse files Browse the repository at this point in the history
* Tweaking trampolines.

* Typo.
  • Loading branch information
jandupej authored Oct 17, 2023
1 parent e68a4cd commit c5e7081
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 18 deletions.
14 changes: 10 additions & 4 deletions src/mono/mono/arch/arm64/arm64-codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -486,18 +486,19 @@ MONO_RESTORE_WARNING
#define arm_neon_ldrq_lit(p, rd, target) arm_emit ((p), 0b00011100000000000000000000000000 | (0b10 << 30) | (arm_get_disp19 ((p), (target)) << 5) | (rd))
#define arm_neon_ldrq_lit_fixup(p, target) *((guint32*)p) = (*((guint32*)p) & 0xff00001f) | (arm_get_disp19 ((p), (target)) << 5)

#define ARM_MAX_ARITH_IMM (0xfff)

/* Arithmetic (immediate) */
static G_GNUC_UNUSED inline guint32
arm_encode_arith_imm (int imm, guint32 *shift)
{
// FIXME:
g_assert ((imm >= 0) && (imm < 0xfff));
g_assert ((imm >= 0) && (imm < ARM_MAX_ARITH_IMM));
*shift = 0;
return (guint32)imm;
}

// FIXME:
#define arm_is_arith_imm(imm) (((imm) >= 0) && ((imm) < 0xfff))
#define arm_is_arith_imm(imm) (((imm) >= 0) && ((imm) < ARM_MAX_ARITH_IMM))

#define arm_format_alu_imm(p, sf, op, S, rd, rn, imm) do { \
guint32 _imm12, _shift; \
Expand Down Expand Up @@ -1054,7 +1055,12 @@ arm_encode_arith_imm (int imm, guint32 *shift)
#define TYPE_F32 0
#define TYPE_F64 1

/* NEON :: move SIMD register*/
/* NEON :: paired loads/stores */
#define arm_neon_ldp_stp(p, opc, l, rt1, rt2, rn, imm7) arm_emit ((p), 0b00101101000000000000000000000000 | (opc) << 30 | (l) << 22 | (imm7) << 15 | (rt2) << 10 | (rn) << 5 | (rt1))
#define arm_neon_stp_16b(p, rt1, rt2, rn, imm) arm_neon_ldp_stp ((p), 0b10, 0b0, (rt1), (rt2), (rn), arm_encode_imm7 (imm, 16))
#define arm_neon_ldp_16b(p, rt1, rt2, rn, imm) arm_neon_ldp_stp ((p), 0b10, 0b1, (rt1), (rt2), (rn), arm_encode_imm7 (imm, 16))

/* NEON :: move SIMD register */
#define arm_neon_mov(p, rd, rn) arm_neon_orr ((p), VREG_FULL, (rd), (rn), (rn))
#define arm_neon_mov_8b(p, rd, rn) arm_neon_orr ((p), VREG_LOW, (rd), (rn), (rn))

Expand Down
42 changes: 28 additions & 14 deletions src/mono/mono/mini/tramp-arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInf
/* Setup stack frame */
imm = frame_size;
mono_add_unwind_op_def_cfa (unwind_ops, code, buf, ARMREG_SP, 0);
while (imm > 256) {
while (imm > 256) { // TODO: can this be changed to ARM_MAX_ARITH_IMM?
arm_subx_imm (code, ARMREG_SP, ARMREG_SP, 256);
imm -= 256;
mono_add_unwind_op_def_cfa_offset (unwind_ops, code, buf, frame_size - imm);
Expand All @@ -159,8 +159,15 @@ mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInf
gregs_regset = ~((1 << ARMREG_FP) | (1 << ARMREG_SP));
code = mono_arm_emit_store_regarray (code, gregs_regset, ARMREG_FP, gregs_offset);
/* Save fregs */
for (i = 0; i < num_fregs; ++i)
arm_strfpq (code, i, ARMREG_FP, fregs_offset + (i * 16));
for (i = 0; i < num_fregs; ++i) {
int offs = fregs_offset + (i * 16);
if (i+1 < num_fregs && arm_is_imm7_scaled (offs, 16)) {
arm_neon_stp_16b (code, i, i+1, ARMREG_FP, offs);
i++;
} else {
arm_strfpq (code, i, ARMREG_FP, offs);
}
}
/* Save trampoline arg */
arm_strx (code, ARMREG_IP1, ARMREG_FP, arg_offset);

Expand All @@ -173,9 +180,9 @@ mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInf
/* Save caller sp */
arm_movx (code, ARMREG_IP1, ARMREG_FP);
imm = frame_size;
while (imm > 256) {
arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, 256);
imm -= 256;
while (imm > ARM_MAX_ARITH_IMM) {
arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, ARM_MAX_ARITH_IMM);
imm -= ARM_MAX_ARITH_IMM;
}
arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, imm);
arm_strx (code, ARMREG_IP1, ARMREG_IP0, MONO_STRUCT_OFFSET (MonoLMF, gregs) + (MONO_ARCH_LMF_REG_SP * 8));
Expand Down Expand Up @@ -264,8 +271,15 @@ mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInf
/* Only have to load the argument regs (r0..r8) and the rgctx reg */
code = mono_arm_emit_load_regarray (code, 0x1ff | (1 << ARMREG_LR) | (1 << MONO_ARCH_RGCTX_REG), ARMREG_FP, gregs_offset);
/* Restore fregs */
for (i = 0; i < num_fregs; ++i)
arm_ldrfpq (code, i, ARMREG_FP, fregs_offset + (i * 16));
for (i = 0; i < num_fregs; ++i) {
int offs = fregs_offset + (i * 16);
if (i+1 < num_fregs && arm_is_imm7_scaled (offs, 16)) {
arm_neon_ldp_16b (code, i, i+1, ARMREG_FP, offs);
i++;
} else {
arm_ldrfpq (code, i, ARMREG_FP, offs);
}
}

/* Load the result */
arm_ldrx (code, ARMREG_IP1, ARMREG_FP, res_offset);
Expand Down Expand Up @@ -567,9 +581,9 @@ mono_arch_create_sdb_trampoline (gboolean single_step, MonoTrampInfo **info, gbo

/* Setup stack frame */
imm = frame_size;
while (imm > 256) {
arm_subx_imm (code, ARMREG_SP, ARMREG_SP, 256);
imm -= 256;
while (imm > ARM_MAX_ARITH_IMM) {
arm_subx_imm (code, ARMREG_SP, ARMREG_SP, ARM_MAX_ARITH_IMM);
imm -= ARM_MAX_ARITH_IMM;
}
arm_subx_imm (code, ARMREG_SP, ARMREG_SP, imm);
arm_stpx (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, 0);
Expand All @@ -585,9 +599,9 @@ mono_arch_create_sdb_trampoline (gboolean single_step, MonoTrampInfo **info, gbo
/* Save caller sp */
arm_movx (code, ARMREG_IP1, ARMREG_FP);
imm = frame_size;
while (imm > 256) {
arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, 256);
imm -= 256;
while (imm > ARM_MAX_ARITH_IMM) {
arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, ARM_MAX_ARITH_IMM);
imm -= ARM_MAX_ARITH_IMM;
}
arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, imm);
arm_strx (code, ARMREG_IP1, ARMREG_FP, ctx_offset + G_STRUCT_OFFSET (MonoContext, regs) + (ARMREG_SP * 8));
Expand Down

0 comments on commit c5e7081

Please sign in to comment.