diff --git a/src/mono/mono/arch/arm64/arm64-codegen.h b/src/mono/mono/arch/arm64/arm64-codegen.h index 6c2ce0d5fee55f..adb16f148bf21b 100644 --- a/src/mono/mono/arch/arm64/arm64-codegen.h +++ b/src/mono/mono/arch/arm64/arm64-codegen.h @@ -486,18 +486,19 @@ MONO_RESTORE_WARNING #define arm_neon_ldrq_lit(p, rd, target) arm_emit ((p), 0b00011100000000000000000000000000 | (0b10 << 30) | (arm_get_disp19 ((p), (target)) << 5) | (rd)) #define arm_neon_ldrq_lit_fixup(p, target) *((guint32*)p) = (*((guint32*)p) & 0xff00001f) | (arm_get_disp19 ((p), (target)) << 5) +#define ARM_MAX_ARITH_IMM (0xfff) + /* Arithmetic (immediate) */ static G_GNUC_UNUSED inline guint32 arm_encode_arith_imm (int imm, guint32 *shift) { // FIXME: - g_assert ((imm >= 0) && (imm < 0xfff)); + g_assert ((imm >= 0) && (imm < ARM_MAX_ARITH_IMM)); *shift = 0; return (guint32)imm; } - // FIXME: -#define arm_is_arith_imm(imm) (((imm) >= 0) && ((imm) < 0xfff)) +#define arm_is_arith_imm(imm) (((imm) >= 0) && ((imm) < ARM_MAX_ARITH_IMM)) #define arm_format_alu_imm(p, sf, op, S, rd, rn, imm) do { \ guint32 _imm12, _shift; \ @@ -1054,7 +1055,12 @@ arm_encode_arith_imm (int imm, guint32 *shift) #define TYPE_F32 0 #define TYPE_F64 1 -/* NEON :: move SIMD register*/ +/* NEON :: paired loads/stores */ +#define arm_neon_ldp_stp(p, opc, l, rt1, rt2, rn, imm7) arm_emit ((p), 0b00101101000000000000000000000000 | (opc) << 30 | (l) << 22 | (imm7) << 15 | (rt2) << 10 | (rn) << 5 | (rt1)) +#define arm_neon_stp_16b(p, rt1, rt2, rn, imm) arm_neon_ldp_stp ((p), 0b10, 0b0, (rt1), (rt2), (rn), arm_encode_imm7 (imm, 16)) +#define arm_neon_ldp_16b(p, rt1, rt2, rn, imm) arm_neon_ldp_stp ((p), 0b10, 0b1, (rt1), (rt2), (rn), arm_encode_imm7 (imm, 16)) + +/* NEON :: move SIMD register */ #define arm_neon_mov(p, rd, rn) arm_neon_orr ((p), VREG_FULL, (rd), (rn), (rn)) #define arm_neon_mov_8b(p, rd, rn) arm_neon_orr ((p), VREG_LOW, (rd), (rn), (rn)) diff --git a/src/mono/mono/mini/tramp-arm64.c b/src/mono/mono/mini/tramp-arm64.c index af394802ecd98a..d26e16b2bbd322 100644 --- a/src/mono/mono/mini/tramp-arm64.c +++ b/src/mono/mono/mini/tramp-arm64.c @@ -139,7 +139,7 @@ mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInf /* Setup stack frame */ imm = frame_size; mono_add_unwind_op_def_cfa (unwind_ops, code, buf, ARMREG_SP, 0); - while (imm > 256) { + while (imm > 256) { // TODO: can this be changed to ARM_MAX_ARITH_IMM? arm_subx_imm (code, ARMREG_SP, ARMREG_SP, 256); imm -= 256; mono_add_unwind_op_def_cfa_offset (unwind_ops, code, buf, frame_size - imm); @@ -159,8 +159,15 @@ mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInf gregs_regset = ~((1 << ARMREG_FP) | (1 << ARMREG_SP)); code = mono_arm_emit_store_regarray (code, gregs_regset, ARMREG_FP, gregs_offset); /* Save fregs */ - for (i = 0; i < num_fregs; ++i) - arm_strfpq (code, i, ARMREG_FP, fregs_offset + (i * 16)); + for (i = 0; i < num_fregs; ++i) { + int offs = fregs_offset + (i * 16); + if (i+1 < num_fregs && arm_is_imm7_scaled (offs, 16)) { + arm_neon_stp_16b (code, i, i+1, ARMREG_FP, offs); + i++; + } else { + arm_strfpq (code, i, ARMREG_FP, offs); + } + } /* Save trampoline arg */ arm_strx (code, ARMREG_IP1, ARMREG_FP, arg_offset); @@ -173,9 +180,9 @@ mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInf /* Save caller sp */ arm_movx (code, ARMREG_IP1, ARMREG_FP); imm = frame_size; - while (imm > 256) { - arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, 256); - imm -= 256; + while (imm > ARM_MAX_ARITH_IMM) { + arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, ARM_MAX_ARITH_IMM); + imm -= ARM_MAX_ARITH_IMM; } arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, imm); arm_strx (code, ARMREG_IP1, ARMREG_IP0, MONO_STRUCT_OFFSET (MonoLMF, gregs) + (MONO_ARCH_LMF_REG_SP * 8)); @@ -264,8 +271,15 @@ mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInf /* Only have to load the argument regs (r0..r8) and the rgctx reg */ code = mono_arm_emit_load_regarray (code, 0x1ff | (1 << ARMREG_LR) | (1 << MONO_ARCH_RGCTX_REG), ARMREG_FP, gregs_offset); /* Restore fregs */ - for (i = 0; i < num_fregs; ++i) - arm_ldrfpq (code, i, ARMREG_FP, fregs_offset + (i * 16)); + for (i = 0; i < num_fregs; ++i) { + int offs = fregs_offset + (i * 16); + if (i+1 < num_fregs && arm_is_imm7_scaled (offs, 16)) { + arm_neon_ldp_16b (code, i, i+1, ARMREG_FP, offs); + i++; + } else { + arm_ldrfpq (code, i, ARMREG_FP, offs); + } + } /* Load the result */ arm_ldrx (code, ARMREG_IP1, ARMREG_FP, res_offset); @@ -567,9 +581,9 @@ mono_arch_create_sdb_trampoline (gboolean single_step, MonoTrampInfo **info, gbo /* Setup stack frame */ imm = frame_size; - while (imm > 256) { - arm_subx_imm (code, ARMREG_SP, ARMREG_SP, 256); - imm -= 256; + while (imm > ARM_MAX_ARITH_IMM) { + arm_subx_imm (code, ARMREG_SP, ARMREG_SP, ARM_MAX_ARITH_IMM); + imm -= ARM_MAX_ARITH_IMM; } arm_subx_imm (code, ARMREG_SP, ARMREG_SP, imm); arm_stpx (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, 0); @@ -585,9 +599,9 @@ mono_arch_create_sdb_trampoline (gboolean single_step, MonoTrampInfo **info, gbo /* Save caller sp */ arm_movx (code, ARMREG_IP1, ARMREG_FP); imm = frame_size; - while (imm > 256) { - arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, 256); - imm -= 256; + while (imm > ARM_MAX_ARITH_IMM) { + arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, ARM_MAX_ARITH_IMM); + imm -= ARM_MAX_ARITH_IMM; } arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, imm); arm_strx (code, ARMREG_IP1, ARMREG_FP, ctx_offset + G_STRUCT_OFFSET (MonoContext, regs) + (ARMREG_SP * 8));