From c8a1352c3e265399992329b4c5c33acbdaf81ec0 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 23 Oct 2022 19:58:26 +0200 Subject: [PATCH 01/10] Allow some intrinsics in Tier0 --- src/coreclr/jit/compiler.h | 25 ++++++++++++++++++++++++- src/coreclr/jit/importer.cpp | 2 +- src/coreclr/jit/importercalls.cpp | 29 ++++++++++++++++++++++++++++- 3 files changed, 53 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 05dfb0bfe30111..fc357c58c5f849 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9037,6 +9037,12 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX COUNT_OPT_CODE }; + enum OptimizationsKind + { + OPT_Lightweight, + OPT_All + }; + struct Options { JitFlags* jitFlags; // all flags passed from the EE @@ -9106,8 +9112,25 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX { return MinOpts() || compDbgCode; } - bool OptimizationEnabled() const + + //------------------------------------------------------------------------ + // OptimizationEnabled: Are optimizations of the specified kind allowed? + // + // Arguments: + // kind - there are two kinds of optimizations: + // OPT_All: all kinds of optimizations + // OPT_Lightweight: only those which won't regress JIT throughput + // + // Return Value: + // true if JIT is alowed to perform optimizations of the given kind. + // + bool OptimizationEnabled(OptimizationsKind kind = OPT_All) const { + if (kind == OPT_Lightweight) + { + // Quick optimizations are allowed for all cases except debug code mode + return !compDbgCode; + } return !OptimizationDisabled(); } diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index e6041c12c5aae3..9f092ef6d27eb1 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -3791,7 +3791,7 @@ void Compiler::impImportAndPushBox(CORINFO_RESOLVED_TOKEN* pResolvedToken) // and the other you get // *(temp+4) = expr - if (opts.OptimizationDisabled()) + if (!opts.OptimizationEnabled(OPT_Lightweight)) { // For minopts/debug code, try and minimize the total number // of box temps by reusing an existing temp when possible. diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 839ffcfd7423fe..f39981e73f5fba 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -2476,6 +2476,33 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, break; } + // Allow some lighweight intrinsics in Tier0 which can improve throughput + // we introduced betterToExpand here because we're fine if intrinsic decides to not expand itself + // in this case unlike mustExpand. + bool betterToExpand = mustExpand; + if (!mustExpand && opts.OptimizationEnabled(OPT_Lightweight)) + { + switch (ni) + { + // HasFlag allocates in Tier0 and produces a fairly large IR tree + case NI_System_Enum_HasFlag: + + // This one is just `return true/false` + case NI_System_Runtime_CompilerServices_RuntimeHelpers_IsKnownConstant: + + // This is faster to expand than emitting get_Length call + case NI_System_String_get_Length: + + betterToExpand = true; + break; + + default: + // Unsafe.* are all small enough to prefer expansions. + betterToExpand = ni >= NI_SRCS_UNSAFE_START && ni <= NI_SRCS_UNSAFE_END; + break; + } + } + GenTree* retNode = nullptr; // Under debug and minopts, only expand what is required. @@ -2483,7 +2510,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, // If that call is an intrinsic and is expanded, codegen for NextCallReturnAddress will fail. // To avoid that we conservatively expand only required intrinsics in methods that call // the NextCallReturnAddress intrinsic. - if (!mustExpand && (opts.OptimizationDisabled() || info.compHasNextCallRetAddr)) + if (!betterToExpand && !mustExpand && (opts.OptimizationDisabled() || info.compHasNextCallRetAddr)) { *pIntrinsicName = NI_Illegal; return retNode; From 78aac501e0f1c80ff1260e4eb587e6fa09e19655 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 23 Oct 2022 20:39:03 +0200 Subject: [PATCH 02/10] Update compiler.h --- src/coreclr/jit/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index fc357c58c5f849..057b787fd2121a 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9129,7 +9129,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX if (kind == OPT_Lightweight) { // Quick optimizations are allowed for all cases except debug code mode - return !compDbgCode; + return !compDbgCode && !jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT); } return !OptimizationDisabled(); } From e6c4669e69af96fcc9bde5df7ecfcc23ab1640f7 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 24 Oct 2022 02:44:05 +0200 Subject: [PATCH 03/10] fix size regressions --- src/coreclr/jit/importer.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 9f092ef6d27eb1..6d475e8608305f 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -3791,10 +3791,20 @@ void Compiler::impImportAndPushBox(CORINFO_RESOLVED_TOKEN* pResolvedToken) // and the other you get // *(temp+4) = expr - if (!opts.OptimizationEnabled(OPT_Lightweight)) + // For minopts/debug code, try and minimize the total number + // of box temps by reusing an existing temp when possible. + bool useSharedBoxTemp = opts.OptimizationDisabled(); + + // However, when we're allowed to perform quick opts we want to still have exact classes for boxed enums + // in case if we hit Enum.HasFlag. + if (useSharedBoxTemp && opts.OptimizationEnabled(OPT_Lightweight) && + (info.compCompHnd->getTypeForPrimitiveNumericClass(pResolvedToken->hClass) == CORINFO_TYPE_UNDEF)) + { + useSharedBoxTemp = false; + } + + if (useSharedBoxTemp) { - // For minopts/debug code, try and minimize the total number - // of box temps by reusing an existing temp when possible. if (impBoxTempInUse || impBoxTemp == BAD_VAR_NUM) { impBoxTemp = lvaGrabTemp(true DEBUGARG("Reusable Box Helper")); From dec24a5419b3b5ca47d8f8132f722fed7b738d4e Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 24 Oct 2022 16:56:16 +0200 Subject: [PATCH 04/10] limit to 128 locals --- src/coreclr/jit/fgbasic.cpp | 3 +++ src/coreclr/jit/importer.cpp | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 20c880ddc66646..830ce8107e471c 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -82,6 +82,9 @@ void Compiler::fgInit() fgGlobalMorph = false; fgModified = false; + impBoxTempInUse = false; + impBoxTemp = BAD_VAR_NUM; + #ifdef DEBUG fgSafeBasicBlockCreation = true; #endif // DEBUG diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 6d475e8608305f..ebb91f5046af30 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -3797,7 +3797,8 @@ void Compiler::impImportAndPushBox(CORINFO_RESOLVED_TOKEN* pResolvedToken) // However, when we're allowed to perform quick opts we want to still have exact classes for boxed enums // in case if we hit Enum.HasFlag. - if (useSharedBoxTemp && opts.OptimizationEnabled(OPT_Lightweight) && + const bool tooManyLocals = lvaCount > 128; + if (useSharedBoxTemp && opts.OptimizationEnabled(OPT_Lightweight) && !tooManyLocals && (info.compCompHnd->getTypeForPrimitiveNumericClass(pResolvedToken->hClass) == CORINFO_TYPE_UNDEF)) { useSharedBoxTemp = false; From 1eb5e104ccf3997ab445bc32d0f0c68d8a57898d Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 10 Feb 2023 21:57:25 +0100 Subject: [PATCH 05/10] Add span intrinsics --- src/coreclr/jit/compiler.h | 21 --------------------- src/coreclr/jit/importercalls.cpp | 12 +++++++++--- 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 06459841f17caa..53ee6a1bddec72 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9284,27 +9284,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX return MinOpts() || compDbgCode; } - //------------------------------------------------------------------------ - // OptimizationEnabled: Are optimizations of the specified kind allowed? - // - // Arguments: - // kind - there are two kinds of optimizations: - // OPT_All: all kinds of optimizations - // OPT_Lightweight: only those which won't regress JIT throughput - // - // Return Value: - // true if JIT is alowed to perform optimizations of the given kind. - // - bool OptimizationEnabled(OptimizationsKind kind = OPT_All) const - { - if (kind == OPT_Lightweight) - { - // Quick optimizations are allowed for all cases except debug code mode - return !compDbgCode && !jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT); - } - return !OptimizationDisabled(); - } - void SetMinOpts(bool val) { assert(!compMinOptsIsUsed); diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index bdfd9397e50c32..c605ec4e7dd785 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -2601,7 +2601,9 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, // we introduced betterToExpand here because we're fine if intrinsic decides to not expand itself // in this case unlike mustExpand. bool betterToExpand = mustExpand; - if (!mustExpand && opts.OptimizationEnabled(OPT_Lightweight)) + // NOTE: MinOpts() currently is always true for Tier0 so we check explicit flags instead + // to be fixed in https://github.com/dotnet/runtime/pull/77465 + if (!mustExpand && !opts.compDbgCode && opts.jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT)) { switch (ni) { @@ -2611,9 +2613,13 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, // This one is just `return true/false` case NI_System_Runtime_CompilerServices_RuntimeHelpers_IsKnownConstant: - // This is faster to expand than emitting get_Length call + // Simple cases + case NI_System_String_get_Chars: case NI_System_String_get_Length: - + case NI_System_Span_get_Item: + case NI_System_Span_get_Length: + case NI_System_ReadOnlySpan_get_Item: + case NI_System_ReadOnlySpan_get_Length: betterToExpand = true; break; From 759e39f0ecdce728c9d8ecf4168f43a4e04cb84e Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 10 Feb 2023 22:00:40 +0100 Subject: [PATCH 06/10] Clean up --- src/coreclr/jit/compiler.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 53ee6a1bddec72..1f517b52326f39 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9208,12 +9208,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX COUNT_OPT_CODE }; - enum OptimizationsKind - { - OPT_Lightweight, - OPT_All - }; - struct Options { JitFlags* jitFlags; // all flags passed from the EE @@ -9283,6 +9277,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX { return MinOpts() || compDbgCode; } + bool OptimizationEnabled() const + { + return !OptimizationDisabled(); + } void SetMinOpts(bool val) { From 3b5c1c3ec571f66991ac5e0bec4155241a064d3d Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 10 Feb 2023 22:10:48 +0100 Subject: [PATCH 07/10] clean up --- src/coreclr/jit/importer.cpp | 5 ++++- src/coreclr/jit/importercalls.cpp | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index a40d924de65834..8b38175000b6b4 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -3545,7 +3545,10 @@ void Compiler::impImportAndPushBox(CORINFO_RESOLVED_TOKEN* pResolvedToken) // However, when we're allowed to perform quick opts we want to still have exact classes for boxed enums // in case if we hit Enum.HasFlag. const bool tooManyLocals = lvaCount > 128; - if (useSharedBoxTemp && opts.OptimizationEnabled(OPT_Lightweight) && !tooManyLocals && + // NOTE: MinOpts() is always true for Tier0 so we have to check explicit flags instead. + // To be fixed in https://github.com/dotnet/runtime/pull/77465 + if (useSharedBoxTemp && !opts.compDbgCode && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT) && + !tooManyLocals && (info.compCompHnd->getTypeForPrimitiveNumericClass(pResolvedToken->hClass) == CORINFO_TYPE_UNDEF)) { useSharedBoxTemp = false; diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index c605ec4e7dd785..8011672a983045 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -2601,9 +2601,9 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, // we introduced betterToExpand here because we're fine if intrinsic decides to not expand itself // in this case unlike mustExpand. bool betterToExpand = mustExpand; - // NOTE: MinOpts() currently is always true for Tier0 so we check explicit flags instead - // to be fixed in https://github.com/dotnet/runtime/pull/77465 - if (!mustExpand && !opts.compDbgCode && opts.jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT)) + // NOTE: MinOpts() is always true for Tier0 so we have to check explicit flags instead. + // To be fixed in https://github.com/dotnet/runtime/pull/77465 + if (!mustExpand && !opts.compDbgCode && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT)) { switch (ni) { From 10d3cfe33f6d03977ca5927e39e035e4812cf0c8 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 12 Feb 2023 12:17:22 +0100 Subject: [PATCH 08/10] Clean up, handle typeof() == typeof() --- src/coreclr/jit/fgbasic.cpp | 3 --- src/coreclr/jit/importer.cpp | 20 +++----------------- src/coreclr/jit/importercalls.cpp | 14 ++++++++++---- 3 files changed, 13 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index e3b8df510c40f1..6718fcaad3dc64 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -81,9 +81,6 @@ void Compiler::fgInit() fgGlobalMorph = false; fgModified = false; - impBoxTempInUse = false; - impBoxTemp = BAD_VAR_NUM; - #ifdef DEBUG fgSafeBasicBlockCreation = true; #endif // DEBUG diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 8b38175000b6b4..5f671605558497 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -3538,24 +3538,10 @@ void Compiler::impImportAndPushBox(CORINFO_RESOLVED_TOKEN* pResolvedToken) // and the other you get // *(temp+4) = expr - // For minopts/debug code, try and minimize the total number - // of box temps by reusing an existing temp when possible. - bool useSharedBoxTemp = opts.OptimizationDisabled(); - - // However, when we're allowed to perform quick opts we want to still have exact classes for boxed enums - // in case if we hit Enum.HasFlag. - const bool tooManyLocals = lvaCount > 128; - // NOTE: MinOpts() is always true for Tier0 so we have to check explicit flags instead. - // To be fixed in https://github.com/dotnet/runtime/pull/77465 - if (useSharedBoxTemp && !opts.compDbgCode && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT) && - !tooManyLocals && - (info.compCompHnd->getTypeForPrimitiveNumericClass(pResolvedToken->hClass) == CORINFO_TYPE_UNDEF)) - { - useSharedBoxTemp = false; - } - - if (useSharedBoxTemp) + if (opts.OptimizationDisabled()) { + // For minopts/debug code, try and minimize the total number + // of box temps by reusing an existing temp when possible. if (impBoxTempInUse || impBoxTemp == BAD_VAR_NUM) { impBoxTemp = lvaGrabTemp(true DEBUGARG("Reusable Box Helper")); diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 8011672a983045..a3e247b222c00b 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -2601,18 +2601,24 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, // we introduced betterToExpand here because we're fine if intrinsic decides to not expand itself // in this case unlike mustExpand. bool betterToExpand = mustExpand; + // NOTE: MinOpts() is always true for Tier0 so we have to check explicit flags instead. // To be fixed in https://github.com/dotnet/runtime/pull/77465 - if (!mustExpand && !opts.compDbgCode && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT)) + const bool tier0opts = !opts.compDbgCode && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT); + + if (!mustExpand && tier0opts) { switch (ni) { - // HasFlag allocates in Tier0 and produces a fairly large IR tree - case NI_System_Enum_HasFlag: - // This one is just `return true/false` case NI_System_Runtime_CompilerServices_RuntimeHelpers_IsKnownConstant: + // We need these to be able to fold "typeof(...) == typeof(...)" + case NI_System_RuntimeTypeHandle_GetValueInternal: + case NI_System_Type_GetTypeFromHandle: + case NI_System_Type_op_Equality: + case NI_System_Type_op_Inequality: + // Simple cases case NI_System_String_get_Chars: case NI_System_String_get_Length: From 7f17843a636b2c6e768f7b7642bc6207e2a31530 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 12 Feb 2023 12:31:11 +0100 Subject: [PATCH 09/10] Update importercalls.cpp --- src/coreclr/jit/importercalls.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index a3e247b222c00b..d7f695bc8827f4 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -2600,7 +2600,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, // Allow some lighweight intrinsics in Tier0 which can improve throughput // we introduced betterToExpand here because we're fine if intrinsic decides to not expand itself // in this case unlike mustExpand. - bool betterToExpand = mustExpand; + bool betterToExpand = false; // NOTE: MinOpts() is always true for Tier0 so we have to check explicit flags instead. // To be fixed in https://github.com/dotnet/runtime/pull/77465 From 341bce71482d9165650d76c42f10387c72aef289 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 12 Feb 2023 15:33:45 +0100 Subject: [PATCH 10/10] Update src/coreclr/jit/importercalls.cpp Co-authored-by: Jakob Botsch Nielsen --- src/coreclr/jit/importercalls.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index d7f695bc8827f4..8b0e1e5610c8a8 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -2643,7 +2643,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, // If that call is an intrinsic and is expanded, codegen for NextCallReturnAddress will fail. // To avoid that we conservatively expand only required intrinsics in methods that call // the NextCallReturnAddress intrinsic. - if (!betterToExpand && !mustExpand && (opts.OptimizationDisabled() || info.compHasNextCallRetAddr)) + if (!mustExpand && ((opts.OptimizationDisabled() && !betterToExpand) || info.compHasNextCallRetAddr)) { *pIntrinsicName = NI_Illegal; return retNode;