From 848668699c15e2c3c1395f7142fc62cd499eb05e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 20 Jun 2024 15:43:46 +0200 Subject: [PATCH 1/4] JIT: Use a hash table for HW intrinsic name -> ID lookups This uses a lazily allocated global-memory hash table to perform these lookups. I noticed that on benchmarks.run_pgo, this function was disproportionately hot compared to how infrequently it is called; even being called for only every 20th compilation on average, it accounted for around 0.7% of JIT time. When jitting a method using a single HW intrinsic the function accounted for 5.55% of JIT time. With this change, that falls to 0.08%. --- src/coreclr/jit/compiler.cpp | 2 + src/coreclr/jit/hwintrinsic.cpp | 146 ++++++++++++++++++++++++++------ src/coreclr/jit/hwintrinsic.h | 2 + 3 files changed, 125 insertions(+), 25 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 9b035278d66d53..03597abcdf07d4 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -1393,6 +1393,8 @@ void Compiler::compShutdown() DisplayNowayAssertMap(); #endif // MEASURE_NOWAY + HWIntrinsicInfo::onJitShutdown(); + /* Shut down the emitter */ emitter::emitDone(); diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 3c6f394e395d18..fb4d3015d1bf72 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -469,6 +469,117 @@ CorInfoType Compiler::getBaseJitTypeFromArgIfNeeded(NamedIntrinsic intrins return (diffInsCount >= 2); } +struct IntrinsicKey +{ + CORINFO_InstructionSet Isa; + int NumArgs; + const char* Name; + + IntrinsicKey(CORINFO_InstructionSet isa, int numArgs, const char* name) + : Isa(isa) + , NumArgs(numArgs) + , Name(name) + { + } +}; + +struct IntrinsicKeyFuncs +{ + static bool Equals(const IntrinsicKey& lhs, const IntrinsicKey& rhs) + { + return (lhs.Isa == rhs.Isa) && (lhs.NumArgs == rhs.NumArgs) && (strcmp(lhs.Name, rhs.Name) == 0); + } + + static int GetHashCode(const IntrinsicKey& key) + { + uint32_t hash = static_cast(key.Isa); + hash ^= key.NumArgs + 0x9e3779b9 + (hash << 19) + (hash >> 13); + hash ^= HashStringA(key.Name) + 0x9e3779b9 + (hash << 19) + (hash >> 13); + return static_cast(hash); + } +}; + +struct MallocAllocator +{ + template + T* allocate(size_t count) + { + void* p = malloc(count * sizeof(T)); + if (p == nullptr) + { + NOMEM(); + } + + return static_cast(p); + } + + void deallocate(void* p) + { + free(p); + } +}; + +typedef JitHashTable IntrinsicsHashTable; + +static IntrinsicsHashTable* volatile s_intrinsicsHashTable; + +//------------------------------------------------------------------------ +// getIntrinsicsHashTable: Get the hash table for name -> ID lookups. +// +// Return Value: +// Hash table. +// +static IntrinsicsHashTable* getIntrinsicsHashTable() +{ + if (s_intrinsicsHashTable != nullptr) + { + return s_intrinsicsHashTable; + } + + MallocAllocator mallocator; + char* mem = mallocator.allocate(sizeof(IntrinsicsHashTable)); + if (mem == nullptr) + { + NOMEM(); + } + + IntrinsicsHashTable* hashTable = new (mem, jitstd::placement_t()) IntrinsicsHashTable(mallocator); + + hashTable->Reallocate(NI_HW_INTRINSIC_END - NI_HW_INTRINSIC_START - 1); + + for (int i = 0; i < (NI_HW_INTRINSIC_END - NI_HW_INTRINSIC_START - 1); i++) + { + const HWIntrinsicInfo& intrinsicInfo = hwIntrinsicInfoArray[i]; + hashTable->Set(IntrinsicKey(static_cast(intrinsicInfo.isa), intrinsicInfo.numArgs, + intrinsicInfo.name), + intrinsicInfo.id); + } + + IntrinsicsHashTable* observed = InterlockedCompareExchangeT(&s_intrinsicsHashTable, hashTable, nullptr); + if (observed != nullptr) + { + hashTable->~IntrinsicsHashTable(); + mallocator.deallocate(hashTable); + return observed; + } + + return hashTable; +} + +//------------------------------------------------------------------------ +// onJitShutdown: Free the intrinsics hash table on JIT shutdown. +// +void HWIntrinsicInfo::onJitShutdown() +{ + IntrinsicsHashTable* hashTable = s_intrinsicsHashTable; + if (hashTable != nullptr) + { + hashTable->~IntrinsicsHashTable(); + free(hashTable); + s_intrinsicsHashTable = nullptr; + } +} + //------------------------------------------------------------------------ // lookupId: Gets the NamedIntrinsic for a given method name and InstructionSet // @@ -621,35 +732,20 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, } #endif - for (int i = 0; i < (NI_HW_INTRINSIC_END - NI_HW_INTRINSIC_START - 1); i++) + IntrinsicsHashTable* hashTable = getIntrinsicsHashTable(); + NamedIntrinsic ni; + if (hashTable->Lookup(IntrinsicKey(isa, sig->numArgs, methodName), &ni) || + hashTable->Lookup(IntrinsicKey(isa, -1, methodName), &ni)) { - const HWIntrinsicInfo& intrinsicInfo = hwIntrinsicInfoArray[i]; - - if (isa != hwIntrinsicInfoArray[i].isa) - { - continue; - } - - int numArgs = static_cast(intrinsicInfo.numArgs); - - if ((numArgs != -1) && (sig->numArgs != static_cast(intrinsicInfo.numArgs))) +#if defined(TARGET_XARCH) + // on AVX1-only CPUs we only support a subset of intrinsics in Vector256 + if (isLimitedVector256Isa && !AvxOnlyCompatible(ni)) { - continue; + return NI_Illegal; } - - if (strcmp(methodName, intrinsicInfo.name) == 0) - { - NamedIntrinsic ni = intrinsicInfo.id; - -#if defined(TARGET_XARCH) - // on AVX1-only CPUs we only support a subset of intrinsics in Vector256 - if (isLimitedVector256Isa && !AvxOnlyCompatible(ni)) - { - return NI_Illegal; - } #endif - return ni; - } + + return ni; } // There are several helper intrinsics that are implemented in managed code diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index d9d04086723442..c77cb2167275e8 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -508,6 +508,8 @@ struct HWIntrinsicInfo int8_t numArgs; // 1-byte HWIntrinsicCategory category; // 1-byte + static void onJitShutdown(); + static const HWIntrinsicInfo& lookup(NamedIntrinsic id); static NamedIntrinsic lookupId(Compiler* comp, From 0b2c3377acdf645533173fa66321121bed8a54b4 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 20 Jun 2024 15:52:59 +0200 Subject: [PATCH 2/4] Remove unnecessary allocation success check --- src/coreclr/jit/hwintrinsic.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index fb4d3015d1bf72..b27d01aec4b783 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -538,10 +538,6 @@ static IntrinsicsHashTable* getIntrinsicsHashTable() MallocAllocator mallocator; char* mem = mallocator.allocate(sizeof(IntrinsicsHashTable)); - if (mem == nullptr) - { - NOMEM(); - } IntrinsicsHashTable* hashTable = new (mem, jitstd::placement_t()) IntrinsicsHashTable(mallocator); From 2039178292b43bdb2d90ded2f0e14ce4d606de1f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 20 Jun 2024 16:11:27 +0200 Subject: [PATCH 3/4] Add an include --- src/coreclr/jit/compiler.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 03597abcdf07d4..e6b17ed3fa4c6d 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -21,6 +21,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "lower.h" #include "stacklevelsetter.h" #include "patchpointinfo.h" +#include "hwintrinsic.h" #include "jitstd/algorithm.h" extern ICorJitHost* g_jitHost; From 9106976a2fa2ef937f6fe2742738d5e2aa07010c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 20 Jun 2024 17:17:05 +0200 Subject: [PATCH 4/4] Just leak it --- src/coreclr/jit/compiler.cpp | 3 --- src/coreclr/jit/hwintrinsic.cpp | 14 -------------- src/coreclr/jit/hwintrinsic.h | 2 -- 3 files changed, 19 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index e6b17ed3fa4c6d..9b035278d66d53 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -21,7 +21,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "lower.h" #include "stacklevelsetter.h" #include "patchpointinfo.h" -#include "hwintrinsic.h" #include "jitstd/algorithm.h" extern ICorJitHost* g_jitHost; @@ -1394,8 +1393,6 @@ void Compiler::compShutdown() DisplayNowayAssertMap(); #endif // MEASURE_NOWAY - HWIntrinsicInfo::onJitShutdown(); - /* Shut down the emitter */ emitter::emitDone(); diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index b27d01aec4b783..4f2c6d46802895 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -562,20 +562,6 @@ static IntrinsicsHashTable* getIntrinsicsHashTable() return hashTable; } -//------------------------------------------------------------------------ -// onJitShutdown: Free the intrinsics hash table on JIT shutdown. -// -void HWIntrinsicInfo::onJitShutdown() -{ - IntrinsicsHashTable* hashTable = s_intrinsicsHashTable; - if (hashTable != nullptr) - { - hashTable->~IntrinsicsHashTable(); - free(hashTable); - s_intrinsicsHashTable = nullptr; - } -} - //------------------------------------------------------------------------ // lookupId: Gets the NamedIntrinsic for a given method name and InstructionSet // diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index c77cb2167275e8..d9d04086723442 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -508,8 +508,6 @@ struct HWIntrinsicInfo int8_t numArgs; // 1-byte HWIntrinsicCategory category; // 1-byte - static void onJitShutdown(); - static const HWIntrinsicInfo& lookup(NamedIntrinsic id); static NamedIntrinsic lookupId(Compiler* comp,