-
Notifications
You must be signed in to change notification settings - Fork 3.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update engine hash id generator with model name/model content/metadata (
#13015) **Update engine hash id generator with model name/model content/metadata** **Description**: * Updated engine id generator, which use model name/model inputs & outputs/env metadata (instead of model path) to generate hash * New bridged API were introduced in order to enable id generator in the TRTEP utility **Motivation and Context** - Why is this change required? What problem does it solve? To fix this [issue](triton-inference-server/server#4587) caused by id generator using model path How to use: * Call [TRTGenerateMetaDefId(const GraphViewer& graph_viewer, HashValue& model_hash)](https://github.com/microsoft/onnxruntime/blob/0fcce74a565478b4c83fac5a3230e9786bb53ab3/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc#L715) to generate hash id for TRT engine cache How to test: * On WIndows, run: * .\onnxruntime_test_all.exe --gtest_filter=TensorrtExecutionProviderTest.TRTMetadefIdGeneratorUsingModelHashing * .\onnxruntime_test_all.exe --gtest_filter=TensorrtExecutionProviderTest.TRTSubgraphIdGeneratorUsingModelHashing **Appendix** * [Existing engine id generator that uses model path](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/core/framework/execution_provider.cc#L112-L182)
- Loading branch information
Showing
9 changed files
with
561 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,349 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT License. | ||
|
||
#include "murmurhash3.h" | ||
|
||
// Original source: https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp | ||
//----------------------------------------------------------------------------- | ||
// MurmurHash3 was written by Austin Appleby, and is placed in the public | ||
// domain. The author hereby disclaims copyright to this source code. | ||
|
||
// Note - The x86 and x64 versions do _not_ produce the same results, as the | ||
// algorithms are optimized for their respective platforms. You can still | ||
// compile and run any of them on any platform, but your performance with the | ||
// non-native version will be less than optimal. | ||
|
||
/* Modifications Copyright (c) Microsoft. */ | ||
|
||
#include "core/framework/endian.h" | ||
|
||
//----------------------------------------------------------------------------- | ||
// Platform-specific functions and macros | ||
|
||
// Microsoft Visual Studio | ||
|
||
#if defined(_MSC_VER) | ||
|
||
#define FORCE_INLINE __forceinline | ||
|
||
#include <stdlib.h> | ||
|
||
#define ROTL32(x, y) _rotl(x, y) | ||
#define ROTL64(x, y) _rotl64(x, y) | ||
|
||
#define BIG_CONSTANT(x) (x) | ||
|
||
// Other compilers | ||
|
||
#else // defined(_MSC_VER) | ||
|
||
#define FORCE_INLINE inline __attribute__((always_inline)) | ||
|
||
inline uint32_t rotl32(uint32_t x, int8_t r) { | ||
return (x << r) | (x >> (32 - r)); | ||
} | ||
|
||
inline uint64_t rotl64(uint64_t x, int8_t r) { | ||
return (x << r) | (x >> (64 - r)); | ||
} | ||
|
||
#define ROTL32(x, y) rotl32(x, y) | ||
#define ROTL64(x, y) rotl64(x, y) | ||
|
||
#define BIG_CONSTANT(x) (x##LLU) | ||
|
||
#endif // !defined(_MSC_VER) | ||
#include <cstddef> | ||
//----------------------------------------------------------------------------- | ||
// Block read - on little-endian machines this is a single load, | ||
// while on big-endian or unknown machines the byte accesses should | ||
// still get optimized into the most efficient instruction. | ||
// | ||
// Changes to support big-endian from https://github.com/explosion/murmurhash/pull/27/ | ||
// were manually applied to original murmurhash3 source code. | ||
FORCE_INLINE uint32_t getblock32(const uint32_t* p, int i) { | ||
if constexpr (onnxruntime::endian::native == onnxruntime::endian::little) { | ||
return p[i]; | ||
} else { | ||
const uint8_t* c = (const uint8_t*)&p[i]; | ||
return (uint32_t)c[0] | | ||
(uint32_t)c[1] << 8 | | ||
(uint32_t)c[2] << 16 | | ||
(uint32_t)c[3] << 24; | ||
} | ||
} | ||
|
||
FORCE_INLINE uint64_t getblock64(const uint64_t* p, int i) { | ||
if constexpr (onnxruntime::endian::native == onnxruntime::endian::little) { | ||
return p[i]; | ||
} else { | ||
const uint8_t* c = (const uint8_t*)&p[i]; | ||
return (uint64_t)c[0] | | ||
(uint64_t)c[1] << 8 | | ||
(uint64_t)c[2] << 16 | | ||
(uint64_t)c[3] << 24 | | ||
(uint64_t)c[4] << 32 | | ||
(uint64_t)c[5] << 40 | | ||
(uint64_t)c[6] << 48 | | ||
(uint64_t)c[7] << 56; | ||
} | ||
} | ||
|
||
//----------------------------------------------------------------------------- | ||
// Finalization mix - force all bits of a hash block to avalanche | ||
|
||
FORCE_INLINE constexpr uint32_t fmix32(uint32_t h) { | ||
h ^= h >> 16; | ||
h *= 0x85ebca6b; | ||
h ^= h >> 13; | ||
h *= 0xc2b2ae35; | ||
h ^= h >> 16; | ||
|
||
return h; | ||
} | ||
|
||
//---------- | ||
|
||
FORCE_INLINE constexpr uint64_t fmix64(uint64_t k) { | ||
k ^= k >> 33; | ||
k *= BIG_CONSTANT(0xff51afd7ed558ccd); | ||
k ^= k >> 33; | ||
k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); | ||
k ^= k >> 33; | ||
|
||
return k; | ||
} | ||
|
||
//----------------------------------------------------------------------------- | ||
|
||
namespace onnxruntime { | ||
void MurmurHash3::x86_32(const void* key, int len, | ||
uint32_t seed, void* out) { | ||
const uint8_t* data = (const uint8_t*)key; | ||
const int nblocks = len / 4; | ||
|
||
uint32_t h1 = seed; | ||
|
||
constexpr uint32_t c1 = 0xcc9e2d51; | ||
constexpr uint32_t c2 = 0x1b873593; | ||
|
||
//---------- | ||
// body | ||
|
||
const uint32_t* blocks = (const uint32_t*)(data + static_cast<ptrdiff_t>(nblocks) * 4); | ||
|
||
for (int i = -nblocks; i; i++) { | ||
uint32_t k1 = getblock32(blocks, i); | ||
|
||
k1 *= c1; | ||
k1 = ROTL32(k1, 15); | ||
k1 *= c2; | ||
|
||
h1 ^= k1; | ||
h1 = ROTL32(h1, 13); | ||
h1 = h1 * 5 + 0xe6546b64; | ||
} | ||
|
||
//---------- | ||
// tail | ||
|
||
const uint8_t* tail = (const uint8_t*)(data + static_cast<ptrdiff_t>(nblocks) * 4); | ||
|
||
uint32_t k1 = 0; | ||
|
||
switch (len & 3) { | ||
case 3: | ||
k1 ^= tail[2] << 16; | ||
[[fallthrough]]; | ||
case 2: | ||
k1 ^= tail[1] << 8; | ||
[[fallthrough]]; | ||
case 1: | ||
k1 ^= tail[0]; | ||
k1 *= c1; | ||
k1 = ROTL32(k1, 15); | ||
k1 *= c2; | ||
h1 ^= k1; | ||
}; | ||
|
||
//---------- | ||
// finalization | ||
|
||
h1 ^= len; | ||
|
||
h1 = fmix32(h1); | ||
|
||
*(uint32_t*)out = h1; | ||
} | ||
|
||
//----------------------------------------------------------------------------- | ||
|
||
void MurmurHash3::x86_128(const void* key, int len, uint32_t seed, void* out) { | ||
const uint8_t* data = (const uint8_t*)key; | ||
const int nblocks = len / 16; | ||
|
||
uint32_t h1 = seed; | ||
uint32_t h2 = seed; | ||
uint32_t h3 = seed; | ||
uint32_t h4 = seed; | ||
|
||
constexpr uint32_t c1 = 0x239b961b; | ||
constexpr uint32_t c2 = 0xab0e9789; | ||
constexpr uint32_t c3 = 0x38b34ae5; | ||
constexpr uint32_t c4 = 0xa1e38b93; | ||
|
||
//---------- | ||
// body | ||
|
||
const uint32_t* blocks = (const uint32_t*)(data + static_cast<ptrdiff_t>(nblocks) * 16); | ||
|
||
for (int i = -nblocks; i; i++) { | ||
uint32_t k1 = getblock32(blocks, i * 4 + 0); | ||
uint32_t k2 = getblock32(blocks, i * 4 + 1); | ||
uint32_t k3 = getblock32(blocks, i * 4 + 2); | ||
uint32_t k4 = getblock32(blocks, i * 4 + 3); | ||
|
||
k1 *= c1; | ||
k1 = ROTL32(k1, 15); | ||
k1 *= c2; | ||
h1 ^= k1; | ||
|
||
h1 = ROTL32(h1, 19); | ||
h1 += h2; | ||
h1 = h1 * 5 + 0x561ccd1b; | ||
|
||
k2 *= c2; | ||
k2 = ROTL32(k2, 16); | ||
k2 *= c3; | ||
h2 ^= k2; | ||
|
||
h2 = ROTL32(h2, 17); | ||
h2 += h3; | ||
h2 = h2 * 5 + 0x0bcaa747; | ||
|
||
k3 *= c3; | ||
k3 = ROTL32(k3, 17); | ||
k3 *= c4; | ||
h3 ^= k3; | ||
|
||
h3 = ROTL32(h3, 15); | ||
h3 += h4; | ||
h3 = h3 * 5 + 0x96cd1c35; | ||
|
||
k4 *= c4; | ||
k4 = ROTL32(k4, 18); | ||
k4 *= c1; | ||
h4 ^= k4; | ||
|
||
h4 = ROTL32(h4, 13); | ||
h4 += h1; | ||
h4 = h4 * 5 + 0x32ac3b17; | ||
} | ||
|
||
//---------- | ||
// tail | ||
|
||
const uint8_t* tail = (const uint8_t*)(data + static_cast<ptrdiff_t>(nblocks) * 16); | ||
|
||
uint32_t k1 = 0; | ||
uint32_t k2 = 0; | ||
uint32_t k3 = 0; | ||
uint32_t k4 = 0; | ||
|
||
switch (len & 15) { | ||
case 15: | ||
k4 ^= tail[14] << 16; | ||
[[fallthrough]]; | ||
case 14: | ||
k4 ^= tail[13] << 8; | ||
[[fallthrough]]; | ||
case 13: | ||
k4 ^= tail[12] << 0; | ||
k4 *= c4; | ||
k4 = ROTL32(k4, 18); | ||
k4 *= c1; | ||
h4 ^= k4; | ||
[[fallthrough]]; | ||
case 12: | ||
k3 ^= tail[11] << 24; | ||
[[fallthrough]]; | ||
case 11: | ||
k3 ^= tail[10] << 16; | ||
[[fallthrough]]; | ||
case 10: | ||
k3 ^= tail[9] << 8; | ||
[[fallthrough]]; | ||
case 9: | ||
k3 ^= tail[8] << 0; | ||
k3 *= c3; | ||
k3 = ROTL32(k3, 17); | ||
k3 *= c4; | ||
h3 ^= k3; | ||
[[fallthrough]]; | ||
case 8: | ||
k2 ^= tail[7] << 24; | ||
[[fallthrough]]; | ||
case 7: | ||
k2 ^= tail[6] << 16; | ||
[[fallthrough]]; | ||
case 6: | ||
k2 ^= tail[5] << 8; | ||
[[fallthrough]]; | ||
case 5: | ||
k2 ^= tail[4] << 0; | ||
k2 *= c2; | ||
k2 = ROTL32(k2, 16); | ||
k2 *= c3; | ||
h2 ^= k2; | ||
[[fallthrough]]; | ||
case 4: | ||
k1 ^= tail[3] << 24; | ||
[[fallthrough]]; | ||
case 3: | ||
k1 ^= tail[2] << 16; | ||
[[fallthrough]]; | ||
case 2: | ||
k1 ^= tail[1] << 8; | ||
[[fallthrough]]; | ||
case 1: | ||
k1 ^= tail[0] << 0; | ||
k1 *= c1; | ||
k1 = ROTL32(k1, 15); | ||
k1 *= c2; | ||
h1 ^= k1; | ||
}; | ||
|
||
//---------- | ||
// finalization | ||
|
||
h1 ^= len; | ||
h2 ^= len; | ||
h3 ^= len; | ||
h4 ^= len; | ||
|
||
h1 += h2; | ||
h1 += h3; | ||
h1 += h4; | ||
h2 += h1; | ||
h3 += h1; | ||
h4 += h1; | ||
|
||
h1 = fmix32(h1); | ||
h2 = fmix32(h2); | ||
h3 = fmix32(h3); | ||
h4 = fmix32(h4); | ||
|
||
h1 += h2; | ||
h1 += h3; | ||
h1 += h4; | ||
h2 += h1; | ||
h3 += h1; | ||
h4 += h1; | ||
|
||
((uint32_t*)out)[0] = h1; | ||
((uint32_t*)out)[1] = h2; | ||
((uint32_t*)out)[2] = h3; | ||
((uint32_t*)out)[3] = h4; | ||
} | ||
|
||
} // namespace onnxruntime |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT License. | ||
|
||
#pragma once | ||
|
||
#include <cstdint> | ||
|
||
namespace onnxruntime { | ||
struct MurmurHash3 { | ||
// generate 32-bit hash from input and write to 'out' | ||
static void x86_32(const void* key, int len, uint32_t seed, void* out); | ||
|
||
// generate 128-bit hash from input and write to 'out'. | ||
static void x86_128(const void* key, int len, uint32_t seed, void* out); | ||
}; | ||
} // namespace onnxruntime |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.