From 1fd320f01878a0986cae7e4a130fcf6c4e66e184 Mon Sep 17 00:00:00 2001 From: Semen Tarlinskii Date: Mon, 12 Feb 2024 16:46:49 +0300 Subject: [PATCH] runtime dispatching (fixed) --- .../AVX2_algo/avx2_algo.cpp | 9 ++ .../runtime_dispatching/AVX2_algo/ya.make | 9 ++ .../Fallback_algo/fallback_algo.cpp | 7 ++ .../runtime_dispatching/Fallback_algo/ya.make | 7 ++ .../SSE42_algo/sse42_algo.cpp | 8 ++ .../runtime_dispatching/SSE42_algo/ya.make | 9 ++ .../simd/exec/runtime_dispatching/algo.h | 108 ++++++++++++++++++ .../exec/runtime_dispatching/main/main.cpp | 30 +++++ .../exec/runtime_dispatching/main/ya.make | 9 ++ .../simd/exec/runtime_dispatching/ya.make | 18 +++ ydb/library/yql/utils/simd/simd_avx2.h | 11 ++ ydb/library/yql/utils/simd/simd_fallback.h | 38 ++++++ ydb/library/yql/utils/simd/simd_sse42.h | 13 ++- 13 files changed, 275 insertions(+), 1 deletion(-) create mode 100644 ydb/library/yql/utils/simd/exec/runtime_dispatching/AVX2_algo/avx2_algo.cpp create mode 100644 ydb/library/yql/utils/simd/exec/runtime_dispatching/AVX2_algo/ya.make create mode 100644 ydb/library/yql/utils/simd/exec/runtime_dispatching/Fallback_algo/fallback_algo.cpp create mode 100644 ydb/library/yql/utils/simd/exec/runtime_dispatching/Fallback_algo/ya.make create mode 100644 ydb/library/yql/utils/simd/exec/runtime_dispatching/SSE42_algo/sse42_algo.cpp create mode 100644 ydb/library/yql/utils/simd/exec/runtime_dispatching/SSE42_algo/ya.make create mode 100644 ydb/library/yql/utils/simd/exec/runtime_dispatching/algo.h create mode 100644 ydb/library/yql/utils/simd/exec/runtime_dispatching/main/main.cpp create mode 100644 ydb/library/yql/utils/simd/exec/runtime_dispatching/main/ya.make create mode 100644 ydb/library/yql/utils/simd/exec/runtime_dispatching/ya.make diff --git a/ydb/library/yql/utils/simd/exec/runtime_dispatching/AVX2_algo/avx2_algo.cpp b/ydb/library/yql/utils/simd/exec/runtime_dispatching/AVX2_algo/avx2_algo.cpp new file mode 100644 index 000000000000..2791eedff8a4 --- /dev/null +++ b/ydb/library/yql/utils/simd/exec/runtime_dispatching/AVX2_algo/avx2_algo.cpp @@ -0,0 +1,9 @@ +#include +#include +#include + +template<> +THolder Perfomancer::Create() { + Cerr << "AVXTrait "; + return MakeHolder>(); +} \ No newline at end of file diff --git a/ydb/library/yql/utils/simd/exec/runtime_dispatching/AVX2_algo/ya.make b/ydb/library/yql/utils/simd/exec/runtime_dispatching/AVX2_algo/ya.make new file mode 100644 index 000000000000..d435fa73030a --- /dev/null +++ b/ydb/library/yql/utils/simd/exec/runtime_dispatching/AVX2_algo/ya.make @@ -0,0 +1,9 @@ +LIBRARY() + +OWNER(g:yql) + +CFLAGS(-mavx2) + +SRCS(avx2_algo.cpp) + +END() \ No newline at end of file diff --git a/ydb/library/yql/utils/simd/exec/runtime_dispatching/Fallback_algo/fallback_algo.cpp b/ydb/library/yql/utils/simd/exec/runtime_dispatching/Fallback_algo/fallback_algo.cpp new file mode 100644 index 000000000000..734ee4dced69 --- /dev/null +++ b/ydb/library/yql/utils/simd/exec/runtime_dispatching/Fallback_algo/fallback_algo.cpp @@ -0,0 +1,7 @@ +#include + +template<> +THolder Perfomancer::Create() { + Cerr << "FallbackTrait "; + return MakeHolder>(); +} \ No newline at end of file diff --git a/ydb/library/yql/utils/simd/exec/runtime_dispatching/Fallback_algo/ya.make b/ydb/library/yql/utils/simd/exec/runtime_dispatching/Fallback_algo/ya.make new file mode 100644 index 000000000000..42e95e1c6581 --- /dev/null +++ b/ydb/library/yql/utils/simd/exec/runtime_dispatching/Fallback_algo/ya.make @@ -0,0 +1,7 @@ +LIBRARY() + +OWNER(g:yql) + +SRCS(fallback_algo.cpp) + +END() \ No newline at end of file diff --git a/ydb/library/yql/utils/simd/exec/runtime_dispatching/SSE42_algo/sse42_algo.cpp b/ydb/library/yql/utils/simd/exec/runtime_dispatching/SSE42_algo/sse42_algo.cpp new file mode 100644 index 000000000000..a3eb83a56e0f --- /dev/null +++ b/ydb/library/yql/utils/simd/exec/runtime_dispatching/SSE42_algo/sse42_algo.cpp @@ -0,0 +1,8 @@ +#include +#include + +template<> +THolder Perfomancer::Create() { + Cerr << "SSETrait "; + return MakeHolder>(); +} \ No newline at end of file diff --git a/ydb/library/yql/utils/simd/exec/runtime_dispatching/SSE42_algo/ya.make b/ydb/library/yql/utils/simd/exec/runtime_dispatching/SSE42_algo/ya.make new file mode 100644 index 000000000000..e08bf55a60aa --- /dev/null +++ b/ydb/library/yql/utils/simd/exec/runtime_dispatching/SSE42_algo/ya.make @@ -0,0 +1,9 @@ +LIBRARY() + +OWNER(g:yql) + +CFLAGS(-msse4.2) + +SRCS(sse42_algo.cpp) + +END() \ No newline at end of file diff --git a/ydb/library/yql/utils/simd/exec/runtime_dispatching/algo.h b/ydb/library/yql/utils/simd/exec/runtime_dispatching/algo.h new file mode 100644 index 000000000000..81079d82db75 --- /dev/null +++ b/ydb/library/yql/utils/simd/exec/runtime_dispatching/algo.h @@ -0,0 +1,108 @@ +#include "util/system/cpu_id.h" +#include +#include +#include +#include + +using vl = std::vector; +using vvl = std::vector>; + +using AVX2Trait = NSimd::NAVX2::TSimd8; + +using SSE42Trait = NSimd::NSSE42::TSimd8; + +using FallbackTrait = NSimd::NFallback::FallbackTrait; + +struct Perfomancer { + + Perfomancer() = default; + + struct Interface { + + virtual ~Interface() = default; + + inline virtual void Add(vvl& columns, vl& result) { + + // to avoid clang(-Wunused-parameter) + columns[0]; + result[0]; + } + + }; + + + template + struct Algo : Interface { + + Algo() {} + + inline void Add(vvl& columns, vl& result) override { + std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); + + std::vector Registers(columns.size()); + + for (size_t j = 0; j < result.size(); j += Trait::SIZE / sizeof(ui64)) { + + for (size_t i = 0; i < columns.size(); ++i) { + Registers[i] = Trait(&columns[i][j]); + } + + for (size_t i = 1; i < columns.size(); ++i) { + Registers[i] += Registers[i - 1]; + } + + Registers.back().Store(&result[j]); + } + + Cerr << std::chrono::duration_cast(std::chrono::steady_clock::now() - begin).count() << "ms\n"; + } + + ~Algo() = default; + }; + + template + inline THolder Create() { + return MakeHolder(); + } + +}; + +template<> +THolder Perfomancer::Create(); + +template<> +THolder Perfomancer::Create(); + +template<> +THolder Perfomancer::Create(); + +template +auto ChooseTrait(TFactory& factory) { + + if (NX86::HaveAVX2()) { + return factory.template Create(); + + } else if (NX86::HaveSSE42()) { + return factory.template Create(); + + } + + return factory.template Create(); +} + +//this part of code just to compare times of work +//we dont need this functions at all +template +auto ChooseAVX2Trait(TFactory& factory) { + return factory.template Create(); +} + +template +auto ChooseSSE42Trait(TFactory& factory) { + return factory.template Create(); +} + +template +auto ChooseFallbackTrait(TFactory& factory) { + return factory.template Create(); +} \ No newline at end of file diff --git a/ydb/library/yql/utils/simd/exec/runtime_dispatching/main/main.cpp b/ydb/library/yql/utils/simd/exec/runtime_dispatching/main/main.cpp new file mode 100644 index 000000000000..820f19bb1482 --- /dev/null +++ b/ydb/library/yql/utils/simd/exec/runtime_dispatching/main/main.cpp @@ -0,0 +1,30 @@ +#include + +int main() { + + std::vector> columns(10, std::vector(64e6, 1e12 + 7)); + std::vector result(64e6); + + Perfomancer perfomancer; + + Cerr << "Best Trait is: "; + auto worker = ChooseTrait(perfomancer); + + worker->Add(columns, result); + result.assign(64e6, 0); + + if (NX86::HaveSSE42()) { + auto SSE42worker = ChooseSSE42Trait(perfomancer); + SSE42worker->Add(columns, result); + result.assign(64e6, 0); + } + + if (NX86::HaveAVX2()) { + auto AVX2worker = ChooseAVX2Trait(perfomancer); + AVX2worker->Add(columns, result); + result.assign(64e6, 0); + } + + auto Fallbackworker = ChooseFallbackTrait(perfomancer); + Fallbackworker->Add(columns, result); +} \ No newline at end of file diff --git a/ydb/library/yql/utils/simd/exec/runtime_dispatching/main/ya.make b/ydb/library/yql/utils/simd/exec/runtime_dispatching/main/ya.make new file mode 100644 index 000000000000..cdf26a14805d --- /dev/null +++ b/ydb/library/yql/utils/simd/exec/runtime_dispatching/main/ya.make @@ -0,0 +1,9 @@ +OWNER(g:yql) + +PROGRAM() + +SRCS(main.cpp) + +PEERDIR(ydb/library/yql/utils/simd/exec/runtime_dispatching) + +END() \ No newline at end of file diff --git a/ydb/library/yql/utils/simd/exec/runtime_dispatching/ya.make b/ydb/library/yql/utils/simd/exec/runtime_dispatching/ya.make new file mode 100644 index 000000000000..d4ee9c1ce9d6 --- /dev/null +++ b/ydb/library/yql/utils/simd/exec/runtime_dispatching/ya.make @@ -0,0 +1,18 @@ +OWNER(g:yql) + +LIBRARY() + +PEERDIR( + ydb/library/yql/utils/simd/exec/runtime_dispatching/AVX2_algo + ydb/library/yql/utils/simd/exec/runtime_dispatching/SSE42_algo + ydb/library/yql/utils/simd/exec/runtime_dispatching/Fallback_algo +) + +END() + +RECURSE( + AVX2_algo + SSE42_algo + Fallback_algo + main +) \ No newline at end of file diff --git a/ydb/library/yql/utils/simd/simd_avx2.h b/ydb/library/yql/utils/simd/simd_avx2.h index 27cbeb4a50ca..53ea09d46e3a 100644 --- a/ydb/library/yql/utils/simd/simd_avx2.h +++ b/ydb/library/yql/utils/simd/simd_avx2.h @@ -426,6 +426,17 @@ struct TSimd8 { } }; +template<> +inline TSimd8 TSimd8::operator+(const TSimd8& other) const { + return _mm256_add_epi64(Value, other.Value); +} + +template<> +inline TSimd8& TSimd8::operator+=(const TSimd8& other) { + *this = *this + other.Value; + return *this; +} + template<> inline TSimd8 TSimd8::Set(bool value) { return _mm256_set1_epi8(ui8(-value)); diff --git a/ydb/library/yql/utils/simd/simd_fallback.h b/ydb/library/yql/utils/simd/simd_fallback.h index b225fa939d30..afffe0e2ba1c 100644 --- a/ydb/library/yql/utils/simd/simd_fallback.h +++ b/ydb/library/yql/utils/simd/simd_fallback.h @@ -63,6 +63,44 @@ struct TBase { }; }; +template +struct FallbackTrait { + + T Value; + + static const int SIZE = sizeof(T); + + inline FallbackTrait() : Value() {} + + inline FallbackTrait(const FallbackTrait& other) : Value(other.Value) {} + + inline FallbackTrait(const T* ptr) : Value(*ptr) {} + + inline FallbackTrait& operator=(const FallbackTrait& other) { + if (&other == this) return *this; + + Value = other.Value; + return *this; + } + + inline FallbackTrait& operator+=(const FallbackTrait& other) { + Value += other.Value; + return *this; + } + + inline FallbackTrait operator+(const FallbackTrait& other) { + FallbackTrait ans; + + ans += other; + + return ans; + } + + inline void Store(T* ptr) { + *ptr = Value; + } +}; + template> struct TBase8: TBase> { diff --git a/ydb/library/yql/utils/simd/simd_sse42.h b/ydb/library/yql/utils/simd/simd_sse42.h index 854f0018caeb..ffe5801d05d1 100644 --- a/ydb/library/yql/utils/simd/simd_sse42.h +++ b/ydb/library/yql/utils/simd/simd_sse42.h @@ -15,7 +15,7 @@ template struct TSimd8 { __m128i Value; - static const int SIZE = 32; + static const int SIZE = 16; inline TSimd8() : Value{__m128i()} { @@ -339,6 +339,17 @@ struct TSimd8 { } }; +template<> +inline TSimd8 TSimd8::operator+(const TSimd8& other) const { + return _mm_add_epi64(Value, other.Value); +} + +template<> +inline TSimd8& TSimd8::operator+=(const TSimd8& other) { + *this = *this + other.Value; + return *this; +} + template<> inline TSimd8 TSimd8::Set(bool value) { return _mm_set1_epi8(ui8(-value));