Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

runtime dispatching (fixed) #1847

Merged
merged 1 commit into from
Feb 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include <ydb/library/yql/utils/simd/exec/runtime_dispatching/algo.h>
#include <immintrin.h>
#include <avxintrin.h>

template<>
THolder<Perfomancer::Interface> Perfomancer::Create<AVX2Trait>() {
Cerr << "AVXTrait ";
return MakeHolder<Algo<AVX2Trait>>();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
LIBRARY()

OWNER(g:yql)

CFLAGS(-mavx2)

SRCS(avx2_algo.cpp)

END()
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#include <ydb/library/yql/utils/simd/exec/runtime_dispatching/algo.h>

template<>
THolder<Perfomancer::Interface> Perfomancer::Create<FallbackTrait>() {
Cerr << "FallbackTrait ";
return MakeHolder<Algo<FallbackTrait>>();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
LIBRARY()

OWNER(g:yql)

SRCS(fallback_algo.cpp)

END()
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#include <ydb/library/yql/utils/simd/exec/runtime_dispatching/algo.h>
#include <immintrin.h>

template<>
THolder<Perfomancer::Interface> Perfomancer::Create<SSE42Trait>() {
Cerr << "SSETrait ";
return MakeHolder<Algo<SSE42Trait>>();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
LIBRARY()

OWNER(g:yql)

CFLAGS(-msse4.2)

SRCS(sse42_algo.cpp)

END()
108 changes: 108 additions & 0 deletions ydb/library/yql/utils/simd/exec/runtime_dispatching/algo.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#include "util/system/cpu_id.h"
#include <ydb/library/yql/utils/simd/simd_avx2.h>
#include <ydb/library/yql/utils/simd/simd_fallback.h>
#include <ydb/library/yql/utils/simd/simd_sse42.h>
#include <vector>

using vl = std::vector<ui64>;
using vvl = std::vector<std::vector<ui64>>;

using AVX2Trait = NSimd::NAVX2::TSimd8<ui64>;

using SSE42Trait = NSimd::NSSE42::TSimd8<ui64>;

using FallbackTrait = NSimd::NFallback::FallbackTrait<ui64>;

struct Perfomancer {

Perfomancer() = default;

struct Interface {

virtual ~Interface() = default;

inline virtual void Add(vvl& columns, vl& result) {

// to avoid clang(-Wunused-parameter)
columns[0];
result[0];
}

};


template <typename Trait>
struct Algo : Interface {

Algo() {}

inline void Add(vvl& columns, vl& result) override {
std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();

std::vector<Trait> Registers(columns.size());

for (size_t j = 0; j < result.size(); j += Trait::SIZE / sizeof(ui64)) {

for (size_t i = 0; i < columns.size(); ++i) {
Registers[i] = Trait(&columns[i][j]);
}

for (size_t i = 1; i < columns.size(); ++i) {
Registers[i] += Registers[i - 1];
}

Registers.back().Store(&result[j]);
}

Cerr << std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - begin).count() << "ms\n";
}

~Algo() = default;
};

template <typename Trait>
inline THolder<Interface> Create() {
return MakeHolder<Interface>();
}

};

template<>
THolder<Perfomancer::Interface> Perfomancer::Create<AVX2Trait>();

template<>
THolder<Perfomancer::Interface> Perfomancer::Create<SSE42Trait>();

template<>
THolder<Perfomancer::Interface> Perfomancer::Create<FallbackTrait>();

template <typename TFactory>
auto ChooseTrait(TFactory& factory) {

if (NX86::HaveAVX2()) {
return factory.template Create<AVX2Trait>();

} else if (NX86::HaveSSE42()) {
return factory.template Create<SSE42Trait>();

}

return factory.template Create<FallbackTrait>();
}

//this part of code just to compare times of work
//we dont need this functions at all
template <typename TFactory>
auto ChooseAVX2Trait(TFactory& factory) {
return factory.template Create<AVX2Trait>();
}

template <typename TFactory>
auto ChooseSSE42Trait(TFactory& factory) {
return factory.template Create<SSE42Trait>();
}

template <typename TFactory>
auto ChooseFallbackTrait(TFactory& factory) {
return factory.template Create<FallbackTrait>();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#include <ydb/library/yql/utils/simd/exec/runtime_dispatching/algo.h>

int main() {

std::vector<std::vector<ui64>> columns(10, std::vector<ui64>(64e6, 1e12 + 7));
std::vector<ui64> result(64e6);

Perfomancer perfomancer;

Cerr << "Best Trait is: ";
auto worker = ChooseTrait(perfomancer);

worker->Add(columns, result);
result.assign(64e6, 0);

if (NX86::HaveSSE42()) {
auto SSE42worker = ChooseSSE42Trait(perfomancer);
SSE42worker->Add(columns, result);
result.assign(64e6, 0);
}

if (NX86::HaveAVX2()) {
auto AVX2worker = ChooseAVX2Trait(perfomancer);
AVX2worker->Add(columns, result);
result.assign(64e6, 0);
}

auto Fallbackworker = ChooseFallbackTrait(perfomancer);
Fallbackworker->Add(columns, result);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
OWNER(g:yql)

PROGRAM()

SRCS(main.cpp)

PEERDIR(ydb/library/yql/utils/simd/exec/runtime_dispatching)

END()
18 changes: 18 additions & 0 deletions ydb/library/yql/utils/simd/exec/runtime_dispatching/ya.make
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
OWNER(g:yql)

LIBRARY()

PEERDIR(
ydb/library/yql/utils/simd/exec/runtime_dispatching/AVX2_algo
ydb/library/yql/utils/simd/exec/runtime_dispatching/SSE42_algo
ydb/library/yql/utils/simd/exec/runtime_dispatching/Fallback_algo
)

END()

RECURSE(
AVX2_algo
SSE42_algo
Fallback_algo
main
)
11 changes: 11 additions & 0 deletions ydb/library/yql/utils/simd/simd_avx2.h
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,17 @@ struct TSimd8 {
}
};

template<>
inline TSimd8<ui64> TSimd8<ui64>::operator+(const TSimd8<ui64>& other) const {
return _mm256_add_epi64(Value, other.Value);
}

template<>
inline TSimd8<ui64>& TSimd8<ui64>::operator+=(const TSimd8<ui64>& other) {
*this = *this + other.Value;
return *this;
}

template<>
inline TSimd8<bool> TSimd8<bool>::Set(bool value) {
return _mm256_set1_epi8(ui8(-value));
Expand Down
38 changes: 38 additions & 0 deletions ydb/library/yql/utils/simd/simd_fallback.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,44 @@ struct TBase {
};
};

template <typename T>
struct FallbackTrait {

T Value;

static const int SIZE = sizeof(T);

inline FallbackTrait() : Value() {}

inline FallbackTrait(const FallbackTrait& other) : Value(other.Value) {}

inline FallbackTrait(const T* ptr) : Value(*ptr) {}

inline FallbackTrait& operator=(const FallbackTrait& other) {
if (&other == this) return *this;

Value = other.Value;
return *this;
}

inline FallbackTrait& operator+=(const FallbackTrait& other) {
Value += other.Value;
return *this;
}

inline FallbackTrait operator+(const FallbackTrait& other) {
FallbackTrait ans;

ans += other;

return ans;
}

inline void Store(T* ptr) {
*ptr = Value;
}
};

template<typename T, typename Mask=TSimd8<bool>>
struct TBase8: TBase<TSimd8<T>> {

Expand Down
13 changes: 12 additions & 1 deletion ydb/library/yql/utils/simd/simd_sse42.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ template<typename T>
struct TSimd8 {
__m128i Value;

static const int SIZE = 32;
static const int SIZE = 16;

inline TSimd8()
: Value{__m128i()} {
Expand Down Expand Up @@ -339,6 +339,17 @@ struct TSimd8 {
}
};

template<>
inline TSimd8<ui64> TSimd8<ui64>::operator+(const TSimd8<ui64>& other) const {
return _mm_add_epi64(Value, other.Value);
}

template<>
inline TSimd8<ui64>& TSimd8<ui64>::operator+=(const TSimd8<ui64>& other) {
*this = *this + other.Value;
return *this;
}

template<>
inline TSimd8<bool> TSimd8<bool>::Set(bool value) {
return _mm_set1_epi8(ui8(-value));
Expand Down
Loading