Skip to content

Commit

Permalink
rocr: Skip uSleep for non-interrupt signals
Browse files Browse the repository at this point in the history
- When waiting on non-interrupt signals, do not uSleep. This causes
  regressions compared to interrupt signal usage.
- Cleanup code.

Change-Id: I706bda0b13e64ffec0b607c1915d8380a2ce0dea
  • Loading branch information
saleelk authored and dayatsin-amd committed Feb 7, 2025
1 parent 166b083 commit 890399a
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 148 deletions.
52 changes: 52 additions & 0 deletions runtime/hsa-runtime/core/inc/signal.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,17 @@

#include "core/util/utils.h"
#include "core/util/locks.h"
#include "core/util/timer.h"

#include "inc/amd_hsa_signal.h"

#if defined(__i386__) || defined(__x86_64__)
#include <mwaitxintrin.h>
#ifndef MWAITX_ECX_TIMER_ENABLE
#define MWAITX_ECX_TIMER_ENABLE 0x2 // BIT(1)
#endif
#endif

// Allow hsa_signal_t to be keys in STL structures.
namespace std {
template <> struct less<hsa_signal_t> {
Expand All @@ -76,6 +84,50 @@ template <> struct less<hsa_signal_t> {
}

namespace rocr {
namespace timer {
inline timer::fast_clock::duration GetFastTimeout(uint64_t timeout) {
uint64_t hsa_freq = 0;
HSA::hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &hsa_freq);
return timer::duration_from_seconds<timer::fast_clock::duration>(
double(timeout) / double(hsa_freq));
}

inline void CheckAbortTimeout(const timer::fast_clock::time_point& start_time,
uint32_t signal_abort_timeout) {
if (signal_abort_timeout) {
const timer::fast_clock::duration abort_timeout =
std::chrono::seconds(signal_abort_timeout);
if (timer::fast_clock::now() - start_time > abort_timeout) {
throw AMD::hsa_exception(HSA_STATUS_ERROR_FATAL,
"Signal wait abort timeout.\n");
}
}
}

inline void DoMwaitx(int64_t* addr, uint32_t timeout, bool timer_enable = false) {
#if defined(__i386__) || defined(__x86_64__)
_mm_monitorx(addr, 0, 0);
_mm_mwaitx(0, timeout, timer_enable ? MWAITX_ECX_TIMER_ENABLE : 0);
#endif
}
} // namespace timer

inline bool CheckSignalCondition(int64_t value, hsa_signal_condition_t condition,
hsa_signal_value_t compare_value) {
switch (condition) {
case HSA_SIGNAL_CONDITION_EQ:
return value == compare_value;
case HSA_SIGNAL_CONDITION_NE:
return value != compare_value;
case HSA_SIGNAL_CONDITION_GTE:
return value >= compare_value;
case HSA_SIGNAL_CONDITION_LT:
return value < compare_value;
default:
return false;
}
}

namespace core {
class Agent;
class Signal;
Expand Down
72 changes: 12 additions & 60 deletions runtime/hsa-runtime/core/runtime/default_signal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/default_signal.h"
#include "core/util/timer.h"

#if defined(__i386__) || defined(__x86_64__)
#include <mwaitxintrin.h>
Expand Down Expand Up @@ -83,78 +82,31 @@ hsa_signal_value_t BusyWaitSignal::WaitRelaxed(hsa_signal_condition_t condition,

waiting_++;
MAKE_SCOPE_GUARD([&]() { waiting_--; });
bool condition_met = false;
int64_t value;

const uint32_t &signal_abort_timeout =
core::Runtime::runtime_singleton_->flag().signal_abort_timeout();

timer::fast_clock::time_point start_time, time;
start_time = timer::fast_clock::now();

// Set a polling timeout value
// Should be a few times bigger than null kernel latency
const timer::fast_clock::duration kMaxElapsed = std::chrono::microseconds(200);

uint64_t hsa_freq = 0;
HSA::hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &hsa_freq);
const timer::fast_clock::duration fast_timeout =
timer::duration_from_seconds<timer::fast_clock::duration>(
double(timeout) / double(hsa_freq));

#if defined(__i386__) || defined(__x86_64__)
if (g_use_mwaitx) _mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0);
#endif
const timer::fast_clock::time_point start_time = timer::fast_clock::now();
const timer::fast_clock::duration fast_timeout = timer::GetFastTimeout(timeout);

while (true) {
if (!IsValid()) return 0;

value = atomic::Load(&signal_.value, std::memory_order_relaxed);

switch (condition) {
case HSA_SIGNAL_CONDITION_EQ: {
condition_met = (value == compare_value);
break;
}
case HSA_SIGNAL_CONDITION_NE: {
condition_met = (value != compare_value);
break;
}
case HSA_SIGNAL_CONDITION_GTE: {
condition_met = (value >= compare_value);
break;
}
case HSA_SIGNAL_CONDITION_LT: {
condition_met = (value < compare_value);
break;
}
default:
return 0;
}
if (condition_met) return hsa_signal_value_t(value);
int64_t value = atomic::Load(&signal_.value, std::memory_order_relaxed);

time = timer::fast_clock::now();
if (time - start_time > fast_timeout) {
value = atomic::Load(&signal_.value, std::memory_order_relaxed);
return hsa_signal_value_t(value);
if (CheckSignalCondition(value, condition, compare_value)) {
return value;
}

if (signal_abort_timeout) {
const timer::fast_clock::duration abort_timeout =
std::chrono::seconds(signal_abort_timeout);

if(time - start_time > abort_timeout)
throw AMD::hsa_exception(HSA_STATUS_ERROR_FATAL,
"Signal wait abort timeout.\n");
if (timer::fast_clock::now() - start_time > fast_timeout) {
return value;
}

if (time - start_time > kMaxElapsed) {
os::uSleep(20);
#if defined(__i386__) || defined(__x86_64__)
} else if (g_use_mwaitx) {
_mm_mwaitx(0, 60000, MWAITX_ECX_TIMER_ENABLE); // 60000 ~20us on a 1.5Ghz CPU
_mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0);
#endif
timer::CheckAbortTimeout(start_time, signal_abort_timeout);

if (g_use_mwaitx) {
// Use timer-enabled mwaitx for busy waiting
timer::DoMwaitx(const_cast<int64_t*>(&signal_.value), 60000, true);
}
}
}
Expand Down
114 changes: 27 additions & 87 deletions runtime/hsa-runtime/core/runtime/interrupt_signal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,8 @@

#include "core/inc/interrupt_signal.h"
#include "core/inc/runtime.h"
#include "core/util/timer.h"
#include "core/util/locks.h"

#if defined(__i386__) || defined(__x86_64__)
#include <mwaitxintrin.h>
#define MWAITX_ECX_TIMER_ENABLE 0x2 // BIT(1)
#endif

namespace rocr {
namespace core {

Expand Down Expand Up @@ -141,118 +135,64 @@ void InterruptSignal::StoreRelease(hsa_signal_value_t value) {
SetEvent();
}

hsa_signal_value_t InterruptSignal::WaitRelaxed(
hsa_signal_condition_t condition, hsa_signal_value_t compare_value,
uint64_t timeout, hsa_wait_state_t wait_hint) {
hsa_signal_value_t InterruptSignal::WaitRelaxed(hsa_signal_condition_t condition,
hsa_signal_value_t compare_value,
uint64_t timeout,
hsa_wait_state_t wait_hint) {
Retain();
MAKE_SCOPE_GUARD([&]() { Release(); });

uint32_t prior = waiting_++;
MAKE_SCOPE_GUARD([&]() { waiting_--; });

uint64_t event_age = 1;
uint64_t event_age = core::Runtime::runtime_singleton_->KfdVersion().supports_event_age ? 1 : 0;
if (!event_age && prior != 0) wait_hint = HSA_WAIT_STATE_ACTIVE;

const timer::fast_clock::time_point start_time = timer::fast_clock::now();
const timer::fast_clock::duration fast_timeout = timer::GetFastTimeout(timeout);
const timer::fast_clock::duration kMaxElapsed = std::chrono::microseconds(200);
const uint32_t &signal_abort_timeout =
core::Runtime::runtime_singleton_->flag().signal_abort_timeout();

if (!core::Runtime::runtime_singleton_->KfdVersion().supports_event_age) {
event_age = 0;
// Allow only the first waiter to sleep. Without event age tracking,
// race condition can cause some threads to sleep without wakeup since missing interrupt.
if (prior != 0) wait_hint = HSA_WAIT_STATE_ACTIVE;
}

int64_t value;

timer::fast_clock::time_point start_time = timer::fast_clock::now();

// Set a polling timeout value
// Should be a few times bigger than null kernel latency
const timer::fast_clock::duration kMaxElapsed = std::chrono::microseconds(200);

uint64_t hsa_freq = 0;
HSA::hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &hsa_freq);
const timer::fast_clock::duration fast_timeout =
timer::duration_from_seconds<timer::fast_clock::duration>(
double(timeout) / double(hsa_freq));

bool condition_met = false;

#if defined(__i386__) || defined(__x86_64__)
if (g_use_mwaitx) _mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0);
#endif

while (true) {
if (!IsValid()) return 0;

value = atomic::Load(&signal_.value, std::memory_order_relaxed);
int64_t value = atomic::Load(&signal_.value, std::memory_order_relaxed);

switch (condition) {
case HSA_SIGNAL_CONDITION_EQ: {
condition_met = (value == compare_value);
break;
}
case HSA_SIGNAL_CONDITION_NE: {
condition_met = (value != compare_value);
break;
}
case HSA_SIGNAL_CONDITION_GTE: {
condition_met = (value >= compare_value);
break;
}
case HSA_SIGNAL_CONDITION_LT: {
condition_met = (value < compare_value);
break;
}
default:
return 0;
if (CheckSignalCondition(value, condition, compare_value)) {
return value;
}
if (condition_met) return hsa_signal_value_t(value);

timer::fast_clock::time_point time = timer::fast_clock::now();
if (time - start_time > fast_timeout) {
value = atomic::Load(&signal_.value, std::memory_order_relaxed);
return hsa_signal_value_t(value);
auto now = timer::fast_clock::now();
if (now - start_time > fast_timeout) {
return value;
}

if (signal_abort_timeout) {
const timer::fast_clock::duration abort_timeout =
std::chrono::seconds(signal_abort_timeout);

if(time - start_time > abort_timeout)
throw AMD::hsa_exception(HSA_STATUS_ERROR_FATAL,
"Signal wait abort timeout.\n");
}
timer::CheckAbortTimeout(start_time, signal_abort_timeout);

if (wait_hint == HSA_WAIT_STATE_ACTIVE) {
#if defined(__i386__) || defined(__x86_64__)
if (g_use_mwaitx) {
_mm_mwaitx(0, 0, 0);
_mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0);
// Short timeout for active waiting
timer::DoMwaitx(const_cast<int64_t*>(&signal_.value), 1000);
}
#endif
continue;
}

if (time - start_time < kMaxElapsed) {
// os::uSleep(20);
#if defined(__i386__) || defined(__x86_64__)
if (now - start_time < kMaxElapsed) {
if (g_use_mwaitx) {
_mm_mwaitx(0, 60000, MWAITX_ECX_TIMER_ENABLE);
_mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0);
// Longer timeout with timer for passive waiting
timer::DoMwaitx(const_cast<int64_t*>(&signal_.value), 60000, true);
}
#endif
continue;
}

uint32_t wait_ms;
auto time_remaining = fast_timeout - (time - start_time);
uint64_t ct = timer::duration_cast<std::chrono::milliseconds>(
time_remaining).count();
auto remaining_ms = timer::duration_cast<std::chrono::milliseconds>(
fast_timeout - (now - start_time)).count();

wait_ms = static_cast<uint32_t>(std::min(ct, 0xFFFFFFFEUL));
if (signal_abort_timeout)
wait_ms = std::min(wait_ms, signal_abort_timeout * 1000);
uint32_t wait_ms = std::min<uint32_t>(
static_cast<uint32_t>(std::min<uint64_t>(remaining_ms, 0xFFFFFFFEUL)),
static_cast<uint32_t>(signal_abort_timeout ? signal_abort_timeout * 1000 : 0xFFFFFFFFUL)
);

hsaKmtWaitOnEvent_Ext(event_, wait_ms, &event_age);
}
Expand Down
2 changes: 1 addition & 1 deletion runtime/hsa-runtime/core/runtime/runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1659,7 +1659,7 @@ void Runtime::AsyncEventsLoop(void* _eventsInfo) {
for (size_t i = index; i < async_events_.Size(); i++) {
hsa_signal_handle sig(async_events_.signal_[i]);
value[0] = atomic::Load(&sig->signal_.value, std::memory_order_relaxed);
if (checkCondition(async_events_.cond_[i], value[0], async_events_.value_[i])) {
if (CheckSignalCondition(value[0], async_events_.cond_[i], async_events_.value_[i])) {
if (i == 0) {
hsa_signal_handle(async_events_control_.wake)->StoreRelaxed(0);
} else {
Expand Down

0 comments on commit 890399a

Please sign in to comment.