diff --git a/libkineto/src/RoctracerActivityApi.cpp b/libkineto/src/RoctracerActivityApi.cpp index 7b9740f6e..761fb6929 100644 --- a/libkineto/src/RoctracerActivityApi.cpp +++ b/libkineto/src/RoctracerActivityApi.cpp @@ -367,6 +367,7 @@ void RoctracerActivityApi::clearActivities() { kernelNames_.clear(); } + void RoctracerActivityApi::enableActivities( const std::set& selected_activities) { #ifdef HAS_ROCTRACER diff --git a/libkineto/src/RoctracerLogger.cpp b/libkineto/src/RoctracerLogger.cpp index a7b261bc0..b2f7807f1 100644 --- a/libkineto/src/RoctracerLogger.cpp +++ b/libkineto/src/RoctracerLogger.cpp @@ -11,8 +11,6 @@ #include #include #include -#include -#include #include "ThreadUtil.h" @@ -26,21 +24,6 @@ using namespace std::chrono; constexpr size_t kBufSize(2 * 1024 * 1024); -class Flush -{ -public: - std::atomic doFlush_ {false}; - std::mutex mutex_; - std::atomic maxCorrelationId_; - uint64_t correlationId_ {0}; - void reportCorrelation(const uint64_t &cid) { - uint64_t prev = maxCorrelationId_; - while (prev < cid && !maxCorrelationId_.compare_exchange_weak(prev, cid)) - {} - } -}; -static Flush s_flush; - RoctracerLogger& RoctracerLogger::singleton() { static RoctracerLogger instance; return instance; @@ -108,7 +91,6 @@ void RoctracerLogger::api_callback(uint32_t domain, uint32_t cid, const void* ca case HIP_API_ID_hipExtLaunchKernel: case HIP_API_ID_hipLaunchCooperativeKernel: // Should work here { - s_flush.reportCorrelation(data->correlation_id); auto &args = data->args.hipLaunchKernel; dis->kernelRows_.emplace_back(data->correlation_id, domain, @@ -134,7 +116,6 @@ void RoctracerLogger::api_callback(uint32_t domain, uint32_t cid, const void* ca case HIP_API_ID_hipModuleLaunchKernel: case HIP_API_ID_hipExtModuleLaunchKernel: { - s_flush.reportCorrelation(data->correlation_id); auto &args = data->args.hipModuleLaunchKernel; dis->kernelRows_.emplace_back(data->correlation_id, domain, @@ -227,7 +208,6 @@ void RoctracerLogger::api_callback(uint32_t domain, uint32_t cid, const void* ca case HIP_API_ID_hipMemcpyAsync: case HIP_API_ID_hipMemcpyWithStream: { - s_flush.reportCorrelation(data->correlation_id); auto &args = data->args.hipMemcpyAsync; dis->copyRows_.emplace_back(data->correlation_id, domain, @@ -272,22 +252,6 @@ void RoctracerLogger::activity_callback(const char* begin, const char* end, void auto &gpuTraceBuffers = singleton().gpuTraceBuffers_; memcpy(buffer, begin, size); gpuTraceBuffers->emplace_back(buffer, size); - - // If we are stopping the tracer, implement reliable flushing - if (s_flush.doFlush_) { - std::unique_lock lock(s_flush.mutex_); - // scan the records looking for the final correlation id - const roctracer_record_t* record = (const roctracer_record_t*)(begin); - const roctracer_record_t* end_record = (const roctracer_record_t*)(end); - - while (record < end_record) { - if (record->correlation_id == s_flush.correlationId_) { - s_flush.correlationId_ = 0; // Clear id to signal we found it - break; - } - roctracer_next_record(record, &record); - } - } } void RoctracerLogger::startLogging() { @@ -348,32 +312,12 @@ void RoctracerLogger::startLogging() { } externalCorrelationEnabled_ = true; - logging_ = true; roctracer_start(); } void RoctracerLogger::stopLogging() { - if (logging_ == false) - return; - - // If we are stopping the tracer, implement reliable flushing - std::unique_lock lock(s_flush.mutex_); - - s_flush.doFlush_ = true; - s_flush.correlationId_ = s_flush.maxCorrelationId_; // load ending id from the running max - - // Poll on the worker finding the record and clearing s_flush.correlationId_ - while (s_flush.correlationId_ != 0) { - lock.unlock(); - roctracer_flush_activity_expl(hccPool_); - usleep(1000); - lock.lock(); - } - - s_flush.doFlush_ = false; - roctracer_stop(); - logging_ = false; + roctracer_flush_activity_expl(hccPool_); } void RoctracerLogger::endTracing() { diff --git a/libkineto/src/RoctracerLogger.h b/libkineto/src/RoctracerLogger.h index 214d609d1..b1ab448de 100644 --- a/libkineto/src/RoctracerLogger.h +++ b/libkineto/src/RoctracerLogger.h @@ -176,7 +176,6 @@ class RoctracerLogger { std::unique_ptr> gpuTraceBuffers_; bool externalCorrelationEnabled_{true}; - bool logging_{false}; friend class onnxruntime::profiling::RocmProfiler; friend class libkineto::RoctracerActivityApi;