Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DeviceMSAN] Fix gpu crashed on device global variable #16566

Merged
merged 13 commits into from
Jan 15, 2025
136 changes: 97 additions & 39 deletions llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -767,50 +767,108 @@ Constant *getOrCreateGlobalString(Module &M, StringRef Name, StringRef Value,
});
}

static void extendSpirKernelArgs(Module &M) {
SmallVector<Constant *, 8> SpirKernelsMetadata;
static bool isUnsupportedDeviceGlobal(const GlobalVariable *G) {
// Skip instrumenting on "__MsanKernelMetadata" etc.
if (G->getName().starts_with("__Msan"))
return true;
if (G->getName().starts_with("__spirv_BuiltIn"))
return true;
if (G->getAddressSpace() == kSpirOffloadLocalAS ||
G->getAddressSpace() == kSpirOffloadConstantAS)
return true;
return false;
}

static void instrumentSPIRModule(Module &M) {

const auto &DL = M.getDataLayout();
Type *IntptrTy = DL.getIntPtrType(M.getContext());

// SpirKernelsMetadata only saves fixed kernels, and is described by
// following structure:
// uptr unmangled_kernel_name
// uptr unmangled_kernel_name_size
StructType *StructTy = StructType::get(IntptrTy, IntptrTy);
for (Function &F : M) {
if (F.getCallingConv() != CallingConv::SPIR_KERNEL)
continue;
// Instrument __MsanKernelMetadata, which records information of sanitized
// kernel
{
SmallVector<Constant *, 8> SpirKernelsMetadata;

// SpirKernelsMetadata only saves fixed kernels, and is described by
// following structure:
// uptr unmangled_kernel_name
// uptr unmangled_kernel_name_size
StructType *StructTy = StructType::get(IntptrTy, IntptrTy);
for (Function &F : M) {
if (F.getCallingConv() != CallingConv::SPIR_KERNEL)
continue;

if (!F.hasFnAttribute(Attribute::SanitizeMemory) ||
F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
continue;
if (!F.hasFnAttribute(Attribute::SanitizeMemory) ||
F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
continue;

auto KernelName = F.getName();
auto *KernelNameGV = getOrCreateGlobalString(M, "__msan_kernel", KernelName,
kSpirOffloadConstantAS);
SpirKernelsMetadata.emplace_back(ConstantStruct::get(
StructTy, ConstantExpr::getPointerCast(KernelNameGV, IntptrTy),
ConstantInt::get(IntptrTy, KernelName.size())));
}

// Create global variable to record spirv kernels' information
ArrayType *ArrayTy = ArrayType::get(StructTy, SpirKernelsMetadata.size());
Constant *MetadataInitializer =
ConstantArray::get(ArrayTy, SpirKernelsMetadata);
GlobalVariable *MsanSpirKernelMetadata = new GlobalVariable(
M, MetadataInitializer->getType(), false, GlobalValue::AppendingLinkage,
MetadataInitializer, "__MsanKernelMetadata", nullptr,
GlobalValue::NotThreadLocal, 1);
MsanSpirKernelMetadata->setUnnamedAddr(GlobalValue::UnnamedAddr::Local);
// Add device global attributes
MsanSpirKernelMetadata->addAttribute(
"sycl-device-global-size", std::to_string(DL.getTypeAllocSize(ArrayTy)));
MsanSpirKernelMetadata->addAttribute("sycl-device-image-scope");
MsanSpirKernelMetadata->addAttribute("sycl-host-access", "0"); // read only
MsanSpirKernelMetadata->addAttribute("sycl-unique-id",
"_Z20__MsanKernelMetadata");
MsanSpirKernelMetadata->setDSOLocal(true);
auto KernelName = F.getName();
auto *KernelNameGV = getOrCreateGlobalString(
M, "__msan_kernel", KernelName, kSpirOffloadConstantAS);
SpirKernelsMetadata.emplace_back(ConstantStruct::get(
StructTy, ConstantExpr::getPointerCast(KernelNameGV, IntptrTy),
ConstantInt::get(IntptrTy, KernelName.size())));
}

// Create global variable to record spirv kernels' information
ArrayType *ArrayTy = ArrayType::get(StructTy, SpirKernelsMetadata.size());
Constant *MetadataInitializer =
ConstantArray::get(ArrayTy, SpirKernelsMetadata);
GlobalVariable *MsanSpirKernelMetadata = new GlobalVariable(
M, MetadataInitializer->getType(), false, GlobalValue::AppendingLinkage,
MetadataInitializer, "__MsanKernelMetadata", nullptr,
GlobalValue::NotThreadLocal, 1);
MsanSpirKernelMetadata->setUnnamedAddr(GlobalValue::UnnamedAddr::Local);
// Add device global attributes
MsanSpirKernelMetadata->addAttribute(
"sycl-device-global-size",
std::to_string(DL.getTypeAllocSize(ArrayTy)));
MsanSpirKernelMetadata->addAttribute("sycl-device-image-scope");
MsanSpirKernelMetadata->addAttribute("sycl-host-access",
"0"); // read only
MsanSpirKernelMetadata->addAttribute("sycl-unique-id",
"_Z20__MsanKernelMetadata");
MsanSpirKernelMetadata->setDSOLocal(true);
}

// Handle global variables:
// - Skip sanitizing unsupported variables
// - Instrument __MsanDeviceGlobalMetadata for device globals
do {
SmallVector<Constant *, 8> DeviceGlobalMetadata;

// Device global meta data is described by a structure
// size_t device_global_size
// size_t beginning address of the device global
StructType *StructTy = StructType::get(IntptrTy, IntptrTy);

for (auto &G : M.globals()) {
if (isUnsupportedDeviceGlobal(&G)) {
for (auto *User : G.users())
if (auto *Inst = dyn_cast<Instruction>(User))
Inst->setNoSanitizeMetadata();
continue;
}

DeviceGlobalMetadata.push_back(ConstantStruct::get(
StructTy,
ConstantInt::get(IntptrTy, DL.getTypeAllocSize(G.getValueType())),
ConstantExpr::getPointerCast(&G, IntptrTy)));
}

if (DeviceGlobalMetadata.empty())
break;

// Create meta data global to record device globals' information
ArrayType *ArrayTy = ArrayType::get(StructTy, DeviceGlobalMetadata.size());
Constant *MetadataInitializer =
ConstantArray::get(ArrayTy, DeviceGlobalMetadata);
GlobalVariable *MsanDeviceGlobalMetadata = new GlobalVariable(
M, MetadataInitializer->getType(), false, GlobalValue::AppendingLinkage,
MetadataInitializer, "__MsanDeviceGlobalMetadata", nullptr,
GlobalValue::NotThreadLocal, 1);
MsanDeviceGlobalMetadata->setUnnamedAddr(GlobalValue::UnnamedAddr::Local);
} while (false);
}

PreservedAnalyses MemorySanitizerPass::run(Module &M,
Expand All @@ -827,7 +885,7 @@ PreservedAnalyses MemorySanitizerPass::run(Module &M,
}

if (TargetTriple.isSPIROrSPIRV()) {
extendSpirKernelArgs(M);
instrumentSPIRModule(M);
Modified = true;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
; RUN: opt < %s -passes=msan -msan-instrumentation-with-call-threshold=0 -msan-eager-checks=1 -S | FileCheck %s
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1"
target triple = "spir64-unknown-unknown"

@.str = external addrspace(1) constant [59 x i8]
@__spirv_BuiltInGlobalInvocationId = external addrspace(1) constant <3 x i64>

; CHECK: @__MsanDeviceGlobalMetadata
; CHECK-NOT: @__spirv_BuiltInGlobalInvocationId
; CHECK-SAME: @.str
2 changes: 1 addition & 1 deletion sycl/cmake/modules/FetchUnifiedRuntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ if(SYCL_UR_USE_FETCH_CONTENT)
CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
endfunction()

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
set(UNIFIED_RUNTIME_REPO "https://github.com/AllanZyne/unified-runtime.git")
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/UnifiedRuntimeTag.cmake)

set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
Expand Down
15 changes: 7 additions & 8 deletions sycl/cmake/modules/UnifiedRuntimeTag.cmake
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
# commit 7eae5c80a9e969bc12fda57c9cc0a0970f0cd17f
# Merge: 9c652ffb b78cfa71
# Author: Ross Brunton <ross@codeplay.com>
# Date: Thu Jan 9 17:28:00 2025 +0000
# Merge pull request #2048 from RossBrunton/ross/refc
#
# Use reference counting on factories
set(UNIFIED_RUNTIME_TAG 7eae5c80a9e969bc12fda57c9cc0a0970f0cd17f)
# commit b2ac58f27c63b8ff714e8b0c39b79aaab05a3faf
# Merge: 3472b5bd ead3d07d
# Author: Kenneth Benzie (Benie) <k.benzie@codeplay.com>
# Date: Tue Jan 7 10:40:35 2025 +0000
# Merge pull request #2524 from nrspruit/fix_enqueue_wait_out_event
# [L0]: Fix Out Event in Enqueue Wait Events to prevent reuse
set(UNIFIED_RUNTIME_TAG review/yang/msan_device_global)
58 changes: 58 additions & 0 deletions sycl/test-e2e/MemorySanitizer/check_device_global.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// REQUIRES: linux, cpu || (gpu && level_zero)
// RUN: %{build} %device_msan_flags -O0 -g -o %t1.out
// RUN: %{run} not %t1.out 2>&1 | FileCheck %s
// RUN: %{build} %device_msan_flags -O1 -g -o %t2.out
// RUN: %{run} not %t2.out 2>&1 | FileCheck %s
// RUN: %{build} %device_msan_flags -O2 -g -o %t3.out
// RUN: %{run} not %t3.out 2>&1 | FileCheck %s

#include <sycl/detail/core.hpp>
#include <sycl/ext/oneapi/device_global/device_global.hpp>
#include <sycl/usm.hpp>

using namespace sycl;
using namespace sycl::ext::oneapi;
using namespace sycl::ext::oneapi::experimental;

sycl::ext::oneapi::experimental::device_global<
int[4], decltype(properties(device_image_scope, host_access_read_write))>
dev_global;

__attribute__((noinline)) int check(int data) { return data + 1; }

int main() {
sycl::queue Q;
int *array = sycl::malloc_device<int>(4, Q);

Q.submit([&](sycl::handler &h) {
h.single_task<class Test1>([=]() {
dev_global[0] = 42;
array[0] = check(dev_global[1]);
array[1] = dev_global[1];
});
}).wait();

int val[4];
Q.copy(dev_global, val).wait();
assert(val[0] == 42);

Q.submit([&](sycl::handler &h) {
h.single_task<class Test2>([=]() {
array[0] = check(array[1]);
dev_global[1] = array[2]; // uninitialzed value
});
}).wait();

Q.submit([&](sycl::handler &h) {
h.single_task<class Test3>([=]() {
array[0] = dev_global[1];
check(array[0]);
});
}).wait();
// CHECK: use-of-uninitialized-value
// CHECK-NEXT: kernel <{{.*Test3}}>

sycl::free(array, Q);

return 0;
}
Loading