Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hip: disable VMM on hip as it seams that it dosent work in some configurations #11420

Merged
merged 1 commit into from
Jan 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp on

option(GGML_HIP "ggml: use HIP" OFF)
option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At some point we should rename all the negative options (e.g. GGML_CUDA_NO_VMM -> GGML_CUDA_VMM) and set the default value accordingly instead. This is mostly an artifact from the time Makefile was used and didn't have a good way to set defaults.

option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
option(GGML_VULKAN "ggml: use Vulkan" OFF)
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
Expand Down
4 changes: 4 additions & 0 deletions ggml/src/ggml-cuda/common.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ typedef float dfloat; // dequantize float
typedef float2 dfloat2;
#endif // GGML_CUDA_F16

#if (!defined(GGML_USE_HIP) && !defined(GGML_CUDA_NO_VMM)) || (defined(GGML_USE_HIP) && !defined(GGML_HIP_NO_VMM))
#define GGML_USE_VMM
#endif // (!defined(GGML_USE_HIP) && !defined(GGML_CUDA_NO_VMM)) || (defined(GGML_USE_HIP) && !defined(GGML_HIP_NO_VMM))

#if (defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) || __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL
#define FP16_AVAILABLE
#endif // (defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) || __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL
Expand Down
14 changes: 7 additions & 7 deletions ggml/src/ggml-cuda/ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
for (int id = 0; id < info.device_count; ++id) {
int device_vmm = 0;

#if !defined(GGML_CUDA_NO_VMM)
#if defined(GGML_USE_VMM)
CUdevice device;
CU_CHECK(cuDeviceGet(&device, id));
CU_CHECK(cuDeviceGetAttribute(&device_vmm, CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, device));
Expand All @@ -164,7 +164,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
alloc_prop.location.id = id;
CU_CHECK(cuMemGetAllocationGranularity(&info.devices[id].vmm_granularity, &alloc_prop, CU_MEM_ALLOC_GRANULARITY_RECOMMENDED));
}
#endif // !defined(GGML_CUDA_NO_VMM)
#endif // defined(GGML_USE_VMM)
info.devices[id].vmm = !!device_vmm;

cudaDeviceProp prop;
Expand Down Expand Up @@ -300,7 +300,7 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool {
};

// pool with virtual memory
#if !defined(GGML_CUDA_NO_VMM)
#if defined(GGML_USE_VMM)
struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
static const size_t CUDA_POOL_VMM_MAX_SIZE = 1ull << 35; // 32 GB

Expand Down Expand Up @@ -408,14 +408,14 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
GGML_ASSERT(ptr == (void *) ((char *)(pool_addr) + pool_used));
}
};
#endif // !defined(GGML_CUDA_NO_VMM)
#endif // defined(GGML_USE_VMM)

std::unique_ptr<ggml_cuda_pool> ggml_backend_cuda_context::new_pool_for_device(int device) {
#if !defined(GGML_CUDA_NO_VMM)
#if defined(GGML_USE_VMM)
if (ggml_cuda_info().devices[device].vmm) {
return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_vmm(device));
}
#endif // !defined(GGML_CUDA_NO_VMM)
#endif // defined(GGML_USE_VMM)
return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_leg(device));
}

Expand Down Expand Up @@ -3250,7 +3250,7 @@ static ggml_backend_feature * ggml_backend_cuda_get_features(ggml_backend_reg_t
features.push_back({ "FORCE_CUBLAS", "1" });
#endif

#ifdef GGML_CUDA_NO_VMM
#ifndef GGML_USE_VMM
features.push_back({ "NO_VMM", "1" });
#endif

Expand Down
4 changes: 2 additions & 2 deletions ggml/src/ggml-hip/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ if (GGML_HIP_GRAPHS)
add_compile_definitions(GGML_HIP_GRAPHS)
endif()

if (GGML_CUDA_NO_VMM)
add_compile_definitions(GGML_CUDA_NO_VMM)
if (GGML_HIP_NO_VMM)
add_compile_definitions(GGML_HIP_NO_VMM)
endif()

if (CXX_IS_HIPCC)
Expand Down
Loading