diff --git a/ggml-cuda.h b/ggml-cuda.h index c00d83ba64748..6ab5b3944301d 100644 --- a/ggml-cuda.h +++ b/ggml-cuda.h @@ -6,19 +6,28 @@ #define CUBLAS_OP_N HIPBLAS_OP_N #define CUBLAS_OP_T HIPBLAS_OP_T #define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS +#define CUBLAS_TF32_TENSOR_OP_MATH 0 #define cublasCreate hipblasCreate #define cublasGemmEx hipblasGemmEx #define cublasHandle_t hipblasHandle_t +#define cublasSetMathMode(h, m) HIPBLAS_STATUS_SUCCESS #define cublasSetStream hipblasSetStream #define cublasSgemm hipblasSgemm #define cublasStatus_t hipblasStatus_t #define CUDA_R_16F HIPBLAS_R_16F #define CUDA_R_32F HIPBLAS_R_32F +#define cudaDeviceSynchronize hipDeviceSynchronize #define cudaError_t hipError_t +#define cudaEvent_t hipEvent_t +#define cudaEventCreateWithFlags hipEventCreateWithFlags +#define cudaEventDisableTiming hipEventDisableTiming +#define cudaEventRecord hipEventRecord #define cudaFree hipFree +#define cudaFreeHost hipFreeHost #define cudaGetErrorString hipGetErrorString #define cudaGetLastError hipGetLastError #define cudaMalloc hipMalloc +#define cudaMallocHost hipMallocHost #define cudaMemcpyAsync hipMemcpyAsync #define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost #define cudaMemcpyHostToDevice hipMemcpyHostToDevice @@ -26,6 +35,7 @@ #define cudaStreamCreateWithFlags hipStreamCreateWithFlags #define cudaStreamNonBlocking hipStreamNonBlocking #define cudaStreamSynchronize hipStreamSynchronize +#define cudaStreamWaitEvent hipStreamWaitEvent #define cudaSuccess hipSuccess #define GGML_USE_CUBLAS #else