From 498a969981065b00e3df6590cecd50bf91d37aa0 Mon Sep 17 00:00:00 2001 From: Mike Wall Date: Thu, 23 Jun 2022 10:42:04 -0400 Subject: [PATCH] Offload mods for crusher development o Modified CMake scripts: - Use BML_OMP_OFFLOAD for both NVIDIA and AMD, needed due to commit #8a7df493 - Use FindCUDAToolkit module instead of depracated FindCUDA - Update to CMake 3.17 version, to support FindCUDAToolkit - Consolidated the logic for CUDA, HIP, and associated libraries for various types of device builds under control of BML_USE_DEVICE - Added BML_OFFLOAD_ARCH with options NVIDIA and AMD o Change crusher and spock build scripts accordingly o Modified offload regions to address bml_multiply_x2() fortran test failure (hang) - Move temporary working arrays all_ix, all_jx, and all_x from stack to heap - This eliminated the hang, although it's not really clear why - Similar changes made to other add, multiply offload regions --- CMakeLists.txt | 164 +++++++++--------- build.sh | 3 + scripts/build_crusher_offload_cce.sh | 4 +- scripts/build_spock_offload_cce.sh | 8 +- scripts/setenv_spock_offload.sh | 4 +- .../ellpack/bml_add_ellpack_typed.c | 22 +-- .../ellpack/bml_multiply_ellpack_typed.c | 20 +-- 7 files changed, 118 insertions(+), 107 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 932c182e1..40a00c9d0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.10) +cmake_minimum_required(VERSION 3.17) message(STATUS "CMake version ${CMAKE_VERSION}") @@ -80,6 +80,11 @@ set(Clang_C_FLAGS_DEBUG -O0 -g -save-temps -std=c99) set(Clang_C_FLAGS_RELEASE -O2 -g -std=c99 -DNDEBUG) set(Clang_C_FLAGS_RELWITHDEBINFO -O2 -g -std=c99 -DNDEBUG) +if(DEFINED ENV{CRAYPE_VERSION}) + set(EXTRA_CFLAGS "-DCRAY_SDK ${EXTRA_CFLAGS}") + set(EXTRA_FFLAGS "-ef -DCRAY_SDK ${EXTRA_FFLAGS}") +endif() + set(MALLOC_ALIGNMENT 64 CACHE INT "Alignment boundary for memory allocations") add_definitions(-DMALLOC_ALIGNMENT=${MALLOC_ALIGNMENT}) @@ -92,6 +97,7 @@ if(CMAKE_BUILD_TYPE_UPPER AND NOT DONT_TOUCH_MY_FLAGS) if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_C_COMPILER_ID STREQUAL "Intel" OR CMAKE_C_COMPILER_ID STREQUAL "Clang" + OR CMAKE_C_COMPILER_ID STREQUAL "Cray" OR CMAKE_C_COMPILER_ID STREQUAL "XL") set(CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER} ${${CMAKE_C_COMPILER_ID}_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}) @@ -116,6 +122,8 @@ if(CMAKE_BUILD_TYPE_UPPER AND NOT DONT_TOUCH_MY_FLAGS) if(CMAKE_Fortran_FLAGS STREQUAL "") if(CMAKE_Fortran_COMPILER_ID STREQUAL "GNU" OR CMAKE_Fortran_COMPILER_ID STREQUAL "Intel" + OR CMAKE_Fortran_COMPILER_ID STREQUAL "Clang" + OR CMAKE_Fortran_COMPILER_ID STREQUAL "Cray" OR CMAKE_Fortran_COMPILER_ID STREQUAL "XL") set(CMAKE_Fortran_FLAGS_${CMAKE_BUILD_TYPE_UPPER} ${${CMAKE_Fortran_COMPILER_ID}_Fortran_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}) @@ -205,20 +213,50 @@ if(BML_MPI) add_definitions(-DDO_MPI) endif() -option(BML_OMP_OFFLOAD "Compile with OpenMP GPU Offload support" FALSE) +include(FindCUDAToolkit) +if(CUDAToolkit_FOUND) + message("Found CUDAToolkit: ${CUDAToolkit_TARGET_DIR}") +endif() + +find_package(hip QUIET) + +set(BML_OMP_OFFLOAD OFF CACHE BOOL "Compile with OpenMP GPU Offload support") +set(BML_OFFLOAD_ARCH "NVIDIA" CACHE STRING "Offload architecture") +set_property(CACHE BML_OFFLOAD_ARCH PROPERTY STRINGS "NVIDIA" "AMD") +if(BML_OFFLOAD_ARCH STREQUAL "NVIDIA" AND NOT CUDAToolkit_FOUND) + message("Offload to NVIDIA selected but CUDA not found. Disabling offload.") + set(BML_OMP_OFFLOAD OFF CACHE BOOL "Compile with OpenMP GPU Offload support" FORCE) +endif() +if(BML_OFFLOAD_ARCH STREQUAL "AMD" AND NOT hip_FOUND) + message("Offload to AMD selected but HIP not found. Disabling offload.") + set(BML_OMP_OFFLOAD OFF CACHE BOOL "Compile with OpenMP GPU Offload support" FORCE) +endif() + if(BML_OMP_OFFLOAD) message(STATUS "Will build with OpenMP GPU Offload support") + add_definitions(-DUSE_OMP_OFFLOAD) find_package(OpenMP 4.5 REQUIRED) include(CheckCXXCompilerFlag) if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set(OpenMP_CXX_OFFLOAD_FLAG "-foffload=-misa=sm_35 -foffload=nvptx-none -foffload='-lm'") + if(BML_OFFLOAD_ARCH STREQUAL "NVIDIA") + set(OpenMP_CXX_OFFLOAD_FLAG "-foffload=-misa=sm_35 -foffload=nvptx-none -foffload='-lm'") + endif() elseif(MAKE_CXX_COMPILER_ID STREQUAL "Intel") set(OpenMP_CXX_OFFLOAD_FLAG "-foffload='-lm'") elseif(MAKE_CXX_COMPILER_ID STREQUAL "Clang") - set(OpenMP_CXX_OFFLOAD_FLAG "-fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}") + if(BML_OFFLOAD_ARCH STREQUAL "NVIDIA") + set(OpenMP_CXX_OFFLOAD_FLAG "-O2 -fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=${CUDAToolkit_TARGET_DIR}") + elseif(BML_OFFLOAD_ARCH STREQUAL "AMD") + if(DEFINED ENV{CRAYPE_VERSION}) + set(OpenMP_CXX_OFFLOAD_FLAG "") + else() + message("BML_OFFLOAD_ARCH is AMD but Cray compiler not detected") + message(" ==> No offload flags added") + endif() + endif() elseif(MAKE_CXX_COMPILER_ID STREQUAL "XL") set(OpenMP_CXX_OFFLOAD_FLAG "-qsmp=omp -qoffload") endif() @@ -232,60 +270,50 @@ if(BML_OMP_OFFLOAD) # list(APPEND LANGUAGES CUDA) - set(CUDA_FIND_REQUIRED TRUE) - include(FindCUDA) + if(BML_OFFLOAD_ARCH EQUAL "NVIDIA") - set(GPU_ARCH "sm_60" CACHE STRING "LAMMPS GPU CUDA SM architecture") - set_property(CACHE GPU_ARCH PROPERTY STRINGS sm_50 sm_60 sm_70) + set(GPU_ARCH "sm_60" CACHE STRING "LAMMPS GPU CUDA SM architecture") + set_property(CACHE GPU_ARCH PROPERTY STRINGS sm_50 sm_60 sm_70) - message("CUDA libraries: ${CUDA_LIBRARIES}") - message("CUDA CUBLAS libraries: ${CUDA_CUBLAS_LIBRARIES}") + endif() endif() set(BML_CUSPARSE FALSE CACHE BOOL "Whether to compile with CuSPARSE support") set(BML_CUDA FALSE CACHE BOOL "Whether to compile with CUDA support") -if(BML_CUDA OR BML_CUSOLVER OR BML_CUSPARSE) - include(FindCUDA) -endif() -if (CUDA_FOUND) - message("CUDA CUBLAS libraries: ${CUDA_CUBLAS_LIBRARIES}") - message("CUDA libraries: ${CUDA_LIBRARIES}") - list(APPEND LINK_LIBRARIES ${CUDA_LIBRARIES}) - include_directories(${CUDA_INCLUDE_DIRS}) -endif() - -if(BML_CUSPARSE) - message(STATUS "Will use cuSPARSE") - add_definitions(-DBML_USE_CUSPARSE) - message("cuSPARSE library: ${CUDA_cusparse_LIBRARY}") - list(APPEND LINK_LIBRARIES ${CUDA_cusparse_LIBRARY}) -endif() - -if(BML_MAGMA) - find_package(hip) -endif() - -if(BML_ROCSOLVER) - find_package(rocblas REQUIRED) - find_package(rocsolver REQUIRED) -endif() - -if (hip_FOUND) - message("HIP libraries: ${hip_LIBRARIES}") - list(APPEND LINK_LIBRARIES ${hip_LIBRARIES}) - include_directories(${hip_INCLUDE_DIRS}) -endif() - -if (rocblas_FOUND) - message("ROCBLAS libraries: ${rocblas_LIBRARIES}") - list(APPEND LINK_LIBRARIES ${rocblas_LIBRARIES}) - include_directories(${rocblas_INCLUDE_DIRS}) -endif() - -if (rocsolver_FOUND) - message("ROCSOLVER libraries: ${rocsolver_LIBRARIES}") - list(APPEND LINK_LIBRARIES ${rocsolver_LIBRARIES}) - include_directories(${rocsolver_INCLUDE_DIRS}) +set(BML_MAGMA FALSE CACHE BOOL "Whether to use MAGMA library") +set(BML_USE_DEVICE (BML_CUDA OR BML_MAGMA OR BML_CUSOLVER OR BML_ROCSOLVER OR BML_CUSPARSE OR BML_ROCSPARSE)) +if(BML_USE_DEVICE) + if (CUDAToolkit_FOUND) + message("CUDA CUBLAS libraries: ${CUDA_cublas_LIBRARY}") + list(APPEND LINK_LIBRARIES ${CUDA_cublas_LIBRARY}) + list(APPEND LINK_LIBRARIES ${CUDA_cudart_LIBRARY}) + include_directories(${CUDAToolkit_INCLUDE_DIR}) + if(BML_CUSOLVER) + message(STATUS "Will use cuSOLVER") + message("CUDA cuSOLVER library: ${CUDA_cusolver_LIBRARY}") + endif() + if(BML_CUSPARSE) + message(STATUS "Will use cuSPARSE") + add_definitions(-DBML_USE_CUSPARSE) + message("CUDA cuSPARSE library: ${CUDA_cusparse_LIBRARY}") + list(APPEND LINK_LIBRARIES ${CUDA_cusparse_LIBRARY}) + endif() + elseif(hip_FOUND) + message("HIP libraries: ${hip_LIBRARIES}") + include_directories(${hip_INCLUDE_DIRS}) + if(BML_ROCSOLVER) + message(STATUS "Will use rocSOLVER") + find_package(rocblas REQUIRED) + message("HIP rocBLAS libraries: ${rocblas_LIBRARIES}") + list(APPEND LINK_LIBRARIES ${rocblas_LIBRARIES}) + include_directories(${rocblas_INCLUDE_DIRS}) + + find_package(rocsolver REQUIRED) + message("HIP rocSOLVER libraries: ${rocsolver_LIBRARIES}") + include_directories(${rocsolver_INCLUDE_DIRS}) + endif() + list(APPEND LINK_LIBRARIES ${hip_LIBRARIES}) + endif() endif() set(BML_OPENCL FALSE CACHE BOOL "Whether to compiler with OpenCL support") @@ -295,7 +323,6 @@ endif() SET(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") -set(BML_MAGMA FALSE CACHE BOOL "Whether to use MAGMA library") if(BML_MAGMA) FIND_PACKAGE(MAGMA) @@ -310,29 +337,16 @@ if (MAGMA_FOUND) " MAGMA_LIBRARIES: ${MAGMA_LIBRARIES}" ) include_directories(${MAGMA_INCLUDE_DIRS}) -else() - message(STATUS - "MAGMA not found" - ) -endif() - -if (MAGMA_FOUND) if(BML_CUSOLVER) - message(STATUS "Will use cuSOLVER") add_definitions(-DBML_USE_CUSOLVER) endif() if(BML_ROCSOLVER) - message(STATUS "Will use rocSOLVER") add_definitions(-DBML_USE_ROCSOLVER) endif() -endif() - -if (CUBLAS_FOUND) - list(APPEND LINK_LIBRARIES ${CUBLAS_LIBRARIES}) -endif() - -if (CUDA_FOUND) - list(APPEND LINK_LIBRARIES ${CUDA_LIBRARIES}) +else() + message(STATUS + "MAGMA not found" + ) endif() set(BML_SCALAPACK FALSE CACHE BOOL "Whether to use ScaLAPACK library") @@ -381,14 +395,6 @@ if(NOT (LAPACK_FOUND OR NOBLAS OR MAGMA_FOUND)) endif() endif() message(STATUS "LAPACK_LIBRARIES=${LAPACK_LIBRARIES}") -if(MAGMA_FOUND) - if(BML_CUSOLVER) - message(STATUS "cuSOLVER: ${CUDA_cusolver_LIBRARY}") - endif() - if(BML_ROCSOLVER) - message(STATUS "rocSOLVER: ${ROCM_rocsolver_LIBRARIES}") - endif() -endif() #check existence of required math and linear algebra functions #in third party libraries @@ -497,6 +503,8 @@ if(MAGMA_FOUND) list(APPEND LINK_LIBRARIES ${MAGMA_LIBRARIES}) if(BML_CUSOLVER) list(APPEND LINK_LIBRARIES ${CUDA_cusolver_LIBRARY}) + elseif(BML_ROCSOLVER) + list(APPEND LINK_LIBRARIES ${rocsolver_LIBRARIES}) endif() endif() if(BML_SCALAPACK) diff --git a/build.sh b/build.sh index c30e4e551..3968918ef 100755 --- a/build.sh +++ b/build.sh @@ -79,6 +79,7 @@ EOF echo "EXTRA_FFLAGS Extra fortran flags (default is '${EXTRA_FFLAGS}')" echo "EXTRA_LINK_FLAGS Add extra link flags (default is '${EXTRA_LINK_FLAGS}')" echo "BML_OMP_OFFLOAD {yes,no} (default is ${BML_OMP_OFFLOAD})" + echo "BML_OFFLOAD_ARCH {NVIDIA, AMD} (default is ${BML_OFFLOAD_ARCH})" echo "GPU_ARCH GPU architecture (default is ${GPU_ARCH})" echo "BML_CUDA Build with CUDA (default is ${BML_CUDA})" echo "BML_MAGMA Build with MAGMA (default is ${BML_MAGMA})" @@ -127,6 +128,7 @@ set_defaults() { : ${FORTRAN_FLAGS:=} : ${EXTRA_LINK_FLAGS:=} : ${BML_OMP_OFFLOAD:=no} + : ${BML_OFFLOAD_ARCH:=NVIDIA} : ${GPU_ARCH:=} : ${BML_CUDA:=no} : ${BML_MAGMA:=no} @@ -234,6 +236,7 @@ configure() { ${EXTRA_LINK_FLAGS:+-DBML_LINK_FLAGS="${EXTRA_LINK_FLAGS}"} \ -DCMAKE_VERBOSE_MAKEFILE=${VERBOSE_MAKEFILE} \ -DBML_OMP_OFFLOAD="${BML_OMP_OFFLOAD}" \ + -DBML_OFFLOAD_ARCH="${BML_OFFLOAD_ARCH}" \ -DGPU_ARCH="${GPU_ARCH}" \ -DBML_CUDA="${BML_CUDA}" \ -DBML_MAGMA="${BML_MAGMA}" \ diff --git a/scripts/build_crusher_offload_cce.sh b/scripts/build_crusher_offload_cce.sh index e28ad5955..7030fe1c5 100644 --- a/scripts/build_crusher_offload_cce.sh +++ b/scripts/build_crusher_offload_cce.sh @@ -12,12 +12,12 @@ export FC=${FC:=ftn} export CXX=${CXX:=CC} export BLA_VENDOR=${BLA_VENDOR:=OpenBLAS} export BML_OPENMP=${BML_OPENMP:=yes} +export BML_OMP_OFFLOAD=${BML_OMP_OFFLOAD:=yes} +export BML_OFFLOAD_ARCH=${BML_OFFLOAD_ARCH:=AMD} export BUILD_DIR=${BUILD_DIR:="${MY_PATH}/build"} export INSTALL_DIR=${INSTALL_DIR:="${MY_PATH}/install"} export BML_TESTING=${BML_TESTING:=yes} export CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:=Release} -export CMAKE_Fortran_FLAGS=${CMAKE_Fortran_FLAGS:="-ef -DCRAY_SDK"} -export CMAKE_C_FLAGS=${CMAKE_C_FLAGS:="-Ofast -DUSE_OMP_OFFLOAD -DCRAY_SDK"} ./build.sh configure diff --git a/scripts/build_spock_offload_cce.sh b/scripts/build_spock_offload_cce.sh index e2345e47d..d9f851906 100644 --- a/scripts/build_spock_offload_cce.sh +++ b/scripts/build_spock_offload_cce.sh @@ -10,16 +10,16 @@ MY_PATH=$(pwd) export CC=${CC:=cc} export FC=${FC:=ftn} export CXX=${CXX:=CC} -export BLAS_VENDOR=${BLAS_VENDOR:=Auto} +export BLA_VENDOR=${BLA_VENDOR:=OpenBLAS} export BML_OPENMP=${BML_OPENMP:=yes} +export BML_OMP_OFFLOAD=${BML_OMP_OFFLOAD:=yes} +export BML_OFFLOAD_ARCH=${BML_OFFLOAD_ARCH:=AMD} +export BML_COMPLEX=${BML_COMPLEX:=no} export INSTALL_DIR=${INSTALL_DIR:="${MY_PATH}/install"} export BML_MAGMA=${BML_MAGMA:=no} export MAGMA_ROOT=${MAGMA_HOME} export BML_TESTING=${BML_TESTING:=yes} export CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:=Release} -export CMAKE_Fortran_FLAGS=${CMAKE_Fortran_FLAGS:="-ef -DCRAY_SDK"} -export CMAKE_C_FLAGS=${CMAKE_C_FLAGS:="-Ofast -DUSE_OMP_OFFLOAD -DCRAY_SDK"} -export EXTRA_LINK_FLAGS=${EXTRA_LINK_FLAGS:="-L${LIBSCI_BASE_DIR}/cray/9.0/x86_64/lib"} ./build.sh configure diff --git a/scripts/setenv_spock_offload.sh b/scripts/setenv_spock_offload.sh index 5a8086356..883311fd2 100644 --- a/scripts/setenv_spock_offload.sh +++ b/scripts/setenv_spock_offload.sh @@ -1,7 +1,7 @@ #!/bin/bash module load craype-accel-amd-gfx908 -module load rocm/4.1.0 +module load rocm module load cmake -export LD_LIBRARY_PATH="$CRAY_LD_LIBRARY_PATH:$LD_LIBRARY_PATH" +module load openblas diff --git a/src/C-interface/ellpack/bml_add_ellpack_typed.c b/src/C-interface/ellpack/bml_add_ellpack_typed.c index 0b862c5c0..87f0b5c37 100644 --- a/src/C-interface/ellpack/bml_add_ellpack_typed.c +++ b/src/C-interface/ellpack/bml_add_ellpack_typed.c @@ -77,12 +77,12 @@ void TYPED_FUNC( #if defined(USE_OMP_OFFLOAD) && (defined(INTEL_SDK) || defined(CRAY_SDK) || defined(__IBMC__) || defined(__ibmxl__)) int num_chunks = MIN(OFFLOAD_NUM_CHUNKS, rowMax - rowMin + 1); - int all_ix[N * num_chunks], all_jx[N * num_chunks]; - REAL_T all_x[N * num_chunks]; + int *all_ix, *all_jx; + REAL_T *all_x; - memset(all_ix, 0, N * num_chunks * sizeof(int)); - memset(all_jx, 0, N * num_chunks * sizeof(int)); - memset(all_x, 0.0, N * num_chunks * sizeof(REAL_T)); + all_ix = calloc(N * num_chunks, sizeof(int)); + all_jx = calloc(N * num_chunks, sizeof(int)); + all_x = calloc(N * num_chunks, sizeof(REAL_T)); #pragma omp target map(to:all_ix[0:N*num_chunks],all_jx[0:N*num_chunks],all_x[0:N*num_chunks]) @@ -248,13 +248,13 @@ double TYPED_FUNC( #if defined(USE_OMP_OFFLOAD) && (defined(INTEL_SDK) || defined(CRAY_SDK) || defined(__IBMC__) || defined(__ibmxl__)) int num_chunks = MIN(OFFLOAD_NUM_CHUNKS, rowMax - rowMin + 1); - int all_ix[N * num_chunks], all_jx[N * num_chunks]; - REAL_T all_x[N * num_chunks], all_y[N * num_chunks]; + int *all_ix, *all_jx; + REAL_T *all_x, *all_y; - memset(all_ix, 0, N * num_chunks * sizeof(int)); - memset(all_jx, 0, N * num_chunks * sizeof(int)); - memset(all_x, 0.0, N * num_chunks * sizeof(REAL_T)); - memset(all_y, 0.0, N * num_chunks * sizeof(REAL_T)); + all_ix = calloc(N * num_chunks, sizeof(int)); + all_jx = calloc(N * num_chunks, sizeof(int)); + all_x = calloc(N * num_chunks, sizeof(REAL_T)); + all_y = calloc(N * num_chunks, sizeof(REAL_T)); #pragma omp target map(to:all_ix[0:N*num_chunks],all_jx[0:N*num_chunks],all_x[0:N*num_chunks],all_y[0:N*num_chunks]) map(tofrom:trnorm) diff --git a/src/C-interface/ellpack/bml_multiply_ellpack_typed.c b/src/C-interface/ellpack/bml_multiply_ellpack_typed.c index 32ab1629a..b27db8b0c 100644 --- a/src/C-interface/ellpack/bml_multiply_ellpack_typed.c +++ b/src/C-interface/ellpack/bml_multiply_ellpack_typed.c @@ -163,12 +163,12 @@ void *TYPED_FUNC( #if defined(USE_OMP_OFFLOAD) && (defined(INTEL_SDK) || defined(CRAY_SDK) || defined(__IBMC__) || defined(__ibmxl__)) int num_chunks = MIN(OFFLOAD_NUM_CHUNKS, rowMax - rowMin + 1); - int all_ix[X_N * num_chunks], all_jx[X_N * num_chunks]; - REAL_T all_x[X_N * num_chunks]; + int *all_ix, *all_jx; + REAL_T *all_x; - memset(all_ix, 0, X_N * num_chunks * sizeof(int)); - memset(all_jx, 0, X_N * num_chunks * sizeof(int)); - memset(all_x, 0.0, X_N * num_chunks * sizeof(REAL_T)); + all_ix = calloc(X_N * num_chunks, sizeof(int)); + all_jx = calloc(X_N * num_chunks, sizeof(int)); + all_x = calloc(X_N * num_chunks, sizeof(REAL_T)); #pragma omp target map(to:all_ix[0:X_N*num_chunks],all_jx[0:X_N*num_chunks],all_x[0:X_N*num_chunks]) @@ -374,12 +374,12 @@ void TYPED_FUNC( #if defined(USE_OMP_OFFLOAD) && (defined(INTEL_SDK) || defined(CRAY_SDK) || defined(__IBMC__) || defined(__ibmxl__)) int num_chunks = MIN(OFFLOAD_NUM_CHUNKS, rowMax - rowMin + 1); - int all_ix[C_N * num_chunks], all_jx[C_N * num_chunks]; - REAL_T all_x[C_N * num_chunks]; + int *all_ix, *all_jx; + REAL_T *all_x; - memset(all_ix, 0, C_N * num_chunks * sizeof(int)); - memset(all_jx, 0, C_N * num_chunks * sizeof(int)); - memset(all_x, 0.0, C_N * num_chunks * sizeof(REAL_T)); + all_ix = calloc(C_N * num_chunks, sizeof(int)); + all_jx = calloc(C_N * num_chunks, sizeof(int)); + all_x = calloc(C_N * num_chunks, sizeof(REAL_T)); #pragma omp target map(to:all_ix[0:C_N*num_chunks],all_jx[0:C_N*num_chunks],all_x[0:C_N*num_chunks])