Skip to content

Commit

Permalink
Merge branch 'master' into cuda_12.4_dep
Browse files Browse the repository at this point in the history
  • Loading branch information
jaycedowell committed Sep 5, 2024
2 parents 0b219b6 + d6d8295 commit 790c8f5
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 4 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
- name: "Software Install - Python"
run: python -m pip install \
setuptools \
numpy \
"numpy<2" \
matplotlib \
contextlib2 \
simplejson \
Expand Down
14 changes: 13 additions & 1 deletion config/cuda.m4
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,12 @@ AC_DEFUN([AX_CHECK_CUDA],
CXXFLAGS_save="$CXXFLAGS"
LDFLAGS_save="$LDFLAGS"
LIBS_save="$LIBS"
ac_ext_save="$ac_ext"
ac_compile='$NVCC -c $NVCCFLAGS conftest.$ac_ext >&5'
LDFLAGS="-L$CUDA_HOME/lib64 -L$CUDA_HOME/lib"
LIBS="$LIBS -lcuda -lcudart"
ac_ext="cu"
ac_link='$NVCC -o conftest$ac_exeext $NVCCFLAGS $LDFLAGS $LIBS conftest.$ac_ext >&5'
AC_LINK_IFELSE([
Expand All @@ -63,6 +65,7 @@ AC_DEFUN([AX_CHECK_CUDA],
CXXFLAGS="$CXXFLAGS_save"
LDFLAGS="$LDFLAGS_save"
LIBS="$LIBS_save"
ac_ext="$ac_ext_save"
fi
if test "$HAVE_CUDA" = "1"; then
Expand Down Expand Up @@ -151,9 +154,11 @@ AC_DEFUN([AX_CHECK_CUDA],
CXXFLAGS_save="$CXXFLAGS"
LDFLAGS_save="$LDFLAGS"
LIBS_save="$LIBS"
ac_ext_save="$ac_ext"
LDFLAGS="-L$CUDA_HOME/lib64 -L$CUDA_HOME/lib"
LIBS="-lcuda -lcudart"
ax_ext="cu"
ac_run='$NVCC -o conftest$ac_ext $LDFLAGS $LIBS conftest.$ac_ext>&5'
AC_RUN_IFELSE([
AC_LANG_PROGRAM([[
Expand Down Expand Up @@ -205,6 +210,7 @@ AC_DEFUN([AX_CHECK_CUDA],
CXXFLAGS="$CXXFLAGS_save"
LDFLAGS="$LDFLAGS_save"
LIBS="$LIBS_save"
ac_ext="$ac_ext_save"
else
AC_SUBST([GPU_ARCHS], [$with_gpu_archs])
fi
Expand All @@ -229,15 +235,17 @@ AC_DEFUN([AX_CHECK_CUDA],
[default GPU shared memory per block in bytes (default=detect)])],
[],
[with_shared_mem='auto'])
if test "$with_gpu_archs" = "auto"; then
if test "$with_shared_mem" = "auto"; then
AC_MSG_CHECKING([for minimum shared memory per block])
CXXFLAGS_save="$CXXFLAGS"
LDFLAGS_save="$LDFLAGS"
LIBS_save="$LIBS"
ac_ext_save="$ac_ext"
LDFLAGS="-L$CUDA_HOME/lib64 -L$CUDA_HOME/lib"
LIBS="-lcuda -lcudart"
ac_ext="cu"
ac_run='$NVCC -o conftest$ac_ext $LDFLAGS $LIBS conftest.$ac_ext>&5'
AC_RUN_IFELSE([
AC_LANG_PROGRAM([[
Expand Down Expand Up @@ -276,6 +284,7 @@ AC_DEFUN([AX_CHECK_CUDA],
CXXFLAGS="$CXXFLAGS_save"
LDFLAGS="$LDFLAGS_save"
LIBS="$LIBS_save"
ac_ext="$ac_ext_save"
else
AC_SUBST([GPU_SHAREDMEM], [$with_shared_mem])
fi
Expand All @@ -294,9 +303,11 @@ AC_DEFUN([AX_CHECK_CUDA],
CXXFLAGS_save="$CXXFLAGS"
LDFLAGS_save="$LDFLAGS"
LIBS_save="$LIBS"
ac_ext_save="$ac_ext"
LDFLAGS="-L$CUDA_HOME/lib64 -L$CUDA_HOME/lib"
LIBS="-lcuda -lcudart"
ac_ext="cu"
ac_run='$NVCC -o conftest$ac_ext $LDFLAGS $LIBS conftest.$ac_ext>&5'
AC_RUN_IFELSE([
AC_LANG_PROGRAM([[
Expand All @@ -312,5 +323,6 @@ AC_DEFUN([AX_CHECK_CUDA],
CXXFLAGS="$CXXFLAGS_save"
LDFLAGS="$LDFLAGS_save"
LIBS="$LIBS_save"
ac_ext="$ac_ext_save"
fi
])
14 changes: 13 additions & 1 deletion configure
Original file line number Diff line number Diff line change
Expand Up @@ -21435,10 +21435,12 @@ printf %s "checking for a working CUDA 10+ installation... " >&6; }
CXXFLAGS_save="$CXXFLAGS"
LDFLAGS_save="$LDFLAGS"
LIBS_save="$LIBS"
ac_ext_save="$ac_ext"

ac_compile='$NVCC -c $NVCCFLAGS conftest.$ac_ext >&5'
LDFLAGS="-L$CUDA_HOME/lib64 -L$CUDA_HOME/lib"
LIBS="$LIBS -lcuda -lcudart"
ac_ext="cu"

ac_link='$NVCC -o conftest$ac_exeext $NVCCFLAGS $LDFLAGS $LIBS conftest.$ac_ext >&5'
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
Expand Down Expand Up @@ -21478,6 +21480,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam \
CXXFLAGS="$CXXFLAGS_save"
LDFLAGS="$LDFLAGS_save"
LIBS="$LIBS_save"
ac_ext="$ac_ext_save"
fi

if test "$HAVE_CUDA" = "1"; then
Expand Down Expand Up @@ -21595,9 +21598,11 @@ printf %s "checking which CUDA architectures to target... " >&6; }
CXXFLAGS_save="$CXXFLAGS"
LDFLAGS_save="$LDFLAGS"
LIBS_save="$LIBS"
ac_ext_save="$ac_ext"

LDFLAGS="-L$CUDA_HOME/lib64 -L$CUDA_HOME/lib"
LIBS="-lcuda -lcudart"
ax_ext="cu"
ac_run='$NVCC -o conftest$ac_ext $LDFLAGS $LIBS conftest.$ac_ext>&5'
if test "$cross_compiling" = yes
then :
Expand Down Expand Up @@ -21676,6 +21681,7 @@ fi
CXXFLAGS="$CXXFLAGS_save"
LDFLAGS="$LDFLAGS_save"
LIBS="$LIBS_save"
ac_ext="$ac_ext_save"
else
GPU_ARCHS=$with_gpu_archs

Expand Down Expand Up @@ -21709,16 +21715,18 @@ else $as_nop
with_shared_mem='auto'
fi

if test "$with_gpu_archs" = "auto"; then
if test "$with_shared_mem" = "auto"; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for minimum shared memory per block" >&5
printf %s "checking for minimum shared memory per block... " >&6; }

CXXFLAGS_save="$CXXFLAGS"
LDFLAGS_save="$LDFLAGS"
LIBS_save="$LIBS"
ac_ext_save="$ac_ext"

LDFLAGS="-L$CUDA_HOME/lib64 -L$CUDA_HOME/lib"
LIBS="-lcuda -lcudart"
ac_ext="cu"
ac_run='$NVCC -o conftest$ac_ext $LDFLAGS $LIBS conftest.$ac_ext>&5'
if test "$cross_compiling" = yes
then :
Expand Down Expand Up @@ -21783,6 +21791,7 @@ fi
CXXFLAGS="$CXXFLAGS_save"
LDFLAGS="$LDFLAGS_save"
LIBS="$LIBS_save"
ac_ext="$ac_ext_save"
else
GPU_SHAREDMEM=$with_shared_mem

Expand All @@ -21808,9 +21817,11 @@ printf %s "checking for thrust pinned allocated support... " >&6; }
CXXFLAGS_save="$CXXFLAGS"
LDFLAGS_save="$LDFLAGS"
LIBS_save="$LIBS"
ac_ext_save="$ac_ext"

LDFLAGS="-L$CUDA_HOME/lib64 -L$CUDA_HOME/lib"
LIBS="-lcuda -lcudart"
ac_ext="cu"
ac_run='$NVCC -o conftest$ac_ext $LDFLAGS $LIBS conftest.$ac_ext>&5'
if test "$cross_compiling" = yes
then :
Expand Down Expand Up @@ -21854,6 +21865,7 @@ fi
CXXFLAGS="$CXXFLAGS_save"
LDFLAGS="$LDFLAGS_save"
LIBS="$LIBS_save"
ac_ext="$ac_ext_save"
fi


Expand Down
2 changes: 1 addition & 1 deletion src/fft.cu
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ BFstatus BFfft_impl::execute_impl(BFarray const* in,
// We could potentially use a CUDA event as a lighter-weight
// solution.
cudaStreamSynchronize(g_cuda_stream);
CallbackData* h_callback_data = &_hv_callback_data[0];
CallbackData* h_callback_data = thrust::raw_pointer_cast(&_hv_callback_data[0]);
// WAR for CUFFT insisting that pointer be aligned to sizeof(cufftComplex)
int alignment = (_nbit == 32 ?
sizeof(cufftComplex) :
Expand Down

0 comments on commit 790c8f5

Please sign in to comment.