Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dynamically detect avx2 capabilities #877

Merged
merged 3 commits into from
Jun 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 26 additions & 11 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ matrix:
packages:
- libblas-dev
- liblapack-dev
- python-numpy
- python-scipy
- python-dev
# - swig3.0
env:
Expand All @@ -26,8 +24,6 @@ matrix:
packages:
- libatlas-base-dev
- liblapack-dev
- python-numpy
- python-scipy
- python-dev
# - swig3.0
env:
Expand All @@ -39,42 +35,61 @@ matrix:
packages:
- libopenblas-dev
- liblapack-dev
- python-numpy
- python-scipy
- python-dev
# - swig3.0
env:
- PYTHON_CFLAGS="-I/usr/include/python2.7"
- os: linux
dist: xenial # To ensure clang 7 for __builtin_cpu_init().
compiler: clang
addons:
apt:
packages:
- libopenblas-dev
- liblapack-dev
- python-numpy
- python-scipy
- python-dev
# - swig3.0
env:
- PYTHON_CFLAGS="-I/usr/include/python2.7"
# NOTE: Hack, c.f. https://github.com/travis-ci/travis-ci/issues/8613
- LD_LIBRARY_PATH="/usr/local/clang/lib"
- os: osx
compiler: gcc
addons:
homebrew:
packages:
- gcc
- swig
update: true
env:
- MATRIX_EVAL="brew update && brew install gcc@6 numpy scipy swig; brew link --overwrite gcc@6; export CC=gcc-6 CXX=g++-6"
- MATRIX_EVAL="which gcc; export CC=/usr/local/bin/gcc CXX=/usr/local/bin/g++"
- os: osx
compiler: clang
addons:
homebrew:
packages:
- llvm
- swig
update: true
env:
- MATRIX_EVAL="brew update && brew install llvm numpy scipy swig; brew link --overwrite llvm; export CC=/usr/local/opt/llvm/bin/clang CXX=/usr/local/opt/llvm/bin/clang++"
- MATRIX_EVAL="brew link --overwrite llvm; export CC=/usr/local/opt/llvm/bin/clang CXX=/usr/local/opt/llvm/bin/clang++"
- LDFLAGS="-L/usr/local/opt/llvm/lib"
- CPPFLAGS="-I/usr/local/opt/llvm/include"
allow_failures:
- os: osx
compiler: gcc
addons:
homebrew:
packages:
- gcc
- swig
update: true
env:
- MATRIX_EVAL="brew update && brew install gcc@6 numpy scipy swig; brew link --overwrite gcc@6; export CC=gcc-6 CXX=g++-6"
- MATRIX_EVAL="which gcc; export CC=/usr/local/bin/gcc CXX=/usr/local/bin/g++"

before_install:
- eval "$MATRIX_EVAL"
- pip install --user scipy

install:
- ./.travis/install.sh
Expand Down
7 changes: 5 additions & 2 deletions acinclude/ax_check_cpu.m4
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

# serial 1

AC_DEFUN([AX_CPU_ARCH], [
Expand All @@ -8,8 +7,12 @@ AC_CANONICAL_TARGET

case $target in
amd64-* | x86_64-*)
ARCH_CPUFLAGS="-msse4 -mpopcnt"
ARCH_CPUFLAGS="-mpopcnt"
ARCH_CXXFLAGS="-m64"

AX_GCC_X86_CPU_SUPPORTS(avx2,
[ARCH_CPUFLAGS+=" -mavx2 -mf16c"],
[ARCH_CPUFLAGS+=" -msse4"])
;;
aarch64*-*)
dnl This is an arch for Nvidia Xavier a proper detection would be nice.
Expand Down
104 changes: 104 additions & 0 deletions acinclude/ax_gcc_x86_cpu_supports.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# ============================================================================
# https://www.gnu.org/software/autoconf-archive/ax_gcc_x86_cpu_supports.html
# ============================================================================
#
# SYNOPSIS
#
# AX_GCC_X86_CPU_SUPPORTS(X86-INSTRUCTION-SET,
# [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND])
#
# DESCRIPTION
#
# Checks if the host cpu supports X86-INSTRUCTION-SET. The instruction set
# that can be tested are "mmx, popcnt, sse, sse2, sse3, sse4.1, sse4.2,
# sse4a, avx, avx2, avx512f, fma, fma4, bmi, bmi2". If the instruction set
# is supported by the host cpu, the C preprocessor macro
# HAVE_XXX_INSTRUCTIONS is set to 1. The XXX is up-cased instruction case
# with dot replaced by underscore. For example, the test for "sse4.2"
# would export HAVE_SSE4_2_INSTRUCTIONS=1. This macro requires gcc
# extended builtin function "__builtin_cpu_init" and
# "__builtin_cpu_supports" to detect the cpu features. It will error out
# if the compiler doesn't has these builtins.
#
# If the test for the instruction set succeeded, the hook ACTION-IF-FOUND
# would run. Otherwise the hook ACTION-IF-NOT-FOUND would run if
# specified.
#
# See also AX_CHECK_X86_FEATURES, which checks all the possible
# instruction set and export the corresponding CFLAGS.
#
# LICENSE
#
# Copyright (c) 2016 Felix Chern <idryman@gmail.com>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.

#serial 3

AC_DEFUN_ONCE([_AX_GCC_X86_CPU_INIT],
[AC_LANG_PUSH([C])
AC_CACHE_CHECK([for gcc __builtin_cpu_init function],
[ax_cv_gcc_check_x86_cpu_init],
[AC_RUN_IFELSE(
[AC_LANG_PROGRAM([#include <stdlib.h>],
[__builtin_cpu_init ();])
],
[ax_cv_gcc_check_x86_cpu_init=yes],
[ax_cv_gcc_check_x86_cpu_init=no])])
AS_IF([test "X$ax_cv_gcc_check_x86_cpu_init" = "Xno"],
[AC_MSG_ERROR([Need GCC to support X86 CPU features tests])])
])

AC_DEFUN([AX_GCC_X86_CPU_SUPPORTS],
[AC_REQUIRE([AC_PROG_CC])
AC_REQUIRE([_AX_GCC_X86_CPU_INIT])
AC_LANG_PUSH([C])
AS_VAR_PUSHDEF([gcc_x86_feature], [AS_TR_SH([ax_cv_gcc_x86_cpu_supports_$1])])
AC_CACHE_CHECK([for x86 $1 instruction support],
[gcc_x86_feature],
[AC_RUN_IFELSE(
[AC_LANG_PROGRAM( [#include <stdlib.h> ],
[ __builtin_cpu_init ();
if (__builtin_cpu_supports("$1"))
return 0;
return 1;
])],
[gcc_x86_feature=yes],
[gcc_x86_feature=no]
)]
)
AC_LANG_POP([C])
AS_VAR_IF([gcc_x86_feature],[yes],
[AC_DEFINE(
AS_TR_CPP([HAVE_$1_INSTRUCTIONS]),
[1],
[Define if $1 instructions are supported])
$2],
[$3]
)
AS_VAR_POPDEF([gcc_x86_feature])
])
157 changes: 156 additions & 1 deletion configure
Original file line number Diff line number Diff line change
Expand Up @@ -2065,6 +2065,48 @@ $as_echo "$ac_res" >&6; }
eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno

} # ac_fn_cxx_check_func

# ac_fn_c_try_run LINENO
# ----------------------
# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
# that executables *can* be run.
ac_fn_c_try_run ()
{
as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
if { { ac_try="$ac_link"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
$as_echo "$ac_try_echo"; } >&5
(eval "$ac_link") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; } && { ac_try='./conftest$ac_exeext'
{ { case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
$as_echo "$ac_try_echo"; } >&5
(eval "$ac_try") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then :
ac_retval=0
else
$as_echo "$as_me: program exited with status $ac_status" >&5
$as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5

ac_retval=$ac_status
fi
rm -rf conftest.dSYM conftest_ipa8_conftest.oo
eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
as_fn_set_status $ac_retval

} # ac_fn_c_try_run
cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
Expand Down Expand Up @@ -6807,15 +6849,128 @@ test -n "$target_alias" &&
NONENONEs,x,x, &&
program_prefix=${target_alias}-

ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu

{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for gcc __builtin_cpu_init function" >&5
$as_echo_n "checking for gcc __builtin_cpu_init function... " >&6; }
if ${ax_cv_gcc_check_x86_cpu_init+:} false; then :
$as_echo_n "(cached) " >&6
else
if test "$cross_compiling" = yes; then :
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
as_fn_error $? "cannot run test program while cross compiling
See \`config.log' for more details" "$LINENO" 5; }
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <stdlib.h>
int
main ()
{
__builtin_cpu_init ();
;
return 0;
}

_ACEOF
if ac_fn_c_try_run "$LINENO"; then :
ax_cv_gcc_check_x86_cpu_init=yes
else
ax_cv_gcc_check_x86_cpu_init=no
fi
rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
conftest.$ac_objext conftest.beam conftest.$ac_ext
fi

fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_gcc_check_x86_cpu_init" >&5
$as_echo "$ax_cv_gcc_check_x86_cpu_init" >&6; }
if test "X$ax_cv_gcc_check_x86_cpu_init" = "Xno"; then :
as_fn_error $? "Need GCC to support X86 CPU features tests" "$LINENO" 5
fi



{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for cpu arch" >&5
$as_echo_n "checking for cpu arch... " >&6; }


case $target in
amd64-* | x86_64-*)
ARCH_CPUFLAGS="-msse4 -mpopcnt"
ARCH_CPUFLAGS="-mpopcnt"
ARCH_CXXFLAGS="-m64"



ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu


{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for x86 avx2 instruction support" >&5
$as_echo_n "checking for x86 avx2 instruction support... " >&6; }
if ${ax_cv_gcc_x86_cpu_supports_avx2+:} false; then :
$as_echo_n "(cached) " >&6
else
if test "$cross_compiling" = yes; then :
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
as_fn_error $? "cannot run test program while cross compiling
See \`config.log' for more details" "$LINENO" 5; }
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <stdlib.h>
int
main ()
{
__builtin_cpu_init ();
if (__builtin_cpu_supports("avx2"))
return 0;
return 1;

;
return 0;
}
_ACEOF
if ac_fn_c_try_run "$LINENO"; then :
ax_cv_gcc_x86_cpu_supports_avx2=yes
else
ax_cv_gcc_x86_cpu_supports_avx2=no

fi
rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
conftest.$ac_objext conftest.beam conftest.$ac_ext
fi


fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_gcc_x86_cpu_supports_avx2" >&5
$as_echo "$ax_cv_gcc_x86_cpu_supports_avx2" >&6; }
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu

if test "x$ax_cv_gcc_x86_cpu_supports_avx2" = xyes; then :

$as_echo "#define HAVE_AVX2_INSTRUCTIONS 1" >>confdefs.h

ARCH_CPUFLAGS+=" -mavx2 -mf16c"
else
ARCH_CPUFLAGS+=" -msse4"

fi


;;
aarch64*-*)
ARCH_CPUFLAGS="-march=armv8.2-a"
Expand Down
2 changes: 1 addition & 1 deletion makefile.inc.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ CXX = @CXX@
CXXCPP = @CXXCPP@
CPPFLAGS = -DFINTEGER=int @CPPFLAGS@ @OPENMP_CXXFLAGS@ @NVCC_CPPFLAGS@
CXXFLAGS = -fPIC @ARCH_CXXFLAGS@ -Wno-sign-compare @CXXFLAGS@
CPUFLAGS = -mavx2 -mf16c @ARCH_CPUFLAGS@
CPUFLAGS = @ARCH_CPUFLAGS@
LDFLAGS = @OPENMP_CXXFLAGS@ @LDFLAGS@ @NVCC_LDFLAGS@
LIBS = @BLAS_LIBS@ @LAPACK_LIBS@ @LIBS@ @NVCC_LIBS@
PYTHONCFLAGS = @PYTHON_CFLAGS@ -I@NUMPY_INCLUDE@
Expand Down