Skip to content

Commit

Permalink
Dynamically detect avx2 capabilities (#877)
Browse files Browse the repository at this point in the history
* Install scipy via pip rather than system package manager

* Dynamically detect AVX2 capability

* Simplify build on TravisCI
  • Loading branch information
Lucas Hosseini authored Jun 28, 2019
1 parent 70f5b04 commit 656368b
Show file tree
Hide file tree
Showing 5 changed files with 292 additions and 15 deletions.
37 changes: 26 additions & 11 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ matrix:
packages:
- libblas-dev
- liblapack-dev
- python-numpy
- python-scipy
- python-dev
# - swig3.0
env:
Expand All @@ -26,8 +24,6 @@ matrix:
packages:
- libatlas-base-dev
- liblapack-dev
- python-numpy
- python-scipy
- python-dev
# - swig3.0
env:
Expand All @@ -39,42 +35,61 @@ matrix:
packages:
- libopenblas-dev
- liblapack-dev
- python-numpy
- python-scipy
- python-dev
# - swig3.0
env:
- PYTHON_CFLAGS="-I/usr/include/python2.7"
- os: linux
dist: xenial # To ensure clang 7 for __builtin_cpu_init().
compiler: clang
addons:
apt:
packages:
- libopenblas-dev
- liblapack-dev
- python-numpy
- python-scipy
- python-dev
# - swig3.0
env:
- PYTHON_CFLAGS="-I/usr/include/python2.7"
# NOTE: Hack, c.f. https://github.com/travis-ci/travis-ci/issues/8613
- LD_LIBRARY_PATH="/usr/local/clang/lib"
- os: osx
compiler: gcc
addons:
homebrew:
packages:
- gcc
- swig
update: true
env:
- MATRIX_EVAL="brew update && brew install gcc@6 numpy scipy swig; brew link --overwrite gcc@6; export CC=gcc-6 CXX=g++-6"
- MATRIX_EVAL="which gcc; export CC=/usr/local/bin/gcc CXX=/usr/local/bin/g++"
- os: osx
compiler: clang
addons:
homebrew:
packages:
- llvm
- swig
update: true
env:
- MATRIX_EVAL="brew update && brew install llvm numpy scipy swig; brew link --overwrite llvm; export CC=/usr/local/opt/llvm/bin/clang CXX=/usr/local/opt/llvm/bin/clang++"
- MATRIX_EVAL="brew link --overwrite llvm; export CC=/usr/local/opt/llvm/bin/clang CXX=/usr/local/opt/llvm/bin/clang++"
- LDFLAGS="-L/usr/local/opt/llvm/lib"
- CPPFLAGS="-I/usr/local/opt/llvm/include"
allow_failures:
- os: osx
compiler: gcc
addons:
homebrew:
packages:
- gcc
- swig
update: true
env:
- MATRIX_EVAL="brew update && brew install gcc@6 numpy scipy swig; brew link --overwrite gcc@6; export CC=gcc-6 CXX=g++-6"
- MATRIX_EVAL="which gcc; export CC=/usr/local/bin/gcc CXX=/usr/local/bin/g++"

before_install:
- eval "$MATRIX_EVAL"
- pip install --user scipy

install:
- ./.travis/install.sh
Expand Down
7 changes: 5 additions & 2 deletions acinclude/ax_check_cpu.m4
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

# serial 1

AC_DEFUN([AX_CPU_ARCH], [
Expand All @@ -8,8 +7,12 @@ AC_CANONICAL_TARGET
case $target in
amd64-* | x86_64-*)
ARCH_CPUFLAGS="-msse4 -mpopcnt"
ARCH_CPUFLAGS="-mpopcnt"
ARCH_CXXFLAGS="-m64"
AX_GCC_X86_CPU_SUPPORTS(avx2,
[ARCH_CPUFLAGS+=" -mavx2 -mf16c"],
[ARCH_CPUFLAGS+=" -msse4"])
;;
aarch64*-*)
dnl This is an arch for Nvidia Xavier a proper detection would be nice.
Expand Down
104 changes: 104 additions & 0 deletions acinclude/ax_gcc_x86_cpu_supports.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# ============================================================================
# https://www.gnu.org/software/autoconf-archive/ax_gcc_x86_cpu_supports.html
# ============================================================================
#
# SYNOPSIS
#
# AX_GCC_X86_CPU_SUPPORTS(X86-INSTRUCTION-SET,
# [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND])
#
# DESCRIPTION
#
# Checks if the host cpu supports X86-INSTRUCTION-SET. The instruction set
# that can be tested are "mmx, popcnt, sse, sse2, sse3, sse4.1, sse4.2,
# sse4a, avx, avx2, avx512f, fma, fma4, bmi, bmi2". If the instruction set
# is supported by the host cpu, the C preprocessor macro
# HAVE_XXX_INSTRUCTIONS is set to 1. The XXX is up-cased instruction case
# with dot replaced by underscore. For example, the test for "sse4.2"
# would export HAVE_SSE4_2_INSTRUCTIONS=1. This macro requires gcc
# extended builtin function "__builtin_cpu_init" and
# "__builtin_cpu_supports" to detect the cpu features. It will error out
# if the compiler doesn't has these builtins.
#
# If the test for the instruction set succeeded, the hook ACTION-IF-FOUND
# would run. Otherwise the hook ACTION-IF-NOT-FOUND would run if
# specified.
#
# See also AX_CHECK_X86_FEATURES, which checks all the possible
# instruction set and export the corresponding CFLAGS.
#
# LICENSE
#
# Copyright (c) 2016 Felix Chern <idryman@gmail.com>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.

#serial 3

AC_DEFUN_ONCE([_AX_GCC_X86_CPU_INIT],
[AC_LANG_PUSH([C])
AC_CACHE_CHECK([for gcc __builtin_cpu_init function],
[ax_cv_gcc_check_x86_cpu_init],
[AC_RUN_IFELSE(
[AC_LANG_PROGRAM([#include <stdlib.h>],
[__builtin_cpu_init ();])
],
[ax_cv_gcc_check_x86_cpu_init=yes],
[ax_cv_gcc_check_x86_cpu_init=no])])
AS_IF([test "X$ax_cv_gcc_check_x86_cpu_init" = "Xno"],
[AC_MSG_ERROR([Need GCC to support X86 CPU features tests])])
])

AC_DEFUN([AX_GCC_X86_CPU_SUPPORTS],
[AC_REQUIRE([AC_PROG_CC])
AC_REQUIRE([_AX_GCC_X86_CPU_INIT])
AC_LANG_PUSH([C])
AS_VAR_PUSHDEF([gcc_x86_feature], [AS_TR_SH([ax_cv_gcc_x86_cpu_supports_$1])])
AC_CACHE_CHECK([for x86 $1 instruction support],
[gcc_x86_feature],
[AC_RUN_IFELSE(
[AC_LANG_PROGRAM( [#include <stdlib.h> ],
[ __builtin_cpu_init ();
if (__builtin_cpu_supports("$1"))
return 0;
return 1;
])],
[gcc_x86_feature=yes],
[gcc_x86_feature=no]
)]
)
AC_LANG_POP([C])
AS_VAR_IF([gcc_x86_feature],[yes],
[AC_DEFINE(
AS_TR_CPP([HAVE_$1_INSTRUCTIONS]),
[1],
[Define if $1 instructions are supported])
$2],
[$3]
)
AS_VAR_POPDEF([gcc_x86_feature])
])
157 changes: 156 additions & 1 deletion configure
Original file line number Diff line number Diff line change
Expand Up @@ -2065,6 +2065,48 @@ $as_echo "$ac_res" >&6; }
eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
} # ac_fn_cxx_check_func
# ac_fn_c_try_run LINENO
# ----------------------
# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
# that executables *can* be run.
ac_fn_c_try_run ()
{
as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
if { { ac_try="$ac_link"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
$as_echo "$ac_try_echo"; } >&5
(eval "$ac_link") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; } && { ac_try='./conftest$ac_exeext'
{ { case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
$as_echo "$ac_try_echo"; } >&5
(eval "$ac_try") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then :
ac_retval=0
else
$as_echo "$as_me: program exited with status $ac_status" >&5
$as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
ac_retval=$ac_status
fi
rm -rf conftest.dSYM conftest_ipa8_conftest.oo
eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
as_fn_set_status $ac_retval
} # ac_fn_c_try_run
cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
Expand Down Expand Up @@ -6807,15 +6849,128 @@ test -n "$target_alias" &&
NONENONEs,x,x, &&
program_prefix=${target_alias}-

ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu

{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for gcc __builtin_cpu_init function" >&5
$as_echo_n "checking for gcc __builtin_cpu_init function... " >&6; }
if ${ax_cv_gcc_check_x86_cpu_init+:} false; then :
$as_echo_n "(cached) " >&6
else
if test "$cross_compiling" = yes; then :
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
as_fn_error $? "cannot run test program while cross compiling
See \`config.log' for more details" "$LINENO" 5; }
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <stdlib.h>
int
main ()
{
__builtin_cpu_init ();
;
return 0;
}
_ACEOF
if ac_fn_c_try_run "$LINENO"; then :
ax_cv_gcc_check_x86_cpu_init=yes
else
ax_cv_gcc_check_x86_cpu_init=no
fi
rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
conftest.$ac_objext conftest.beam conftest.$ac_ext
fi

fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_gcc_check_x86_cpu_init" >&5
$as_echo "$ax_cv_gcc_check_x86_cpu_init" >&6; }
if test "X$ax_cv_gcc_check_x86_cpu_init" = "Xno"; then :
as_fn_error $? "Need GCC to support X86 CPU features tests" "$LINENO" 5
fi



{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for cpu arch" >&5
$as_echo_n "checking for cpu arch... " >&6; }


case $target in
amd64-* | x86_64-*)
ARCH_CPUFLAGS="-msse4 -mpopcnt"
ARCH_CPUFLAGS="-mpopcnt"
ARCH_CXXFLAGS="-m64"



ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu


{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for x86 avx2 instruction support" >&5
$as_echo_n "checking for x86 avx2 instruction support... " >&6; }
if ${ax_cv_gcc_x86_cpu_supports_avx2+:} false; then :
$as_echo_n "(cached) " >&6
else
if test "$cross_compiling" = yes; then :
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
as_fn_error $? "cannot run test program while cross compiling
See \`config.log' for more details" "$LINENO" 5; }
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <stdlib.h>
int
main ()
{
__builtin_cpu_init ();
if (__builtin_cpu_supports("avx2"))
return 0;
return 1;
;
return 0;
}
_ACEOF
if ac_fn_c_try_run "$LINENO"; then :
ax_cv_gcc_x86_cpu_supports_avx2=yes
else
ax_cv_gcc_x86_cpu_supports_avx2=no

fi
rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
conftest.$ac_objext conftest.beam conftest.$ac_ext
fi


fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_gcc_x86_cpu_supports_avx2" >&5
$as_echo "$ax_cv_gcc_x86_cpu_supports_avx2" >&6; }
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu

if test "x$ax_cv_gcc_x86_cpu_supports_avx2" = xyes; then :

$as_echo "#define HAVE_AVX2_INSTRUCTIONS 1" >>confdefs.h

ARCH_CPUFLAGS+=" -mavx2 -mf16c"
else
ARCH_CPUFLAGS+=" -msse4"

fi


;;
aarch64*-*)
ARCH_CPUFLAGS="-march=armv8.2-a"
Expand Down
2 changes: 1 addition & 1 deletion makefile.inc.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ CXX = @CXX@
CXXCPP = @CXXCPP@
CPPFLAGS = -DFINTEGER=int @CPPFLAGS@ @OPENMP_CXXFLAGS@ @NVCC_CPPFLAGS@
CXXFLAGS = -fPIC @ARCH_CXXFLAGS@ -Wno-sign-compare @CXXFLAGS@
CPUFLAGS = -mavx2 -mf16c @ARCH_CPUFLAGS@
CPUFLAGS = @ARCH_CPUFLAGS@
LDFLAGS = @OPENMP_CXXFLAGS@ @LDFLAGS@ @NVCC_LDFLAGS@
LIBS = @BLAS_LIBS@ @LAPACK_LIBS@ @LIBS@ @NVCC_LIBS@
PYTHONCFLAGS = @PYTHON_CFLAGS@ -I@NUMPY_INCLUDE@
Expand Down

0 comments on commit 656368b

Please sign in to comment.