From 656368b5eda4d376177a3355673d217fa95000b6 Mon Sep 17 00:00:00 2001 From: Lucas Hosseini Date: Fri, 28 Jun 2019 13:50:46 +0200 Subject: [PATCH] Dynamically detect avx2 capabilities (#877) * Install scipy via pip rather than system package manager * Dynamically detect AVX2 capability * Simplify build on TravisCI --- .travis.yml | 37 +++++-- acinclude/ax_check_cpu.m4 | 7 +- acinclude/ax_gcc_x86_cpu_supports.m4 | 104 ++++++++++++++++++ configure | 157 ++++++++++++++++++++++++++- makefile.inc.in | 2 +- 5 files changed, 292 insertions(+), 15 deletions(-) create mode 100644 acinclude/ax_gcc_x86_cpu_supports.m4 diff --git a/.travis.yml b/.travis.yml index 6fe60037d7..7fc1de5daf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,8 +13,6 @@ matrix: packages: - libblas-dev - liblapack-dev - - python-numpy - - python-scipy - python-dev # - swig3.0 env: @@ -26,8 +24,6 @@ matrix: packages: - libatlas-base-dev - liblapack-dev - - python-numpy - - python-scipy - python-dev # - swig3.0 env: @@ -39,21 +35,18 @@ matrix: packages: - libopenblas-dev - liblapack-dev - - python-numpy - - python-scipy - python-dev # - swig3.0 env: - PYTHON_CFLAGS="-I/usr/include/python2.7" - os: linux + dist: xenial # To ensure clang 7 for __builtin_cpu_init(). compiler: clang addons: apt: packages: - libopenblas-dev - liblapack-dev - - python-numpy - - python-scipy - python-dev # - swig3.0 env: @@ -61,20 +54,42 @@ matrix: # NOTE: Hack, c.f. https://github.com/travis-ci/travis-ci/issues/8613 - LD_LIBRARY_PATH="/usr/local/clang/lib" - os: osx + compiler: gcc + addons: + homebrew: + packages: + - gcc + - swig + update: true env: - - MATRIX_EVAL="brew update && brew install gcc@6 numpy scipy swig; brew link --overwrite gcc@6; export CC=gcc-6 CXX=g++-6" + - MATRIX_EVAL="which gcc; export CC=/usr/local/bin/gcc CXX=/usr/local/bin/g++" - os: osx + compiler: clang + addons: + homebrew: + packages: + - llvm + - swig + update: true env: - - MATRIX_EVAL="brew update && brew install llvm numpy scipy swig; brew link --overwrite llvm; export CC=/usr/local/opt/llvm/bin/clang CXX=/usr/local/opt/llvm/bin/clang++" + - MATRIX_EVAL="brew link --overwrite llvm; export CC=/usr/local/opt/llvm/bin/clang CXX=/usr/local/opt/llvm/bin/clang++" - LDFLAGS="-L/usr/local/opt/llvm/lib" - CPPFLAGS="-I/usr/local/opt/llvm/include" allow_failures: - os: osx + compiler: gcc + addons: + homebrew: + packages: + - gcc + - swig + update: true env: - - MATRIX_EVAL="brew update && brew install gcc@6 numpy scipy swig; brew link --overwrite gcc@6; export CC=gcc-6 CXX=g++-6" + - MATRIX_EVAL="which gcc; export CC=/usr/local/bin/gcc CXX=/usr/local/bin/g++" before_install: - eval "$MATRIX_EVAL" + - pip install --user scipy install: - ./.travis/install.sh diff --git a/acinclude/ax_check_cpu.m4 b/acinclude/ax_check_cpu.m4 index 0f8155afd2..d0a04a770d 100644 --- a/acinclude/ax_check_cpu.m4 +++ b/acinclude/ax_check_cpu.m4 @@ -1,4 +1,3 @@ - # serial 1 AC_DEFUN([AX_CPU_ARCH], [ @@ -8,8 +7,12 @@ AC_CANONICAL_TARGET case $target in amd64-* | x86_64-*) - ARCH_CPUFLAGS="-msse4 -mpopcnt" + ARCH_CPUFLAGS="-mpopcnt" ARCH_CXXFLAGS="-m64" + + AX_GCC_X86_CPU_SUPPORTS(avx2, + [ARCH_CPUFLAGS+=" -mavx2 -mf16c"], + [ARCH_CPUFLAGS+=" -msse4"]) ;; aarch64*-*) dnl This is an arch for Nvidia Xavier a proper detection would be nice. diff --git a/acinclude/ax_gcc_x86_cpu_supports.m4 b/acinclude/ax_gcc_x86_cpu_supports.m4 new file mode 100644 index 0000000000..fd37b4fa14 --- /dev/null +++ b/acinclude/ax_gcc_x86_cpu_supports.m4 @@ -0,0 +1,104 @@ +# ============================================================================ +# https://www.gnu.org/software/autoconf-archive/ax_gcc_x86_cpu_supports.html +# ============================================================================ +# +# SYNOPSIS +# +# AX_GCC_X86_CPU_SUPPORTS(X86-INSTRUCTION-SET, +# [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND]) +# +# DESCRIPTION +# +# Checks if the host cpu supports X86-INSTRUCTION-SET. The instruction set +# that can be tested are "mmx, popcnt, sse, sse2, sse3, sse4.1, sse4.2, +# sse4a, avx, avx2, avx512f, fma, fma4, bmi, bmi2". If the instruction set +# is supported by the host cpu, the C preprocessor macro +# HAVE_XXX_INSTRUCTIONS is set to 1. The XXX is up-cased instruction case +# with dot replaced by underscore. For example, the test for "sse4.2" +# would export HAVE_SSE4_2_INSTRUCTIONS=1. This macro requires gcc +# extended builtin function "__builtin_cpu_init" and +# "__builtin_cpu_supports" to detect the cpu features. It will error out +# if the compiler doesn't has these builtins. +# +# If the test for the instruction set succeeded, the hook ACTION-IF-FOUND +# would run. Otherwise the hook ACTION-IF-NOT-FOUND would run if +# specified. +# +# See also AX_CHECK_X86_FEATURES, which checks all the possible +# instruction set and export the corresponding CFLAGS. +# +# LICENSE +# +# Copyright (c) 2016 Felix Chern +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 3 + +AC_DEFUN_ONCE([_AX_GCC_X86_CPU_INIT], + [AC_LANG_PUSH([C]) + AC_CACHE_CHECK([for gcc __builtin_cpu_init function], + [ax_cv_gcc_check_x86_cpu_init], + [AC_RUN_IFELSE( + [AC_LANG_PROGRAM([#include ], + [__builtin_cpu_init ();]) + ], + [ax_cv_gcc_check_x86_cpu_init=yes], + [ax_cv_gcc_check_x86_cpu_init=no])]) + AS_IF([test "X$ax_cv_gcc_check_x86_cpu_init" = "Xno"], + [AC_MSG_ERROR([Need GCC to support X86 CPU features tests])]) +]) + +AC_DEFUN([AX_GCC_X86_CPU_SUPPORTS], + [AC_REQUIRE([AC_PROG_CC]) + AC_REQUIRE([_AX_GCC_X86_CPU_INIT]) + AC_LANG_PUSH([C]) + AS_VAR_PUSHDEF([gcc_x86_feature], [AS_TR_SH([ax_cv_gcc_x86_cpu_supports_$1])]) + AC_CACHE_CHECK([for x86 $1 instruction support], + [gcc_x86_feature], + [AC_RUN_IFELSE( + [AC_LANG_PROGRAM( [#include ], + [ __builtin_cpu_init (); + if (__builtin_cpu_supports("$1")) + return 0; + return 1; + ])], + [gcc_x86_feature=yes], + [gcc_x86_feature=no] + )] + ) + AC_LANG_POP([C]) + AS_VAR_IF([gcc_x86_feature],[yes], + [AC_DEFINE( + AS_TR_CPP([HAVE_$1_INSTRUCTIONS]), + [1], + [Define if $1 instructions are supported]) + $2], + [$3] + ) + AS_VAR_POPDEF([gcc_x86_feature]) +]) diff --git a/configure b/configure index 720edffbc3..d5968a5016 100755 --- a/configure +++ b/configure @@ -2065,6 +2065,48 @@ $as_echo "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_cxx_check_func + +# ac_fn_c_try_run LINENO +# ---------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes +# that executables *can* be run. +ac_fn_c_try_run () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then : + ac_retval=0 +else + $as_echo "$as_me: program exited with status $ac_status" >&5 + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=$ac_status +fi + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_run cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. @@ -6807,6 +6849,52 @@ test -n "$target_alias" && NONENONEs,x,x, && program_prefix=${target_alias}- +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for gcc __builtin_cpu_init function" >&5 +$as_echo_n "checking for gcc __builtin_cpu_init function... " >&6; } +if ${ax_cv_gcc_check_x86_cpu_init+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then : + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run test program while cross compiling +See \`config.log' for more details" "$LINENO" 5; } +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +__builtin_cpu_init (); + ; + return 0; +} + +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + ax_cv_gcc_check_x86_cpu_init=yes +else + ax_cv_gcc_check_x86_cpu_init=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_gcc_check_x86_cpu_init" >&5 +$as_echo "$ax_cv_gcc_check_x86_cpu_init" >&6; } + if test "X$ax_cv_gcc_check_x86_cpu_init" = "Xno"; then : + as_fn_error $? "Need GCC to support X86 CPU features tests" "$LINENO" 5 +fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cpu arch" >&5 $as_echo_n "checking for cpu arch... " >&6; } @@ -6814,8 +6902,75 @@ $as_echo_n "checking for cpu arch... " >&6; } case $target in amd64-* | x86_64-*) - ARCH_CPUFLAGS="-msse4 -mpopcnt" + ARCH_CPUFLAGS="-mpopcnt" ARCH_CXXFLAGS="-m64" + + + + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for x86 avx2 instruction support" >&5 +$as_echo_n "checking for x86 avx2 instruction support... " >&6; } +if ${ax_cv_gcc_x86_cpu_supports_avx2+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then : + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run test program while cross compiling +See \`config.log' for more details" "$LINENO" 5; } +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ + __builtin_cpu_init (); + if (__builtin_cpu_supports("avx2")) + return 0; + return 1; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + ax_cv_gcc_x86_cpu_supports_avx2=yes +else + ax_cv_gcc_x86_cpu_supports_avx2=no + +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_gcc_x86_cpu_supports_avx2" >&5 +$as_echo "$ax_cv_gcc_x86_cpu_supports_avx2" >&6; } + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + if test "x$ax_cv_gcc_x86_cpu_supports_avx2" = xyes; then : + +$as_echo "#define HAVE_AVX2_INSTRUCTIONS 1" >>confdefs.h + + ARCH_CPUFLAGS+=" -mavx2 -mf16c" +else + ARCH_CPUFLAGS+=" -msse4" + +fi + + ;; aarch64*-*) ARCH_CPUFLAGS="-march=armv8.2-a" diff --git a/makefile.inc.in b/makefile.inc.in index 86d704ad22..bc46e879b4 100644 --- a/makefile.inc.in +++ b/makefile.inc.in @@ -7,7 +7,7 @@ CXX = @CXX@ CXXCPP = @CXXCPP@ CPPFLAGS = -DFINTEGER=int @CPPFLAGS@ @OPENMP_CXXFLAGS@ @NVCC_CPPFLAGS@ CXXFLAGS = -fPIC @ARCH_CXXFLAGS@ -Wno-sign-compare @CXXFLAGS@ -CPUFLAGS = -mavx2 -mf16c @ARCH_CPUFLAGS@ +CPUFLAGS = @ARCH_CPUFLAGS@ LDFLAGS = @OPENMP_CXXFLAGS@ @LDFLAGS@ @NVCC_LDFLAGS@ LIBS = @BLAS_LIBS@ @LAPACK_LIBS@ @LIBS@ @NVCC_LIBS@ PYTHONCFLAGS = @PYTHON_CFLAGS@ -I@NUMPY_INCLUDE@