Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Auto-detect CPU capabilities at runtime in Linux #42

Merged
merged 20 commits into from
May 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 5 additions & 13 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, linux-gpu-cuda]
fullopt: ["True", "False"]
exclude:
- os: linux-gpu-cuda
fullopt: "False"
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v3
Expand Down Expand Up @@ -49,23 +45,19 @@ jobs:
clang -v
fi
which h5c++
if [[ "$(uname -s)" == "Linux" && "${{ matrix.fullopt }}" != "False" ]];
if [[ "$(uname -s)" == "Linux" ]];
then
# always use PGI when fully optimizing
# but never when in compatible/low-optimization mode
# install PGI but do not source it
# the makefile will do it automatically
./scripts/install_hpc_sdk.sh </dev/null
source setup_nv_h5.sh
fi
df -h .
export PERFORMING_CONDA_BUILD=True
export BUILD_FULL_OPTIMIZATION=${{ matrix.fullopt }}
echo "======= begin env ====="
env
echo "======= end env ====="
make api && \
make main && \
make install && \
make test_binaries
# all == build (shlib,bins,tests) and install
make all
df -h .
pushd src
if [[ "$(uname -s)" == "Linux" ]];
Expand Down
89 changes: 79 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,24 +1,93 @@
.PHONY: all main api test_binaries test install clean
.PHONY: test clean all

all: api main install
# Note: This Makefile will NOT properly work with the -j option

main:
cd src && make main
PLATFORM := $(shell uname -s)
COMPILER := $(shell ($(CXX) -v 2>&1) | tr A-Z a-z )

ifeq ($(PLATFORM),Darwin)
all: api main install test_binaries

else

# Note: important that all_nv is after all_cpu_basic and all_nv_avx2 for tests to work
all: all_cpu_basic all_nv_avx2 all_nv all_combined test_binaries_nv

all_cpu_basic: api_cpu_basic main_cpu_basic install_cpu_basic

all_nv: api_nv main_nv install_nv

all_nv_avx2: api_nv_avx2 main_nv_avx2 install_nv_avx2

all_combined: api_combined install_combined

endif

clean:
-cd test && make clean
-cd src && make clean
-cd combined && make clean

########### api

api:
cd src && make api

api_cpu_basic:
export BUILD_VARIANT=cpu_basic ; export BUILD_FULL_OPTIMIZATION=False ; cd src && make clean && make api

api_nv:
source ./setup_nv_h5.sh; export BUILD_VARIANT=nv ; export BUILD_FULL_OPTIMIZATION=False ; cd src && make clean && make api

api_nv_avx2:
source ./setup_nv_h5.sh; export BUILD_VARIANT=nv_avx2 ; export BUILD_FULL_OPTIMIZATION=True ; cd src && make clean && make api

api_combined:
cd combined && make api

########### main

main:
cd src && make main

main_cpu_basic:
export BUILD_VARIANT=cpu_basic ; export BUILD_FULL_OPTIMIZATION=False ; cd src && make main

main_nv:
source ./setup_nv_h5.sh; export BUILD_VARIANT=nv ; export BUILD_FULL_OPTIMIZATION=False ; cd src && make main

main_nv_avx2:
source ./setup_nv_h5.sh; export BUILD_VARIANT=nv_avx2 ; export BUILD_FULL_OPTIMIZATION=True ; cd src && make main

########### install

install:
cd src && make install

install_cpu_basic:
export BUILD_VARIANT=cpu_basic ; export BUILD_FULL_OPTIMIZATION=False ; cd src && make install

install_nv:
source ./setup_nv_h5.sh; export BUILD_VARIANT=nv ; export BUILD_FULL_OPTIMIZATION=False ; cd src && make install

install_nv_avx2:
source ./setup_nv_h5.sh; export BUILD_VARIANT=nv_avx2 ; export BUILD_FULL_OPTIMIZATION=True ; cd src && make install

install_combined:
cd combined && make install

########### test

test_binaries:
cd src && make test_binaries
cd test && make test_binaries

test_binaries_nv:
source ./setup_nv_h5.sh; export BUILD_VARIANT=nv ; export BUILD_FULL_OPTIMIZATION=False ; cd src && make test_binaries
# use the default compiler for the test subdir as it tests the combined shlib
cd test && make test_binaries

test:
cd src && make test
cd test && make test

install:
cd src && make install

clean:
-cd test && make clean
-cd src && make clean
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,17 @@ To restrict the number of cores used, set:

export OMP_NUM_THREADS=nthreads

## Older CPU support

On Linux platforms, Unifrac will auto-detect the CPU generation, i.e. if it supports avx or avx2 vector instructions.
To force the most compatible binary variant, one can set:

export UNIFRAC_MAX_CPU=basic

To check which binary is used (Unifrac will print it to standard output at runtime), set:

export UNIFRAC_CPU_INFO=Y

## GPU support

On Linux platforms, Unifrac will run on a GPU, if one is found.
Expand Down
27 changes: 27 additions & 0 deletions combined/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
.PHONY: all api main install clean

all: api install

api: libssu.so

# no-op, just for completeness
main:


ifeq ($(PREFIX),)
PREFIX := $(CONDA_PREFIX)
endif

libssu.o: libssu.c
$(CC) -c libssu.c -fPIC

libssu.so: libssu.o
$(CC) -shared -o libssu.so libssu.o -fPIC -ldl

install: libssu.so
rm -f ${PREFIX}/lib//libssu.so; cp libssu.so ${PREFIX}/lib/
rm -f ${PREFIX}/bin/ssu; cp ssu ${PREFIX}/bin/
rm -f ${PREFIX}/bin/faithpd; cp faithpd ${PREFIX}/bin/

clean:
rm -f libssu.o libssu.so
44 changes: 44 additions & 0 deletions combined/faithpd
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/bin/bash

#default
FPD=faithpd_nv

# Need at least AVX to support GPUs
if [ "${UNIFRAC_MAX_CPU}" == "basic" ]; then
has_no_avx=1
else
cat /proc/cpuinfo |grep flags |head -1 | grep -q avx
has_no_avx=$?
fi

if [ "${has_no_avx}" -eq 1 ]; then
FPD=faithpd_cpu_basic
else

if [ "${UNIFRAC_MAX_CPU}" == "avx" ]; then
has_no_avx2=1
else
cat /proc/cpuinfo |grep flags |head -1 | grep -q avx2
has_no_avx2=$?
fi

if [ "${has_no_avx2}" -eq 1 ]; then
FPD=faithpd_nv
else
FPD=faithpd_nv_avx2
fi # if "${has_no_avx2}" -eq 1


fi # if "${has_no_avx}" -eq 1

if [ "${UNIFRAC_CPU_INFO}" == "Y" ]; then
echo "INFO (unifrac): Using executable" ${FPD}
fi

#
#
#
BASEDIR=$(dirname "$0")

exec ${BASEDIR}/${FPD} "$@"

Loading