Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update CI to test with Vc, alpaka and CUDA #125

Merged
merged 10 commits into from
Nov 14, 2020
125 changes: 106 additions & 19 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -1,24 +1,15 @@
name: CI
on: [push, pull_request]

env:
THREADS: 4
CONFIG: RelWithDebInfo

jobs:
build-ubuntu:
amalgamation:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: vcpkg install dependencies
run: |
vcpkg install catch2 fmt
- name: cmake
run: |
mkdir build
cd build
export BOOST_ROOT=$BOOST_ROOT_1_72_0
CXX=g++-10 cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo -DASAN_FOR_TESTS=ON -DCMAKE_TOOLCHAIN_FILE=/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake
- name: build
run: cmake --build build
- name: tests
run: build/tests
- name: clone amalgamate
uses: actions/checkout@v2
with:
Expand All @@ -34,6 +25,85 @@ jobs:
name: llama.hpp
path: build/llama-amalgamated/llama.hpp

build-ubuntu-gcc9-cuda:
env:
CUDA_INSTALLER: cuda_11.1.1_455.32.00_linux.run
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: vcpkg install dependencies
run: |
vcpkg install catch2 fmt tbb
- name: cache CUDA 11
id: cache-cude
uses: actions/cache@v2
with:
path: ${{env.CUDA_INSTALLER}}
key: ${{env.CUDA_INSTALLER}}
- name: install CUDA 11
if: steps.cache-cuda.outputs.cache-hit != 'true'
run: |
wget --no-verbose https://developer.download.nvidia.com/compute/cuda/11.1.1/local_installers/$CUDA_INSTALLER
sudo sh $CUDA_INSTALLER --silent --toolkit
- name: install alpaka
run: |
git clone https://github.com/alpaka-group/alpaka.git
mkdir alpaka/build
cd alpaka/build
export BOOST_ROOT=$BOOST_ROOT_1_72_0
cmake .. -DCMAKE_TOOLCHAIN_FILE=/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake
sudo cmake --build . --target install
- name: install Vc
run: |
git clone https://github.com/VcDevel/Vc.git
mkdir Vc/build
cd Vc/build
cmake .. -DBUILD_TESTING=OFF -DCMAKE_TOOLCHAIN_FILE=/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake
sudo cmake --build . --target install
- name: cmake
run: |
mkdir build
cd build
export BOOST_ROOT=$BOOST_ROOT_1_72_0
CXX=g++-9 cmake .. -DCMAKE_BUILD_TYPE=$CONFIG -DASAN_FOR_TESTS=ON -DALPAKA_CXX_STANDARD=17 -DALPAKA_ACC_GPU_CUDA_ENABLE=ON -DCMAKE_TOOLCHAIN_FILE=/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake
- name: build tests + examples
run: cmake --build build -j $THREADS
- name: run tests
run: build/tests

build-ubuntu-gcc10:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: vcpkg install dependencies
run: |
vcpkg install catch2 fmt
- name: install alpaka
run: |
git clone https://github.com/alpaka-group/alpaka.git
mkdir alpaka/build
cd alpaka/build
export BOOST_ROOT=$BOOST_ROOT_1_72_0
cmake .. -DCMAKE_TOOLCHAIN_FILE=/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake
sudo cmake --build . --target install
- name: install Vc
run: |
git clone https://github.com/VcDevel/Vc.git
mkdir Vc/build
cd Vc/build
cmake .. -DBUILD_TESTING=OFF -DCMAKE_TOOLCHAIN_FILE=/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake
sudo cmake --build . --target install
- name: cmake
run: |
mkdir build
cd build
export BOOST_ROOT=$BOOST_ROOT_1_72_0
CXX=g++-10 cmake .. -DCMAKE_BUILD_TYPE=$CONFIG -DASAN_FOR_TESTS=ON -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLE=ON -DCMAKE_TOOLCHAIN_FILE=/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake
- name: build tests + examples
run: cmake --build build -j $THREADS
- name: run tests
run: build/tests

build-windows:
runs-on: windows-latest
env:
Expand All @@ -48,13 +118,30 @@ jobs:
- name: vcpkg install dependencies
run: |
& $env:VCPKG_ROOT/vcpkg install catch2 fmt
- name: install alpaka
run: |
git clone https://github.com/alpaka-group/alpaka.git
mkdir alpaka/build
cd alpaka/build
$env:BOOST_ROOT = $env:BOOST_ROOT_1_72_0
cmake .. "-DCMAKE_TOOLCHAIN_FILE=$env:VCPKG_ROOT/scripts/buildsystems/vcpkg.cmake"
cmake --build . --target install --config $env:CONFIG
# Vc does not currently compile with MSVC
# - name: install Vc
# run: |
# git clone https://github.com/VcDevel/Vc.git
# mkdir Vc/build
# cd Vc/build
# cmake .. -DBUILD_TESTING=OFF "-DCMAKE_TOOLCHAIN_FILE=$env:VCPKG_ROOT/scripts/buildsystems/vcpkg.cmake"
# cmake --build . --target install --config $env:CONFIG
- name: cmake
run: |
mkdir build
cd build
$env:BOOST_ROOT = $env:BOOST_ROOT_1_72_0
cmake .. "-DCMAKE_TOOLCHAIN_FILE=$env:VCPKG_ROOT/scripts/buildsystems/vcpkg.cmake"
- name: build
run: cmake --build build --config RelWithDebInfo
- name: tests
run: build/RelWithDebInfo/tests
cmake .. -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLE=ON "-DCMAKE_TOOLCHAIN_FILE=$env:VCPKG_ROOT/scripts/buildsystems/vcpkg.cmake"
- name: build tests + examples
run: cmake --build build -j $env:THREADS --config $env:CONFIG
- name: run tests
run: |
& build/$env:CONFIG/tests
5 changes: 2 additions & 3 deletions examples/alpaka/asyncblur/asyncblur.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,12 @@ struct BlurKernel
{
// Using SoA for the shared memory
constexpr auto sharedChunkSize = ElemsPerBlock + 2 * KernelSize;
const auto sharedMapping = llama::mapping::tree::Mapping(
const auto sharedMapping = llama::mapping::SoA(
typename View::ArrayDomain{sharedChunkSize, sharedChunkSize},
llama::Tuple{llama::mapping::tree::functor::LeafOnlyRT()},
typename View::DatumDomain{});
constexpr auto sharedMemSize = llama::sizeOf<PixelOnAcc> * sharedChunkSize * sharedChunkSize;
auto& sharedMem = alpaka::allocVar<std::byte[sharedMemSize], __COUNTER__>(acc);
return llama::View{sharedMapping, llama::Array{&sharedMem[0]}};
return llama::View(sharedMapping, llama::Array<std::byte*, 1>{&sharedMem[0]});
}
else
return int{}; // dummy
Expand Down
36 changes: 18 additions & 18 deletions examples/alpaka/nbody/nbody.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,32 +150,32 @@ struct MoveKernel
}
};

using Dim = alpaka::DimInt<1>;
using Size = std::size_t;

using Acc = alpaka::ExampleDefaultAcc<Dim, Size>;
// using Acc = alpaka::AccGpuCudaRt<Dim, Size>;
// using Acc = alpaka::AccCpuSerial<Dim, Size>;

using DevHost = alpaka::DevCpu;
using DevAcc = alpaka::Dev<Acc>;
using PltfHost = alpaka::Pltf<DevHost>;
using PltfAcc = alpaka::Pltf<DevAcc>;
using Queue = alpaka::Queue<DevAcc, alpaka::Blocking>;

constexpr std::size_t hardwareThreads = 2; // relevant for OpenMP2Threads
using Distribution = common::ThreadsElemsDistribution<Acc, BLOCK_SIZE, hardwareThreads>;
constexpr std::size_t elemCount = Distribution::elemCount;
constexpr std::size_t threadCount = Distribution::threadCount;

int main()
{
using Dim = alpaka::DimInt<1>;
using Size = std::size_t;

using Acc = alpaka::ExampleDefaultAcc<Dim, Size>;
// using Acc = alpaka::AccGpuCudaRt<Dim, Size>;
// using Acc = alpaka::AccCpuSerial<Dim, Size>;

using DevHost = alpaka::DevCpu;
using DevAcc = alpaka::Dev<Acc>;
using PltfHost = alpaka::Pltf<DevHost>;
using PltfAcc = alpaka::Pltf<DevAcc>;
using Queue = alpaka::Queue<DevAcc, alpaka::Blocking>;
const DevAcc devAcc(alpaka::getDevByIdx<PltfAcc>(0u));
const DevHost devHost(alpaka::getDevByIdx<PltfHost>(0u));
Queue queue(devAcc);

// NBODY
constexpr std::size_t hardwareThreads = 2; // relevant for OpenMP2Threads
using Distribution = common::ThreadsElemsDistribution<Acc, BLOCK_SIZE, hardwareThreads>;
constexpr std::size_t elemCount = Distribution::elemCount;
constexpr std::size_t threadCount = Distribution::threadCount;
constexpr FP ts = 0.0001;

// LLAMA
const auto arrayDomain = llama::ArrayDomain{PROBLEM_SIZE};

const auto mapping = [&] {
Expand Down
2 changes: 1 addition & 1 deletion include/llama/Array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ namespace llama

LLAMA_FN_HOST_ACC_INLINE constexpr friend auto operator+(const Array& a, const Array& b) -> Array
{
Array temp;
Array temp{};
for (std::size_t i = 0; i < N; ++i)
temp[i] = a[i] + b[i];
return temp;
Expand Down