diff --git a/CMakeLists.txt b/CMakeLists.txt index 607691fa0e..0a3de3831a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -156,6 +156,7 @@ if (LLAMA_BUILD_EXAMPLES) # general examples add_subdirectory("examples/vectoradd") add_subdirectory("examples/nbody") + add_subdirectory("examples/nbody_code_comp") add_subdirectory("examples/heatequation") add_subdirectory("examples/viewcopy") add_subdirectory("examples/bufferguard") diff --git a/examples/nbody_code_comp/CMakeLists.txt b/examples/nbody_code_comp/CMakeLists.txt new file mode 100644 index 0000000000..3dead4d77b --- /dev/null +++ b/examples/nbody_code_comp/CMakeLists.txt @@ -0,0 +1,16 @@ +# Copyright 2024 Bernhard Manfred Gruber +# SPDX-License-Identifier: MPL-2.0 + +cmake_minimum_required (VERSION 3.18.3) + +project(llama-nbody-baseline CXX) +add_executable(${PROJECT_NAME} nbody_baseline.cpp) +target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_20) + +project(llama-nbody-ported CXX) +if (NOT TARGET llama::llama) + find_package(llama CONFIG REQUIRED) +endif() +add_executable(${PROJECT_NAME} nbody_ported.cpp) +target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_20) +target_link_libraries(${PROJECT_NAME} PRIVATE llama::llama) diff --git a/examples/nbody_code_comp/README.md b/examples/nbody_code_comp/README.md new file mode 100644 index 0000000000..f3b98a851b --- /dev/null +++ b/examples/nbody_code_comp/README.md @@ -0,0 +1,3 @@ +These code examples are intended to show the difficulty of porting a simple code to LLAMA. +`nbody_baseline.cpp` contains a simple n-body simulation using an AoS layout. +`nbody_ported.cpp` is the corresponding LLAMA port with the same layout. diff --git a/examples/nbody_code_comp/nbody_baseline.cpp b/examples/nbody_code_comp/nbody_baseline.cpp new file mode 100644 index 0000000000..fdf742ba6d --- /dev/null +++ b/examples/nbody_code_comp/nbody_baseline.cpp @@ -0,0 +1,90 @@ +// Copyright 2024 Bernhard Manfred Gruber +// SPDX-License-Identifier: MPL-2.0 + +// clang-format off + +#include +#include +#include + +using FP = float; +constexpr FP timestep = 0.0001f, eps2 = 0.01f; +constexpr int steps = 5, problemSize = 64 * 1024; + +struct Vec3 { + FP x, y, z; + + auto operator*=(Vec3 v) -> Vec3& { + x *= v.x; + y *= v.y; + z *= v.z; + return *this; + } + + auto operator+=(Vec3 v) -> Vec3& { + x += v.x; + y += v.y; + z += v.z; + return *this; + } + + friend auto operator-(Vec3 a, Vec3 b) -> Vec3 { + return Vec3{a.x - b.x, a.y - b.y, a.z - b.z}; + } + + friend auto operator*(Vec3 a, FP s) -> Vec3 { + return Vec3{a.x * s, a.y * s, a.z * s}; + } +}; + +struct Particle { + Vec3 pos, vel; + FP mass; +}; + +inline void pPInteraction(Particle& pi, const Particle& pj) { + auto dist = pi.pos - pj.pos; + dist *= dist; + const auto distSqr = eps2 + dist.x + dist.y + dist.z; + const auto distSixth = distSqr * distSqr * distSqr; + const auto invDistCube = FP{1} / std::sqrt(distSixth); + const auto sts = pj.mass * timestep * invDistCube; + pi.vel += dist * sts; +} + +void update(std::span particles) { +#pragma GCC ivdep + for(int i = 0; i < problemSize; i++) { + Particle pi = particles[i]; + for(std::size_t j = 0; j < problemSize; ++j) + pPInteraction(pi, particles[j]); + particles[i].vel = pi.vel; + } +} + +void move(std::span particles) { +#pragma GCC ivdep + for(int i = 0; i < problemSize; i++) + particles[i].pos += particles[i].vel * timestep; +} + +auto main() -> int { + auto particles = std::vector(problemSize); + std::default_random_engine engine; + std::normal_distribution dist(FP{0}, FP{1}); + for(auto& p : particles) { + p.pos.x = dist(engine); + p.pos.y = dist(engine); + p.pos.z = dist(engine); + p.vel.x = dist(engine) / FP{10}; + p.vel.y = dist(engine) / FP{10}; + p.vel.z = dist(engine) / FP{10}; + p.mass = dist(engine) / FP{100}; + } + + for(int s = 0; s < steps + 1; ++s) { + update(particles); + ::move(particles); + } + return 0; +} diff --git a/examples/nbody_code_comp/nbody_ported.cpp b/examples/nbody_code_comp/nbody_ported.cpp new file mode 100644 index 0000000000..4dab1925c6 --- /dev/null +++ b/examples/nbody_code_comp/nbody_ported.cpp @@ -0,0 +1,67 @@ +// Copyright 2024 Bernhard Manfred Gruber +// SPDX-License-Identifier: MPL-2.0 + +// clang-format off + +#include "llama/llama.hpp" + +#include +#include + +using FP = float; +constexpr FP timestep = 0.0001f, eps2 = 0.01f; +constexpr int steps = 5, problemSize = 64 * 1024; + +struct Pos{}; struct Vel{}; struct X{}; struct Y{}; struct Z{}; struct Mass{}; +using V3 = llama::Record, llama::Field, llama::Field>; +using Particle = llama::Record, llama::Field, llama::Field>; + +LLAMA_FN_HOST_ACC_INLINE void pPInteraction(auto&& pi, auto&& pj) { + auto dist = pi(Pos{}) - pj(Pos{}); + dist *= dist; + const auto distSqr = eps2 + dist(X{}) + dist(Y{}) + dist(Z{}); + const auto distSixth = distSqr * distSqr * distSqr; + const auto invDistCube = FP{1} / std::sqrt(distSixth); + const auto sts = pj(Mass{}) * timestep * invDistCube; + pi(Vel{}) += dist * sts; +} + +void update(auto&& particles) { + LLAMA_INDEPENDENT_DATA + for(int i = 0; i < problemSize; i++) { + llama::One pi = particles[i]; + for(std::size_t j = 0; j < problemSize; ++j) + pPInteraction(pi, particles[j]); + particles[i](Vel{}) = pi(Vel{}); + } +} + +void move(auto&& particles) { + LLAMA_INDEPENDENT_DATA + for(int i = 0; i < problemSize; i++) + particles[i](Pos{}) += particles[i](Vel{}) * timestep; +} + +auto main() -> int { + using ArrayExtents = llama::ArrayExtentsDynamic; + const auto extents = ArrayExtents{problemSize}; + auto mapping = llama::mapping::AoS{extents}; + auto particles = llama::allocViewUninitialized(std::move(mapping)); + std::default_random_engine engine; + std::normal_distribution dist(FP{0}, FP{1}); + for(auto&& p : particles) { + p(Pos{}, X{}) = dist(engine); + p(Pos{}, Y{}) = dist(engine); + p(Pos{}, Z{}) = dist(engine); + p(Vel{}, X{}) = dist(engine) / FP{10}; + p(Vel{}, Y{}) = dist(engine) / FP{10}; + p(Vel{}, Z{}) = dist(engine) / FP{10}; + p(Mass{}) = dist(engine) / FP{100}; + } + + for(int s = 0; s < steps + 1; ++s) { + update(particles); + ::move(particles); + } + return 0; +}