Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to clang-format 12 #202

Merged
merged 2 commits into from
Apr 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@ DisableFormat: false
AccessModifierOffset: -4
AlignAfterOpenBracket: AlwaysBreak
AlignConsecutiveAssignments: false
#AlignConsecutiveBitFields: false
AlignConsecutiveBitFields: false
AlignConsecutiveDeclarations: false
AlignConsecutiveMacros: false
AlignEscapedNewlines: Right
AlignOperands: false #DontAlign
AlignOperands: DontAlign
AlignTrailingComments: false
AllowAllArgumentsOnNextLine: false
AllowAllConstructorInitializersOnNextLine: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortBlocksOnASingleLine: Never
AllowShortCaseLabelsOnASingleLine: false
#AllowShortEnumsOnASingleLine: false
AllowShortEnumsOnASingleLine: false
AllowShortFunctionsOnASingleLine: None
AllowShortIfStatementsOnASingleLine: Never
AllowShortLambdasOnASingleLine: All
Expand All @@ -26,7 +26,7 @@ AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: Yes
BinPackArguments: false
BinPackParameters: false
#BitFieldColonSpacing: Both
BitFieldColonSpacing: Both
BreakBeforeBinaryOperators: All
BreakBeforeBraces: Allman
BreakBeforeTernaryOperators: true
Expand All @@ -48,9 +48,9 @@ IncludeBlocks: Regroup
IncludeCategories:
IncludeIsMainRegex: '(Test)?$'
IncludeIsMainSourceRegex: ''
#IndentCaseBlocks: true
IndentCaseBlocks: true
IndentCaseLabels: false
#IndentExternBlock: AfterExternBlock
IndentExternBlock: AfterExternBlock
IndentGotoLabels: true
IndentPPDirectives: AfterHash
IndentWidth: 4
Expand All @@ -60,7 +60,6 @@ MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 2
NamespaceIndentation: All
#OperandAlignmentStyle: Align
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakComment: 300
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: DoozyX/clang-format-lint-action@v0.11
- uses: DoozyX/clang-format-lint-action@v0.12
with:
exclude: './thirdparty'
clangFormatVersion: 11
clangFormatVersion: 12

amalgamation:
runs-on: ubuntu-latest
Expand Down
3 changes: 2 additions & 1 deletion examples/alpaka/asyncblur/asyncblur.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ struct BlurKernel
{
const auto ti = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);

[[maybe_unused]] auto sharedView = [&] {
[[maybe_unused]] auto sharedView = [&]
{
if constexpr (SHARED)
{
// Using SoA for the shared memory
Expand Down
15 changes: 10 additions & 5 deletions examples/alpaka/nbody/nbody.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,13 +149,15 @@ struct UpdateKernel
template <typename Acc, typename View>
LLAMA_FN_HOST_ACC_INLINE void operator()(const Acc& acc, View particles) const
{
auto sharedView = [&] {
auto sharedView = [&]
{
// if there is only 1 thread per block, use stack instead of shared memory
if constexpr (BlockSize == 1)
return llama::allocViewStack<View::ArrayDomain::rank, typename View::DatumDomain>();
else
{
constexpr auto sharedMapping = [] {
constexpr auto sharedMapping = []
{
constexpr auto arrayDomain = llama::ArrayDomain{BlockSize};
if constexpr (MappingSM == AoS)
return llama::mapping::AoS{arrayDomain, Particle{}};
Expand All @@ -176,7 +178,8 @@ struct UpdateKernel
const auto tbi = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0];

// TODO: we could optimize here, because only velocity is ever updated
auto pi = [&] {
auto pi = [&]
{
constexpr auto arrayDomain = llama::ArrayDomain{Elems};
constexpr auto mapping
= llama::mapping::SoA<typename View::ArrayDomain, typename View::DatumDomain, false>{arrayDomain};
Expand Down Expand Up @@ -242,7 +245,8 @@ void run(std::ostream& plotFile)
using PltfAcc = alpaka::Pltf<DevAcc>;
using Queue = alpaka::Queue<DevAcc, alpaka::Blocking>;

auto mappingName = [](int m) -> std::string {
auto mappingName = [](int m) -> std::string
{
if (m == 0)
return "AoS";
if (m == 1)
Expand All @@ -258,7 +262,8 @@ void run(std::ostream& plotFile)
const DevHost devHost(alpaka::getDevByIdx<PltfHost>(0u));
Queue queue(devAcc);

auto mapping = [] {
auto mapping = []
{
const auto arrayDomain = llama::ArrayDomain{PROBLEM_SIZE};
if constexpr (MappingGM == AoS)
return llama::mapping::AoS{arrayDomain, Particle{}};
Expand Down
3 changes: 2 additions & 1 deletion examples/alpaka/vectoradd/vectoradd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ try
// LLAMA
const auto arrayDomain = llama::ArrayDomain{PROBLEM_SIZE};

const auto mapping = [&] {
const auto mapping = [&]
{
if constexpr (MAPPING == 0)
return llama::mapping::AoS{arrayDomain, Vector{}};
if constexpr (MAPPING == 1)
Expand Down
3 changes: 2 additions & 1 deletion examples/bufferguard/bufferguard.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,8 @@ void run(const std::string& mappingName)
std::cout << "\nView 2:\n";
printView(view2, rows, cols);

auto copyBlobs = [&](auto& srcView, auto& dstView, auto srcBlobs, auto dstBlobs) {
auto copyBlobs = [&](auto& srcView, auto& dstView, auto srcBlobs, auto dstBlobs)
{
static_assert(srcBlobs.size() == dstBlobs.size());
for (auto i = 0; i < srcBlobs.size(); i++)
{
Expand Down
3 changes: 2 additions & 1 deletion examples/heatequation/heatequation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,8 @@ try
auto uNext = llama::allocView(mapping);
auto uCurr = llama::allocView(mapping);

auto run = [&](std::string_view updateName, auto update) {
auto run = [&](std::string_view updateName, auto update)
{
// init
for (uint32_t i = 0; i < extent; i++)
uCurr[i] = exactSolution(i * dx, 0.0);
Expand Down
65 changes: 42 additions & 23 deletions examples/nbody/nbody.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ namespace usellama
template <int Mapping, bool UseAccumulator, std::size_t AoSoALanes = 8 /*AVX2*/>
auto main(std::ostream& plotFile) -> int
{
auto mappingName = [](int m) -> std::string {
auto mappingName = [](int m) -> std::string
{
if (m == 0)
return "AoS";
if (m == 1)
Expand All @@ -127,7 +128,8 @@ namespace usellama
title += " Acc";
std::cout << title << "\n";
Stopwatch watch;
auto mapping = [&] {
auto mapping = [&]
{
const auto arrayDomain = llama::ArrayDomain{PROBLEM_SIZE};
if constexpr (Mapping == 0)
return llama::mapping::AoS{arrayDomain, Particle{}};
Expand All @@ -149,14 +151,16 @@ namespace usellama
if constexpr (DUMP_MAPPING)
std::ofstream(title + ".svg") << llama::toSvg(mapping);

auto tmapping = [&] {
auto tmapping = [&]
{
if constexpr (TRACE)
return llama::mapping::Trace{std::move(mapping)};
else
return std::move(mapping);
}();

auto hmapping = [&] {
auto hmapping = [&]
{
if constexpr (HEATMAP)
return llama::mapping::Heatmap{std::move(tmapping)};
else
Expand Down Expand Up @@ -730,7 +734,8 @@ namespace manualAoSoA_manualAVX
const __m256 distSqr
= _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(vEPS2, xdistanceSqr), ydistanceSqr), zdistanceSqr);
const __m256 distSixth = _mm256_mul_ps(_mm256_mul_ps(distSqr, distSqr), distSqr);
const __m256 invDistCube = [&] {
const __m256 invDistCube = [&]
{
if constexpr (ALLOW_RSQRT)
{
const __m256 r = _mm256_rsqrt_ps(distSixth);
Expand Down Expand Up @@ -1019,7 +1024,8 @@ namespace manualAoSoA_Vc
const vec zdistanceSqr = zdistance * zdistance;
const vec distSqr = EPS2 + xdistanceSqr + ydistanceSqr + zdistanceSqr;
const vec distSixth = distSqr * distSqr * distSqr;
const vec invDistCube = [&] {
const vec invDistCube = [&]
{
if constexpr (ALLOW_RSQRT)
{
const vec r = Vc::rsqrt(distSixth);
Expand Down Expand Up @@ -1383,25 +1389,33 @@ try

int r = 0;
using namespace boost::mp11;
mp_for_each<mp_iota_c<5>>([&](auto i) {
// only AoSoA (3) needs lanes
using Lanes
= std::conditional_t<decltype(i)::value == 3, mp_list_c<std::size_t, 8, 16>, mp_list_c<std::size_t, 0>>;
mp_for_each<Lanes>([&, i](auto lanes) {
mp_for_each<mp_list_c<bool, false, true>>([&, i](auto useAccumulator) {
r += usellama::main<decltype(i)::value, decltype(useAccumulator)::value, decltype(lanes)::value>(
plotFile);
});
mp_for_each<mp_iota_c<5>>(
[&](auto i)
{
// only AoSoA (3) needs lanes
using Lanes
= std::conditional_t<decltype(i)::value == 3, mp_list_c<std::size_t, 8, 16>, mp_list_c<std::size_t, 0>>;
mp_for_each<Lanes>(
[&, i](auto lanes)
{
mp_for_each<mp_list_c<bool, false, true>>(
[&, i](auto useAccumulator) {
r += usellama::
main<decltype(i)::value, decltype(useAccumulator)::value, decltype(lanes)::value>(
plotFile);
});
});
});
});
r += manualAoS::main<false>(plotFile);
r += manualAoS::main<true>(plotFile);
r += manualSoA::main<false>(plotFile);
r += manualSoA::main<true>(plotFile);
mp_for_each<mp_list_c<std::size_t, 8, 16>>([&](auto lanes) {
r += manualAoSoA::main<false, false, decltype(lanes)::value>(plotFile);
r += manualAoSoA::main<true, false, decltype(lanes)::value>(plotFile);
});
mp_for_each<mp_list_c<std::size_t, 8, 16>>(
[&](auto lanes)
{
r += manualAoSoA::main<false, false, decltype(lanes)::value>(plotFile);
r += manualAoSoA::main<true, false, decltype(lanes)::value>(plotFile);
});
// r += manualAoSoA::main<false, true>(plotFile);
// r += manualAoSoA::main<true, true>(plotFile);
#ifdef __AVX2__
Expand All @@ -1417,9 +1431,14 @@ try
{
if (useUpdate1 && tiled)
continue;
mp_for_each<mp_list_c<bool, false, true>>([&](auto useAccumulator) {
r += manualAoSoA_Vc::main<decltype(useAccumulator)::value>(plotFile, threads, useUpdate1, tiled);
});
mp_for_each<mp_list_c<bool, false, true>>(
[&](auto useAccumulator) {
r += manualAoSoA_Vc::main<decltype(useAccumulator)::value>(
plotFile,
threads,
useUpdate1,
tiled);
});
}
#endif

Expand Down
19 changes: 12 additions & 7 deletions examples/nbody_benchmark/nbody.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ void run(std::ostream& plotFile)

constexpr FP ts = 0.0001f;

auto mapping = [&] {
auto mapping = [&]
{
const auto arrayDomain = llama::ArrayDomain{PROBLEM_SIZE};
if constexpr (Mapping == 0)
return llama::mapping::AoS{arrayDomain, Particle{}};
Expand Down Expand Up @@ -155,13 +156,17 @@ try
plotFile.exceptions(std::ios::badbit | std::ios::failbit);
plotFile << "\"alignment\"\t\"AoS\"\t\"SoA\"\t\"SoA MB\"\n";

mp_for_each<mp_iota_c<28>>([&](auto ae) {
mp_for_each<mp_list_c<std::size_t, 0, 1, 2>>([&](auto m) {
constexpr auto mapping = decltype(m)::value;
constexpr auto alignment = std::size_t{1} << decltype(ae)::value;
run<mapping, alignment>(plotFile);
mp_for_each<mp_iota_c<28>>(
[&](auto ae)
{
mp_for_each<mp_list_c<std::size_t, 0, 1, 2>>(
[&](auto m)
{
constexpr auto mapping = decltype(m)::value;
constexpr auto alignment = std::size_t{1} << decltype(ae)::value;
run<mapping, alignment>(plotFile);
});
});
});

std::cout << "Plot with: ./nbody.sh\n";
std::ofstream{"nbody.sh"} << fmt::format(
Expand Down
3 changes: 2 additions & 1 deletion examples/vectoradd/vectoradd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ namespace usellama
std::cout << "\nLLAMA\n";
Stopwatch watch;

const auto mapping = [&] {
const auto mapping = [&]
{
const auto arrayDomain = llama::ArrayDomain{PROBLEM_SIZE};
if constexpr (MAPPING == 0)
return llama::mapping::AoS{arrayDomain, Vector{}};
Expand Down
Loading