Skip to content

Commit

Permalink
Fix OpenMP partition kernels
Browse files Browse the repository at this point in the history
  • Loading branch information
tcojean committed May 16, 2023
1 parent 498f5cc commit b37bf44
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions omp/distributed/partition_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ void build_starting_indices(std::shared_ptr<const DefaultExecutor> exec,
auto size_per_thread =
static_cast<size_type>(ceildiv(num_ranges, num_threads));
vector<LocalIndexType> local_sizes(num_parts * num_threads, 0, {exec});
int tmp = num_empty_parts;
#pragma omp parallel
{
auto thread_id = static_cast<size_type>(omp_get_thread_num());
Expand All @@ -77,7 +78,7 @@ void build_starting_indices(std::shared_ptr<const DefaultExecutor> exec,
#pragma omp barrier
// exclusive prefix sum over local sizes
// FIXME: PGI/NVHPC(22.7) doesn't like reduction with references
#pragma omp for reduction(+ : num_empty_parts)
#pragma omp for reduction(+ : tmp)
for (comm_index_type part = 0; part < num_parts; ++part) {
LocalIndexType size{};
for (size_type thread = 0; thread < num_threads; ++thread) {
Expand All @@ -87,14 +88,15 @@ void build_starting_indices(std::shared_ptr<const DefaultExecutor> exec,
size += local_size;
}
sizes[part] = size;
num_empty_parts += size == 0 ? 1 : 0;
tmp += size == 0 ? 1 : 0;
}
// add global baselines to local ranks
for (auto range = thread_begin; range < thread_end; range++) {
auto part = range_parts[range];
ranks[range] += local_sizes[part + base];
}
}
num_empty_parts = tmp;
}

GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(
Expand Down

0 comments on commit b37bf44

Please sign in to comment.