From f1fa71025c30fbf02c31cc1390ae2a8f1be5aeeb Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Sat, 2 Dec 2023 21:36:03 +0100 Subject: [PATCH 01/29] remove get_node_degrees kernel --- core/device_hooks/common_kernels.inc.cpp | 1 - core/reorder/rcm.cpp | 10 +++------- core/reorder/rcm_kernels.hpp | 21 +++++++-------------- cuda/reorder/rcm_kernels.cu | 12 +----------- dpcpp/reorder/rcm_kernels.dp.cpp | 12 +----------- hip/reorder/rcm_kernels.hip.cpp | 12 +----------- omp/reorder/rcm_kernels.cpp | 24 ++++++++---------------- reference/reorder/rcm_kernels.cpp | 21 ++++++--------------- 8 files changed, 27 insertions(+), 86 deletions(-) diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index a4a58feba78..9fa01126ce4 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -898,7 +898,6 @@ namespace rcm { GKO_STUB_INDEX_TYPE(GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL); -GKO_STUB_INDEX_TYPE(GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL); } // namespace rcm diff --git a/core/reorder/rcm.cpp b/core/reorder/rcm.cpp index 42bfff334b2..0131b49a363 100644 --- a/core/reorder/rcm.cpp +++ b/core/reorder/rcm.cpp @@ -30,7 +30,6 @@ namespace { GKO_REGISTER_OPERATION(get_permutation, rcm::get_permutation); -GKO_REGISTER_OPERATION(get_degree_of_nodes, rcm::get_degree_of_nodes); } // anonymous namespace @@ -44,12 +43,9 @@ void rcm_reorder(const matrix::SparsityCsr* mtx, { const auto exec = mtx->get_executor(); const IndexType num_rows = mtx->get_size()[0]; - array degrees{exec, mtx->get_size()[0]}; - exec->run(rcm::make_get_degree_of_nodes(num_rows, mtx->get_const_row_ptrs(), - degrees.get_data())); - exec->run(rcm::make_get_permutation( - num_rows, mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(), - degrees.get_const_data(), permutation, inv_permutation, strategy)); + exec->run(rcm::make_get_permutation(num_rows, mtx->get_const_row_ptrs(), + mtx->get_const_col_idxs(), permutation, + inv_permutation, strategy)); } diff --git a/core/reorder/rcm_kernels.hpp b/core/reorder/rcm_kernels.hpp index c69157676bd..3ee37faba68 100644 --- a/core/reorder/rcm_kernels.hpp +++ b/core/reorder/rcm_kernels.hpp @@ -27,22 +27,15 @@ namespace gko { namespace kernels { -#define GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL(IndexType) \ - void get_permutation(std::shared_ptr exec, \ - IndexType num_vertices, const IndexType* row_ptrs, \ - const IndexType* col_idxs, const IndexType* degrees, \ - IndexType* permutation, IndexType* inv_permutation, \ +#define GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL(IndexType) \ + void get_permutation(std::shared_ptr exec, \ + IndexType num_vertices, const IndexType* row_ptrs, \ + const IndexType* col_idxs, IndexType* permutation, \ + IndexType* inv_permutation, \ gko::reorder::starting_strategy strategy) -#define GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL(IndexType) \ - void get_degree_of_nodes(std::shared_ptr exec, \ - IndexType num_vertices, \ - const IndexType* row_ptrs, IndexType* degrees) - -#define GKO_DECLARE_ALL_AS_TEMPLATES \ - template \ - GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL(IndexType); \ - template \ +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + template \ GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL(IndexType) diff --git a/cuda/reorder/rcm_kernels.cu b/cuda/reorder/rcm_kernels.cu index 13cf05927d3..5a8f542fadb 100644 --- a/cuda/reorder/rcm_kernels.cu +++ b/cuda/reorder/rcm_kernels.cu @@ -29,21 +29,11 @@ namespace cuda { namespace rcm { -template -void get_degree_of_nodes(std::shared_ptr exec, - const IndexType num_vertices, - const IndexType* const row_ptrs, - IndexType* const degrees) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL); - - template void get_permutation( std::shared_ptr exec, const IndexType num_vertices, const IndexType* const row_ptrs, const IndexType* const col_idxs, - const IndexType* const degrees, IndexType* const permutation, - IndexType* const inv_permutation, + IndexType* const permutation, IndexType* const inv_permutation, const gko::reorder::starting_strategy strategy) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL); diff --git a/dpcpp/reorder/rcm_kernels.dp.cpp b/dpcpp/reorder/rcm_kernels.dp.cpp index 225f539b47e..4ce42826351 100644 --- a/dpcpp/reorder/rcm_kernels.dp.cpp +++ b/dpcpp/reorder/rcm_kernels.dp.cpp @@ -27,21 +27,11 @@ namespace dpcpp { namespace rcm { -template -void get_degree_of_nodes(std::shared_ptr exec, - const IndexType num_vertices, - const IndexType* const row_ptrs, - IndexType* const degrees) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL); - - template void get_permutation( std::shared_ptr exec, const IndexType num_vertices, const IndexType* const row_ptrs, const IndexType* const col_idxs, - const IndexType* const degrees, IndexType* const permutation, - IndexType* const inv_permutation, + IndexType* const permutation, IndexType* const inv_permutation, const gko::reorder::starting_strategy strategy) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL); diff --git a/hip/reorder/rcm_kernels.hip.cpp b/hip/reorder/rcm_kernels.hip.cpp index a7fd5b27877..6f572d11b12 100644 --- a/hip/reorder/rcm_kernels.hip.cpp +++ b/hip/reorder/rcm_kernels.hip.cpp @@ -29,21 +29,11 @@ namespace hip { namespace rcm { -template -void get_degree_of_nodes(std::shared_ptr exec, - const IndexType num_vertices, - const IndexType* const row_ptrs, - IndexType* const degrees) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL); - - template void get_permutation( std::shared_ptr exec, const IndexType num_vertices, const IndexType* const row_ptrs, const IndexType* const col_idxs, - const IndexType* const degrees, IndexType* const permutation, - IndexType* const inv_permutation, + IndexType* const permutation, IndexType* const inv_permutation, const gko::reorder::starting_strategy strategy) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL); diff --git a/omp/reorder/rcm_kernels.cpp b/omp/reorder/rcm_kernels.cpp index 36c5ce4bbbb..a2df9e863e7 100644 --- a/omp/reorder/rcm_kernels.cpp +++ b/omp/reorder/rcm_kernels.cpp @@ -54,20 +54,6 @@ namespace rcm { #define GKO_MM_PAUSE() #endif // defined __x86_64__ -template -void get_degree_of_nodes(std::shared_ptr exec, - const IndexType num_vertices, - const IndexType* const row_ptrs, - IndexType* const degrees) -{ -#pragma omp parallel for - for (IndexType i = 0; i < num_vertices; ++i) { - degrees[i] = row_ptrs[i + 1] - row_ptrs[i]; - } -} - -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL); - // This constant controls how many nodes can be dequeued from the // UbfsLinearQueue at once at most. Increasing it reduces lock contention and @@ -760,11 +746,17 @@ template void get_permutation(std::shared_ptr exec, const IndexType num_vertices, const IndexType* const row_ptrs, - const IndexType* const col_idxs, - const IndexType* const degrees, IndexType* const perm, + const IndexType* const col_idxs, IndexType* const perm, IndexType* const inv_perm, const gko::reorder::starting_strategy strategy) { + // compute node degrees + array degree_array{exec, static_cast(num_vertices)}; + const auto degrees = degree_array.get_data(); +#pragma omp parallel for + for (IndexType i = 0; i < num_vertices; ++i) { + degrees[i] = row_ptrs[i + 1] - row_ptrs[i]; + } // Initialize the perm to all "signal value". std::fill(perm, perm + num_vertices, perm_untouched); diff --git a/reference/reorder/rcm_kernels.cpp b/reference/reorder/rcm_kernels.cpp index 9da1a8dac60..fe6b8718ac4 100644 --- a/reference/reorder/rcm_kernels.cpp +++ b/reference/reorder/rcm_kernels.cpp @@ -37,20 +37,6 @@ namespace reference { namespace rcm { -template -void get_degree_of_nodes(std::shared_ptr exec, - const IndexType num_vertices, - const IndexType* const row_ptrs, - IndexType* const degrees) -{ - for (IndexType i = 0; i < num_vertices; ++i) { - degrees[i] = row_ptrs[i + 1] - row_ptrs[i]; - } -} - -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL); - - /** * Computes a level structure rooted at `root`, returning a node of minimal * degree in its last level, along with the height of the structure. @@ -198,11 +184,16 @@ void get_permutation(std::shared_ptr exec, const IndexType num_vertices, const IndexType* const row_ptrs, const IndexType* const col_idxs, - const IndexType* const degrees, IndexType* const permutation, IndexType* const inv_permutation, const gko::reorder::starting_strategy strategy) { + // compute node degrees + array degree_array{exec, static_cast(num_vertices)}; + const auto degrees = degree_array.get_data(); + for (IndexType i = 0; i < num_vertices; ++i) { + degrees[i] = row_ptrs[i + 1] - row_ptrs[i]; + } // Storing vertices left to proceess. array linear_queue(exec, num_vertices); auto linear_queue_p = linear_queue.get_data(); From 356e97c2fef8a2b3f655cf86d1d7d40b2510a9f1 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Sun, 3 Dec 2023 12:35:15 +0100 Subject: [PATCH 02/29] unify RCM source CUDA/HIP --- common/cuda_hip/reorder/rcm_kernels.hpp.inc | 12 ++++++++++++ cuda/reorder/rcm_kernels.cu | 9 +-------- hip/reorder/rcm_kernels.hip.cpp | 9 +-------- 3 files changed, 14 insertions(+), 16 deletions(-) create mode 100644 common/cuda_hip/reorder/rcm_kernels.hpp.inc diff --git a/common/cuda_hip/reorder/rcm_kernels.hpp.inc b/common/cuda_hip/reorder/rcm_kernels.hpp.inc new file mode 100644 index 00000000000..3ce9cb3b0a9 --- /dev/null +++ b/common/cuda_hip/reorder/rcm_kernels.hpp.inc @@ -0,0 +1,12 @@ +// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +template +void get_permutation( + std::shared_ptr exec, const IndexType num_vertices, + const IndexType* const row_ptrs, const IndexType* const col_idxs, + IndexType* const permutation, IndexType* const inv_permutation, + const gko::reorder::starting_strategy strategy) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL); \ No newline at end of file diff --git a/cuda/reorder/rcm_kernels.cu b/cuda/reorder/rcm_kernels.cu index 5a8f542fadb..2a1b1fe72d6 100644 --- a/cuda/reorder/rcm_kernels.cu +++ b/cuda/reorder/rcm_kernels.cu @@ -29,14 +29,7 @@ namespace cuda { namespace rcm { -template -void get_permutation( - std::shared_ptr exec, const IndexType num_vertices, - const IndexType* const row_ptrs, const IndexType* const col_idxs, - IndexType* const permutation, IndexType* const inv_permutation, - const gko::reorder::starting_strategy strategy) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL); +#include "common/cuda_hip/reorder/rcm_kernels.hpp.inc" } // namespace rcm diff --git a/hip/reorder/rcm_kernels.hip.cpp b/hip/reorder/rcm_kernels.hip.cpp index 6f572d11b12..b5fd552245e 100644 --- a/hip/reorder/rcm_kernels.hip.cpp +++ b/hip/reorder/rcm_kernels.hip.cpp @@ -29,14 +29,7 @@ namespace hip { namespace rcm { -template -void get_permutation( - std::shared_ptr exec, const IndexType num_vertices, - const IndexType* const row_ptrs, const IndexType* const col_idxs, - IndexType* const permutation, IndexType* const inv_permutation, - const gko::reorder::starting_strategy strategy) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL); +#include "common/cuda_hip/reorder/rcm_kernels.hpp.inc" } // namespace rcm From d7bd22fcecb2d6e7766853dab480a7f1f2040afa Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Sun, 3 Dec 2023 13:22:23 +0100 Subject: [PATCH 03/29] use CUDA/HIP RCM reordering --- core/reorder/rcm.cpp | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/core/reorder/rcm.cpp b/core/reorder/rcm.cpp index 0131b49a363..8409f47d404 100644 --- a/core/reorder/rcm.cpp +++ b/core/reorder/rcm.cpp @@ -62,14 +62,13 @@ Rcm::Rcm(const Factory* factory, factory->get_executor()), parameters_{factory->get_parameters()} { - // Always execute the reordering on the cpu. - const auto is_gpu_executor = - this->get_executor() != this->get_executor()->get_master(); - auto cpu_exec = is_gpu_executor ? this->get_executor()->get_master() - : this->get_executor(); + // The reordering is not supported on DPC++, use the host instead + const auto is_dpcpp_executor = bool( + std::dynamic_pointer_cast(this->get_executor())); + auto work_exec = is_dpcpp_executor ? this->get_executor()->get_master() + : this->get_executor(); - auto adjacency_matrix = SparsityMatrix::create(cpu_exec); - array degrees; + auto adjacency_matrix = SparsityMatrix::create(work_exec); // The adjacency matrix has to be square. GKO_ASSERT_IS_SQUARE_MATRIX(args.system_matrix); @@ -77,19 +76,19 @@ Rcm::Rcm(const Factory* factory, // convert if the existing matrix is empty. if (args.system_matrix->get_size()) { auto tmp = - copy_and_convert_to(cpu_exec, args.system_matrix); + copy_and_convert_to(work_exec, args.system_matrix); // This function provided within the Sparsity matrix format removes // the diagonal elements and outputs an adjacency matrix. adjacency_matrix = tmp->to_adjacency_matrix(); } auto const size = adjacency_matrix->get_size()[0]; - permutation_ = PermutationMatrix::create(cpu_exec, size); + permutation_ = PermutationMatrix::create(work_exec, size); // To make it explicit. inv_permutation_ = nullptr; if (parameters_.construct_inverse_permutation) { - inv_permutation_ = PermutationMatrix::create(cpu_exec, size); + inv_permutation_ = PermutationMatrix::create(work_exec, size); } rcm_reorder( @@ -98,7 +97,7 @@ Rcm::Rcm(const Factory* factory, parameters_.strategy); // Copy back results to gpu if necessary. - if (is_gpu_executor) { + if (is_dpcpp_executor) { const auto gpu_exec = this->get_executor(); auto gpu_perm = share(PermutationMatrix::create(gpu_exec, size)); gpu_perm->copy_from(permutation_); @@ -153,7 +152,11 @@ std::unique_ptr Rcm::generate_impl( { GKO_ASSERT_IS_SQUARE_MATRIX(system_matrix); const auto exec = this->get_executor(); - const auto host_exec = exec->get_master(); + // The reordering is not supported on DPC++, use the host instead + const auto is_dpcpp_executor = bool( + std::dynamic_pointer_cast(this->get_executor())); + auto work_exec = is_dpcpp_executor ? this->get_executor()->get_master() + : this->get_executor(); const auto num_rows = system_matrix->get_size()[0]; using sparsity_mtx = matrix::SparsityCsr; std::unique_ptr converted; @@ -166,7 +169,7 @@ std::unique_ptr Rcm::generate_impl( using Identity = matrix::Identity; using Mtx = matrix::Csr; using Scalar = matrix::Dense; - auto conv_csr = Mtx::create(host_exec); + auto conv_csr = Mtx::create(work_exec); as>(op)->convert_to(conv_csr); if (!parameters_.skip_symmetrize) { auto scalar = initialize({one()}, exec); @@ -174,8 +177,8 @@ std::unique_ptr Rcm::generate_impl( // compute A^T + A conv_csr->transpose()->apply(scalar, id, scalar, conv_csr); } - if (exec != host_exec) { - conv_csr = gko::clone(host_exec, std::move(conv_csr)); + if (exec != work_exec) { + conv_csr = gko::clone(work_exec, std::move(conv_csr)); } nnz = conv_csr->get_num_stored_elements(); row_ptrs = conv_csr->get_const_row_ptrs(); @@ -190,13 +193,13 @@ std::unique_ptr Rcm::generate_impl( convert(system_matrix, std::complex{}); } - array permutation(host_exec, num_rows); + array permutation(work_exec, num_rows); // remove diagonal entries auto pattern = sparsity_mtx::create_const( - host_exec, gko::dim<2>{num_rows, num_rows}, - make_const_array_view(host_exec, nnz, col_idxs), - make_const_array_view(host_exec, num_rows + 1, row_ptrs)); + work_exec, gko::dim<2>{num_rows, num_rows}, + make_const_array_view(work_exec, nnz, col_idxs), + make_const_array_view(work_exec, num_rows + 1, row_ptrs)); pattern = pattern->to_adjacency_matrix(); rcm_reorder(pattern.get(), permutation.get_data(), static_cast(nullptr), parameters_.strategy); From 3345649a51021472a0a1e7ccef807647886eea56 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Tue, 5 Dec 2023 12:00:30 +0100 Subject: [PATCH 04/29] fix unsigned abs warnings --- core/test/accessor/math.cpp | 5 ++--- core/test/accessor/reduced_row_major_reference.cpp | 1 - core/test/accessor/scaled_reduced_row_major_reference.cpp | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/core/test/accessor/math.cpp b/core/test/accessor/math.cpp index 8cc49c2d405..f15644be93f 100644 --- a/core/test/accessor/math.cpp +++ b/core/test/accessor/math.cpp @@ -21,9 +21,8 @@ class RealMath : public ::testing::Test { using value_type = ValueType; }; -using RealTypes = ::testing::Types; +using RealTypes = ::testing::Types; TYPED_TEST_SUITE(RealMath, RealTypes); diff --git a/core/test/accessor/reduced_row_major_reference.cpp b/core/test/accessor/reduced_row_major_reference.cpp index 36feeca5c56..a6da6277b1d 100644 --- a/core/test/accessor/reduced_row_major_reference.cpp +++ b/core/test/accessor/reduced_row_major_reference.cpp @@ -51,7 +51,6 @@ using ReferenceTypes = ::testing::Types, std::tuple, std::tuple, - std::tuple, std::tuple, std::tuple, std::tuple, std::tuple, diff --git a/core/test/accessor/scaled_reduced_row_major_reference.cpp b/core/test/accessor/scaled_reduced_row_major_reference.cpp index 04b3c33e7be..281ae9a6735 100644 --- a/core/test/accessor/scaled_reduced_row_major_reference.cpp +++ b/core/test/accessor/scaled_reduced_row_major_reference.cpp @@ -53,7 +53,6 @@ using ReferenceTypes = ::testing::Types, std::tuple, std::tuple, - std::tuple, std::tuple, std::tuple, std::tuple, std::tuple>; From 1b151f5519875fdded0e67068059e8ee6bcd992a Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Sat, 9 Dec 2023 15:52:36 +0100 Subject: [PATCH 05/29] add RCM GPU kernel --- common/cuda_hip/reorder/rcm_kernels.hpp.inc | 494 +++++++++++++++++++- cuda/reorder/rcm_kernels.cu | 21 +- hip/reorder/rcm_kernels.hip.cpp | 21 +- 3 files changed, 525 insertions(+), 11 deletions(-) diff --git a/common/cuda_hip/reorder/rcm_kernels.hpp.inc b/common/cuda_hip/reorder/rcm_kernels.hpp.inc index 3ce9cb3b0a9..77ac37e7b3c 100644 --- a/common/cuda_hip/reorder/rcm_kernels.hpp.inc +++ b/common/cuda_hip/reorder/rcm_kernels.hpp.inc @@ -3,10 +3,494 @@ // SPDX-License-Identifier: BSD-3-Clause template -void get_permutation( - std::shared_ptr exec, const IndexType num_vertices, +array compute_node_degrees( + std::shared_ptr exec, + const IndexType* const row_ptrs, const IndexType num_rows) +{ + const auto policy = thrust_policy(exec); + array node_degrees{exec, static_cast(num_rows)}; + const auto row_ptr_zip_it = + thrust::make_zip_iterator(thrust::make_tuple(row_ptrs, row_ptrs + 1)); + thrust::transform(policy, row_ptr_zip_it, row_ptr_zip_it + num_rows, + node_degrees.get_data(), [] __device__(auto pair) { + return thrust::get<1>(pair) - thrust::get<0>(pair); + }); + return node_degrees; +} + + +template +struct components_data { + /** Mapping node -> component ID */ + array node_component; + /** Segmented storage of node IDs for each component */ + array nodes; + /** mapping entries in nodes to their component ID */ + array sorted_ids; + /** Pointers into nodes */ + array ptrs; + /** Minimum degree node for each component */ + array min_deg_node; + + components_data(std::shared_ptr exec, + size_type num_rows) + : node_component{exec, num_rows}, + nodes{exec, num_rows}, + sorted_ids{exec, num_rows}, + ptrs{exec}, + min_deg_node{exec} + {} + + void set_num_components(size_type num_components) + { + ptrs.resize_and_reset(num_components + 1); + min_deg_node.resize_and_reset(num_components); + } + + size_type get_num_components() const { return min_deg_node.get_size(); } +}; + + +template +__global__ +__launch_bounds__(default_block_size) void connected_components_attach( + const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, IndexType num_rows, + IndexType* __restrict__ components) +{ + const auto row = thread::get_thread_id_flat(); + if (row >= num_rows) { + return; + } + const auto begin = row_ptrs[row]; + const auto end = row_ptrs[row + 1]; + auto parent = static_cast(row); + for (auto nz = begin; nz < end; nz++) { + const auto col = col_idxs[nz]; + parent = min(col, parent); + } + components[row] = parent; +} + + +__global__ void reset_flag(bool* finished) { *finished = true; } + + +template +__global__ +__launch_bounds__(default_block_size) void connected_components_path_compress( + IndexType num_rows, IndexType* components, bool* finished) +{ + const auto row = thread::get_thread_id_flat(); + if (row >= num_rows) { + return; + } + const auto old_value = components[row]; + const auto new_value = components[old_value]; + if (old_value != new_value && *finished) { + *finished = false; + } + components[row] = new_value; +} + + +template +struct adj_not_predicate { + __device__ __forceinline__ bool operator()(IndexType i) + { + return i == 0 || data[i - 1] != data[i]; + } + + const IndexType* data; +}; + + +template +struct node_min_degree_reduction { + __device__ __forceinline__ IndexType operator()(IndexType u, IndexType v) + { + return thrust::make_pair(degree[u], u) < thrust::make_pair(degree[v], v) + ? u + : v; + } + + const IndexType* degree; +}; + + +template +components_data compute_connected_components( + std::shared_ptr exec, const IndexType num_rows, const IndexType* const row_ptrs, const IndexType* const col_idxs, - IndexType* const permutation, IndexType* const inv_permutation, - const gko::reorder::starting_strategy strategy) GKO_NOT_IMPLEMENTED; + const IndexType* const node_degrees) +{ + const auto policy = thrust_policy(exec); + components_data result{exec, static_cast(num_rows)}; + array finished_array{exec, 1}; + const auto node_component = result.node_component.get_data(); + const auto nodes = result.nodes.get_data(); + const auto finished = finished_array.get_data(); + // attach every node to its minimum neighbor + const auto num_blocks = ceildiv(num_rows, default_block_size); + connected_components_attach<<get_stream()>>>( + row_ptrs, col_idxs, num_rows, node_component); + // double pointers until all paths are loops or edges + do { + reset_flag<<<1, 1, 0, exec->get_stream()>>>(finished); + connected_components_path_compress<<get_stream()>>>( + num_rows, node_component, finished); + } while (!exec->copy_val_to_host(finished)); + // group nodes by component ID + result.sorted_ids = result.node_component; + const auto sorted_component_ids = result.sorted_ids.get_data(); + thrust::sequence(policy, nodes, nodes + num_rows, IndexType{}); + thrust::stable_sort_by_key(policy, sorted_component_ids, + sorted_component_ids + num_rows, nodes); + // find beginning of all components + auto it = thrust::make_counting_iterator(size_type{}); + const auto predicate = adj_not_predicate{sorted_component_ids}; + const auto num_components = static_cast( + thrust::count_if(policy, it, it + num_rows, predicate)); + result.set_num_components(num_components); + const auto ptrs = result.ptrs.get_data(); + const auto min_deg_node = result.min_deg_node.get_data(); + thrust::copy_if(policy, it, it + num_rows, ptrs, predicate); + // set the sentinel entry + set_element(result.ptrs, num_components, num_rows); + // find minimum degree node for each component + array component_id_array{exec, num_components}; + const auto component_ids = component_id_array.get_data(); + thrust::reduce_by_key(policy, sorted_component_ids, + sorted_component_ids + num_rows, nodes, component_ids, + min_deg_node, thrust::equal_to{}, + node_min_degree_reduction{node_degrees}); + // map component IDs to consecutive indexing + array compacted_node_component{exec, + static_cast(num_rows)}; + thrust::lower_bound(policy, component_ids, component_ids + num_components, + node_component, node_component + num_rows, + compacted_node_component.get_data()); + result.node_component = std::move(compacted_node_component); + return result; +} + + +template +struct ubfs_levels { + /** Mapping node -> level */ + array node_level; + /** Segmented list of nodes for each level */ + array nodes; + /** Pointers into nodes for each level */ + array ptrs; + + ubfs_levels(std::shared_ptr exec, size_type num_rows) + : node_level{exec, num_rows}, + nodes{exec, num_rows}, + ptrs{exec, num_rows + 1} + {} +}; + + +template +struct atomic_map {}; + + +template <> +struct atomic_map { + using type = int; +}; + + +template <> +struct atomic_map { + using type = unsigned long long; +}; + + +template +__global__ __launch_bounds__(default_block_size) void ubfs_level_kernel( + const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, + const IndexType* __restrict__ sources, size_type num_sources, + IndexType level, IndexType* __restrict__ node_levels, + IndexType* __restrict__ level_nodes, IndexType* __restrict__ output_ptr) +{ + using atomic_type = typename atomic_map::type; + const auto source = thread::get_thread_id_flat(); + if (source >= num_sources) { + return; + } + const auto row = sources[source]; + const auto begin = row_ptrs[row]; + const auto end = row_ptrs[row + 1]; + atomic_type unsigned_unattached{}; + const auto unattached = invalid_index(); + memcpy(&unsigned_unattached, &unattached, sizeof(IndexType)); + auto parent = static_cast(row); + for (auto nz = begin; nz < end; nz++) { + const auto col = col_idxs[nz]; + if (node_levels[col] == unattached && + atomicCAS(reinterpret_cast(node_levels + col), + unsigned_unattached, + static_cast(level)) == unsigned_unattached) { + const auto output_pos = + atomicAdd(reinterpret_cast(output_ptr), 1); + level_nodes[output_pos] = col; + } + } +} + + +template +IndexType ubfs(std::shared_ptr exec, + const IndexType num_rows, const IndexType* const row_ptrs, + const IndexType* const col_idxs, ubfs_levels& levels) +{ + const auto policy = thrust_policy(exec); + const auto node_levels = levels.node_level.get_data(); + const auto level_nodes = levels.nodes.get_data(); + IndexType level{}; + IndexType level_begin{}; + thrust::fill_n(policy, node_levels, num_rows, invalid_index()); + auto level_end_ptr = levels.ptrs.get_data() + level + 1; + auto level_end = exec->copy_val_to_host(level_end_ptr); + const auto level_order_level_it = + thrust::make_permutation_iterator(node_levels, level_nodes); + thrust::fill_n(policy, level_order_level_it, level_end, 0); + while (level_end > level_begin) { + level++; + level_end_ptr++; + const auto level_size = level_end - level_begin; + const auto num_blocks = ceildiv(level_size, default_block_size); + // copy end of previous level pointer to atomic counter for this level + exec->copy(1, level_end_ptr - 1, level_end_ptr); + ubfs_level_kernel<<get_stream()>>>( + row_ptrs, col_idxs, level_nodes + level_begin, level_size, level, + node_levels, level_nodes, level_end_ptr); + level_begin = + std::exchange(level_end, exec->copy_val_to_host(level_end_ptr)); + } + return level; +} + + +template +struct node_max_level_min_degree_reduction { + __device__ __forceinline__ IndexType operator()(IndexType u, IndexType v) + { + // return node with larger level (smaller degree or ID as tie-breakers) + return thrust::make_tuple(level[v], degree[u], u) < + thrust::make_tuple(level[u], degree[v], v) + ? u + : v; + } + + const IndexType* degree; + const IndexType* level; +}; + + +template +struct node_compare_functor { + __device__ void operator()(IndexType component) + { + const auto new_node = candidate_node[component]; + const auto new_level = level[new_node]; + const auto old_level = best_level[component]; + // if the candidate has a larger level, swap it + if (new_level > old_level) { + *improved = true; + best_node[component] = new_node; + best_level[component] = new_level; + } + } + + const IndexType* candidate_node; + const IndexType* level; + IndexType* best_node; + IndexType* best_level; + bool* improved; +}; + + +template +void find_pseudo_peripheral_nodes(std::shared_ptr exec, + const IndexType num_rows, + const IndexType* const row_ptrs, + const IndexType* const col_idxs, + const IndexType* const node_degrees, + const components_data& components, + ubfs_levels& levels) +{ + const auto policy = thrust_policy(exec); + const auto num_components = components.get_num_components(); + array candidate_node_array{exec, num_components}; + array best_level_array{exec, num_components}; + array improved{exec, 1}; + const auto candidate_nodes = candidate_node_array.get_data(); + const auto best_levels = best_level_array.get_data(); + const auto level_nodes = levels.nodes.get_data(); + const auto node_levels = levels.node_level.get_const_data(); + const auto component_nodes = components.nodes.get_const_data(); + const auto sorted_component_ids = components.sorted_ids.get_const_data(); + const auto reduction = node_max_level_min_degree_reduction{ + node_degrees, node_levels}; + const auto discard_it = thrust::discard_iterator{}; + const auto eq_op = thrust::equal_to{}; + const auto counting_it = thrust::make_counting_iterator(IndexType{}); + const auto compare_fn = node_compare_functor{ + candidate_nodes, node_levels, level_nodes, best_levels, + improved.get_data()}; + // initialize best_levels to the initial nodes at level 0 + thrust::fill_n(policy, best_levels, num_components, IndexType{}); + do { + ubfs(exec, num_rows, row_ptrs, col_idxs, levels); + // write a last-level node of min degree to candidate_nodes for each + // component + thrust::reduce_by_key(policy, sorted_component_ids, + sorted_component_ids + num_rows, component_nodes, + discard_it, candidate_nodes, eq_op, reduction); + set_element(improved, 0, false); + thrust::for_each_n(policy, counting_it, num_components, compare_fn); + } while (get_element(improved, 0)); + // the best nodes stay on the 0th level +} + + +template +__global__ __launch_bounds__(default_block_size) void ubfs_min_neighbor_kernel( + const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, const IndexType level_begin, + const IndexType level_end, const IndexType* __restrict__ level_nodes, + const IndexType* __restrict__ inv_permutation, + const IndexType* __restrict__ node_levels, + IndexType* __restrict__ min_neighbors) +{ + const auto target = thread::get_thread_id_flat() + level_begin; + if (target >= level_end) { + return; + } + const auto row = level_nodes[target]; + const auto begin = row_ptrs[row]; + const auto end = row_ptrs[row + 1]; + const auto cur_level = node_levels[row]; + auto min_neighbor = device_numeric_limits::max; + for (auto nz = begin; nz < end; nz++) { + const auto col = col_idxs[nz]; + const auto neighbor_level = node_levels[col]; + if (neighbor_level < cur_level) { + min_neighbor = min(min_neighbor, inv_permutation[col]); + } + } + min_neighbors[target] = min_neighbor; +} + + +template +__global__ __launch_bounds__(default_block_size) void build_permutation_level( + const IndexType level_begin, const IndexType level_end, + const IndexType* const level_nodes, IndexType* const inv_permutation) +{ + const auto target = thread::get_thread_id_flat() + level_begin; + if (target >= level_end) { + return; + } + inv_permutation[level_nodes[target]] = target; +} + + +template +void sort_levels(std::shared_ptr exec, + const IndexType num_rows, const IndexType* const row_ptrs, + const IndexType* const col_idxs, + const IndexType* const degrees, const IndexType num_levels, + const components_data& comps, + ubfs_levels& levels, IndexType* const permutation) +{ + const auto policy = thrust_policy(exec); + array inv_permutation_array{exec, + static_cast(num_rows)}; + array key_array{exec, static_cast(num_rows)}; + levels.ptrs.set_executor(exec->get_master()); + const auto level_ptrs = levels.ptrs.get_data(); + const auto level_nodes = levels.nodes.get_data(); + const auto node_levels = levels.node_level.get_const_data(); + const auto inv_permutation = inv_permutation_array.get_data(); + const auto key = key_array.get_data(); + const auto it = thrust::make_counting_iterator(IndexType{}); + const auto inv_permutation_it = + thrust::make_permutation_iterator(inv_permutation, level_nodes); + thrust::fill_n(policy, inv_permutation, num_rows, + std::numeric_limits::max()); + // fill inverse permutation for first level + const auto num_components = level_ptrs[1]; + thrust::copy_n(policy, it, num_components, inv_permutation_it); + for (IndexType lvl = 1; lvl < num_levels; lvl++) { + const auto level_begin = level_ptrs[lvl]; + const auto level_end = level_ptrs[lvl + 1]; + const auto level_size = level_end - level_begin; + // sort by node ID for determinism + thrust::sort(policy, level_nodes + level_begin, + level_nodes + level_end); + // sort by degree as tie-breaker + thrust::copy_n(policy, + thrust::make_permutation_iterator( + degrees, level_nodes + level_begin), + level_size, key + level_begin); + thrust::stable_sort_by_key(policy, key + level_begin, key + level_end, + level_nodes + level_begin); + // sort by minimum parent in CM order + const auto num_blocks = ceildiv(level_size, default_block_size); + ubfs_min_neighbor_kernel<<get_stream()>>>( + row_ptrs, col_idxs, level_begin, level_end, level_nodes, + inv_permutation, node_levels, key); + thrust::stable_sort_by_key(policy, key + level_begin, key + level_end, + level_nodes + level_begin); + // fill inverse permutation for next level + thrust::copy_n(policy, it + level_begin, level_size, + inv_permutation_it + level_begin); + } + // sort by component + thrust::copy_n(policy, + thrust::make_permutation_iterator( + comps.node_component.get_const_data(), level_nodes), + num_rows, key); + thrust::stable_sort_by_key(policy, key, key + num_rows, level_nodes); + thrust::copy_n(policy, level_nodes, num_rows, permutation); +} + + +template +void get_permutation(std::shared_ptr exec, + const IndexType num_rows, const IndexType* const row_ptrs, + const IndexType* const col_idxs, + IndexType* const permutation, + IndexType* const inv_permutation, + const gko::reorder::starting_strategy strategy) +{ + if (num_rows == 0) { + return; + } + const auto degrees = compute_node_degrees(exec, row_ptrs, num_rows); + auto comps = compute_connected_components( + exec, num_rows, row_ptrs, col_idxs, degrees.get_const_data()); + const auto num_components = comps.get_num_components(); + ubfs_levels levels{exec, static_cast(num_rows)}; + set_element(levels.ptrs, 0, 0); + set_element(levels.ptrs, 1, num_components); + if (strategy == reorder::starting_strategy::pseudo_peripheral) { + find_pseudo_peripheral_nodes(exec, num_rows, row_ptrs, col_idxs, + degrees.get_const_data(), comps, levels); + } + const auto num_levels = ubfs(exec, num_rows, row_ptrs, col_idxs, levels); + sort_levels(exec, num_rows, row_ptrs, col_idxs, degrees.get_const_data(), + num_levels, comps, levels, permutation); + thrust::reverse(thrust_policy(exec), permutation, permutation + num_rows); +} -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL); \ No newline at end of file +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL); diff --git a/cuda/reorder/rcm_kernels.cu b/cuda/reorder/rcm_kernels.cu index 2a1b1fe72d6..2dcc1ffe4cc 100644 --- a/cuda/reorder/rcm_kernels.cu +++ b/cuda/reorder/rcm_kernels.cu @@ -5,6 +5,18 @@ #include "core/reorder/rcm_kernels.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + #include #include #include @@ -13,9 +25,9 @@ #include -#include "cuda/base/math.hpp" -#include "cuda/base/types.hpp" -#include "cuda/components/prefix_sum.cuh" +#include "core/base/array_access.hpp" +#include "cuda/base/thrust.cuh" +#include "cuda/components/thread_ids.cuh" namespace gko { @@ -29,6 +41,9 @@ namespace cuda { namespace rcm { +constexpr int default_block_size = 512; + + #include "common/cuda_hip/reorder/rcm_kernels.hpp.inc" diff --git a/hip/reorder/rcm_kernels.hip.cpp b/hip/reorder/rcm_kernels.hip.cpp index b5fd552245e..40ecef9cb06 100644 --- a/hip/reorder/rcm_kernels.hip.cpp +++ b/hip/reorder/rcm_kernels.hip.cpp @@ -5,6 +5,18 @@ #include "core/reorder/rcm_kernels.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + #include #include #include @@ -13,9 +25,9 @@ #include -#include "hip/base/math.hip.hpp" -#include "hip/base/types.hip.hpp" -#include "hip/components/prefix_sum.hip.hpp" +#include "core/base/array_access.hpp" +#include "hip/base/thrust.hip.hpp" +#include "hip/components/thread_ids.hip.hpp" namespace gko { @@ -29,6 +41,9 @@ namespace hip { namespace rcm { +constexpr int default_block_size = 512; + + #include "common/cuda_hip/reorder/rcm_kernels.hpp.inc" From 8e4ee0fb033f9883e9b45fe97fd4fd86e5d106fe Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Mon, 11 Dec 2023 09:39:34 +0100 Subject: [PATCH 06/29] tests --- omp/test/CMakeLists.txt | 1 - omp/test/reorder/CMakeLists.txt | 1 - test/reorder/CMakeLists.txt | 1 + .../rcm_kernels.cpp => test/reorder/rcm.cpp | 92 +++++++++---------- 4 files changed, 44 insertions(+), 51 deletions(-) delete mode 100644 omp/test/reorder/CMakeLists.txt rename omp/test/reorder/rcm_kernels.cpp => test/reorder/rcm.cpp (80%) diff --git a/omp/test/CMakeLists.txt b/omp/test/CMakeLists.txt index 224d4b10e2b..b16882cfaf6 100644 --- a/omp/test/CMakeLists.txt +++ b/omp/test/CMakeLists.txt @@ -2,4 +2,3 @@ include(${PROJECT_SOURCE_DIR}/cmake/create_test.cmake) add_subdirectory(base) add_subdirectory(matrix) -add_subdirectory(reorder) diff --git a/omp/test/reorder/CMakeLists.txt b/omp/test/reorder/CMakeLists.txt deleted file mode 100644 index 65aea4a0fdb..00000000000 --- a/omp/test/reorder/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -ginkgo_create_test(rcm_kernels RESOURCE_TYPE cpu) diff --git a/test/reorder/CMakeLists.txt b/test/reorder/CMakeLists.txt index c9f3980e8bf..e841195d784 100644 --- a/test/reorder/CMakeLists.txt +++ b/test/reorder/CMakeLists.txt @@ -3,3 +3,4 @@ ginkgo_create_common_test(mc64 DISABLE_EXECUTORS dpcpp) if (GINKGO_HAVE_METIS) ginkgo_create_common_test(nested_dissection) endif() +ginkgo_create_common_test(rcm DISABLE_EXECUTORS dpcpp) diff --git a/omp/test/reorder/rcm_kernels.cpp b/test/reorder/rcm.cpp similarity index 80% rename from omp/test/reorder/rcm_kernels.cpp rename to test/reorder/rcm.cpp index 1fec10a72a2..a89d9e26cd3 100644 --- a/omp/test/reorder/rcm_kernels.cpp +++ b/test/reorder/rcm.cpp @@ -2,9 +2,6 @@ // // SPDX-License-Identifier: BSD-3-Clause -#include - - #include #include #include @@ -17,17 +14,19 @@ #include #include #include +#include #include "core/test/utils.hpp" #include "core/test/utils/assertions.hpp" #include "matrices/config.hpp" +#include "test/utils/executor.hpp" namespace { -class Rcm : public ::testing::Test { +class Rcm : public CommonTestFixture { protected: using v_type = double; using i_type = int; @@ -38,14 +37,12 @@ class Rcm : public ::testing::Test { using perm_type = gko::matrix::Permutation; Rcm() - : ref(gko::ReferenceExecutor::create()), - omp(gko::OmpExecutor::create()), - o_1138_bus_mtx(gko::read( + : o_1138_bus_mtx(gko::read( std::ifstream(gko::matrices::location_1138_bus_mtx, std::ios::in), ref)), d_1138_bus_mtx(gko::read( std::ifstream(gko::matrices::location_1138_bus_mtx, std::ios::in), - omp)) + exec)) {} static void ubfs_reference( @@ -81,14 +78,12 @@ class Rcm : public ::testing::Test { } } - static bool is_valid_start_node(std::shared_ptr mtx, - const i_type* permutation, i_type start, - std::vector& already_visited, - gko::reorder::starting_strategy strategy) + static void check_valid_start_node(std::shared_ptr mtx, + const i_type* permutation, i_type start, + std::vector& already_visited, + gko::reorder::starting_strategy strategy) { - if (already_visited[start]) { - return false; - } + ASSERT_FALSE(already_visited[start]) << start; const auto n = mtx->get_size()[0]; auto degrees = std::vector(n); @@ -105,9 +100,7 @@ class Rcm : public ::testing::Test { min_degree = degrees[i]; } } - if (min_degree != degrees[start]) { - return false; - } + ASSERT_EQ(min_degree, degrees[start]) << start; break; } @@ -157,19 +150,20 @@ class Rcm : public ::testing::Test { } } if (contender_height <= current_height) { - return true; + return; } } - return false; + GTEST_FAIL() << "there is a contender with larger height"; } } - return true; } - static bool is_rcm_ordered(std::shared_ptr mtx, - const i_type* permutation, - gko::reorder::starting_strategy strategy) + static void check_rcm_ordered(std::shared_ptr mtx, + const perm_type* d_permutation, + gko::reorder::starting_strategy strategy) { + const auto host_permutation = d_permutation->clone(mtx->get_executor()); + const auto permutation = host_permutation->get_const_permutation(); const auto n = mtx->get_size()[0]; const auto row_ptrs = mtx->get_const_row_ptrs(); const auto col_idxs = mtx->get_const_col_idxs(); @@ -180,8 +174,7 @@ class Rcm : public ::testing::Test { } // Following checks for cm ordering, therefore create a reversed perm. - auto perm = std::vector(n); - std::copy_n(permutation, n, perm.begin()); + std::vector perm(permutation, permutation + n); std::reverse(perm.begin(), perm.end()); // Now check for cm ordering. @@ -190,10 +183,8 @@ class Rcm : public ::testing::Test { std::vector already_visited(n); while (base_offset != n) { // Assert valid start node. - if (!is_valid_start_node(mtx, permutation, perm[base_offset], - already_visited, strategy)) { - return false; - } + check_valid_start_node(mtx, permutation, perm[base_offset], + already_visited, strategy); // Assert valid level structure. // Also update base_offset and mark as visited while at it. @@ -218,7 +209,8 @@ class Rcm : public ::testing::Test { ++current_level; continue; } - return false; + GTEST_FAIL() << "Level structure invalid at node " << node + << ", level " << current_level; } } @@ -267,22 +259,24 @@ class Rcm : public ::testing::Test { // Assert the ... is not after the ... in the previous level. if (std::find(perm.begin(), perm.end(), y_first_neighbour) < std::find(perm.begin(), perm.end(), x_first_neighbour)) { - return false; + GTEST_FAIL() + << "First neighbor ordering violated between nodes " + << x << " and " << y << ", first neighbors were " + << x_first_neighbour << " and " << y_first_neighbour; } if (y_first_neighbour == x_first_neighbour) { if (degrees[y] < degrees[x]) { - return false; + GTEST_FAIL() + << "Degree ordering violated between nodes " << x + << " and " << y << ", degrees were " << degrees[x] + << " and " << degrees[y]; } } } } - - return true; } - std::shared_ptr ref; - std::shared_ptr omp; std::shared_ptr o_1138_bus_mtx; std::shared_ptr d_1138_bus_mtx; // Can't std::move parameter when using ASSERT_PREDN, no perfect forwarding. @@ -290,36 +284,36 @@ class Rcm : public ::testing::Test { std::shared_ptr d_reorder_op; }; -TEST_F(Rcm, OmpPermutationIsRcmOrdered) +TEST_F(Rcm, PermutationIsRcmOrdered) { - d_reorder_op = reorder_type::build().on(omp)->generate(d_1138_bus_mtx); + d_reorder_op = reorder_type::build().on(exec)->generate(d_1138_bus_mtx); auto perm = d_reorder_op->get_permutation(); - ASSERT_PRED3(is_rcm_ordered, d_1138_bus_mtx, perm->get_const_permutation(), - d_reorder_op->get_parameters().strategy); + check_rcm_ordered(o_1138_bus_mtx, perm.get(), + d_reorder_op->get_parameters().strategy); } -TEST_F(Rcm, OmpPermutationIsRcmOrderedMinDegree) +TEST_F(Rcm, PermutationIsRcmOrderedMinDegree) { d_reorder_op = reorder_type::build() .with_strategy(gko::reorder::starting_strategy::minimum_degree) - .on(omp) + .on(exec) ->generate(d_1138_bus_mtx); auto perm = d_reorder_op->get_permutation(); - ASSERT_PRED3(is_rcm_ordered, d_1138_bus_mtx, perm->get_const_permutation(), - d_reorder_op->get_parameters().strategy); + check_rcm_ordered(o_1138_bus_mtx, perm.get(), + d_reorder_op->get_parameters().strategy); } -TEST_F(Rcm, OmpPermutationIsRcmOrderedNewInterface) +TEST_F(Rcm, PermutationIsRcmOrderedNewInterface) { - auto perm = new_reorder_type::build().on(omp)->generate(d_1138_bus_mtx); + auto perm = new_reorder_type::build().on(exec)->generate(d_1138_bus_mtx); - ASSERT_PRED3(is_rcm_ordered, d_1138_bus_mtx, perm->get_const_permutation(), - gko::reorder::starting_strategy::pseudo_peripheral); + check_rcm_ordered(o_1138_bus_mtx, perm.get(), + gko::reorder::starting_strategy::pseudo_peripheral); } } // namespace From 1e150cf286b103b26143f7744db6dd620b250794 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Mon, 11 Dec 2023 14:58:52 +0100 Subject: [PATCH 07/29] add load_*_local loads for workgroup-coherent global memory atomics --- .../cuda_hip/components/memory.nvidia.hpp.inc | 326 ++++++++++++++++++ dev_tools/scripts/generate_cuda_memory_ptx.py | 2 + hip/components/memory.hip.hpp | 51 +++ 3 files changed, 379 insertions(+) diff --git a/common/cuda_hip/components/memory.nvidia.hpp.inc b/common/cuda_hip/components/memory.nvidia.hpp.inc index 99053c78554..5f3f68f490d 100644 --- a/common/cuda_hip/components/memory.nvidia.hpp.inc +++ b/common/cuda_hip/components/memory.nvidia.hpp.inc @@ -343,6 +343,258 @@ __device__ __forceinline__ void store_release_shared(double* ptr, double result) } +__device__ __forceinline__ int32 load_relaxed_local(const int32* ptr) +{ + int32 result; +#if __CUDA_ARCH__ < 700 + asm volatile("ld.volatile.s32 %0, [%1];" + : "=r"(result) + : "l"(const_cast(ptr)) + : "memory"); +#else + asm volatile("ld.relaxed.cta.s32 %0, [%1];" + : "=r"(result) + : "l"(const_cast(ptr)) + : "memory"); +#endif + + return result; +} + + +__device__ __forceinline__ void store_relaxed_local(int32* ptr, int32 result) +{ +#if __CUDA_ARCH__ < 700 + asm volatile("st.volatile.s32 [%0], %1;" ::"l"(ptr), "r"(result) + : "memory"); +#else + asm volatile("st.relaxed.cta.s32 [%0], %1;" ::"l"(ptr), "r"(result) + : "memory"); +#endif +} + + +__device__ __forceinline__ int64 load_relaxed_local(const int64* ptr) +{ + int64 result; +#if __CUDA_ARCH__ < 700 + asm volatile("ld.volatile.s64 %0, [%1];" + : "=l"(result) + : "l"(const_cast(ptr)) + : "memory"); +#else + asm volatile("ld.relaxed.cta.s64 %0, [%1];" + : "=l"(result) + : "l"(const_cast(ptr)) + : "memory"); +#endif + + return result; +} + + +__device__ __forceinline__ void store_relaxed_local(int64* ptr, int64 result) +{ +#if __CUDA_ARCH__ < 700 + asm volatile("st.volatile.s64 [%0], %1;" ::"l"(ptr), "l"(result) + : "memory"); +#else + asm volatile("st.relaxed.cta.s64 [%0], %1;" ::"l"(ptr), "l"(result) + : "memory"); +#endif +} + + +__device__ __forceinline__ float load_relaxed_local(const float* ptr) +{ + float result; +#if __CUDA_ARCH__ < 700 + asm volatile("ld.volatile.f32 %0, [%1];" + : "=f"(result) + : "l"(const_cast(ptr)) + : "memory"); +#else + asm volatile("ld.relaxed.cta.f32 %0, [%1];" + : "=f"(result) + : "l"(const_cast(ptr)) + : "memory"); +#endif + + return result; +} + + +__device__ __forceinline__ void store_relaxed_local(float* ptr, float result) +{ +#if __CUDA_ARCH__ < 700 + asm volatile("st.volatile.f32 [%0], %1;" ::"l"(ptr), "f"(result) + : "memory"); +#else + asm volatile("st.relaxed.cta.f32 [%0], %1;" ::"l"(ptr), "f"(result) + : "memory"); +#endif +} + + +__device__ __forceinline__ double load_relaxed_local(const double* ptr) +{ + double result; +#if __CUDA_ARCH__ < 700 + asm volatile("ld.volatile.f64 %0, [%1];" + : "=d"(result) + : "l"(const_cast(ptr)) + : "memory"); +#else + asm volatile("ld.relaxed.cta.f64 %0, [%1];" + : "=d"(result) + : "l"(const_cast(ptr)) + : "memory"); +#endif + + return result; +} + + +__device__ __forceinline__ void store_relaxed_local(double* ptr, double result) +{ +#if __CUDA_ARCH__ < 700 + asm volatile("st.volatile.f64 [%0], %1;" ::"l"(ptr), "d"(result) + : "memory"); +#else + asm volatile("st.relaxed.cta.f64 [%0], %1;" ::"l"(ptr), "d"(result) + : "memory"); +#endif +} + + +__device__ __forceinline__ int32 load_acquire_local(const int32* ptr) +{ + int32 result; +#if __CUDA_ARCH__ < 700 + asm volatile("ld.volatile.s32 %0, [%1];" + : "=r"(result) + : "l"(const_cast(ptr)) + : "memory"); +#else + asm volatile("ld.acquire.cta.s32 %0, [%1];" + : "=r"(result) + : "l"(const_cast(ptr)) + : "memory"); +#endif + membar_acq_rel_local(); + return result; +} + + +__device__ __forceinline__ void store_release_local(int32* ptr, int32 result) +{ + membar_acq_rel_local(); +#if __CUDA_ARCH__ < 700 + asm volatile("st.volatile.s32 [%0], %1;" ::"l"(ptr), "r"(result) + : "memory"); +#else + asm volatile("st.release.cta.s32 [%0], %1;" ::"l"(ptr), "r"(result) + : "memory"); +#endif +} + + +__device__ __forceinline__ int64 load_acquire_local(const int64* ptr) +{ + int64 result; +#if __CUDA_ARCH__ < 700 + asm volatile("ld.volatile.s64 %0, [%1];" + : "=l"(result) + : "l"(const_cast(ptr)) + : "memory"); +#else + asm volatile("ld.acquire.cta.s64 %0, [%1];" + : "=l"(result) + : "l"(const_cast(ptr)) + : "memory"); +#endif + membar_acq_rel_local(); + return result; +} + + +__device__ __forceinline__ void store_release_local(int64* ptr, int64 result) +{ + membar_acq_rel_local(); +#if __CUDA_ARCH__ < 700 + asm volatile("st.volatile.s64 [%0], %1;" ::"l"(ptr), "l"(result) + : "memory"); +#else + asm volatile("st.release.cta.s64 [%0], %1;" ::"l"(ptr), "l"(result) + : "memory"); +#endif +} + + +__device__ __forceinline__ float load_acquire_local(const float* ptr) +{ + float result; +#if __CUDA_ARCH__ < 700 + asm volatile("ld.volatile.f32 %0, [%1];" + : "=f"(result) + : "l"(const_cast(ptr)) + : "memory"); +#else + asm volatile("ld.acquire.cta.f32 %0, [%1];" + : "=f"(result) + : "l"(const_cast(ptr)) + : "memory"); +#endif + membar_acq_rel_local(); + return result; +} + + +__device__ __forceinline__ void store_release_local(float* ptr, float result) +{ + membar_acq_rel_local(); +#if __CUDA_ARCH__ < 700 + asm volatile("st.volatile.f32 [%0], %1;" ::"l"(ptr), "f"(result) + : "memory"); +#else + asm volatile("st.release.cta.f32 [%0], %1;" ::"l"(ptr), "f"(result) + : "memory"); +#endif +} + + +__device__ __forceinline__ double load_acquire_local(const double* ptr) +{ + double result; +#if __CUDA_ARCH__ < 700 + asm volatile("ld.volatile.f64 %0, [%1];" + : "=d"(result) + : "l"(const_cast(ptr)) + : "memory"); +#else + asm volatile("ld.acquire.cta.f64 %0, [%1];" + : "=d"(result) + : "l"(const_cast(ptr)) + : "memory"); +#endif + membar_acq_rel_local(); + return result; +} + + +__device__ __forceinline__ void store_release_local(double* ptr, double result) +{ + membar_acq_rel_local(); +#if __CUDA_ARCH__ < 700 + asm volatile("st.volatile.f64 [%0], %1;" ::"l"(ptr), "d"(result) + : "memory"); +#else + asm volatile("st.release.cta.f64 [%0], %1;" ::"l"(ptr), "d"(result) + : "memory"); +#endif +} + + __device__ __forceinline__ int32 load_relaxed(const int32* ptr) { int32 result; @@ -677,6 +929,80 @@ __device__ __forceinline__ void store_relaxed_shared( } +__device__ __forceinline__ thrust::complex load_relaxed_local( + const thrust::complex* ptr) +{ + float real_result; + float imag_result; +#if __CUDA_ARCH__ < 700 + asm volatile("ld.volatile.v2.f32 {%0, %1}, [%2];" + : "=f"(real_result), "=f"(imag_result) + : "l"(const_cast*>(ptr)) + : "memory"); +#else + asm volatile("ld.relaxed.cta.v2.f32 {%0, %1}, [%2];" + : "=f"(real_result), "=f"(imag_result) + : "l"(const_cast*>(ptr)) + : "memory"); +#endif + return thrust::complex{real_result, imag_result}; +} + + +__device__ __forceinline__ void store_relaxed_local( + thrust::complex* ptr, thrust::complex result) +{ + auto real_result = result.real(); + auto imag_result = result.imag(); +#if __CUDA_ARCH__ < 700 + asm volatile("st.volatile.v2.f32 [%0], {%1, %2};" ::"l"(ptr), + "f"(real_result), "f"(imag_result) + : "memory"); +#else + asm volatile("st.relaxed.cta.v2.f32 [%0], {%1, %2};" ::"l"(ptr), + "f"(real_result), "f"(imag_result) + : "memory"); +#endif +} + + +__device__ __forceinline__ thrust::complex load_relaxed_local( + const thrust::complex* ptr) +{ + double real_result; + double imag_result; +#if __CUDA_ARCH__ < 700 + asm volatile("ld.volatile.v2.f64 {%0, %1}, [%2];" + : "=d"(real_result), "=d"(imag_result) + : "l"(const_cast*>(ptr)) + : "memory"); +#else + asm volatile("ld.relaxed.cta.v2.f64 {%0, %1}, [%2];" + : "=d"(real_result), "=d"(imag_result) + : "l"(const_cast*>(ptr)) + : "memory"); +#endif + return thrust::complex{real_result, imag_result}; +} + + +__device__ __forceinline__ void store_relaxed_local( + thrust::complex* ptr, thrust::complex result) +{ + auto real_result = result.real(); + auto imag_result = result.imag(); +#if __CUDA_ARCH__ < 700 + asm volatile("st.volatile.v2.f64 [%0], {%1, %2};" ::"l"(ptr), + "d"(real_result), "d"(imag_result) + : "memory"); +#else + asm volatile("st.relaxed.cta.v2.f64 [%0], {%1, %2};" ::"l"(ptr), + "d"(real_result), "d"(imag_result) + : "memory"); +#endif +} + + __device__ __forceinline__ thrust::complex load_relaxed( const thrust::complex* ptr) { diff --git a/dev_tools/scripts/generate_cuda_memory_ptx.py b/dev_tools/scripts/generate_cuda_memory_ptx.py index 3f913408829..4aa8a25b7bd 100755 --- a/dev_tools/scripts/generate_cuda_memory_ptx.py +++ b/dev_tools/scripts/generate_cuda_memory_ptx.py @@ -30,6 +30,8 @@ class type_desc: memory_spaces = [ space(ptx_space_suffix=".shared", ptx_scope_suffix=".cta", fn_suffix="_shared", ptr_expr="convert_generic_ptr_to_smem_ptr({ptr})", ptr_constraint="r"), + space(ptx_space_suffix="", ptx_scope_suffix=".cta", fn_suffix="_local", + ptr_expr="{ptr}", ptr_constraint="l"), space(ptx_space_suffix="", ptx_scope_suffix=".gpu", fn_suffix="", ptr_expr="{ptr}", ptr_constraint="l")] memory_orderings = [ ordering(ptx_load_suffix=".relaxed", fn_load_suffix="_relaxed", diff --git a/hip/components/memory.hip.hpp b/hip/components/memory.hip.hpp index 79632f63616..fd4fbb8ce11 100644 --- a/hip/components/memory.hip.hpp +++ b/hip/components/memory.hip.hpp @@ -144,6 +144,13 @@ __device__ __forceinline__ ValueType load_relaxed_shared(const ValueType* ptr) } +template +__device__ __forceinline__ ValueType load_relaxed_local(const ValueType* ptr) +{ + return load_generic<__ATOMIC_RELAXED, HIP_SCOPE_THREADBLOCK>(ptr); +} + + template __device__ __forceinline__ ValueType load_acquire(const ValueType* ptr) { @@ -158,6 +165,13 @@ __device__ __forceinline__ ValueType load_acquire_shared(const ValueType* ptr) } +template +__device__ __forceinline__ ValueType load_acquire_local(const ValueType* ptr) +{ + return load_generic<__ATOMIC_ACQUIRE, HIP_SCOPE_THREADBLOCK>(ptr); +} + + template __device__ __forceinline__ void store_relaxed(ValueType* ptr, ValueType value) { @@ -173,6 +187,14 @@ __device__ __forceinline__ void store_relaxed_shared(ValueType* ptr, } +template +__device__ __forceinline__ void store_relaxed_local(ValueType* ptr, + ValueType value) +{ + store_generic<__ATOMIC_RELAXED, HIP_SCOPE_THREADBLOCK>(ptr, value); +} + + template __device__ __forceinline__ void store_release(ValueType* ptr, ValueType value) { @@ -188,6 +210,14 @@ __device__ __forceinline__ void store_release_shared(ValueType* ptr, } +template +__device__ __forceinline__ void store_release_local(ValueType* ptr, + ValueType value) +{ + store_generic<__ATOMIC_RELEASE, HIP_SCOPE_THREADBLOCK>(ptr, value); +} + + template __device__ __forceinline__ thrust::complex load_relaxed( const thrust::complex* ptr) @@ -210,6 +240,17 @@ __device__ __forceinline__ thrust::complex load_relaxed_shared( } +template +__device__ __forceinline__ thrust::complex load_relaxed_local( + const thrust::complex* ptr) +{ + auto real_ptr = reinterpret_cast(ptr); + auto real = load_relaxed_local(real_ptr); + auto imag = load_relaxed_local(real_ptr + 1); + return {real, imag}; +} + + template __device__ __forceinline__ void store_relaxed(thrust::complex* ptr, thrust::complex value) @@ -230,6 +271,16 @@ __device__ __forceinline__ void store_relaxed_shared( } +template +__device__ __forceinline__ void store_relaxed_local( + thrust::complex* ptr, thrust::complex value) +{ + auto real_ptr = reinterpret_cast(ptr); + store_relaxed_local(real_ptr, value.real()); + store_relaxed_local(real_ptr + 1, value.imag()); +} + + #undef HIP_ATOMIC_LOAD #undef HIP_ATOMIC_STORE #undef HIP_SCOPE_GPU From 100d2200cf85095a210080c93a7324719abad7bc Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Mon, 11 Dec 2023 15:48:38 +0100 Subject: [PATCH 08/29] fix connected component search --- common/cuda_hip/reorder/rcm_kernels.hpp.inc | 155 +++++++++++++++----- cuda/reorder/rcm_kernels.cu | 1 + hip/reorder/rcm_kernels.hip.cpp | 1 + 3 files changed, 120 insertions(+), 37 deletions(-) diff --git a/common/cuda_hip/reorder/rcm_kernels.hpp.inc b/common/cuda_hip/reorder/rcm_kernels.hpp.inc index 77ac37e7b3c..4e956d4605a 100644 --- a/common/cuda_hip/reorder/rcm_kernels.hpp.inc +++ b/common/cuda_hip/reorder/rcm_kernels.hpp.inc @@ -51,6 +51,7 @@ struct components_data { }; +// Attach each node to a smaller neighbor template __global__ __launch_bounds__(default_block_size) void connected_components_attach( @@ -58,39 +59,136 @@ __launch_bounds__(default_block_size) void connected_components_attach( const IndexType* __restrict__ col_idxs, IndexType num_rows, IndexType* __restrict__ components) { - const auto row = thread::get_thread_id_flat(); + const auto row = thread::get_thread_id_flat(); if (row >= num_rows) { return; } const auto begin = row_ptrs[row]; const auto end = row_ptrs[row + 1]; - auto parent = static_cast(row); + auto parent = row; for (auto nz = begin; nz < end; nz++) { const auto col = col_idxs[nz]; - parent = min(col, parent); + if (col < parent) { + parent = col; + break; + } } components[row] = parent; } -__global__ void reset_flag(bool* finished) { *finished = true; } +// Returns the representative of a (partial) component with path compression +// For details, see J. Jaiganesh and M. Burtscher. +// "A High-Performance Connected Components Implementation for GPUs." +// Proceedings of the 2018 ACM International Symposium on High-Performance +// Parallel and Distributed Computing. June 2018 +template +__device__ __forceinline__ IndexType disjoint_set_find(IndexType node, + IndexType* parents) +{ + auto parent = parents[node]; + if (node != parent) { + // here we use atomics with threadblock-local coherence + // to avoid the L2 performance penalty at the cost of a few additional + // iterations + // TODO we can probably replace < by != + for (auto grandparent = load_relaxed_local(parents + parent); + grandparent < parent; + grandparent = load_relaxed_local(parents + parent)) { + // pointer doubling + // node --> parent --> grandparent + // turns into + // node -------------> grandparent + // | + // parent ------/ + // This operation is safe, because only the representative of each + // set will be changed in subsequent operations, and this only + // shortens paths along intermediate nodes + store_relaxed_local(parents + node, grandparent); + node = parent; + parent = grandparent; + } + } + return parent; +} + + +template +struct atomic_map {}; + + +template <> +struct atomic_map { + using type = int; +}; + + +template <> +struct atomic_map { + using type = unsigned long long; +}; + + +template +__global__ +__launch_bounds__(default_block_size) void connected_components_combine( + const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, IndexType num_rows, + IndexType* __restrict__ parents) +{ + using atomic_type = typename atomic_map::type; + const auto row = thread::get_thread_id_flat(); + if (row >= num_rows) { + return; + } + const auto begin = row_ptrs[row]; + const auto end = row_ptrs[row + 1]; + auto parent = disjoint_set_find(row, parents); + for (auto nz = begin; nz < end; nz++) { + const auto col = col_idxs[nz]; + // handle every edge only in one direction + if (col < row) { + auto col_parent = disjoint_set_find(col, parents); + bool repeat = false; + do { + repeat = false; + auto& min_parent = col_parent < parent ? col_parent : parent; + auto& max_parent = col_parent < parent ? parent : col_parent; + // attempt to attach the (assumed unattached) larger node to the + // smaller node + const auto old_parent = static_cast(atomicCAS( + reinterpret_cast(parents + max_parent), + static_cast(max_parent), + static_cast(min_parent))); + // if unsuccessful, proceed with the parent of the (now known + // attached) node + if (old_parent != max_parent) { + max_parent = old_parent; + repeat = true; + } + } while (repeat); + } + } +} +// Replace each node's parent by its representative template __global__ __launch_bounds__(default_block_size) void connected_components_path_compress( - IndexType num_rows, IndexType* components, bool* finished) + IndexType num_rows, IndexType* parents) { - const auto row = thread::get_thread_id_flat(); + const auto row = thread::get_thread_id_flat(); if (row >= num_rows) { return; } - const auto old_value = components[row]; - const auto new_value = components[old_value]; - if (old_value != new_value && *finished) { - *finished = false; + auto current = row; + // TODO we can probably replace < by != + for (auto parent = load_relaxed_local(parents + current); parent < current; + parent = load_relaxed_local(parents + current)) { + current = parent; } - components[row] = new_value; + parents[row] = current; } @@ -126,22 +224,21 @@ components_data compute_connected_components( { const auto policy = thrust_policy(exec); components_data result{exec, static_cast(num_rows)}; - array finished_array{exec, 1}; const auto node_component = result.node_component.get_data(); const auto nodes = result.nodes.get_data(); - const auto finished = finished_array.get_data(); - // attach every node to its minimum neighbor + // attach every node to a smaller neighbor const auto num_blocks = ceildiv(num_rows, default_block_size); connected_components_attach<<get_stream()>>>( row_ptrs, col_idxs, num_rows, node_component); - // double pointers until all paths are loops or edges - do { - reset_flag<<<1, 1, 0, exec->get_stream()>>>(finished); - connected_components_path_compress<<get_stream()>>>( - num_rows, node_component, finished); - } while (!exec->copy_val_to_host(finished)); + // combine connected components along edges + connected_components_combine<<get_stream()>>>( + row_ptrs, col_idxs, num_rows, node_component); + // compress paths to edges + connected_components_path_compress<<get_stream()>>>(num_rows, + node_component); // group nodes by component ID result.sorted_ids = result.node_component; const auto sorted_component_ids = result.sorted_ids.get_data(); @@ -194,22 +291,6 @@ struct ubfs_levels { }; -template -struct atomic_map {}; - - -template <> -struct atomic_map { - using type = int; -}; - - -template <> -struct atomic_map { - using type = unsigned long long; -}; - - template __global__ __launch_bounds__(default_block_size) void ubfs_level_kernel( const IndexType* __restrict__ row_ptrs, diff --git a/cuda/reorder/rcm_kernels.cu b/cuda/reorder/rcm_kernels.cu index 2dcc1ffe4cc..d699d00dfb6 100644 --- a/cuda/reorder/rcm_kernels.cu +++ b/cuda/reorder/rcm_kernels.cu @@ -27,6 +27,7 @@ #include "core/base/array_access.hpp" #include "cuda/base/thrust.cuh" +#include "cuda/components/memory.cuh" #include "cuda/components/thread_ids.cuh" diff --git a/hip/reorder/rcm_kernels.hip.cpp b/hip/reorder/rcm_kernels.hip.cpp index 40ecef9cb06..0c83c728e79 100644 --- a/hip/reorder/rcm_kernels.hip.cpp +++ b/hip/reorder/rcm_kernels.hip.cpp @@ -27,6 +27,7 @@ #include "core/base/array_access.hpp" #include "hip/base/thrust.hip.hpp" +#include "hip/components/memory.hip.hpp" #include "hip/components/thread_ids.hip.hpp" From d541d19b386816ee5d9ba74458b0a59b22f65da6 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Mon, 11 Dec 2023 16:08:50 +0100 Subject: [PATCH 09/29] fix level initialization --- common/cuda_hip/reorder/rcm_kernels.hpp.inc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/common/cuda_hip/reorder/rcm_kernels.hpp.inc b/common/cuda_hip/reorder/rcm_kernels.hpp.inc index 4e956d4605a..e78c1b0fda8 100644 --- a/common/cuda_hip/reorder/rcm_kernels.hpp.inc +++ b/common/cuda_hip/reorder/rcm_kernels.hpp.inc @@ -426,7 +426,9 @@ void find_pseudo_peripheral_nodes(std::shared_ptr exec, const auto compare_fn = node_compare_functor{ candidate_nodes, node_levels, level_nodes, best_levels, improved.get_data()}; - // initialize best_levels to the initial nodes at level 0 + // initialize best_levels and levels to the initial nodes at level 0 + thrust::copy_n(policy, components.min_deg_node.get_const_data(), + num_components, level_nodes); thrust::fill_n(policy, best_levels, num_components, IndexType{}); do { ubfs(exec, num_rows, row_ptrs, col_idxs, levels); From eeb18e947b7bbbf4700a2ddb02b2afd3a437349c Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Fri, 15 Dec 2023 00:16:43 +0100 Subject: [PATCH 10/29] fix membar for local atomics --- common/cuda_hip/components/memory.nvidia.hpp.inc | 10 ++++++++++ dev_tools/scripts/generate_cuda_memory_ptx.py | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/common/cuda_hip/components/memory.nvidia.hpp.inc b/common/cuda_hip/components/memory.nvidia.hpp.inc index 5f3f68f490d..49c9ae7601c 100644 --- a/common/cuda_hip/components/memory.nvidia.hpp.inc +++ b/common/cuda_hip/components/memory.nvidia.hpp.inc @@ -55,6 +55,16 @@ __device__ __forceinline__ void membar_acq_rel_shared() } +__device__ __forceinline__ void membar_acq_rel_local() +{ +#if __CUDA_ARCH__ < 700 + asm volatile("membar.cta;" ::: "memory"); +#else + asm volatile("fence.acq_rel.cta;" ::: "memory"); +#endif +} + + __device__ __forceinline__ int32 load_relaxed_shared(const int32* ptr) { int32 result; diff --git a/dev_tools/scripts/generate_cuda_memory_ptx.py b/dev_tools/scripts/generate_cuda_memory_ptx.py index 4aa8a25b7bd..9dec14d2394 100755 --- a/dev_tools/scripts/generate_cuda_memory_ptx.py +++ b/dev_tools/scripts/generate_cuda_memory_ptx.py @@ -99,6 +99,16 @@ class type_desc: asm volatile("fence.acq_rel.cta;" ::: "memory"); #endif } + + +__device__ __forceinline__ void membar_acq_rel_local() +{ +#if __CUDA_ARCH__ < 700 + asm volatile("membar.cta;" ::: "memory"); +#else + asm volatile("fence.acq_rel.cta;" ::: "memory"); +#endif +} """) # relaxed From 69b298d52fcce22e095121b14d5e57e0c4cd34b7 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Fri, 15 Dec 2023 00:17:08 +0100 Subject: [PATCH 11/29] fix min degree level initialization --- common/cuda_hip/reorder/rcm_kernels.hpp.inc | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/common/cuda_hip/reorder/rcm_kernels.hpp.inc b/common/cuda_hip/reorder/rcm_kernels.hpp.inc index e78c1b0fda8..1efc51bc94e 100644 --- a/common/cuda_hip/reorder/rcm_kernels.hpp.inc +++ b/common/cuda_hip/reorder/rcm_kernels.hpp.inc @@ -282,6 +282,8 @@ struct ubfs_levels { array nodes; /** Pointers into nodes for each level */ array ptrs; + /** How many levels are there? */ + IndexType num_levels; ubfs_levels(std::shared_ptr exec, size_type num_rows) : node_level{exec, num_rows}, @@ -355,7 +357,7 @@ IndexType ubfs(std::shared_ptr exec, level_begin = std::exchange(level_end, exec->copy_val_to_host(level_end_ptr)); } - return level; + levels.num_levels = level; } @@ -427,8 +429,6 @@ void find_pseudo_peripheral_nodes(std::shared_ptr exec, candidate_nodes, node_levels, level_nodes, best_levels, improved.get_data()}; // initialize best_levels and levels to the initial nodes at level 0 - thrust::copy_n(policy, components.min_deg_node.get_const_data(), - num_components, level_nodes); thrust::fill_n(policy, best_levels, num_components, IndexType{}); do { ubfs(exec, num_rows, row_ptrs, col_idxs, levels); @@ -490,7 +490,7 @@ template void sort_levels(std::shared_ptr exec, const IndexType num_rows, const IndexType* const row_ptrs, const IndexType* const col_idxs, - const IndexType* const degrees, const IndexType num_levels, + const IndexType* const degrees, const components_data& comps, ubfs_levels& levels, IndexType* const permutation) { @@ -499,6 +499,7 @@ void sort_levels(std::shared_ptr exec, static_cast(num_rows)}; array key_array{exec, static_cast(num_rows)}; levels.ptrs.set_executor(exec->get_master()); + const auto num_levels = levels.num_levels; const auto level_ptrs = levels.ptrs.get_data(); const auto level_nodes = levels.nodes.get_data(); const auto node_levels = levels.node_level.get_const_data(); @@ -566,13 +567,17 @@ void get_permutation(std::shared_ptr exec, ubfs_levels levels{exec, static_cast(num_rows)}; set_element(levels.ptrs, 0, 0); set_element(levels.ptrs, 1, num_components); + // copy min degree nodes to level 0 + thrust::copy_n(thrust_policy(exec), comps.min_deg_node.get_const_data(), + num_components, levels.nodes.get_data()); if (strategy == reorder::starting_strategy::pseudo_peripheral) { find_pseudo_peripheral_nodes(exec, num_rows, row_ptrs, col_idxs, degrees.get_const_data(), comps, levels); + } else { + ubfs(exec, num_rows, row_ptrs, col_idxs, levels); } - const auto num_levels = ubfs(exec, num_rows, row_ptrs, col_idxs, levels); sort_levels(exec, num_rows, row_ptrs, col_idxs, degrees.get_const_data(), - num_levels, comps, levels, permutation); + comps, levels, permutation); thrust::reverse(thrust_policy(exec), permutation, permutation + num_rows); } From 4479266326b857e9c23dcbbadf8f5b090aceaef4 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Fri, 15 Dec 2023 00:35:41 +0100 Subject: [PATCH 12/29] test multiple connected components --- test/reorder/rcm.cpp | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/test/reorder/rcm.cpp b/test/reorder/rcm.cpp index a89d9e26cd3..ce5c52f3a57 100644 --- a/test/reorder/rcm.cpp +++ b/test/reorder/rcm.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -37,7 +38,8 @@ class Rcm : public CommonTestFixture { using perm_type = gko::matrix::Permutation; Rcm() - : o_1138_bus_mtx(gko::read( + : rng{63749}, + o_1138_bus_mtx(gko::read( std::ifstream(gko::matrices::location_1138_bus_mtx, std::ios::in), ref)), d_1138_bus_mtx(gko::read( @@ -277,6 +279,7 @@ class Rcm : public CommonTestFixture { } } + std::default_random_engine rng; std::shared_ptr o_1138_bus_mtx; std::shared_ptr d_1138_bus_mtx; // Can't std::move parameter when using ASSERT_PREDN, no perfect forwarding. @@ -316,4 +319,40 @@ TEST_F(Rcm, PermutationIsRcmOrderedNewInterface) gko::reorder::starting_strategy::pseudo_peripheral); } +TEST_F(Rcm, PermutationIsRcmOrderedMultipleConnectedComponents) +{ + gko::matrix_data data; + d_1138_bus_mtx->write(data); + const auto num_rows = data.size[0]; + const int num_copies = 5; + data.size[0] *= num_copies; + data.size[1] *= num_copies; + for (gko::size_type i = 0; i < num_rows; i++) { + const auto entry = data.nonzeros[i]; + // create copies of the matrix + for (int i = 1; i < num_copies; i++) { + data.nonzeros.emplace_back(entry.row + i * num_rows, + entry.column + i * num_rows, + entry.value); + } + } + std::vector permutation(data.size[0]); + std::iota(permutation.begin(), permutation.end(), 0); + std::shuffle(permutation.begin(), permutation.end(), rng); + for (auto& entry : data.nonzeros) { + entry.row = permutation[entry.row]; + entry.column = permutation[entry.column]; + } + data.sort_row_major(); + d_1138_bus_mtx->read(data); + o_1138_bus_mtx->read(data); + + d_reorder_op = reorder_type::build().on(exec)->generate(d_1138_bus_mtx); + + auto perm = d_reorder_op->get_permutation(); + + check_rcm_ordered(o_1138_bus_mtx, perm.get(), + d_reorder_op->get_parameters().strategy); +} + } // namespace From 02b71cd3bebeee9be96919c22dfa023fad6306d9 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Fri, 15 Dec 2023 08:25:17 +0100 Subject: [PATCH 13/29] fix ubfs --- common/cuda_hip/reorder/rcm_kernels.hpp.inc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common/cuda_hip/reorder/rcm_kernels.hpp.inc b/common/cuda_hip/reorder/rcm_kernels.hpp.inc index 1efc51bc94e..d2ee7772ac4 100644 --- a/common/cuda_hip/reorder/rcm_kernels.hpp.inc +++ b/common/cuda_hip/reorder/rcm_kernels.hpp.inc @@ -328,9 +328,9 @@ __global__ __launch_bounds__(default_block_size) void ubfs_level_kernel( template -IndexType ubfs(std::shared_ptr exec, - const IndexType num_rows, const IndexType* const row_ptrs, - const IndexType* const col_idxs, ubfs_levels& levels) +void ubfs(std::shared_ptr exec, const IndexType num_rows, + const IndexType* const row_ptrs, const IndexType* const col_idxs, + ubfs_levels& levels) { const auto policy = thrust_policy(exec); const auto node_levels = levels.node_level.get_data(); From b258af733582b265470c31afee97ff9e0c901406 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Fri, 15 Dec 2023 09:19:47 +0100 Subject: [PATCH 14/29] fix test --- test/reorder/CMakeLists.txt | 2 +- test/reorder/rcm.cpp | 110 +++++++++++++++++++++--------------- 2 files changed, 67 insertions(+), 45 deletions(-) diff --git a/test/reorder/CMakeLists.txt b/test/reorder/CMakeLists.txt index e841195d784..2bd78c77607 100644 --- a/test/reorder/CMakeLists.txt +++ b/test/reorder/CMakeLists.txt @@ -3,4 +3,4 @@ ginkgo_create_common_test(mc64 DISABLE_EXECUTORS dpcpp) if (GINKGO_HAVE_METIS) ginkgo_create_common_test(nested_dissection) endif() -ginkgo_create_common_test(rcm DISABLE_EXECUTORS dpcpp) +ginkgo_create_common_and_reference_test(rcm DISABLE_EXECUTORS dpcpp) diff --git a/test/reorder/rcm.cpp b/test/reorder/rcm.cpp index ce5c52f3a57..630bdcefce4 100644 --- a/test/reorder/rcm.cpp +++ b/test/reorder/rcm.cpp @@ -178,6 +178,13 @@ class Rcm : public CommonTestFixture { // Following checks for cm ordering, therefore create a reversed perm. std::vector perm(permutation, permutation + n); std::reverse(perm.begin(), perm.end()); + std::vector inv_perm(n, gko::invalid_index()); + for (gko::size_type i = 0; i < n; i++) { + ASSERT_GE(perm[i], 0) << i; + ASSERT_LT(perm[i], n) << i; + ASSERT_EQ(inv_perm[perm[i]], gko::invalid_index()) << i; + inv_perm[perm[i]] = i; + } // Now check for cm ordering. @@ -235,9 +242,8 @@ class Rcm : public CommonTestFixture { x_neighbour_idx < x_row_end; ++x_neighbour_idx) { const auto x_neighbour = col_idxs[x_neighbour_idx]; if (levels[x_neighbour] == level - 1) { - if (std::find(perm.begin(), perm.end(), x_neighbour) < - std::find(perm.begin(), perm.end(), - x_first_neighbour)) { + if (inv_perm[x_neighbour] < + inv_perm[x_first_neighbour]) { x_first_neighbour = x_neighbour; } } @@ -250,35 +256,60 @@ class Rcm : public CommonTestFixture { y_neighbour_idx < y_row_end; ++y_neighbour_idx) { const auto y_neighbour = col_idxs[y_neighbour_idx]; if (levels[y_neighbour] == level - 1) { - if (std::find(perm.begin(), perm.end(), y_neighbour) < - std::find(perm.begin(), perm.end(), - y_first_neighbour)) { + if (inv_perm[y_neighbour] < + inv_perm[y_first_neighbour]) { y_first_neighbour = y_neighbour; } } } // Assert the ... is not after the ... in the previous level. - if (std::find(perm.begin(), perm.end(), y_first_neighbour) < - std::find(perm.begin(), perm.end(), x_first_neighbour)) { - GTEST_FAIL() - << "First neighbor ordering violated between nodes " - << x << " and " << y << ", first neighbors were " - << x_first_neighbour << " and " << y_first_neighbour; - } + ASSERT_GE(inv_perm[y_first_neighbour], + inv_perm[x_first_neighbour]) + << "First neighbor ordering violated between nodes " << x + << " and " << y << ", first neighbors were " + << x_first_neighbour << " and " << y_first_neighbour; if (y_first_neighbour == x_first_neighbour) { - if (degrees[y] < degrees[x]) { - GTEST_FAIL() - << "Degree ordering violated between nodes " << x - << " and " << y << ", degrees were " << degrees[x] - << " and " << degrees[y]; - } + ASSERT_GE(degrees[y], degrees[x]) + << "Degree ordering violated between nodes " << x + << " and " << y << ", degrees were " << degrees[x] + << " and " << degrees[y]; } } } } + void build_multiple_connected_components() + { + gko::matrix_data data; + d_1138_bus_mtx->write(data); + const auto num_rows = data.size[0]; + const auto nnz = data.nonzeros.size(); + const int num_copies = 5; + data.size[0] *= num_copies; + data.size[1] *= num_copies; + for (gko::size_type i = 0; i < nnz; i++) { + const auto entry = data.nonzeros[i]; + // create copies of the matrix + for (int copy = 1; copy < num_copies; copy++) { + data.nonzeros.emplace_back(entry.row + copy * num_rows, + entry.column + copy * num_rows, + entry.value); + } + } + std::vector permutation(data.size[0]); + std::iota(permutation.begin(), permutation.end(), 0); + std::shuffle(permutation.begin(), permutation.end(), rng); + for (auto& entry : data.nonzeros) { + entry.row = permutation[entry.row]; + entry.column = permutation[entry.column]; + } + data.sort_row_major(); + d_1138_bus_mtx->read(data); + o_1138_bus_mtx->read(data); + } + std::default_random_engine rng; std::shared_ptr o_1138_bus_mtx; std::shared_ptr d_1138_bus_mtx; @@ -321,36 +352,27 @@ TEST_F(Rcm, PermutationIsRcmOrderedNewInterface) TEST_F(Rcm, PermutationIsRcmOrderedMultipleConnectedComponents) { - gko::matrix_data data; - d_1138_bus_mtx->write(data); - const auto num_rows = data.size[0]; - const int num_copies = 5; - data.size[0] *= num_copies; - data.size[1] *= num_copies; - for (gko::size_type i = 0; i < num_rows; i++) { - const auto entry = data.nonzeros[i]; - // create copies of the matrix - for (int i = 1; i < num_copies; i++) { - data.nonzeros.emplace_back(entry.row + i * num_rows, - entry.column + i * num_rows, - entry.value); - } - } - std::vector permutation(data.size[0]); - std::iota(permutation.begin(), permutation.end(), 0); - std::shuffle(permutation.begin(), permutation.end(), rng); - for (auto& entry : data.nonzeros) { - entry.row = permutation[entry.row]; - entry.column = permutation[entry.column]; - } - data.sort_row_major(); - d_1138_bus_mtx->read(data); - o_1138_bus_mtx->read(data); + this->build_multiple_connected_components(); d_reorder_op = reorder_type::build().on(exec)->generate(d_1138_bus_mtx); auto perm = d_reorder_op->get_permutation(); + check_rcm_ordered(o_1138_bus_mtx, perm.get(), + d_reorder_op->get_parameters().strategy); +} + + +TEST_F(Rcm, PermutationIsRcmOrderedMinDegreeMultipleConnectedComponents) +{ + this->build_multiple_connected_components(); + + d_reorder_op = + reorder_type::build() + .with_strategy(gko::reorder::starting_strategy::minimum_degree) + .on(exec) + ->generate(d_1138_bus_mtx); + auto perm = d_reorder_op->get_permutation(); check_rcm_ordered(o_1138_bus_mtx, perm.get(), d_reorder_op->get_parameters().strategy); } From 42833cfddaf16270bc98e7f6a92b440c327e5df7 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Fri, 15 Dec 2023 14:03:47 +0100 Subject: [PATCH 15/29] work around rocThrust bug https://github.com/ROCm/rocThrust/issues/352 --- common/cuda_hip/reorder/rcm_kernels.hpp.inc | 38 +++++++++++++++++---- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/common/cuda_hip/reorder/rcm_kernels.hpp.inc b/common/cuda_hip/reorder/rcm_kernels.hpp.inc index d2ee7772ac4..f56db6e7fd5 100644 --- a/common/cuda_hip/reorder/rcm_kernels.hpp.inc +++ b/common/cuda_hip/reorder/rcm_kernels.hpp.inc @@ -205,13 +205,25 @@ struct adj_not_predicate { template struct node_min_degree_reduction { - __device__ __forceinline__ IndexType operator()(IndexType u, IndexType v) + __device__ __forceinline__ IndexType operator()(IndexType u, + IndexType v) const { +#ifdef GKO_COMPILING_HIP + // guard against out-of-bounds values, since rocThrust has a bug + // https://github.com/ROCm/rocThrust/issues/352 + if (u < 0 || u >= size) { + u = 0; + } + if (v < 0 || v >= size) { + v = 0; + } +#endif return thrust::make_pair(degree[u], u) < thrust::make_pair(degree[v], v) ? u : v; } + IndexType size; const IndexType* degree; }; @@ -259,10 +271,10 @@ components_data compute_connected_components( // find minimum degree node for each component array component_id_array{exec, num_components}; const auto component_ids = component_id_array.get_data(); - thrust::reduce_by_key(policy, sorted_component_ids, - sorted_component_ids + num_rows, nodes, component_ids, - min_deg_node, thrust::equal_to{}, - node_min_degree_reduction{node_degrees}); + thrust::reduce_by_key( + policy, sorted_component_ids, sorted_component_ids + num_rows, nodes, + component_ids, min_deg_node, thrust::equal_to{}, + node_min_degree_reduction{num_rows, node_degrees}); // map component IDs to consecutive indexing array compacted_node_component{exec, static_cast(num_rows)}; @@ -363,8 +375,19 @@ void ubfs(std::shared_ptr exec, const IndexType num_rows, template struct node_max_level_min_degree_reduction { - __device__ __forceinline__ IndexType operator()(IndexType u, IndexType v) + __device__ __forceinline__ IndexType operator()(IndexType u, + IndexType v) const { +#ifdef GKO_COMPILING_HIP + // guard against out-of-bounds values, since rocThrust has a bug + // https://github.com/ROCm/rocThrust/issues/352 + if (u < 0 || u >= size) { + u = 0; + } + if (v < 0 || v >= size) { + v = 0; + } +#endif // return node with larger level (smaller degree or ID as tie-breakers) return thrust::make_tuple(level[v], degree[u], u) < thrust::make_tuple(level[u], degree[v], v) @@ -372,6 +395,7 @@ struct node_max_level_min_degree_reduction { : v; } + IndexType size; const IndexType* degree; const IndexType* level; }; @@ -421,7 +445,7 @@ void find_pseudo_peripheral_nodes(std::shared_ptr exec, const auto component_nodes = components.nodes.get_const_data(); const auto sorted_component_ids = components.sorted_ids.get_const_data(); const auto reduction = node_max_level_min_degree_reduction{ - node_degrees, node_levels}; + num_rows, node_degrees, node_levels}; const auto discard_it = thrust::discard_iterator{}; const auto eq_op = thrust::equal_to{}; const auto counting_it = thrust::make_counting_iterator(IndexType{}); From 3e9420d67334c9eb3e767a3263faf8f430ff9487 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Mon, 18 Dec 2023 16:04:50 +0100 Subject: [PATCH 16/29] fix handling of multiple connected components in OpenMP --- omp/reorder/rcm_kernels.cpp | 9 ++-- test/reorder/rcm.cpp | 91 ++++++++++++++++++------------------- 2 files changed, 49 insertions(+), 51 deletions(-) diff --git a/omp/reorder/rcm_kernels.cpp b/omp/reorder/rcm_kernels.cpp index a2df9e863e7..64f5a789a3f 100644 --- a/omp/reorder/rcm_kernels.cpp +++ b/omp/reorder/rcm_kernels.cpp @@ -358,7 +358,7 @@ std::pair rls_contender_and_height( // Implement this through a tie-max reduction. First reduce local ... const int32 num_threads = omp_get_max_threads(); const auto initial_value = - std::make_pair(std::make_pair(levels[0], degrees[0]), 0); + std::make_pair(std::make_pair(levels[start], degrees[start]), start); vector local_contenders(num_threads, initial_value, exec); #pragma omp parallel num_threads(num_threads) @@ -368,9 +368,10 @@ std::pair rls_contender_and_height( #pragma omp for schedule(static) for (IndexType i = 1; i < num_vertices; ++i) { - if (std::tie(levels[i], degrees[i]) > - std::tie(local_contender.first.first, - local_contender.first.second)) { + if (levels[i] != std::numeric_limits::max() && + std::tie(levels[i], degrees[i]) > + std::tie(local_contender.first.first, + local_contender.first.second)) { local_contender.first = std::make_pair(levels[i], degrees[i]); local_contender.second = i; } diff --git a/test/reorder/rcm.cpp b/test/reorder/rcm.cpp index 630bdcefce4..f8b92561167 100644 --- a/test/reorder/rcm.cpp +++ b/test/reorder/rcm.cpp @@ -85,6 +85,7 @@ class Rcm : public CommonTestFixture { std::vector& already_visited, gko::reorder::starting_strategy strategy) { + SCOPED_TRACE(start); ASSERT_FALSE(already_visited[start]) << start; const auto n = mtx->get_size()[0]; @@ -94,8 +95,7 @@ class Rcm : public CommonTestFixture { mtx->get_const_row_ptrs()[i + 1] - mtx->get_const_row_ptrs()[i]; } - switch (strategy) { - case gko::reorder::starting_strategy::minimum_degree: { + if (strategy == gko::reorder::starting_strategy::minimum_degree) { auto min_degree = std::numeric_limits::max(); for (gko::size_type i = 0; i < n; ++i) { if (!already_visited[i] && degrees[i] < min_degree) { @@ -103,60 +103,57 @@ class Rcm : public CommonTestFixture { } } ASSERT_EQ(min_degree, degrees[start]) << start; - break; + return; } - case gko::reorder::starting_strategy::pseudo_peripheral: { - // Check if any valid contender has a lowereq height than the - // selected start node. + // Check if any valid contender has a lowereq height than the + // selected start node. - std::vector reference_current_levels(n); - std::fill(reference_current_levels.begin(), - reference_current_levels.end(), - std::numeric_limits::max()); - ubfs_reference(mtx, &reference_current_levels[0], start); + std::vector reference_current_levels(n); + std::fill(reference_current_levels.begin(), + reference_current_levels.end(), + std::numeric_limits::max()); + ubfs_reference(mtx, &reference_current_levels[0], start); - std::vector reference_contenders(0); - auto current_height = std::numeric_limits::min(); - for (gko::size_type i = 0; i < n; ++i) { - if (reference_current_levels[i] != - std::numeric_limits::max() && - reference_current_levels[i] >= current_height) { - if (reference_current_levels[i] > current_height) { - reference_contenders.clear(); - } - reference_contenders.push_back(i); - current_height = reference_current_levels[i]; + // First find all contender nodes in the last UBFS level + std::vector reference_contenders; + auto current_height = std::numeric_limits::min(); + for (gko::size_type i = 0; i < n; ++i) { + if (reference_current_levels[i] != + std::numeric_limits::max() && + reference_current_levels[i] >= current_height) { + if (reference_current_levels[i] > current_height) { + reference_contenders.clear(); } + reference_contenders.push_back(i); + current_height = reference_current_levels[i]; } + } - std::vector> reference_contenders_levels( - reference_contenders.size()); - for (gko::size_type i = 0; i < reference_contenders.size(); ++i) { - std::vector reference_contender_levels(n); - std::fill(reference_contender_levels.begin(), - reference_contender_levels.end(), - std::numeric_limits::max()); - ubfs_reference(mtx, &reference_contender_levels[0], - reference_contenders[i]); - reference_contenders_levels[i] = reference_contender_levels; - } + // then compute a level array for each of the contenders + std::vector> reference_contenders_levels( + reference_contenders.size()); + for (gko::size_type i = 0; i < reference_contenders.size(); ++i) { + std::vector reference_contender_levels(n); + std::fill(reference_contender_levels.begin(), + reference_contender_levels.end(), + std::numeric_limits::max()); + ubfs_reference(mtx, &reference_contender_levels[0], + reference_contenders[i]); + reference_contenders_levels[i] = reference_contender_levels; + } - for (gko::size_type i = 0; i < reference_contenders.size(); ++i) { - auto contender_height = std::numeric_limits::min(); - for (gko::size_type j = 0; j < n; ++j) { - if (reference_contenders_levels[i][j] != - std::numeric_limits::max() && - reference_contenders_levels[i][j] > contender_height) { - contender_height = reference_contenders_levels[i][j]; - } - } - if (contender_height <= current_height) { - return; + // and check if any maximum level exceeds that of the start node + for (gko::size_type i = 0; i < reference_contenders.size(); ++i) { + auto contender_height = std::numeric_limits::min(); + for (gko::size_type j = 0; j < n; ++j) { + if (reference_contenders_levels[i][j] != + std::numeric_limits::max() && + reference_contenders_levels[i][j] > contender_height) { + contender_height = reference_contenders_levels[i][j]; } } - GTEST_FAIL() << "there is a contender with larger height"; - } + ASSERT_LE(contender_height, current_height); } } @@ -300,7 +297,7 @@ class Rcm : public CommonTestFixture { } std::vector permutation(data.size[0]); std::iota(permutation.begin(), permutation.end(), 0); - std::shuffle(permutation.begin(), permutation.end(), rng); + // std::shuffle(permutation.begin(), permutation.end(), rng); for (auto& entry : data.nonzeros) { entry.row = permutation[entry.row]; entry.column = permutation[entry.column]; From 0de9baaa9ce3c4e2c8b66d961ff896f15ba9ec9d Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Wed, 20 Dec 2023 13:56:57 +0100 Subject: [PATCH 17/29] compute and test inverse permutation --- common/cuda_hip/reorder/rcm_kernels.hpp.inc | 6 ++++++ test/reorder/rcm.cpp | 8 +++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/common/cuda_hip/reorder/rcm_kernels.hpp.inc b/common/cuda_hip/reorder/rcm_kernels.hpp.inc index f56db6e7fd5..25195b25a90 100644 --- a/common/cuda_hip/reorder/rcm_kernels.hpp.inc +++ b/common/cuda_hip/reorder/rcm_kernels.hpp.inc @@ -603,6 +603,12 @@ void get_permutation(std::shared_ptr exec, sort_levels(exec, num_rows, row_ptrs, col_idxs, degrees.get_const_data(), comps, levels, permutation); thrust::reverse(thrust_policy(exec), permutation, permutation + num_rows); + if (inv_permutation) { + thrust::copy_n( + thrust_policy(exec), thrust::make_counting_iterator(IndexType{}), + num_rows, + thrust::make_permutation_iterator(inv_permutation, permutation)); + } } GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL); diff --git a/test/reorder/rcm.cpp b/test/reorder/rcm.cpp index f8b92561167..6e381c636cd 100644 --- a/test/reorder/rcm.cpp +++ b/test/reorder/rcm.cpp @@ -317,12 +317,17 @@ class Rcm : public CommonTestFixture { TEST_F(Rcm, PermutationIsRcmOrdered) { - d_reorder_op = reorder_type::build().on(exec)->generate(d_1138_bus_mtx); + d_reorder_op = reorder_type::build() + .with_construct_inverse_permutation(true) + .on(exec) + ->generate(d_1138_bus_mtx); auto perm = d_reorder_op->get_permutation(); check_rcm_ordered(o_1138_bus_mtx, perm.get(), d_reorder_op->get_parameters().strategy); + GKO_ASSERT_MTX_EQ_SPARSITY(perm->compute_inverse(), + d_reorder_op->get_inverse_permutation()); } TEST_F(Rcm, PermutationIsRcmOrderedMinDegree) @@ -337,6 +342,7 @@ TEST_F(Rcm, PermutationIsRcmOrderedMinDegree) check_rcm_ordered(o_1138_bus_mtx, perm.get(), d_reorder_op->get_parameters().strategy); + ASSERT_EQ(d_reorder_op->get_inverse_permutation(), nullptr); } TEST_F(Rcm, PermutationIsRcmOrderedNewInterface) From 8d7c35e8bda9491243c774a275981b2b56a48e41 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Wed, 20 Dec 2023 14:26:17 +0100 Subject: [PATCH 18/29] review updates Co-authored-by: Marcel Koch --- common/cuda_hip/reorder/rcm_kernels.hpp.inc | 1 - test/reorder/rcm.cpp | 11 +++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/common/cuda_hip/reorder/rcm_kernels.hpp.inc b/common/cuda_hip/reorder/rcm_kernels.hpp.inc index 25195b25a90..69f5d988e30 100644 --- a/common/cuda_hip/reorder/rcm_kernels.hpp.inc +++ b/common/cuda_hip/reorder/rcm_kernels.hpp.inc @@ -324,7 +324,6 @@ __global__ __launch_bounds__(default_block_size) void ubfs_level_kernel( atomic_type unsigned_unattached{}; const auto unattached = invalid_index(); memcpy(&unsigned_unattached, &unattached, sizeof(IndexType)); - auto parent = static_cast(row); for (auto nz = begin; nz < end; nz++) { const auto col = col_idxs[nz]; if (node_levels[col] == unattached && diff --git a/test/reorder/rcm.cpp b/test/reorder/rcm.cpp index 6e381c636cd..09cec19897d 100644 --- a/test/reorder/rcm.cpp +++ b/test/reorder/rcm.cpp @@ -31,7 +31,6 @@ class Rcm : public CommonTestFixture { protected: using v_type = double; using i_type = int; - using Mtx = gko::matrix::Dense; using CsrMtx = gko::matrix::Csr; using reorder_type = gko::reorder::Rcm; using new_reorder_type = gko::experimental::reorder::Rcm; @@ -81,7 +80,7 @@ class Rcm : public CommonTestFixture { } static void check_valid_start_node(std::shared_ptr mtx, - const i_type* permutation, i_type start, + i_type start, std::vector& already_visited, gko::reorder::starting_strategy strategy) { @@ -125,7 +124,7 @@ class Rcm : public CommonTestFixture { if (reference_current_levels[i] > current_height) { reference_contenders.clear(); } - reference_contenders.push_back(i); + reference_contenders.push_back(static_cast(i)); current_height = reference_current_levels[i]; } } @@ -180,7 +179,7 @@ class Rcm : public CommonTestFixture { ASSERT_GE(perm[i], 0) << i; ASSERT_LT(perm[i], n) << i; ASSERT_EQ(inv_perm[perm[i]], gko::invalid_index()) << i; - inv_perm[perm[i]] = i; + inv_perm[perm[i]] = static_cast(i); } // Now check for cm ordering. @@ -189,8 +188,8 @@ class Rcm : public CommonTestFixture { std::vector already_visited(n); while (base_offset != n) { // Assert valid start node. - check_valid_start_node(mtx, permutation, perm[base_offset], - already_visited, strategy); + check_valid_start_node(mtx, perm[base_offset], already_visited, + strategy); // Assert valid level structure. // Also update base_offset and mark as visited while at it. From c9b2f1086c2d4bf58f1d8dad89f07a7f27133fce Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Wed, 20 Dec 2023 14:42:32 +0100 Subject: [PATCH 19/29] remove old reordering test --- cuda/test/CMakeLists.txt | 1 - cuda/test/reorder/CMakeLists.txt | 1 - cuda/test/reorder/rcm_kernels.cpp | 60 ------------------------------- 3 files changed, 62 deletions(-) delete mode 100644 cuda/test/reorder/CMakeLists.txt delete mode 100644 cuda/test/reorder/rcm_kernels.cpp diff --git a/cuda/test/CMakeLists.txt b/cuda/test/CMakeLists.txt index 36a876b1377..f1048af73b9 100644 --- a/cuda/test/CMakeLists.txt +++ b/cuda/test/CMakeLists.txt @@ -2,6 +2,5 @@ include(${PROJECT_SOURCE_DIR}/cmake/create_test.cmake) add_subdirectory(base) add_subdirectory(components) -add_subdirectory(reorder) add_subdirectory(solver) add_subdirectory(utils) diff --git a/cuda/test/reorder/CMakeLists.txt b/cuda/test/reorder/CMakeLists.txt deleted file mode 100644 index 79deba957b3..00000000000 --- a/cuda/test/reorder/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -ginkgo_create_test(rcm_kernels RESOURCE_TYPE cudagpu) diff --git a/cuda/test/reorder/rcm_kernels.cpp b/cuda/test/reorder/rcm_kernels.cpp deleted file mode 100644 index 9e5ed239857..00000000000 --- a/cuda/test/reorder/rcm_kernels.cpp +++ /dev/null @@ -1,60 +0,0 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors -// -// SPDX-License-Identifier: BSD-3-Clause - -#include - - -#include - - -#include "core/test/utils/assertions.hpp" -#include "cuda/test/utils.hpp" - - -namespace { - - -class Rcm : public CudaTestFixture { -protected: - using v_type = double; - using i_type = int; - using CsrMtx = gko::matrix::Csr; - using reorder_type = gko::reorder::Rcm; - using new_reorder_type = gko::experimental::reorder::Rcm; - using perm_type = gko::matrix::Permutation; - - - Rcm() - : p_mtx(gko::initialize({{1.0, 2.0, 0.0, -1.3, 2.1}, - {2.0, 5.0, 1.5, 0.0, 0.0}, - {0.0, 1.5, 1.5, 1.1, 0.0}, - {-1.3, 0.0, 1.1, 2.0, 0.0}, - {2.1, 0.0, 0.0, 0.0, 1.0}}, - exec)) - {} - - std::shared_ptr p_mtx; -}; - - -TEST_F(Rcm, IsEquivalentToRef) -{ - auto reorder_op = reorder_type::build().on(ref)->generate(p_mtx); - auto dreorder_op = reorder_type::build().on(exec)->generate(p_mtx); - - GKO_ASSERT_ARRAY_EQ(dreorder_op->get_permutation_array(), - reorder_op->get_permutation_array()); -} - - -TEST_F(Rcm, IsEquivalentToRefNewInterface) -{ - auto reorder_op = new_reorder_type::build().on(ref)->generate(p_mtx); - auto dreorder_op = new_reorder_type::build().on(exec)->generate(p_mtx); - - GKO_ASSERT_MTX_EQ_SPARSITY(dreorder_op, reorder_op); -} - - -} // namespace From ed75c866cc3d4be385adb2db074198fd10d6a944 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Tue, 9 Jan 2024 16:10:34 +0100 Subject: [PATCH 20/29] reenable dpcpp test --- test/reorder/CMakeLists.txt | 6 ++-- test/reorder/mc64.cpp | 11 +++---- test/reorder/rcm.cpp | 66 ++++++++++++++++++------------------- 3 files changed, 40 insertions(+), 43 deletions(-) diff --git a/test/reorder/CMakeLists.txt b/test/reorder/CMakeLists.txt index 2bd78c77607..4ee674e56d2 100644 --- a/test/reorder/CMakeLists.txt +++ b/test/reorder/CMakeLists.txt @@ -1,6 +1,6 @@ -ginkgo_create_common_test(amd DISABLE_EXECUTORS dpcpp) -ginkgo_create_common_test(mc64 DISABLE_EXECUTORS dpcpp) +ginkgo_create_common_test(amd) +ginkgo_create_common_test(mc64) if (GINKGO_HAVE_METIS) ginkgo_create_common_test(nested_dissection) endif() -ginkgo_create_common_and_reference_test(rcm DISABLE_EXECUTORS dpcpp) +ginkgo_create_common_and_reference_test(rcm) diff --git a/test/reorder/mc64.cpp b/test/reorder/mc64.cpp index 0dc0052956a..d4e4b176da7 100644 --- a/test/reorder/mc64.cpp +++ b/test/reorder/mc64.cpp @@ -18,12 +18,11 @@ namespace { class Mc64 : public CommonTestFixture { protected: - using v_type = double; - using i_type = int; - using CsrMtx = gko::matrix::Csr; - using reorder_type = gko::experimental::reorder::Mc64; - using result_type = gko::Composition; - using perm_type = gko::matrix::ScaledPermutation; + using CsrMtx = gko::matrix::Csr; + using reorder_type = + gko::experimental::reorder::Mc64; + using result_type = gko::Composition; + using perm_type = gko::matrix::ScaledPermutation; Mc64() : mtx(gko::initialize({{1.0, 2.0, 0.0, -1.3, 2.1}, diff --git a/test/reorder/rcm.cpp b/test/reorder/rcm.cpp index 09cec19897d..f2a4ae45164 100644 --- a/test/reorder/rcm.cpp +++ b/test/reorder/rcm.cpp @@ -29,12 +29,10 @@ namespace { class Rcm : public CommonTestFixture { protected: - using v_type = double; - using i_type = int; - using CsrMtx = gko::matrix::Csr; - using reorder_type = gko::reorder::Rcm; - using new_reorder_type = gko::experimental::reorder::Rcm; - using perm_type = gko::matrix::Permutation; + using CsrMtx = gko::matrix::Csr; + using reorder_type = gko::reorder::Rcm; + using new_reorder_type = gko::experimental::reorder::Rcm; + using perm_type = gko::matrix::Permutation; Rcm() : rng{63749}, @@ -48,14 +46,14 @@ class Rcm : public CommonTestFixture { static void ubfs_reference( std::shared_ptr mtx, - i_type* const + index_type* const levels, // Must be inf/max in all nodes connected to source - const i_type start) + const index_type start) { const auto row_ptrs = mtx->get_const_row_ptrs(); const auto col_idxs = mtx->get_const_col_idxs(); - std::deque q(0); + std::deque q(0); q.push_back(start); levels[start] = 0; @@ -80,7 +78,7 @@ class Rcm : public CommonTestFixture { } static void check_valid_start_node(std::shared_ptr mtx, - i_type start, + index_type start, std::vector& already_visited, gko::reorder::starting_strategy strategy) { @@ -88,14 +86,14 @@ class Rcm : public CommonTestFixture { ASSERT_FALSE(already_visited[start]) << start; const auto n = mtx->get_size()[0]; - auto degrees = std::vector(n); + auto degrees = std::vector(n); for (gko::size_type i = 0; i < n; ++i) { degrees[i] = mtx->get_const_row_ptrs()[i + 1] - mtx->get_const_row_ptrs()[i]; } if (strategy == gko::reorder::starting_strategy::minimum_degree) { - auto min_degree = std::numeric_limits::max(); + auto min_degree = std::numeric_limits::max(); for (gko::size_type i = 0; i < n; ++i) { if (!already_visited[i] && degrees[i] < min_degree) { min_degree = degrees[i]; @@ -108,35 +106,35 @@ class Rcm : public CommonTestFixture { // Check if any valid contender has a lowereq height than the // selected start node. - std::vector reference_current_levels(n); + std::vector reference_current_levels(n); std::fill(reference_current_levels.begin(), reference_current_levels.end(), - std::numeric_limits::max()); + std::numeric_limits::max()); ubfs_reference(mtx, &reference_current_levels[0], start); // First find all contender nodes in the last UBFS level - std::vector reference_contenders; - auto current_height = std::numeric_limits::min(); + std::vector reference_contenders; + auto current_height = std::numeric_limits::min(); for (gko::size_type i = 0; i < n; ++i) { if (reference_current_levels[i] != - std::numeric_limits::max() && + std::numeric_limits::max() && reference_current_levels[i] >= current_height) { if (reference_current_levels[i] > current_height) { reference_contenders.clear(); } - reference_contenders.push_back(static_cast(i)); + reference_contenders.push_back(static_cast(i)); current_height = reference_current_levels[i]; } } // then compute a level array for each of the contenders - std::vector> reference_contenders_levels( + std::vector> reference_contenders_levels( reference_contenders.size()); for (gko::size_type i = 0; i < reference_contenders.size(); ++i) { - std::vector reference_contender_levels(n); + std::vector reference_contender_levels(n); std::fill(reference_contender_levels.begin(), reference_contender_levels.end(), - std::numeric_limits::max()); + std::numeric_limits::max()); ubfs_reference(mtx, &reference_contender_levels[0], reference_contenders[i]); reference_contenders_levels[i] = reference_contender_levels; @@ -144,10 +142,10 @@ class Rcm : public CommonTestFixture { // and check if any maximum level exceeds that of the start node for (gko::size_type i = 0; i < reference_contenders.size(); ++i) { - auto contender_height = std::numeric_limits::min(); + auto contender_height = std::numeric_limits::min(); for (gko::size_type j = 0; j < n; ++j) { if (reference_contenders_levels[i][j] != - std::numeric_limits::max() && + std::numeric_limits::max() && reference_contenders_levels[i][j] > contender_height) { contender_height = reference_contenders_levels[i][j]; } @@ -165,21 +163,21 @@ class Rcm : public CommonTestFixture { const auto n = mtx->get_size()[0]; const auto row_ptrs = mtx->get_const_row_ptrs(); const auto col_idxs = mtx->get_const_col_idxs(); - auto degrees = std::vector(n); + auto degrees = std::vector(n); for (gko::size_type i = 0; i < n; ++i) { degrees[i] = mtx->get_const_row_ptrs()[i + 1] - mtx->get_const_row_ptrs()[i]; } // Following checks for cm ordering, therefore create a reversed perm. - std::vector perm(permutation, permutation + n); + std::vector perm(permutation, permutation + n); std::reverse(perm.begin(), perm.end()); - std::vector inv_perm(n, gko::invalid_index()); + std::vector inv_perm(n, gko::invalid_index()); for (gko::size_type i = 0; i < n; i++) { ASSERT_GE(perm[i], 0) << i; ASSERT_LT(perm[i], n) << i; - ASSERT_EQ(inv_perm[perm[i]], gko::invalid_index()) << i; - inv_perm[perm[i]] = static_cast(i); + ASSERT_EQ(inv_perm[perm[i]], gko::invalid_index()) << i; + inv_perm[perm[i]] = static_cast(i); } // Now check for cm ordering. @@ -193,16 +191,16 @@ class Rcm : public CommonTestFixture { // Assert valid level structure. // Also update base_offset and mark as visited while at it. - std::vector levels(n); + std::vector levels(n); std::fill(levels.begin(), levels.end(), - std::numeric_limits::max()); + std::numeric_limits::max()); ubfs_reference(mtx, &levels[0], perm[base_offset]); - i_type current_level = 0; + index_type current_level = 0; const auto previous_base_offset = base_offset; for (gko::size_type i = 0; i < n; ++i) { const auto node = perm[i]; - if (levels[node] != std::numeric_limits::max() && + if (levels[node] != std::numeric_limits::max() && !already_visited[node]) { already_visited[node] = true; ++base_offset; @@ -278,7 +276,7 @@ class Rcm : public CommonTestFixture { void build_multiple_connected_components() { - gko::matrix_data data; + gko::matrix_data data; d_1138_bus_mtx->write(data); const auto num_rows = data.size[0]; const auto nnz = data.nonzeros.size(); @@ -294,7 +292,7 @@ class Rcm : public CommonTestFixture { entry.value); } } - std::vector permutation(data.size[0]); + std::vector permutation(data.size[0]); std::iota(permutation.begin(), permutation.end(), 0); // std::shuffle(permutation.begin(), permutation.end(), rng); for (auto& entry : data.nonzeros) { From faf720e86eaadf5aa196d15830d7e1936dff44a9 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Tue, 9 Jan 2024 16:16:55 +0100 Subject: [PATCH 21/29] formatting --- test/reorder/rcm.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/test/reorder/rcm.cpp b/test/reorder/rcm.cpp index f2a4ae45164..61f8a95e6fc 100644 --- a/test/reorder/rcm.cpp +++ b/test/reorder/rcm.cpp @@ -24,9 +24,6 @@ #include "test/utils/executor.hpp" -namespace { - - class Rcm : public CommonTestFixture { protected: using CsrMtx = gko::matrix::Csr; @@ -312,6 +309,7 @@ class Rcm : public CommonTestFixture { std::shared_ptr d_reorder_op; }; + TEST_F(Rcm, PermutationIsRcmOrdered) { d_reorder_op = reorder_type::build() @@ -327,6 +325,7 @@ TEST_F(Rcm, PermutationIsRcmOrdered) d_reorder_op->get_inverse_permutation()); } + TEST_F(Rcm, PermutationIsRcmOrderedMinDegree) { d_reorder_op = @@ -342,6 +341,7 @@ TEST_F(Rcm, PermutationIsRcmOrderedMinDegree) ASSERT_EQ(d_reorder_op->get_inverse_permutation(), nullptr); } + TEST_F(Rcm, PermutationIsRcmOrderedNewInterface) { auto perm = new_reorder_type::build().on(exec)->generate(d_1138_bus_mtx); @@ -350,6 +350,7 @@ TEST_F(Rcm, PermutationIsRcmOrderedNewInterface) gko::reorder::starting_strategy::pseudo_peripheral); } + TEST_F(Rcm, PermutationIsRcmOrderedMultipleConnectedComponents) { this->build_multiple_connected_components(); @@ -376,5 +377,3 @@ TEST_F(Rcm, PermutationIsRcmOrderedMinDegreeMultipleConnectedComponents) check_rcm_ordered(o_1138_bus_mtx, perm.get(), d_reorder_op->get_parameters().strategy); } - -} // namespace From 9086dabb2c3441293482416e3e34812b862eac98 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Tue, 9 Jan 2024 16:41:09 +0100 Subject: [PATCH 22/29] clearer connected component detection --- test/reorder/rcm.cpp | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/test/reorder/rcm.cpp b/test/reorder/rcm.cpp index 61f8a95e6fc..3eb4a5d924d 100644 --- a/test/reorder/rcm.cpp +++ b/test/reorder/rcm.cpp @@ -18,6 +18,7 @@ #include +#include "core/components/disjoint_sets.hpp" #include "core/test/utils.hpp" #include "core/test/utils/assertions.hpp" #include "matrices/config.hpp" @@ -74,13 +75,29 @@ class Rcm : public CommonTestFixture { } } + static gko::disjoint_sets connected_components_reference( + std::shared_ptr mtx) + { + auto num_rows = static_cast(mtx->get_size()[0]); + const auto row_ptrs = mtx->get_const_row_ptrs(); + const auto cols = mtx->get_const_col_idxs(); + gko::disjoint_sets sets{mtx->get_executor(), num_rows}; + for (index_type row = 0; row < num_rows; row++) { + for (auto nz = row_ptrs[row]; nz < row_ptrs[row + 1]; nz++) { + const auto col = cols[nz]; + sets.join(row, col); + } + } + return sets; + } + static void check_valid_start_node(std::shared_ptr mtx, index_type start, - std::vector& already_visited, + const gko::disjoint_sets& cc, gko::reorder::starting_strategy strategy) { SCOPED_TRACE(start); - ASSERT_FALSE(already_visited[start]) << start; + const auto start_rep = cc.const_find(start); const auto n = mtx->get_size()[0]; auto degrees = std::vector(n); @@ -92,7 +109,7 @@ class Rcm : public CommonTestFixture { if (strategy == gko::reorder::starting_strategy::minimum_degree) { auto min_degree = std::numeric_limits::max(); for (gko::size_type i = 0; i < n; ++i) { - if (!already_visited[i] && degrees[i] < min_degree) { + if (cc.const_find(i) == start_rep && degrees[i] < min_degree) { min_degree = degrees[i]; } } @@ -113,8 +130,7 @@ class Rcm : public CommonTestFixture { std::vector reference_contenders; auto current_height = std::numeric_limits::min(); for (gko::size_type i = 0; i < n; ++i) { - if (reference_current_levels[i] != - std::numeric_limits::max() && + if (cc.const_find(i) == start_rep && reference_current_levels[i] >= current_height) { if (reference_current_levels[i] > current_height) { reference_contenders.clear(); @@ -140,9 +156,9 @@ class Rcm : public CommonTestFixture { // and check if any maximum level exceeds that of the start node for (gko::size_type i = 0; i < reference_contenders.size(); ++i) { auto contender_height = std::numeric_limits::min(); + index_type contender_degree = 0; for (gko::size_type j = 0; j < n; ++j) { - if (reference_contenders_levels[i][j] != - std::numeric_limits::max() && + if (cc.const_find(j) == start_rep && reference_contenders_levels[i][j] > contender_height) { contender_height = reference_contenders_levels[i][j]; } @@ -165,6 +181,7 @@ class Rcm : public CommonTestFixture { degrees[i] = mtx->get_const_row_ptrs()[i + 1] - mtx->get_const_row_ptrs()[i]; } + const auto cc = connected_components_reference(mtx); // Following checks for cm ordering, therefore create a reversed perm. std::vector perm(permutation, permutation + n); @@ -183,8 +200,7 @@ class Rcm : public CommonTestFixture { std::vector already_visited(n); while (base_offset != n) { // Assert valid start node. - check_valid_start_node(mtx, perm[base_offset], already_visited, - strategy); + check_valid_start_node(mtx, perm[base_offset], cc, strategy); // Assert valid level structure. // Also update base_offset and mark as visited while at it. From 211aecf9b5677228c9ee1243bfe7dc70dc958f31 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Tue, 9 Jan 2024 16:41:48 +0100 Subject: [PATCH 23/29] fix minimum degree choice in OMP starting node --- omp/reorder/rcm_kernels.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/omp/reorder/rcm_kernels.cpp b/omp/reorder/rcm_kernels.cpp index 64f5a789a3f..9fa65d117c0 100644 --- a/omp/reorder/rcm_kernels.cpp +++ b/omp/reorder/rcm_kernels.cpp @@ -368,10 +368,10 @@ std::pair rls_contender_and_height( #pragma omp for schedule(static) for (IndexType i = 1; i < num_vertices; ++i) { + // choose maximum level and minimum degree if (levels[i] != std::numeric_limits::max() && - std::tie(levels[i], degrees[i]) > - std::tie(local_contender.first.first, - local_contender.first.second)) { + std::tie(levels[i], local_contender.first.second) > + std::tie(local_contender.first.first, degrees[i])) { local_contender.first = std::make_pair(levels[i], degrees[i]); local_contender.second = i; } From 78a1f5c20355c6ac8a767c143fb2cefa49d0c0b3 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Tue, 9 Jan 2024 16:49:01 +0100 Subject: [PATCH 24/29] rename kernel get_permutation -> compute_permutation --- common/cuda_hip/reorder/rcm_kernels.hpp.inc | 15 ++++++++------- core/device_hooks/common_kernels.inc.cpp | 2 +- core/reorder/rcm.cpp | 8 ++++---- core/reorder/rcm_kernels.hpp | 14 +++++++------- dpcpp/reorder/rcm_kernels.dp.cpp | 4 ++-- omp/reorder/rcm_kernels.cpp | 14 +++++++------- reference/reorder/rcm_kernels.cpp | 16 ++++++++-------- 7 files changed, 37 insertions(+), 36 deletions(-) diff --git a/common/cuda_hip/reorder/rcm_kernels.hpp.inc b/common/cuda_hip/reorder/rcm_kernels.hpp.inc index 69f5d988e30..a20e1c6e99d 100644 --- a/common/cuda_hip/reorder/rcm_kernels.hpp.inc +++ b/common/cuda_hip/reorder/rcm_kernels.hpp.inc @@ -573,12 +573,13 @@ void sort_levels(std::shared_ptr exec, template -void get_permutation(std::shared_ptr exec, - const IndexType num_rows, const IndexType* const row_ptrs, - const IndexType* const col_idxs, - IndexType* const permutation, - IndexType* const inv_permutation, - const gko::reorder::starting_strategy strategy) +void compute_permutation(std::shared_ptr exec, + const IndexType num_rows, + const IndexType* const row_ptrs, + const IndexType* const col_idxs, + IndexType* const permutation, + IndexType* const inv_permutation, + const gko::reorder::starting_strategy strategy) { if (num_rows == 0) { return; @@ -610,4 +611,4 @@ void get_permutation(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_COMPUTE_PERMUTATION_KERNEL); diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 9fa01126ce4..963bde5bb90 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -897,7 +897,7 @@ GKO_STUB_VALUE_AND_INDEX_TYPE( namespace rcm { -GKO_STUB_INDEX_TYPE(GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL); +GKO_STUB_INDEX_TYPE(GKO_DECLARE_RCM_COMPUTE_PERMUTATION_KERNEL); } // namespace rcm diff --git a/core/reorder/rcm.cpp b/core/reorder/rcm.cpp index 8409f47d404..5be8409ba79 100644 --- a/core/reorder/rcm.cpp +++ b/core/reorder/rcm.cpp @@ -29,7 +29,7 @@ namespace rcm { namespace { -GKO_REGISTER_OPERATION(get_permutation, rcm::get_permutation); +GKO_REGISTER_OPERATION(compute_permutation, rcm::compute_permutation); } // anonymous namespace @@ -43,9 +43,9 @@ void rcm_reorder(const matrix::SparsityCsr* mtx, { const auto exec = mtx->get_executor(); const IndexType num_rows = mtx->get_size()[0]; - exec->run(rcm::make_get_permutation(num_rows, mtx->get_const_row_ptrs(), - mtx->get_const_col_idxs(), permutation, - inv_permutation, strategy)); + exec->run(rcm::make_compute_permutation( + num_rows, mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(), + permutation, inv_permutation, strategy)); } diff --git a/core/reorder/rcm_kernels.hpp b/core/reorder/rcm_kernels.hpp index 3ee37faba68..77e1ce68ff0 100644 --- a/core/reorder/rcm_kernels.hpp +++ b/core/reorder/rcm_kernels.hpp @@ -27,16 +27,16 @@ namespace gko { namespace kernels { -#define GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL(IndexType) \ - void get_permutation(std::shared_ptr exec, \ - IndexType num_vertices, const IndexType* row_ptrs, \ - const IndexType* col_idxs, IndexType* permutation, \ - IndexType* inv_permutation, \ - gko::reorder::starting_strategy strategy) +#define GKO_DECLARE_RCM_COMPUTE_PERMUTATION_KERNEL(IndexType) \ + void compute_permutation( \ + std::shared_ptr exec, IndexType num_vertices, \ + const IndexType* row_ptrs, const IndexType* col_idxs, \ + IndexType* permutation, IndexType* inv_permutation, \ + gko::reorder::starting_strategy strategy) #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ - GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL(IndexType) + GKO_DECLARE_RCM_COMPUTE_PERMUTATION_KERNEL(IndexType) GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(rcm, GKO_DECLARE_ALL_AS_TEMPLATES); diff --git a/dpcpp/reorder/rcm_kernels.dp.cpp b/dpcpp/reorder/rcm_kernels.dp.cpp index 4ce42826351..95a8fa38b80 100644 --- a/dpcpp/reorder/rcm_kernels.dp.cpp +++ b/dpcpp/reorder/rcm_kernels.dp.cpp @@ -28,13 +28,13 @@ namespace rcm { template -void get_permutation( +void compute_permutation( std::shared_ptr exec, const IndexType num_vertices, const IndexType* const row_ptrs, const IndexType* const col_idxs, IndexType* const permutation, IndexType* const inv_permutation, const gko::reorder::starting_strategy strategy) GKO_NOT_IMPLEMENTED; -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_COMPUTE_PERMUTATION_KERNEL); } // namespace rcm diff --git a/omp/reorder/rcm_kernels.cpp b/omp/reorder/rcm_kernels.cpp index 9fa65d117c0..3abdfc96a59 100644 --- a/omp/reorder/rcm_kernels.cpp +++ b/omp/reorder/rcm_kernels.cpp @@ -744,12 +744,12 @@ IndexType handle_isolated_nodes(std::shared_ptr exec, * Computes a rcm permutation, employing the parallel unordered rcm algorithm. */ template -void get_permutation(std::shared_ptr exec, - const IndexType num_vertices, - const IndexType* const row_ptrs, - const IndexType* const col_idxs, IndexType* const perm, - IndexType* const inv_perm, - const gko::reorder::starting_strategy strategy) +void compute_permutation(std::shared_ptr exec, + const IndexType num_vertices, + const IndexType* const row_ptrs, + const IndexType* const col_idxs, IndexType* const perm, + IndexType* const inv_perm, + const gko::reorder::starting_strategy strategy) { // compute node degrees array degree_array{exec, static_cast(num_vertices)}; @@ -831,7 +831,7 @@ void get_permutation(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_COMPUTE_PERMUTATION_KERNEL); } // namespace rcm diff --git a/reference/reorder/rcm_kernels.cpp b/reference/reorder/rcm_kernels.cpp index fe6b8718ac4..077b5a009bc 100644 --- a/reference/reorder/rcm_kernels.cpp +++ b/reference/reorder/rcm_kernels.cpp @@ -180,13 +180,13 @@ IndexType find_starting_node(std::shared_ptr exec, * Computes a RCM reordering using a naive sequential algorithm. */ template -void get_permutation(std::shared_ptr exec, - const IndexType num_vertices, - const IndexType* const row_ptrs, - const IndexType* const col_idxs, - IndexType* const permutation, - IndexType* const inv_permutation, - const gko::reorder::starting_strategy strategy) +void compute_permutation(std::shared_ptr exec, + const IndexType num_vertices, + const IndexType* const row_ptrs, + const IndexType* const col_idxs, + IndexType* const permutation, + IndexType* const inv_permutation, + const gko::reorder::starting_strategy strategy) { // compute node degrees array degree_array{exec, static_cast(num_vertices)}; @@ -255,7 +255,7 @@ void get_permutation(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_COMPUTE_PERMUTATION_KERNEL); } // namespace rcm From 147c28020fc3dcab38caeca94e0994906240efa3 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Wed, 10 Jan 2024 10:40:34 +0100 Subject: [PATCH 25/29] review updates Co-authored-by: Yuhsiang M. Tsai --- common/cuda_hip/reorder/rcm_kernels.hpp.inc | 1 + test/reorder/rcm.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/common/cuda_hip/reorder/rcm_kernels.hpp.inc b/common/cuda_hip/reorder/rcm_kernels.hpp.inc index a20e1c6e99d..05fe3bce07e 100644 --- a/common/cuda_hip/reorder/rcm_kernels.hpp.inc +++ b/common/cuda_hip/reorder/rcm_kernels.hpp.inc @@ -286,6 +286,7 @@ components_data compute_connected_components( } +/** level structure for unordered breadth first search. */ template struct ubfs_levels { /** Mapping node -> level */ diff --git a/test/reorder/rcm.cpp b/test/reorder/rcm.cpp index 3eb4a5d924d..68ba5c13d4a 100644 --- a/test/reorder/rcm.cpp +++ b/test/reorder/rcm.cpp @@ -307,7 +307,7 @@ class Rcm : public CommonTestFixture { } std::vector permutation(data.size[0]); std::iota(permutation.begin(), permutation.end(), 0); - // std::shuffle(permutation.begin(), permutation.end(), rng); + std::shuffle(permutation.begin(), permutation.end(), rng); for (auto& entry : data.nonzeros) { entry.row = permutation[entry.row]; entry.column = permutation[entry.column]; From 8a9a7dfef64e4fd1e2f6a14e5ba4f6156cdf8415 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Wed, 17 Jan 2024 17:02:16 +0100 Subject: [PATCH 26/29] use stable sort in RCM --- reference/reorder/rcm_kernels.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reference/reorder/rcm_kernels.cpp b/reference/reorder/rcm_kernels.cpp index 077b5a009bc..3c6c9567d36 100644 --- a/reference/reorder/rcm_kernels.cpp +++ b/reference/reorder/rcm_kernels.cpp @@ -240,7 +240,7 @@ void compute_permutation(std::shared_ptr exec, } // Sort all just-added neighbors by degree. - std::sort( + std::stable_sort( linear_queue_p + prev_head_offset, linear_queue_p + head_offset, [&](IndexType i, IndexType j) { return degrees[i] < degrees[j]; }); From 3f627416bbfe2ee60858d14ed9ec413331e54750 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Wed, 17 Jan 2024 17:02:45 +0100 Subject: [PATCH 27/29] add isolated vertices to test --- test/reorder/rcm.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/reorder/rcm.cpp b/test/reorder/rcm.cpp index 68ba5c13d4a..9d0749da73f 100644 --- a/test/reorder/rcm.cpp +++ b/test/reorder/rcm.cpp @@ -294,8 +294,9 @@ class Rcm : public CommonTestFixture { const auto num_rows = data.size[0]; const auto nnz = data.nonzeros.size(); const int num_copies = 5; - data.size[0] *= num_copies; - data.size[1] *= num_copies; + data.size[0] = + num_rows * num_copies + 10; // add a handful of isolated vertices + data.size[1] = data.size[0]; for (gko::size_type i = 0; i < nnz; i++) { const auto entry = data.nonzeros[i]; // create copies of the matrix From 7e0f19a3e4fb1bcc9b83f10ba5c5a1cece2279db Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Thu, 18 Jan 2024 14:37:36 +0100 Subject: [PATCH 28/29] add failing test case --- matrices/CMakeLists.txt | 1 + matrices/config.hpp.in | 2 + matrices/test/1138_bus_shuffled.mtx | 4056 +++++++++++++++++++++++++++ test/reorder/rcm.cpp | 14 + 4 files changed, 4073 insertions(+) create mode 100644 matrices/test/1138_bus_shuffled.mtx diff --git a/matrices/CMakeLists.txt b/matrices/CMakeLists.txt index 391bb346ae0..d689022c065 100644 --- a/matrices/CMakeLists.txt +++ b/matrices/CMakeLists.txt @@ -26,6 +26,7 @@ configure_file("test/isai_u_excess_rhs.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/ configure_file("test/isai_u_inv.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/isai_u_inv.mtx") configure_file("test/isai_u_inv_partial.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/isai_u_inv_partial.mtx") configure_file("test/1138_bus.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/1138_bus.mtx") +configure_file("test/1138_bus_shuffled.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/1138_bus_shuffled.mtx") configure_file("test/isai_a.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/isai_a.mtx") configure_file("test/isai_a_excess.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/isai_a_excess.mtx") configure_file("test/isai_a_excess_rhs.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/isai_a_excess_rhs.mtx") diff --git a/matrices/config.hpp.in b/matrices/config.hpp.in index 4420d253154..46d734c9b6c 100644 --- a/matrices/config.hpp.in +++ b/matrices/config.hpp.in @@ -39,6 +39,8 @@ const char* location_ani4_amd_chol_mtx = const char* location_isai_mtxs = "@Ginkgo_BINARY_DIR@/matrices/test/"; const char* location_1138_bus_mtx = "@Ginkgo_BINARY_DIR@/matrices/test/1138_bus.mtx"; +const char* location_1138_bus_shuffled_mtx = + "@Ginkgo_BINARY_DIR@/matrices/test/1138_bus_shuffled.mtx"; const char* location_1138_bus_mc64_result = "@Ginkgo_BINARY_DIR@/matrices/test/1138_bus_mc64_result.mtx"; const char* location_nontrivial_mc64_example = diff --git a/matrices/test/1138_bus_shuffled.mtx b/matrices/test/1138_bus_shuffled.mtx new file mode 100644 index 00000000000..db1de2c2dfe --- /dev/null +++ b/matrices/test/1138_bus_shuffled.mtx @@ -0,0 +1,4056 @@ +%%MatrixMarket matrix coordinate real general +1138 1138 4054 +1 1 1 +1 260 1 +2 2 1 +2 100 1 +2 999 1 +3 3 1 +3 506 1 +4 4 1 +4 779 1 +4 853 1 +5 5 1 +5 15 1 +5 61 1 +5 315 1 +5 489 1 +5 1061 1 +6 6 1 +6 206 1 +6 269 1 +6 603 1 +6 1011 1 +7 7 1 +7 139 1 +7 359 1 +7 575 1 +7 1010 1 +8 8 1 +8 53 1 +8 298 1 +9 9 1 +9 662 1 +9 724 1 +9 1027 1 +10 10 1 +10 356 1 +10 419 1 +11 11 1 +11 689 1 +11 1021 1 +11 1106 1 +12 12 1 +12 50 1 +12 1006 1 +13 13 1 +13 166 1 +13 207 1 +13 562 1 +14 14 1 +14 99 1 +14 229 1 +14 390 1 +14 581 1 +15 5 1 +15 15 1 +15 656 1 +16 16 1 +16 206 1 +16 1072 1 +17 17 1 +17 342 1 +17 741 1 +17 790 1 +17 913 1 +18 18 1 +18 764 1 +19 19 1 +19 247 1 +19 530 1 +20 20 1 +20 762 1 +20 821 1 +21 21 1 +21 203 1 +21 360 1 +21 432 1 +21 512 1 +21 740 1 +21 996 1 +22 22 1 +22 370 1 +22 480 1 +22 744 1 +23 23 1 +23 170 1 +23 479 1 +23 515 1 +23 542 1 +23 792 1 +23 941 1 +24 24 1 +24 653 1 +24 1088 1 +25 25 1 +25 789 1 +25 1019 1 +26 26 1 +26 140 1 +26 363 1 +26 391 1 +27 27 1 +27 493 1 +27 684 1 +28 28 1 +28 279 1 +28 342 1 +28 464 1 +29 29 1 +29 455 1 +30 30 1 +30 297 1 +30 391 1 +30 849 1 +30 975 1 +31 31 1 +31 170 1 +31 648 1 +31 987 1 +32 32 1 +32 727 1 +32 1131 1 +33 33 1 +33 237 1 +34 34 1 +34 646 1 +35 35 1 +35 748 1 +36 36 1 +36 549 1 +36 672 1 +36 776 1 +37 37 1 +37 202 1 +37 738 1 +37 786 1 +37 888 1 +38 38 1 +38 113 1 +38 180 1 +39 39 1 +39 277 1 +39 281 1 +39 582 1 +40 40 1 +40 400 1 +40 583 1 +41 41 1 +41 563 1 +42 42 1 +42 1081 1 +43 43 1 +43 703 1 +43 921 1 +44 44 1 +44 139 1 +44 595 1 +44 680 1 +44 938 1 +45 45 1 +45 293 1 +46 46 1 +46 225 1 +46 389 1 +46 772 1 +46 816 1 +47 47 1 +47 218 1 +47 861 1 +48 48 1 +48 642 1 +48 856 1 +49 49 1 +49 58 1 +50 12 1 +50 50 1 +50 439 1 +50 858 1 +51 51 1 +51 184 1 +51 209 1 +51 1109 1 +52 52 1 +52 171 1 +52 530 1 +52 822 1 +53 8 1 +53 53 1 +53 719 1 +54 54 1 +54 550 1 +55 55 1 +55 145 1 +55 158 1 +55 201 1 +55 247 1 +55 248 1 +55 322 1 +55 645 1 +55 927 1 +55 1054 1 +55 1075 1 +55 1133 1 +56 56 1 +56 107 1 +56 156 1 +56 254 1 +56 527 1 +56 538 1 +56 793 1 +57 57 1 +57 263 1 +57 842 1 +58 49 1 +58 58 1 +58 81 1 +58 892 1 +58 1015 1 +59 59 1 +59 73 1 +59 753 1 +60 60 1 +60 149 1 +60 774 1 +60 955 1 +61 5 1 +61 61 1 +61 656 1 +61 679 1 +61 859 1 +61 1049 1 +62 62 1 +62 701 1 +63 63 1 +63 494 1 +64 64 1 +64 327 1 +64 868 1 +65 65 1 +65 183 1 +65 326 1 +65 372 1 +65 583 1 +65 664 1 +65 775 1 +65 965 1 +66 66 1 +66 663 1 +66 982 1 +67 67 1 +67 512 1 +67 1116 1 +67 1118 1 +68 68 1 +68 136 1 +68 845 1 +69 69 1 +69 139 1 +70 70 1 +70 278 1 +70 738 1 +71 71 1 +71 510 1 +71 543 1 +71 793 1 +71 889 1 +72 72 1 +72 456 1 +72 803 1 +73 59 1 +73 73 1 +73 632 1 +73 860 1 +74 74 1 +74 356 1 +75 75 1 +75 623 1 +75 952 1 +75 981 1 +76 76 1 +76 353 1 +77 77 1 +77 193 1 +78 78 1 +78 597 1 +78 1017 1 +79 79 1 +79 632 1 +79 781 1 +80 80 1 +80 415 1 +80 825 1 +81 58 1 +81 81 1 +81 291 1 +82 82 1 +82 821 1 +83 83 1 +83 704 1 +84 84 1 +84 93 1 +84 110 1 +85 85 1 +85 457 1 +86 86 1 +86 585 1 +86 830 1 +86 998 1 +87 87 1 +87 436 1 +87 847 1 +87 922 1 +88 88 1 +88 188 1 +88 212 1 +88 319 1 +88 402 1 +89 89 1 +89 749 1 +90 90 1 +90 525 1 +91 91 1 +91 262 1 +91 674 1 +92 92 1 +92 140 1 +93 84 1 +93 93 1 +93 255 1 +93 320 1 +93 905 1 +94 94 1 +94 617 1 +94 633 1 +94 819 1 +95 95 1 +95 293 1 +95 1060 1 +96 96 1 +96 132 1 +96 258 1 +96 365 1 +96 1092 1 +97 97 1 +97 308 1 +97 393 1 +97 1094 1 +98 98 1 +98 245 1 +98 413 1 +98 720 1 +98 747 1 +98 879 1 +98 927 1 +98 1080 1 +99 14 1 +99 99 1 +99 165 1 +99 996 1 +100 2 1 +100 100 1 +100 280 1 +101 101 1 +101 153 1 +101 722 1 +102 102 1 +102 279 1 +102 689 1 +102 876 1 +103 103 1 +103 947 1 +104 104 1 +104 1121 1 +105 105 1 +105 617 1 +106 106 1 +106 153 1 +106 644 1 +106 1052 1 +107 56 1 +107 107 1 +107 251 1 +107 254 1 +107 496 1 +107 538 1 +107 657 1 +108 108 1 +108 140 1 +108 884 1 +108 1042 1 +109 109 1 +109 552 1 +109 800 1 +110 84 1 +110 110 1 +110 180 1 +110 693 1 +110 922 1 +111 111 1 +111 780 1 +112 112 1 +112 637 1 +112 707 1 +112 949 1 +113 38 1 +113 113 1 +113 121 1 +113 126 1 +113 953 1 +113 967 1 +114 114 1 +114 753 1 +114 781 1 +115 115 1 +115 1059 1 +116 116 1 +116 763 1 +116 826 1 +116 1029 1 +117 117 1 +117 578 1 +117 697 1 +118 118 1 +118 836 1 +118 1044 1 +119 119 1 +119 880 1 +119 1088 1 +120 120 1 +120 832 1 +120 989 1 +120 996 1 +121 113 1 +121 121 1 +122 122 1 +122 415 1 +122 558 1 +123 123 1 +123 905 1 +124 124 1 +124 421 1 +124 537 1 +124 639 1 +124 683 1 +124 864 1 +125 125 1 +125 181 1 +125 249 1 +125 445 1 +125 653 1 +125 1007 1 +126 113 1 +126 126 1 +126 325 1 +126 905 1 +127 127 1 +127 890 1 +127 898 1 +128 128 1 +128 313 1 +128 540 1 +128 989 1 +128 1014 1 +128 1025 1 +128 1070 1 +128 1122 1 +129 129 1 +129 645 1 +129 872 1 +130 130 1 +130 483 1 +131 131 1 +131 324 1 +131 874 1 +132 96 1 +132 132 1 +132 258 1 +132 280 1 +132 285 1 +132 681 1 +132 857 1 +133 133 1 +133 341 1 +133 451 1 +133 891 1 +134 134 1 +134 564 1 +134 640 1 +135 135 1 +135 627 1 +136 68 1 +136 136 1 +136 169 1 +137 137 1 +137 798 1 +137 1124 1 +138 138 1 +138 653 1 +138 1088 1 +139 7 1 +139 44 1 +139 69 1 +139 139 1 +140 26 1 +140 92 1 +140 108 1 +140 140 1 +140 261 1 +140 510 1 +140 561 1 +140 721 1 +140 797 1 +140 884 1 +140 934 1 +140 971 1 +140 1084 1 +141 141 1 +141 783 1 +142 142 1 +142 355 1 +142 543 1 +143 143 1 +143 918 1 +144 144 1 +144 555 1 +144 894 1 +144 948 1 +145 55 1 +145 145 1 +145 815 1 +146 146 1 +146 530 1 +146 927 1 +147 147 1 +147 409 1 +147 915 1 +148 148 1 +148 469 1 +148 791 1 +149 60 1 +149 149 1 +149 162 1 +149 293 1 +149 525 1 +149 673 1 +149 778 1 +149 909 1 +149 1060 1 +149 1089 1 +150 150 1 +150 1075 1 +150 1133 1 +151 151 1 +151 970 1 +152 152 1 +152 189 1 +152 252 1 +152 284 1 +152 408 1 +152 475 1 +152 917 1 +152 1028 1 +153 101 1 +153 106 1 +153 153 1 +154 154 1 +154 636 1 +154 865 1 +154 965 1 +154 1055 1 +155 155 1 +155 415 1 +156 56 1 +156 156 1 +156 254 1 +157 157 1 +157 689 1 +157 850 1 +158 55 1 +158 158 1 +158 1133 1 +159 159 1 +159 263 1 +159 415 1 +159 1073 1 +160 160 1 +160 1115 1 +161 161 1 +161 234 1 +161 525 1 +162 149 1 +162 162 1 +162 293 1 +163 163 1 +163 220 1 +163 367 1 +163 852 1 +164 164 1 +164 996 1 +165 99 1 +165 165 1 +166 13 1 +166 166 1 +166 274 1 +166 432 1 +167 167 1 +167 321 1 +168 168 1 +168 232 1 +168 279 1 +168 303 1 +168 658 1 +168 742 1 +168 954 1 +168 959 1 +168 1119 1 +169 136 1 +169 169 1 +169 411 1 +169 655 1 +169 770 1 +170 23 1 +170 31 1 +170 170 1 +170 968 1 +171 52 1 +171 171 1 +171 208 1 +172 172 1 +172 380 1 +172 858 1 +173 173 1 +173 652 1 +173 728 1 +173 764 1 +173 939 1 +174 174 1 +174 603 1 +175 175 1 +175 1109 1 +176 176 1 +176 382 1 +176 429 1 +177 177 1 +177 426 1 +178 178 1 +178 825 1 +179 179 1 +179 1124 1 +180 38 1 +180 110 1 +180 180 1 +180 587 1 +180 910 1 +181 125 1 +181 181 1 +181 1007 1 +182 182 1 +182 202 1 +182 354 1 +182 570 1 +182 594 1 +182 795 1 +182 852 1 +183 65 1 +183 183 1 +183 664 1 +184 51 1 +184 184 1 +184 1050 1 +185 185 1 +185 688 1 +186 186 1 +186 839 1 +186 1011 1 +187 187 1 +187 271 1 +187 961 1 +188 88 1 +188 188 1 +188 319 1 +188 402 1 +189 152 1 +189 189 1 +190 190 1 +190 315 1 +191 191 1 +191 435 1 +191 876 1 +191 1119 1 +192 192 1 +192 422 1 +192 716 1 +193 77 1 +193 193 1 +193 241 1 +193 563 1 +193 696 1 +193 942 1 +194 194 1 +194 970 1 +195 195 1 +195 279 1 +195 689 1 +195 876 1 +196 196 1 +196 526 1 +197 197 1 +197 200 1 +197 568 1 +197 876 1 +198 198 1 +198 530 1 +198 1054 1 +199 199 1 +199 256 1 +199 630 1 +200 197 1 +200 200 1 +200 876 1 +201 55 1 +201 201 1 +201 670 1 +202 37 1 +202 182 1 +202 202 1 +202 329 1 +203 21 1 +203 203 1 +204 204 1 +204 413 1 +204 460 1 +205 205 1 +205 876 1 +206 6 1 +206 16 1 +206 206 1 +207 13 1 +207 207 1 +208 171 1 +208 208 1 +208 1033 1 +209 51 1 +209 209 1 +210 210 1 +210 232 1 +210 959 1 +211 211 1 +211 578 1 +211 643 1 +212 88 1 +212 212 1 +212 402 1 +213 213 1 +213 515 1 +213 956 1 +214 214 1 +214 313 1 +214 819 1 +214 1067 1 +215 215 1 +215 1013 1 +216 216 1 +216 799 1 +217 217 1 +217 1070 1 +218 47 1 +218 218 1 +218 1133 1 +219 219 1 +219 955 1 +219 991 1 +220 163 1 +220 220 1 +221 221 1 +221 384 1 +221 512 1 +221 685 1 +222 222 1 +222 965 1 +223 223 1 +223 309 1 +224 224 1 +224 679 1 +225 46 1 +225 225 1 +225 226 1 +225 453 1 +225 1112 1 +226 225 1 +226 226 1 +226 243 1 +227 227 1 +227 801 1 +227 1120 1 +228 228 1 +228 357 1 +228 907 1 +228 1076 1 +229 14 1 +229 229 1 +230 230 1 +230 422 1 +230 494 1 +230 688 1 +230 848 1 +231 231 1 +231 525 1 +231 600 1 +232 168 1 +232 210 1 +232 232 1 +232 346 1 +233 233 1 +233 467 1 +234 161 1 +234 234 1 +234 270 1 +234 319 1 +234 327 1 +234 392 1 +234 423 1 +234 837 1 +235 235 1 +235 705 1 +235 897 1 +235 932 1 +236 236 1 +236 1046 1 +237 33 1 +237 237 1 +237 848 1 +237 1008 1 +238 238 1 +238 413 1 +239 239 1 +239 246 1 +240 240 1 +240 354 1 +240 979 1 +241 193 1 +241 241 1 +241 849 1 +242 242 1 +242 893 1 +243 226 1 +243 243 1 +243 486 1 +243 926 1 +243 1112 1 +244 244 1 +244 684 1 +244 844 1 +244 869 1 +244 988 1 +245 98 1 +245 245 1 +245 926 1 +246 239 1 +246 246 1 +246 262 1 +246 498 1 +246 623 1 +246 1074 1 +247 19 1 +247 55 1 +247 247 1 +248 55 1 +248 248 1 +248 1062 1 +249 125 1 +249 249 1 +249 1007 1 +250 250 1 +250 592 1 +250 729 1 +250 957 1 +251 107 1 +251 251 1 +251 331 1 +251 423 1 +251 538 1 +251 1065 1 +252 152 1 +252 252 1 +252 272 1 +252 349 1 +253 253 1 +253 697 1 +254 56 1 +254 107 1 +254 156 1 +254 254 1 +254 551 1 +254 884 1 +254 958 1 +255 93 1 +255 255 1 +256 199 1 +256 256 1 +256 619 1 +257 257 1 +257 279 1 +257 597 1 +258 96 1 +258 132 1 +258 258 1 +259 259 1 +259 474 1 +260 1 1 +260 260 1 +260 706 1 +261 140 1 +261 261 1 +262 91 1 +262 246 1 +262 262 1 +262 728 1 +262 765 1 +262 865 1 +262 1086 1 +263 57 1 +263 159 1 +263 263 1 +263 574 1 +263 801 1 +263 874 1 +263 1073 1 +263 1120 1 +264 264 1 +264 447 1 +265 265 1 +265 288 1 +265 562 1 +265 675 1 +266 266 1 +266 1020 1 +267 267 1 +267 421 1 +267 851 1 +267 910 1 +267 939 1 +267 1132 1 +268 268 1 +268 546 1 +268 1001 1 +268 1027 1 +268 1083 1 +269 6 1 +269 269 1 +269 931 1 +270 234 1 +270 270 1 +271 187 1 +271 271 1 +271 775 1 +271 965 1 +272 252 1 +272 272 1 +273 273 1 +273 315 1 +273 677 1 +273 937 1 +274 166 1 +274 274 1 +275 275 1 +275 1106 1 +276 276 1 +276 670 1 +276 757 1 +277 39 1 +277 277 1 +277 699 1 +277 1120 1 +278 70 1 +278 278 1 +278 529 1 +279 28 1 +279 102 1 +279 168 1 +279 195 1 +279 257 1 +279 279 1 +279 528 1 +279 597 1 +279 610 1 +279 689 1 +279 695 1 +279 1041 1 +279 1105 1 +280 100 1 +280 132 1 +280 280 1 +280 285 1 +280 334 1 +280 678 1 +280 924 1 +280 999 1 +281 39 1 +281 281 1 +281 1059 1 +282 282 1 +282 745 1 +283 283 1 +283 378 1 +283 857 1 +284 152 1 +284 284 1 +285 132 1 +285 280 1 +285 285 1 +286 286 1 +286 441 1 +286 628 1 +286 936 1 +286 961 1 +287 287 1 +287 317 1 +288 265 1 +288 288 1 +288 294 1 +288 486 1 +288 562 1 +288 577 1 +288 945 1 +288 1135 1 +289 289 1 +289 906 1 +290 290 1 +290 455 1 +291 81 1 +291 291 1 +291 590 1 +291 863 1 +292 292 1 +292 1116 1 +293 45 1 +293 95 1 +293 149 1 +293 162 1 +293 293 1 +293 319 1 +293 511 1 +293 525 1 +293 933 1 +293 950 1 +293 984 1 +293 1060 1 +294 288 1 +294 294 1 +294 690 1 +295 295 1 +295 363 1 +295 391 1 +295 776 1 +295 802 1 +295 1084 1 +296 296 1 +296 483 1 +296 509 1 +296 536 1 +297 30 1 +297 297 1 +297 939 1 +297 981 1 +298 8 1 +298 298 1 +298 446 1 +298 621 1 +299 299 1 +299 419 1 +300 300 1 +300 421 1 +301 301 1 +301 601 1 +302 302 1 +302 414 1 +302 1075 1 +303 168 1 +303 303 1 +303 1030 1 +304 304 1 +304 728 1 +305 305 1 +305 1125 1 +306 306 1 +306 421 1 +306 480 1 +307 307 1 +307 904 1 +307 1122 1 +308 97 1 +308 308 1 +308 534 1 +308 623 1 +309 223 1 +309 309 1 +309 329 1 +309 427 1 +309 503 1 +309 738 1 +309 814 1 +310 310 1 +310 336 1 +310 466 1 +311 311 1 +311 420 1 +311 739 1 +312 312 1 +312 533 1 +312 552 1 +313 128 1 +313 214 1 +313 313 1 +313 540 1 +313 565 1 +314 314 1 +314 417 1 +314 512 1 +315 5 1 +315 190 1 +315 273 1 +315 315 1 +315 349 1 +315 502 1 +315 627 1 +315 697 1 +316 316 1 +316 356 1 +316 434 1 +316 569 1 +317 287 1 +317 317 1 +317 355 1 +317 785 1 +317 1034 1 +317 1053 1 +318 318 1 +318 412 1 +318 482 1 +318 491 1 +318 514 1 +318 777 1 +318 1134 1 +319 88 1 +319 188 1 +319 234 1 +319 293 1 +319 319 1 +319 402 1 +319 606 1 +320 93 1 +320 320 1 +320 599 1 +320 1125 1 +321 167 1 +321 321 1 +321 586 1 +322 55 1 +322 322 1 +322 927 1 +323 323 1 +323 578 1 +324 131 1 +324 324 1 +324 1136 1 +325 126 1 +325 325 1 +325 369 1 +325 466 1 +326 65 1 +326 326 1 +327 64 1 +327 234 1 +327 327 1 +327 468 1 +327 692 1 +327 746 1 +327 884 1 +327 978 1 +328 328 1 +328 349 1 +329 202 1 +329 309 1 +329 329 1 +329 427 1 +329 517 1 +329 738 1 +329 895 1 +330 330 1 +330 769 1 +330 1093 1 +331 251 1 +331 331 1 +332 332 1 +332 1029 1 +333 333 1 +333 558 1 +333 584 1 +333 710 1 +333 727 1 +333 882 1 +333 901 1 +333 1135 1 +334 280 1 +334 334 1 +335 335 1 +335 806 1 +336 310 1 +336 336 1 +336 382 1 +336 466 1 +336 703 1 +336 841 1 +336 921 1 +337 337 1 +337 558 1 +338 338 1 +338 351 1 +338 365 1 +338 366 1 +338 426 1 +338 738 1 +338 938 1 +338 1070 1 +339 339 1 +339 387 1 +339 823 1 +339 929 1 +339 1109 1 +339 1121 1 +340 340 1 +340 341 1 +341 133 1 +341 340 1 +341 341 1 +341 640 1 +341 821 1 +341 1093 1 +341 1121 1 +342 17 1 +342 28 1 +342 342 1 +343 343 1 +343 666 1 +343 973 1 +344 344 1 +344 355 1 +344 552 1 +344 766 1 +345 345 1 +345 879 1 +345 927 1 +346 232 1 +346 346 1 +346 959 1 +347 347 1 +347 630 1 +348 348 1 +348 601 1 +348 1116 1 +349 252 1 +349 315 1 +349 328 1 +349 349 1 +349 408 1 +349 627 1 +349 679 1 +350 350 1 +350 530 1 +350 1138 1 +351 338 1 +351 351 1 +352 352 1 +352 530 1 +352 861 1 +353 76 1 +353 353 1 +353 511 1 +353 725 1 +353 809 1 +353 1032 1 +353 1091 1 +353 1097 1 +354 182 1 +354 240 1 +354 354 1 +355 142 1 +355 317 1 +355 344 1 +355 355 1 +355 544 1 +355 557 1 +356 10 1 +356 74 1 +356 316 1 +356 356 1 +356 434 1 +356 516 1 +356 562 1 +356 711 1 +356 805 1 +357 228 1 +357 357 1 +357 505 1 +357 589 1 +357 836 1 +358 358 1 +358 694 1 +359 7 1 +359 359 1 +359 443 1 +359 452 1 +359 575 1 +359 758 1 +360 21 1 +360 360 1 +360 484 1 +360 622 1 +361 361 1 +361 619 1 +362 362 1 +362 653 1 +362 1088 1 +363 26 1 +363 295 1 +363 363 1 +363 526 1 +363 776 1 +363 975 1 +364 364 1 +364 874 1 +365 96 1 +365 338 1 +365 365 1 +365 415 1 +365 812 1 +365 1092 1 +366 338 1 +366 366 1 +367 163 1 +367 367 1 +367 852 1 +368 368 1 +368 486 1 +368 635 1 +368 639 1 +368 702 1 +368 764 1 +368 939 1 +369 325 1 +369 369 1 +369 466 1 +370 22 1 +370 370 1 +370 687 1 +370 1099 1 +371 371 1 +371 752 1 +372 65 1 +372 372 1 +373 373 1 +373 639 1 +373 964 1 +374 374 1 +374 555 1 +374 1135 1 +375 375 1 +375 780 1 +376 376 1 +376 428 1 +376 500 1 +376 559 1 +376 734 1 +376 848 1 +377 377 1 +377 501 1 +377 1007 1 +378 283 1 +378 378 1 +379 379 1 +379 848 1 +379 1082 1 +379 1110 1 +380 172 1 +380 380 1 +381 381 1 +381 750 1 +382 176 1 +382 336 1 +382 382 1 +383 383 1 +383 521 1 +384 221 1 +384 384 1 +385 385 1 +385 1060 1 +386 386 1 +386 560 1 +386 1096 1 +387 339 1 +387 387 1 +387 548 1 +387 1050 1 +387 1109 1 +388 388 1 +388 907 1 +389 46 1 +389 389 1 +390 14 1 +390 390 1 +390 927 1 +390 980 1 +391 26 1 +391 30 1 +391 295 1 +391 391 1 +392 234 1 +392 392 1 +392 837 1 +393 97 1 +393 393 1 +393 981 1 +394 394 1 +394 416 1 +394 504 1 +394 568 1 +394 827 1 +395 395 1 +395 710 1 +396 396 1 +396 636 1 +396 1037 1 +397 397 1 +397 567 1 +398 398 1 +398 1074 1 +399 399 1 +399 700 1 +399 876 1 +400 40 1 +400 400 1 +401 401 1 +401 538 1 +402 88 1 +402 188 1 +402 212 1 +402 319 1 +402 402 1 +403 403 1 +403 418 1 +404 404 1 +404 530 1 +404 927 1 +405 405 1 +405 720 1 +405 916 1 +406 406 1 +406 471 1 +407 407 1 +407 760 1 +407 1120 1 +408 152 1 +408 349 1 +408 408 1 +408 519 1 +409 147 1 +409 409 1 +409 479 1 +409 648 1 +409 1081 1 +410 410 1 +410 931 1 +411 169 1 +411 411 1 +411 698 1 +411 818 1 +412 318 1 +412 412 1 +412 491 1 +412 543 1 +412 777 1 +412 982 1 +412 1134 1 +413 98 1 +413 204 1 +413 238 1 +413 413 1 +414 302 1 +414 414 1 +414 871 1 +415 80 1 +415 122 1 +415 155 1 +415 159 1 +415 365 1 +415 415 1 +415 558 1 +415 733 1 +416 394 1 +416 416 1 +416 1113 1 +417 314 1 +417 417 1 +417 1131 1 +418 403 1 +418 418 1 +418 583 1 +418 974 1 +419 10 1 +419 299 1 +419 419 1 +419 631 1 +420 311 1 +420 420 1 +421 124 1 +421 267 1 +421 300 1 +421 306 1 +421 421 1 +421 635 1 +421 639 1 +422 192 1 +422 230 1 +422 422 1 +422 494 1 +422 1085 1 +423 234 1 +423 251 1 +423 423 1 +424 424 1 +424 544 1 +424 606 1 +425 425 1 +425 701 1 +425 711 1 +426 177 1 +426 338 1 +426 426 1 +426 836 1 +426 1070 1 +427 309 1 +427 329 1 +427 427 1 +427 595 1 +427 899 1 +428 376 1 +428 428 1 +428 946 1 +428 962 1 +429 176 1 +429 429 1 +429 466 1 +430 430 1 +430 872 1 +430 1062 1 +431 431 1 +431 556 1 +431 598 1 +431 1066 1 +432 21 1 +432 166 1 +432 432 1 +432 562 1 +432 740 1 +432 927 1 +432 1012 1 +433 433 1 +433 771 1 +433 814 1 +434 316 1 +434 356 1 +434 434 1 +434 604 1 +435 191 1 +435 435 1 +436 87 1 +436 436 1 +437 437 1 +437 644 1 +438 438 1 +438 882 1 +438 926 1 +439 50 1 +439 439 1 +439 924 1 +439 931 1 +439 1031 1 +440 440 1 +440 515 1 +441 286 1 +441 441 1 +442 442 1 +442 545 1 +443 359 1 +443 443 1 +443 895 1 +444 444 1 +444 455 1 +445 125 1 +445 445 1 +445 653 1 +446 298 1 +446 446 1 +446 1086 1 +447 264 1 +447 447 1 +447 573 1 +447 846 1 +448 448 1 +448 567 1 +448 886 1 +448 918 1 +449 449 1 +449 490 1 +449 728 1 +450 450 1 +450 903 1 +451 133 1 +451 451 1 +451 769 1 +452 359 1 +452 452 1 +452 895 1 +453 225 1 +453 453 1 +453 882 1 +454 454 1 +454 736 1 +454 759 1 +455 29 1 +455 290 1 +455 444 1 +455 455 1 +455 576 1 +455 608 1 +455 730 1 +455 754 1 +455 787 1 +455 991 1 +456 72 1 +456 456 1 +456 638 1 +457 85 1 +457 457 1 +457 520 1 +457 703 1 +458 458 1 +458 697 1 +459 459 1 +459 1018 1 +460 204 1 +460 460 1 +461 461 1 +461 1132 1 +462 462 1 +462 668 1 +462 882 1 +463 463 1 +463 489 1 +463 1066 1 +464 28 1 +464 464 1 +465 465 1 +465 711 1 +465 739 1 +465 749 1 +465 1064 1 +465 1110 1 +466 310 1 +466 325 1 +466 336 1 +466 369 1 +466 429 1 +466 466 1 +467 233 1 +467 467 1 +467 963 1 +468 327 1 +468 468 1 +469 148 1 +469 469 1 +469 840 1 +470 470 1 +470 1025 1 +471 406 1 +471 471 1 +471 592 1 +472 472 1 +472 669 1 +472 1019 1 +473 473 1 +473 505 1 +473 671 1 +473 892 1 +474 259 1 +474 474 1 +474 567 1 +474 947 1 +475 152 1 +475 475 1 +475 648 1 +476 476 1 +476 642 1 +476 781 1 +476 965 1 +477 477 1 +477 528 1 +477 597 1 +478 478 1 +478 639 1 +479 23 1 +479 409 1 +479 479 1 +479 956 1 +480 22 1 +480 306 1 +480 480 1 +480 567 1 +480 947 1 +480 996 1 +481 481 1 +481 706 1 +481 718 1 +482 318 1 +482 482 1 +482 491 1 +482 514 1 +482 982 1 +483 130 1 +483 296 1 +483 483 1 +483 580 1 +483 671 1 +483 799 1 +483 943 1 +484 360 1 +484 484 1 +485 485 1 +485 714 1 +485 883 1 +485 921 1 +486 243 1 +486 288 1 +486 368 1 +486 486 1 +486 535 1 +486 1058 1 +487 487 1 +487 751 1 +488 488 1 +488 568 1 +488 700 1 +488 876 1 +489 5 1 +489 463 1 +489 489 1 +489 1066 1 +490 449 1 +490 490 1 +491 318 1 +491 412 1 +491 482 1 +491 491 1 +491 777 1 +491 982 1 +491 1134 1 +492 492 1 +492 1063 1 +493 27 1 +493 493 1 +493 743 1 +494 63 1 +494 230 1 +494 422 1 +494 494 1 +495 495 1 +495 925 1 +496 107 1 +496 496 1 +497 497 1 +497 708 1 +497 779 1 +498 246 1 +498 498 1 +499 499 1 +499 978 1 +500 376 1 +500 500 1 +501 377 1 +501 501 1 +502 315 1 +502 502 1 +503 309 1 +503 503 1 +504 394 1 +504 504 1 +505 357 1 +505 473 1 +505 505 1 +505 1031 1 +506 3 1 +506 506 1 +506 717 1 +506 834 1 +507 507 1 +507 704 1 +507 828 1 +508 508 1 +508 674 1 +508 728 1 +509 296 1 +509 509 1 +509 943 1 +510 71 1 +510 140 1 +510 510 1 +510 543 1 +510 557 1 +510 663 1 +510 736 1 +511 293 1 +511 353 1 +511 511 1 +512 21 1 +512 67 1 +512 221 1 +512 314 1 +512 512 1 +512 685 1 +512 752 1 +512 829 1 +513 513 1 +513 834 1 +514 318 1 +514 482 1 +514 514 1 +514 780 1 +515 23 1 +515 213 1 +515 440 1 +515 515 1 +515 1026 1 +516 356 1 +516 516 1 +516 711 1 +517 329 1 +517 517 1 +517 667 1 +517 796 1 +518 518 1 +518 1007 1 +519 408 1 +519 519 1 +520 457 1 +520 520 1 +520 602 1 +520 918 1 +521 383 1 +521 521 1 +521 864 1 +522 522 1 +522 776 1 +522 975 1 +523 523 1 +523 690 1 +524 524 1 +524 917 1 +525 90 1 +525 149 1 +525 161 1 +525 231 1 +525 293 1 +525 525 1 +526 196 1 +526 363 1 +526 526 1 +526 663 1 +526 749 1 +527 56 1 +527 527 1 +527 793 1 +528 279 1 +528 477 1 +528 528 1 +529 278 1 +529 529 1 +529 804 1 +530 19 1 +530 52 1 +530 146 1 +530 198 1 +530 350 1 +530 352 1 +530 404 1 +530 530 1 +530 927 1 +531 531 1 +531 865 1 +532 532 1 +532 981 1 +533 312 1 +533 533 1 +533 552 1 +534 308 1 +534 534 1 +534 906 1 +534 1094 1 +535 486 1 +535 535 1 +535 926 1 +536 296 1 +536 536 1 +536 580 1 +536 585 1 +536 745 1 +536 794 1 +536 830 1 +536 835 1 +536 943 1 +536 1018 1 +537 124 1 +537 537 1 +537 790 1 +538 56 1 +538 107 1 +538 251 1 +538 401 1 +538 538 1 +538 543 1 +538 889 1 +539 539 1 +539 689 1 +539 1121 1 +540 128 1 +540 313 1 +540 540 1 +540 586 1 +540 1115 1 +541 541 1 +541 1047 1 +541 1048 1 +542 23 1 +542 542 1 +542 669 1 +543 71 1 +543 142 1 +543 412 1 +543 510 1 +543 538 1 +543 543 1 +543 889 1 +544 355 1 +544 424 1 +544 544 1 +545 442 1 +545 545 1 +545 907 1 +545 938 1 +545 1130 1 +546 268 1 +546 546 1 +546 856 1 +547 547 1 +547 714 1 +548 387 1 +548 548 1 +548 823 1 +549 36 1 +549 549 1 +549 696 1 +549 776 1 +550 54 1 +550 550 1 +550 734 1 +550 946 1 +551 254 1 +551 551 1 +552 109 1 +552 312 1 +552 344 1 +552 533 1 +552 552 1 +552 557 1 +552 800 1 +553 553 1 +553 860 1 +553 1083 1 +554 554 1 +554 613 1 +554 838 1 +554 866 1 +555 144 1 +555 374 1 +555 555 1 +556 431 1 +556 556 1 +557 355 1 +557 510 1 +557 552 1 +557 557 1 +558 122 1 +558 333 1 +558 337 1 +558 415 1 +558 558 1 +558 584 1 +558 646 1 +558 668 1 +559 376 1 +559 559 1 +560 386 1 +560 560 1 +560 709 1 +560 888 1 +560 1096 1 +561 140 1 +561 561 1 +562 13 1 +562 265 1 +562 288 1 +562 356 1 +562 432 1 +562 562 1 +562 894 1 +562 945 1 +562 1135 1 +563 41 1 +563 193 1 +563 563 1 +563 749 1 +564 134 1 +564 564 1 +565 313 1 +565 565 1 +565 1133 1 +566 566 1 +566 913 1 +567 397 1 +567 448 1 +567 474 1 +567 480 1 +567 567 1 +567 625 1 +567 703 1 +567 770 1 +567 851 1 +568 197 1 +568 394 1 +568 488 1 +568 568 1 +568 817 1 +568 876 1 +568 914 1 +568 1113 1 +569 316 1 +569 569 1 +569 706 1 +569 1100 1 +570 182 1 +570 570 1 +570 888 1 +571 571 1 +571 669 1 +572 572 1 +572 1044 1 +573 447 1 +573 573 1 +573 694 1 +574 263 1 +574 574 1 +575 7 1 +575 359 1 +575 575 1 +576 455 1 +576 576 1 +577 288 1 +577 577 1 +577 773 1 +577 976 1 +578 117 1 +578 211 1 +578 323 1 +578 578 1 +579 579 1 +579 924 1 +580 483 1 +580 536 1 +580 580 1 +581 14 1 +581 581 1 +582 39 1 +582 582 1 +582 1046 1 +583 40 1 +583 65 1 +583 418 1 +583 583 1 +583 628 1 +583 775 1 +583 846 1 +583 865 1 +583 903 1 +583 1043 1 +584 333 1 +584 558 1 +584 584 1 +585 86 1 +585 536 1 +585 585 1 +585 1068 1 +586 321 1 +586 540 1 +586 586 1 +586 897 1 +586 932 1 +587 180 1 +587 587 1 +587 953 1 +588 588 1 +588 917 1 +589 357 1 +589 589 1 +589 924 1 +589 1020 1 +590 291 1 +590 590 1 +590 1045 1 +591 591 1 +591 683 1 +591 940 1 +592 250 1 +592 471 1 +592 592 1 +592 1098 1 +593 593 1 +593 637 1 +594 182 1 +594 594 1 +594 895 1 +595 44 1 +595 427 1 +595 595 1 +595 1123 1 +596 596 1 +596 634 1 +597 78 1 +597 257 1 +597 279 1 +597 477 1 +597 597 1 +597 695 1 +598 431 1 +598 598 1 +598 679 1 +599 320 1 +599 599 1 +600 231 1 +600 600 1 +601 301 1 +601 348 1 +601 601 1 +601 622 1 +602 520 1 +602 602 1 +603 6 1 +603 174 1 +603 603 1 +604 434 1 +604 604 1 +605 605 1 +605 736 1 +606 319 1 +606 424 1 +606 606 1 +607 607 1 +607 885 1 +607 983 1 +607 1023 1 +607 1133 1 +608 455 1 +608 608 1 +609 609 1 +609 835 1 +610 279 1 +610 610 1 +610 689 1 +611 611 1 +611 634 1 +611 930 1 +612 612 1 +612 864 1 +613 554 1 +613 613 1 +613 964 1 +614 614 1 +614 1002 1 +615 615 1 +615 935 1 +615 1121 1 +616 616 1 +616 1124 1 +617 94 1 +617 105 1 +617 617 1 +617 1044 1 +617 1130 1 +618 618 1 +618 794 1 +619 256 1 +619 361 1 +619 619 1 +619 739 1 +619 897 1 +619 932 1 +619 1000 1 +619 1103 1 +620 620 1 +620 865 1 +620 1086 1 +620 1104 1 +621 298 1 +621 621 1 +622 360 1 +622 601 1 +622 622 1 +622 686 1 +623 75 1 +623 246 1 +623 308 1 +623 623 1 +623 952 1 +623 1074 1 +623 1094 1 +624 624 1 +624 708 1 +624 732 1 +624 766 1 +625 567 1 +625 625 1 +625 1002 1 +626 626 1 +626 789 1 +627 135 1 +627 315 1 +627 349 1 +627 627 1 +628 286 1 +628 583 1 +628 628 1 +629 629 1 +629 876 1 +629 904 1 +630 199 1 +630 347 1 +630 630 1 +630 1095 1 +630 1108 1 +631 419 1 +631 631 1 +632 73 1 +632 79 1 +632 632 1 +633 94 1 +633 633 1 +633 790 1 +633 1025 1 +634 596 1 +634 611 1 +634 634 1 +634 930 1 +634 1051 1 +635 368 1 +635 421 1 +635 635 1 +635 901 1 +635 927 1 +636 154 1 +636 396 1 +636 636 1 +637 112 1 +637 593 1 +637 637 1 +637 855 1 +638 456 1 +638 638 1 +638 893 1 +639 124 1 +639 368 1 +639 373 1 +639 421 1 +639 478 1 +639 639 1 +639 703 1 +639 910 1 +640 134 1 +640 341 1 +640 640 1 +640 689 1 +641 641 1 +641 706 1 +642 48 1 +642 476 1 +642 642 1 +643 211 1 +643 643 1 +643 648 1 +643 904 1 +644 106 1 +644 437 1 +644 644 1 +644 1090 1 +644 1120 1 +645 55 1 +645 129 1 +645 645 1 +646 34 1 +646 558 1 +646 646 1 +646 963 1 +646 1136 1 +647 647 1 +647 665 1 +647 904 1 +647 1051 1 +648 31 1 +648 409 1 +648 475 1 +648 643 1 +648 648 1 +649 649 1 +649 807 1 +649 905 1 +650 650 1 +650 930 1 +650 1079 1 +651 651 1 +651 712 1 +651 866 1 +652 173 1 +652 652 1 +653 24 1 +653 125 1 +653 138 1 +653 362 1 +653 445 1 +653 653 1 +653 987 1 +654 654 1 +654 1012 1 +655 169 1 +655 655 1 +656 15 1 +656 61 1 +656 656 1 +657 107 1 +657 657 1 +658 168 1 +658 658 1 +658 954 1 +658 1030 1 +659 659 1 +659 908 1 +660 660 1 +660 805 1 +661 661 1 +661 848 1 +661 1082 1 +662 9 1 +662 662 1 +663 66 1 +663 510 1 +663 526 1 +663 663 1 +663 759 1 +664 65 1 +664 183 1 +664 664 1 +664 1083 1 +665 647 1 +665 665 1 +666 343 1 +666 666 1 +666 747 1 +667 517 1 +667 667 1 +668 462 1 +668 558 1 +668 668 1 +669 472 1 +669 542 1 +669 571 1 +669 669 1 +669 789 1 +669 969 1 +670 201 1 +670 276 1 +670 670 1 +671 473 1 +671 483 1 +671 671 1 +671 1124 1 +672 36 1 +672 672 1 +672 942 1 +673 149 1 +673 673 1 +674 91 1 +674 508 1 +674 674 1 +675 265 1 +675 675 1 +676 676 1 +676 1099 1 +677 273 1 +677 677 1 +678 280 1 +678 678 1 +679 61 1 +679 224 1 +679 349 1 +679 598 1 +679 679 1 +680 44 1 +680 680 1 +680 1029 1 +681 132 1 +681 681 1 +681 887 1 +682 682 1 +682 881 1 +682 916 1 +683 124 1 +683 591 1 +683 683 1 +683 712 1 +684 27 1 +684 244 1 +684 684 1 +684 689 1 +684 1107 1 +685 221 1 +685 512 1 +685 685 1 +686 622 1 +686 686 1 +687 370 1 +687 687 1 +687 996 1 +688 185 1 +688 230 1 +688 688 1 +688 1087 1 +689 11 1 +689 102 1 +689 157 1 +689 195 1 +689 279 1 +689 539 1 +689 610 1 +689 640 1 +689 684 1 +689 689 1 +689 769 1 +689 844 1 +689 867 1 +689 896 1 +689 1004 1 +689 1021 1 +689 1041 1 +689 1126 1 +690 294 1 +690 523 1 +690 690 1 +691 691 1 +691 713 1 +692 327 1 +692 692 1 +692 811 1 +692 1060 1 +693 110 1 +693 693 1 +693 993 1 +694 358 1 +694 573 1 +694 694 1 +694 728 1 +694 846 1 +695 279 1 +695 597 1 +695 695 1 +696 193 1 +696 549 1 +696 696 1 +697 117 1 +697 253 1 +697 315 1 +697 458 1 +697 697 1 +698 411 1 +698 698 1 +698 714 1 +698 875 1 +698 1063 1 +699 277 1 +699 699 1 +700 399 1 +700 488 1 +700 700 1 +701 62 1 +701 425 1 +701 701 1 +701 734 1 +702 368 1 +702 702 1 +702 764 1 +702 873 1 +702 901 1 +702 1129 1 +702 1130 1 +703 43 1 +703 336 1 +703 457 1 +703 567 1 +703 639 1 +703 703 1 +703 841 1 +704 83 1 +704 507 1 +704 704 1 +704 828 1 +704 934 1 +704 1111 1 +705 235 1 +705 705 1 +706 260 1 +706 481 1 +706 569 1 +706 641 1 +706 706 1 +707 112 1 +707 707 1 +707 708 1 +708 497 1 +708 624 1 +708 707 1 +708 708 1 +709 560 1 +709 709 1 +709 888 1 +710 333 1 +710 395 1 +710 710 1 +710 911 1 +710 1135 1 +711 356 1 +711 425 1 +711 465 1 +711 516 1 +711 711 1 +711 916 1 +712 651 1 +712 683 1 +712 712 1 +712 818 1 +712 866 1 +713 691 1 +713 713 1 +713 717 1 +713 834 1 +714 485 1 +714 547 1 +714 698 1 +714 714 1 +714 1063 1 +715 715 1 +715 999 1 +716 192 1 +716 716 1 +716 1085 1 +716 1137 1 +717 506 1 +717 713 1 +717 717 1 +717 997 1 +718 481 1 +718 718 1 +719 53 1 +719 719 1 +720 98 1 +720 405 1 +720 720 1 +721 140 1 +721 721 1 +722 101 1 +722 722 1 +723 723 1 +723 882 1 +724 9 1 +724 724 1 +725 353 1 +725 725 1 +726 726 1 +726 850 1 +727 32 1 +727 333 1 +727 727 1 +728 173 1 +728 262 1 +728 304 1 +728 449 1 +728 508 1 +728 694 1 +728 728 1 +729 250 1 +729 729 1 +730 455 1 +730 730 1 +730 1060 1 +731 731 1 +731 1000 1 +732 624 1 +732 732 1 +733 415 1 +733 733 1 +734 376 1 +734 550 1 +734 701 1 +734 734 1 +735 735 1 +735 864 1 +736 454 1 +736 510 1 +736 605 1 +736 736 1 +737 737 1 +737 1121 1 +738 37 1 +738 70 1 +738 309 1 +738 329 1 +738 338 1 +738 738 1 +738 1122 1 +739 311 1 +739 465 1 +739 619 1 +739 739 1 +740 21 1 +740 432 1 +740 740 1 +740 1009 1 +741 17 1 +741 741 1 +741 790 1 +741 900 1 +742 168 1 +742 742 1 +742 876 1 +742 1071 1 +743 493 1 +743 743 1 +743 1048 1 +744 22 1 +744 744 1 +744 854 1 +745 282 1 +745 536 1 +745 745 1 +746 327 1 +746 746 1 +747 98 1 +747 666 1 +747 747 1 +748 35 1 +748 748 1 +748 957 1 +749 89 1 +749 465 1 +749 526 1 +749 563 1 +749 749 1 +749 849 1 +749 1110 1 +750 381 1 +750 750 1 +750 816 1 +751 487 1 +751 751 1 +751 857 1 +751 907 1 +751 938 1 +752 371 1 +752 512 1 +752 752 1 +752 756 1 +752 1135 1 +753 59 1 +753 114 1 +753 753 1 +754 455 1 +754 754 1 +755 755 1 +755 771 1 +756 752 1 +756 756 1 +756 810 1 +757 276 1 +757 757 1 +757 1075 1 +758 359 1 +758 758 1 +758 1010 1 +759 454 1 +759 663 1 +759 759 1 +760 407 1 +760 760 1 +761 761 1 +761 1029 1 +761 1049 1 +762 20 1 +762 762 1 +762 1004 1 +763 116 1 +763 763 1 +763 768 1 +763 796 1 +763 831 1 +764 18 1 +764 173 1 +764 368 1 +764 702 1 +764 764 1 +764 939 1 +765 262 1 +765 765 1 +766 344 1 +766 624 1 +766 766 1 +767 767 1 +767 983 1 +767 1023 1 +768 763 1 +768 768 1 +768 796 1 +769 330 1 +769 451 1 +769 689 1 +769 769 1 +770 169 1 +770 567 1 +770 770 1 +770 928 1 +771 433 1 +771 755 1 +771 771 1 +771 804 1 +771 814 1 +772 46 1 +772 772 1 +773 577 1 +773 773 1 +773 1022 1 +773 1135 1 +774 60 1 +774 774 1 +774 1097 1 +775 65 1 +775 271 1 +775 583 1 +775 775 1 +775 965 1 +775 966 1 +776 36 1 +776 295 1 +776 363 1 +776 522 1 +776 549 1 +776 776 1 +776 975 1 +777 318 1 +777 412 1 +777 491 1 +777 777 1 +777 920 1 +777 1134 1 +778 149 1 +778 778 1 +779 4 1 +779 497 1 +779 779 1 +780 111 1 +780 375 1 +780 514 1 +780 780 1 +780 992 1 +780 1038 1 +781 79 1 +781 114 1 +781 476 1 +781 781 1 +781 1037 1 +782 782 1 +782 1081 1 +783 141 1 +783 783 1 +783 1007 1 +784 784 1 +784 993 1 +785 317 1 +785 785 1 +786 37 1 +786 786 1 +786 1096 1 +787 455 1 +787 787 1 +788 788 1 +788 953 1 +789 25 1 +789 626 1 +789 669 1 +789 789 1 +789 941 1 +790 17 1 +790 537 1 +790 633 1 +790 741 1 +790 790 1 +790 819 1 +790 855 1 +791 148 1 +791 791 1 +791 806 1 +791 944 1 +791 1124 1 +792 23 1 +792 792 1 +792 941 1 +793 56 1 +793 71 1 +793 527 1 +793 793 1 +794 536 1 +794 618 1 +794 794 1 +795 182 1 +795 795 1 +796 517 1 +796 763 1 +796 768 1 +796 796 1 +796 831 1 +797 140 1 +797 797 1 +798 137 1 +798 798 1 +798 1003 1 +799 216 1 +799 483 1 +799 799 1 +799 957 1 +800 109 1 +800 552 1 +800 800 1 +801 227 1 +801 263 1 +801 801 1 +802 295 1 +802 802 1 +802 1084 1 +803 72 1 +803 803 1 +804 529 1 +804 771 1 +804 804 1 +805 356 1 +805 660 1 +805 805 1 +805 945 1 +806 335 1 +806 791 1 +806 806 1 +807 649 1 +807 807 1 +808 808 1 +808 920 1 +809 353 1 +809 809 1 +810 756 1 +810 810 1 +810 960 1 +810 1135 1 +811 692 1 +811 811 1 +812 365 1 +812 812 1 +812 1092 1 +813 813 1 +813 1079 1 +814 309 1 +814 433 1 +814 771 1 +814 814 1 +814 979 1 +815 145 1 +815 815 1 +815 1102 1 +816 46 1 +816 750 1 +816 816 1 +817 568 1 +817 817 1 +818 411 1 +818 712 1 +818 818 1 +819 94 1 +819 214 1 +819 790 1 +819 819 1 +820 820 1 +820 1110 1 +821 20 1 +821 82 1 +821 341 1 +821 821 1 +822 52 1 +822 822 1 +823 339 1 +823 548 1 +823 823 1 +824 824 1 +824 993 1 +825 80 1 +825 178 1 +825 825 1 +825 1070 1 +825 1122 1 +826 116 1 +826 826 1 +826 1029 1 +827 394 1 +827 827 1 +827 1113 1 +827 1127 1 +828 507 1 +828 704 1 +828 828 1 +829 512 1 +829 829 1 +830 86 1 +830 536 1 +830 830 1 +830 1069 1 +831 763 1 +831 796 1 +831 831 1 +832 120 1 +832 832 1 +833 833 1 +833 899 1 +833 951 1 +833 986 1 +834 506 1 +834 513 1 +834 713 1 +834 834 1 +835 536 1 +835 609 1 +835 835 1 +836 118 1 +836 357 1 +836 426 1 +836 836 1 +836 1013 1 +837 234 1 +837 392 1 +837 837 1 +838 554 1 +838 838 1 +839 186 1 +839 839 1 +840 469 1 +840 840 1 +841 336 1 +841 703 1 +841 841 1 +842 57 1 +842 842 1 +842 1039 1 +843 843 1 +843 920 1 +844 244 1 +844 689 1 +844 844 1 +845 68 1 +845 845 1 +845 928 1 +846 447 1 +846 583 1 +846 694 1 +846 846 1 +846 873 1 +846 1016 1 +846 1043 1 +847 87 1 +847 847 1 +848 230 1 +848 237 1 +848 376 1 +848 379 1 +848 661 1 +848 848 1 +848 902 1 +848 977 1 +848 1064 1 +848 1128 1 +849 30 1 +849 241 1 +849 749 1 +849 849 1 +850 157 1 +850 726 1 +850 850 1 +850 869 1 +851 267 1 +851 567 1 +851 851 1 +851 1002 1 +852 163 1 +852 182 1 +852 367 1 +852 852 1 +852 888 1 +853 4 1 +853 853 1 +854 744 1 +854 854 1 +854 1056 1 +855 637 1 +855 790 1 +855 855 1 +856 48 1 +856 546 1 +856 856 1 +857 132 1 +857 283 1 +857 751 1 +857 857 1 +857 887 1 +858 50 1 +858 172 1 +858 858 1 +859 61 1 +859 859 1 +860 73 1 +860 553 1 +860 860 1 +861 47 1 +861 352 1 +861 861 1 +862 862 1 +862 876 1 +862 1041 1 +863 291 1 +863 863 1 +864 124 1 +864 521 1 +864 612 1 +864 735 1 +864 864 1 +865 154 1 +865 262 1 +865 531 1 +865 583 1 +865 620 1 +865 865 1 +865 965 1 +866 554 1 +866 651 1 +866 712 1 +866 866 1 +866 1014 1 +867 689 1 +867 867 1 +867 1121 1 +868 64 1 +868 868 1 +869 244 1 +869 850 1 +869 869 1 +870 870 1 +870 952 1 +871 414 1 +871 871 1 +871 1133 1 +872 129 1 +872 430 1 +872 872 1 +873 702 1 +873 846 1 +873 873 1 +873 1043 1 +874 131 1 +874 263 1 +874 364 1 +874 874 1 +875 698 1 +875 875 1 +876 102 1 +876 191 1 +876 195 1 +876 197 1 +876 200 1 +876 205 1 +876 399 1 +876 488 1 +876 568 1 +876 629 1 +876 742 1 +876 862 1 +876 876 1 +876 1106 1 +876 1126 1 +877 877 1 +877 1124 1 +878 878 1 +878 964 1 +878 967 1 +878 1036 1 +879 98 1 +879 345 1 +879 879 1 +880 119 1 +880 880 1 +880 1035 1 +881 682 1 +881 881 1 +882 333 1 +882 438 1 +882 453 1 +882 462 1 +882 723 1 +882 882 1 +882 1058 1 +883 485 1 +883 883 1 +884 108 1 +884 140 1 +884 254 1 +884 327 1 +884 884 1 +885 607 1 +885 885 1 +886 448 1 +886 886 1 +887 681 1 +887 857 1 +887 887 1 +888 37 1 +888 560 1 +888 570 1 +888 709 1 +888 852 1 +888 888 1 +889 71 1 +889 538 1 +889 543 1 +889 889 1 +890 127 1 +890 890 1 +890 969 1 +891 133 1 +891 891 1 +892 58 1 +892 473 1 +892 892 1 +892 997 1 +893 242 1 +893 638 1 +893 893 1 +893 1112 1 +894 144 1 +894 562 1 +894 894 1 +895 329 1 +895 443 1 +895 452 1 +895 594 1 +895 895 1 +895 979 1 +896 689 1 +896 896 1 +896 1019 1 +897 235 1 +897 586 1 +897 619 1 +897 897 1 +898 127 1 +898 898 1 +899 427 1 +899 833 1 +899 899 1 +900 741 1 +900 900 1 +901 333 1 +901 635 1 +901 702 1 +901 901 1 +902 848 1 +902 902 1 +902 908 1 +903 450 1 +903 583 1 +903 903 1 +903 1016 1 +903 1043 1 +904 307 1 +904 629 1 +904 643 1 +904 647 1 +904 904 1 +904 913 1 +905 93 1 +905 123 1 +905 126 1 +905 649 1 +905 905 1 +905 1125 1 +906 289 1 +906 534 1 +906 906 1 +907 228 1 +907 388 1 +907 545 1 +907 751 1 +907 907 1 +908 659 1 +908 902 1 +908 908 1 +908 1128 1 +909 149 1 +909 909 1 +910 180 1 +910 267 1 +910 639 1 +910 910 1 +910 939 1 +910 1057 1 +910 1078 1 +910 1085 1 +911 710 1 +911 911 1 +912 912 1 +912 1099 1 +913 17 1 +913 566 1 +913 904 1 +913 913 1 +914 568 1 +914 914 1 +915 147 1 +915 915 1 +916 405 1 +916 682 1 +916 711 1 +916 916 1 +917 152 1 +917 524 1 +917 588 1 +917 917 1 +918 143 1 +918 448 1 +918 520 1 +918 918 1 +919 919 1 +919 1051 1 +920 777 1 +920 808 1 +920 843 1 +920 920 1 +920 1024 1 +920 1134 1 +921 43 1 +921 336 1 +921 485 1 +921 921 1 +922 87 1 +922 110 1 +922 922 1 +923 923 1 +923 1122 1 +924 280 1 +924 439 1 +924 579 1 +924 589 1 +924 924 1 +924 1077 1 +925 495 1 +925 925 1 +925 1068 1 +926 243 1 +926 245 1 +926 438 1 +926 535 1 +926 926 1 +927 55 1 +927 98 1 +927 146 1 +927 322 1 +927 345 1 +927 390 1 +927 404 1 +927 432 1 +927 530 1 +927 635 1 +927 927 1 +927 973 1 +927 996 1 +927 1012 1 +928 770 1 +928 845 1 +928 928 1 +929 339 1 +929 929 1 +929 1109 1 +930 611 1 +930 634 1 +930 650 1 +930 930 1 +930 1051 1 +930 1088 1 +931 269 1 +931 410 1 +931 439 1 +931 931 1 +932 235 1 +932 586 1 +932 619 1 +932 932 1 +933 293 1 +933 933 1 +933 1091 1 +934 140 1 +934 704 1 +934 934 1 +935 615 1 +935 935 1 +935 1101 1 +935 1121 1 +936 286 1 +936 936 1 +937 273 1 +937 937 1 +938 44 1 +938 338 1 +938 545 1 +938 751 1 +938 938 1 +939 173 1 +939 267 1 +939 297 1 +939 368 1 +939 764 1 +939 910 1 +939 939 1 +939 1078 1 +939 1083 1 +940 591 1 +940 940 1 +941 23 1 +941 789 1 +941 792 1 +941 941 1 +942 193 1 +942 672 1 +942 942 1 +943 483 1 +943 509 1 +943 536 1 +943 943 1 +944 791 1 +944 944 1 +944 985 1 +945 288 1 +945 562 1 +945 805 1 +945 945 1 +946 428 1 +946 550 1 +946 946 1 +947 103 1 +947 474 1 +947 480 1 +947 947 1 +948 144 1 +948 948 1 +949 112 1 +949 949 1 +949 967 1 +949 1005 1 +950 293 1 +950 950 1 +950 984 1 +951 833 1 +951 951 1 +952 75 1 +952 623 1 +952 870 1 +952 952 1 +953 113 1 +953 587 1 +953 788 1 +953 953 1 +954 168 1 +954 658 1 +954 954 1 +955 60 1 +955 219 1 +955 955 1 +955 995 1 +956 213 1 +956 479 1 +956 956 1 +956 1081 1 +957 250 1 +957 748 1 +957 799 1 +957 957 1 +958 254 1 +958 958 1 +959 168 1 +959 210 1 +959 346 1 +959 959 1 +960 810 1 +960 960 1 +961 187 1 +961 286 1 +961 961 1 +962 428 1 +962 962 1 +963 467 1 +963 646 1 +963 963 1 +964 373 1 +964 613 1 +964 878 1 +964 964 1 +964 967 1 +964 1014 1 +965 65 1 +965 154 1 +965 222 1 +965 271 1 +965 476 1 +965 775 1 +965 865 1 +965 965 1 +966 775 1 +966 966 1 +967 113 1 +967 878 1 +967 949 1 +967 964 1 +967 967 1 +967 993 1 +968 170 1 +968 968 1 +969 669 1 +969 890 1 +969 969 1 +970 151 1 +970 194 1 +970 970 1 +970 1035 1 +971 140 1 +971 971 1 +972 972 1 +972 1013 1 +972 1120 1 +973 343 1 +973 927 1 +973 973 1 +973 1117 1 +974 418 1 +974 974 1 +974 1040 1 +975 30 1 +975 363 1 +975 522 1 +975 776 1 +975 975 1 +976 577 1 +976 976 1 +977 848 1 +977 977 1 +978 327 1 +978 499 1 +978 978 1 +979 240 1 +979 814 1 +979 895 1 +979 979 1 +980 390 1 +980 980 1 +981 75 1 +981 297 1 +981 393 1 +981 532 1 +981 981 1 +982 66 1 +982 412 1 +982 482 1 +982 491 1 +982 982 1 +983 607 1 +983 767 1 +983 983 1 +983 1062 1 +984 293 1 +984 950 1 +984 984 1 +985 944 1 +985 985 1 +986 833 1 +986 986 1 +987 31 1 +987 653 1 +987 987 1 +987 1051 1 +988 244 1 +988 988 1 +988 1048 1 +989 120 1 +989 128 1 +989 989 1 +989 994 1 +989 996 1 +990 990 1 +990 1060 1 +991 219 1 +991 455 1 +991 991 1 +992 780 1 +992 992 1 +993 693 1 +993 784 1 +993 824 1 +993 967 1 +993 993 1 +994 989 1 +994 994 1 +995 955 1 +995 995 1 +996 21 1 +996 99 1 +996 120 1 +996 164 1 +996 480 1 +996 687 1 +996 927 1 +996 989 1 +996 996 1 +997 717 1 +997 892 1 +997 997 1 +997 1031 1 +998 86 1 +998 998 1 +999 2 1 +999 280 1 +999 715 1 +999 999 1 +999 1020 1 +1000 619 1 +1000 731 1 +1000 1000 1 +1000 1132 1 +1001 268 1 +1001 1001 1 +1002 614 1 +1002 625 1 +1002 851 1 +1002 1002 1 +1003 798 1 +1003 1003 1 +1004 689 1 +1004 762 1 +1004 1004 1 +1005 949 1 +1005 1005 1 +1006 12 1 +1006 1006 1 +1007 125 1 +1007 181 1 +1007 249 1 +1007 377 1 +1007 518 1 +1007 783 1 +1007 1007 1 +1008 237 1 +1008 1008 1 +1009 740 1 +1009 1009 1 +1010 7 1 +1010 758 1 +1010 1010 1 +1011 6 1 +1011 186 1 +1011 1011 1 +1012 432 1 +1012 654 1 +1012 927 1 +1012 1012 1 +1013 215 1 +1013 836 1 +1013 972 1 +1013 1013 1 +1014 128 1 +1014 866 1 +1014 964 1 +1014 1014 1 +1014 1115 1 +1015 58 1 +1015 1015 1 +1016 846 1 +1016 903 1 +1016 1016 1 +1016 1043 1 +1017 78 1 +1017 1017 1 +1018 459 1 +1018 536 1 +1018 1018 1 +1019 25 1 +1019 472 1 +1019 896 1 +1019 1019 1 +1019 1048 1 +1020 266 1 +1020 589 1 +1020 999 1 +1020 1020 1 +1021 11 1 +1021 689 1 +1021 1021 1 +1022 773 1 +1022 1022 1 +1023 607 1 +1023 767 1 +1023 1023 1 +1024 920 1 +1024 1024 1 +1025 128 1 +1025 470 1 +1025 633 1 +1025 1025 1 +1026 515 1 +1026 1026 1 +1027 9 1 +1027 268 1 +1027 1027 1 +1028 152 1 +1028 1028 1 +1029 116 1 +1029 332 1 +1029 680 1 +1029 761 1 +1029 826 1 +1029 1029 1 +1030 303 1 +1030 658 1 +1030 1030 1 +1031 439 1 +1031 505 1 +1031 997 1 +1031 1031 1 +1032 353 1 +1032 1032 1 +1033 208 1 +1033 1033 1 +1034 317 1 +1034 1034 1 +1035 880 1 +1035 970 1 +1035 1035 1 +1035 1088 1 +1036 878 1 +1036 1036 1 +1037 396 1 +1037 781 1 +1037 1037 1 +1038 780 1 +1038 1038 1 +1039 842 1 +1039 1039 1 +1040 974 1 +1040 1040 1 +1041 279 1 +1041 689 1 +1041 862 1 +1041 1041 1 +1042 108 1 +1042 1042 1 +1043 583 1 +1043 846 1 +1043 873 1 +1043 903 1 +1043 1016 1 +1043 1043 1 +1044 118 1 +1044 572 1 +1044 617 1 +1044 1044 1 +1045 590 1 +1045 1045 1 +1046 236 1 +1046 582 1 +1046 1046 1 +1047 541 1 +1047 1047 1 +1047 1048 1 +1048 541 1 +1048 743 1 +1048 988 1 +1048 1019 1 +1048 1047 1 +1048 1048 1 +1048 1107 1 +1049 61 1 +1049 761 1 +1049 1049 1 +1050 184 1 +1050 387 1 +1050 1050 1 +1051 634 1 +1051 647 1 +1051 919 1 +1051 930 1 +1051 987 1 +1051 1051 1 +1052 106 1 +1052 1052 1 +1053 317 1 +1053 1053 1 +1054 55 1 +1054 198 1 +1054 1054 1 +1055 154 1 +1055 1055 1 +1056 854 1 +1056 1056 1 +1057 910 1 +1057 1057 1 +1058 486 1 +1058 882 1 +1058 1058 1 +1059 115 1 +1059 281 1 +1059 1059 1 +1060 95 1 +1060 149 1 +1060 293 1 +1060 385 1 +1060 692 1 +1060 730 1 +1060 990 1 +1060 1060 1 +1061 5 1 +1061 1061 1 +1062 248 1 +1062 430 1 +1062 983 1 +1062 1062 1 +1062 1075 1 +1063 492 1 +1063 698 1 +1063 714 1 +1063 1063 1 +1064 465 1 +1064 848 1 +1064 1064 1 +1065 251 1 +1065 1065 1 +1066 431 1 +1066 463 1 +1066 489 1 +1066 1066 1 +1067 214 1 +1067 1067 1 +1068 585 1 +1068 925 1 +1068 1068 1 +1069 830 1 +1069 1069 1 +1070 128 1 +1070 217 1 +1070 338 1 +1070 426 1 +1070 825 1 +1070 1070 1 +1070 1122 1 +1071 742 1 +1071 1071 1 +1072 16 1 +1072 1072 1 +1073 159 1 +1073 263 1 +1073 1073 1 +1074 246 1 +1074 398 1 +1074 623 1 +1074 1074 1 +1075 55 1 +1075 150 1 +1075 302 1 +1075 757 1 +1075 1062 1 +1075 1075 1 +1076 228 1 +1076 1076 1 +1077 924 1 +1077 1077 1 +1078 910 1 +1078 939 1 +1078 1078 1 +1079 650 1 +1079 813 1 +1079 1079 1 +1080 98 1 +1080 1080 1 +1081 42 1 +1081 409 1 +1081 782 1 +1081 956 1 +1081 1081 1 +1082 379 1 +1082 661 1 +1082 1082 1 +1083 268 1 +1083 553 1 +1083 664 1 +1083 939 1 +1083 1083 1 +1084 140 1 +1084 295 1 +1084 802 1 +1084 1084 1 +1085 422 1 +1085 716 1 +1085 910 1 +1085 1085 1 +1086 262 1 +1086 446 1 +1086 620 1 +1086 1086 1 +1087 688 1 +1087 1087 1 +1088 24 1 +1088 119 1 +1088 138 1 +1088 362 1 +1088 930 1 +1088 1035 1 +1088 1088 1 +1089 149 1 +1089 1089 1 +1090 644 1 +1090 1090 1 +1091 353 1 +1091 933 1 +1091 1091 1 +1092 96 1 +1092 365 1 +1092 812 1 +1092 1092 1 +1093 330 1 +1093 341 1 +1093 1093 1 +1094 97 1 +1094 534 1 +1094 623 1 +1094 1094 1 +1095 630 1 +1095 1095 1 +1096 386 1 +1096 560 1 +1096 786 1 +1096 1096 1 +1096 1114 1 +1097 353 1 +1097 774 1 +1097 1097 1 +1098 592 1 +1098 1098 1 +1099 370 1 +1099 676 1 +1099 912 1 +1099 1099 1 +1100 569 1 +1100 1100 1 +1101 935 1 +1101 1101 1 +1102 815 1 +1102 1102 1 +1102 1117 1 +1103 619 1 +1103 1103 1 +1103 1108 1 +1104 620 1 +1104 1104 1 +1105 279 1 +1105 1105 1 +1105 1126 1 +1106 11 1 +1106 275 1 +1106 876 1 +1106 1106 1 +1107 684 1 +1107 1048 1 +1107 1107 1 +1108 630 1 +1108 1103 1 +1108 1108 1 +1109 51 1 +1109 175 1 +1109 339 1 +1109 387 1 +1109 929 1 +1109 1109 1 +1110 379 1 +1110 465 1 +1110 749 1 +1110 820 1 +1110 1110 1 +1111 704 1 +1111 1111 1 +1112 225 1 +1112 243 1 +1112 893 1 +1112 1112 1 +1113 416 1 +1113 568 1 +1113 827 1 +1113 1113 1 +1113 1121 1 +1114 1096 1 +1114 1114 1 +1115 160 1 +1115 540 1 +1115 1014 1 +1115 1115 1 +1116 67 1 +1116 292 1 +1116 348 1 +1116 1116 1 +1117 973 1 +1117 1102 1 +1117 1117 1 +1118 67 1 +1118 1118 1 +1119 168 1 +1119 191 1 +1119 1119 1 +1120 227 1 +1120 263 1 +1120 277 1 +1120 407 1 +1120 644 1 +1120 972 1 +1120 1120 1 +1121 104 1 +1121 339 1 +1121 341 1 +1121 539 1 +1121 615 1 +1121 737 1 +1121 867 1 +1121 935 1 +1121 1113 1 +1121 1121 1 +1122 128 1 +1122 307 1 +1122 738 1 +1122 825 1 +1122 923 1 +1122 1070 1 +1122 1122 1 +1123 595 1 +1123 1123 1 +1124 137 1 +1124 179 1 +1124 616 1 +1124 671 1 +1124 791 1 +1124 877 1 +1124 1124 1 +1125 305 1 +1125 320 1 +1125 905 1 +1125 1125 1 +1126 689 1 +1126 876 1 +1126 1105 1 +1126 1126 1 +1127 827 1 +1127 1127 1 +1128 848 1 +1128 908 1 +1128 1128 1 +1129 702 1 +1129 1129 1 +1130 545 1 +1130 617 1 +1130 702 1 +1130 1130 1 +1131 32 1 +1131 417 1 +1131 1131 1 +1132 267 1 +1132 461 1 +1132 1000 1 +1132 1132 1 +1133 55 1 +1133 150 1 +1133 158 1 +1133 218 1 +1133 565 1 +1133 607 1 +1133 871 1 +1133 1133 1 +1133 1138 1 +1134 318 1 +1134 412 1 +1134 491 1 +1134 777 1 +1134 920 1 +1134 1134 1 +1135 288 1 +1135 333 1 +1135 374 1 +1135 562 1 +1135 710 1 +1135 752 1 +1135 773 1 +1135 810 1 +1135 1135 1 +1136 324 1 +1136 646 1 +1136 1136 1 +1137 716 1 +1137 1137 1 +1138 350 1 +1138 1133 1 +1138 1138 1 diff --git a/test/reorder/rcm.cpp b/test/reorder/rcm.cpp index 9d0749da73f..ce3c60b17dc 100644 --- a/test/reorder/rcm.cpp +++ b/test/reorder/rcm.cpp @@ -380,6 +380,20 @@ TEST_F(Rcm, PermutationIsRcmOrderedMultipleConnectedComponents) } +TEST_F(Rcm, PermutationIsRcmOrderedShuffledFromFile) +{ + o_1138_bus_mtx = gko::read( + std::ifstream{gko::matrices::location_1138_bus_shuffled_mtx}, ref); + d_1138_bus_mtx = gko::clone(exec, o_1138_bus_mtx); + + d_reorder_op = reorder_type::build().on(exec)->generate(d_1138_bus_mtx); + + auto perm = d_reorder_op->get_permutation(); + check_rcm_ordered(o_1138_bus_mtx, perm.get(), + d_reorder_op->get_parameters().strategy); +} + + TEST_F(Rcm, PermutationIsRcmOrderedMinDegreeMultipleConnectedComponents) { this->build_multiple_connected_components(); From 9f659027f8e80b9b54129eecf3a7a70b62559926 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Thu, 18 Jan 2024 14:46:25 +0100 Subject: [PATCH 29/29] fix test We should check that one possible choice for the next contender has a lower or equal height, not all of them. --- test/reorder/rcm.cpp | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/test/reorder/rcm.cpp b/test/reorder/rcm.cpp index ce3c60b17dc..923a5c1f10f 100644 --- a/test/reorder/rcm.cpp +++ b/test/reorder/rcm.cpp @@ -139,6 +139,19 @@ class Rcm : public CommonTestFixture { current_height = reference_current_levels[i]; } } + // remove all contenders of non-minimal degree + auto contender_min_degree = *std::min_element( + reference_contenders.begin(), reference_contenders.end(), + [&](index_type u, index_type v) { + return degrees[u] < degrees[v]; + }); + reference_contenders.erase( + std::remove_if(reference_contenders.begin(), + reference_contenders.end(), + [&](index_type u) { + return degrees[u] > contender_min_degree; + }), + reference_contenders.end()); // then compute a level array for each of the contenders std::vector> reference_contenders_levels( @@ -153,18 +166,22 @@ class Rcm : public CommonTestFixture { reference_contenders_levels[i] = reference_contender_levels; } - // and check if any maximum level exceeds that of the start node + // and check if there is at least one minimum-degree contender with + // lower or equal height + std::vector lower_contenders; for (gko::size_type i = 0; i < reference_contenders.size(); ++i) { auto contender_height = std::numeric_limits::min(); - index_type contender_degree = 0; for (gko::size_type j = 0; j < n; ++j) { if (cc.const_find(j) == start_rep && reference_contenders_levels[i][j] > contender_height) { contender_height = reference_contenders_levels[i][j]; } } - ASSERT_LE(contender_height, current_height); + if (contender_height <= current_height) { + lower_contenders.push_back(reference_contenders[i]); + } } + ASSERT_FALSE(lower_contenders.empty()); } static void check_rcm_ordered(std::shared_ptr mtx,