From b7eaa17ccc437bd4a602c0b221d9a680fdeb508b Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Wed, 15 Jan 2025 13:40:26 +0100 Subject: [PATCH] fix in resolving global indexing of octahedral meshes (with Willem) --- ...olation_mpi_global_interpolation_matrix.cc | 235 ++++++++++++------ 1 file changed, 155 insertions(+), 80 deletions(-) diff --git a/src/tests/interpolation/test_interpolation_mpi_global_interpolation_matrix.cc b/src/tests/interpolation/test_interpolation_mpi_global_interpolation_matrix.cc index 34f283f75..2f3ae3cb7 100644 --- a/src/tests/interpolation/test_interpolation_mpi_global_interpolation_matrix.cc +++ b/src/tests/interpolation/test_interpolation_mpi_global_interpolation_matrix.cc @@ -76,19 +76,23 @@ static Config scheme() { return scheme; } + std::string input_gridname(const std::string& default_grid) { return eckit::Resource("--input-grid", default_grid); } + std::string output_gridname(const std::string& default_grid) { return eckit::Resource("--output-grid", default_grid); } + CASE("which scheme?") { Log::info() << scheme().getString("type") << std::endl; } -std::vector& serial_grid_matrix( const Interpolation& interpolation, std::vector& triplets ) { + +void serial_grid_matrix( const Interpolation& interpolation, std::vector& triplets ) { auto src_fs = interpolation.source(); auto tgt_fs = interpolation.target(); @@ -100,31 +104,26 @@ std::vector& serial_grid_matrix( const Interpolation& in if( src_fs.type() == "PointCloud" ) { // continue and extract the triplets directly from the matrix + return; } auto src_ridx = array::make_indexview(src_fs.remote_index()); - auto src_remote_index = [&](idx_t idx) -> idx_t { if( src_fs.type() == "StructuredColumns" ) { - return src_ridx(idx) + 1; - // TODO : fix bug in StructuredColumns !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + return src_ridx(idx) + 1; // this is a bug in StructuredColumns } return src_ridx(idx); }; const idx_t src_size = src_fs.size(); idx_t src_nghost{0}; - for( idx_t i = 0; i < src_size; ++i) { + for (idx_t i = 0; i < src_size; ++i) { if( src_remote_index(i) != i ) { ++src_nghost; } } - auto src_owned = src_size - src_nghost; - - size_t tgt_owned = tgt_fs.size(); - auto src_global_index = array::make_view(src_fs.global_index()); - auto tgt_global_index = array::make_view(tgt_fs.global_index()); auto tgt_ghost = array::make_view(tgt_fs.ghost()); + gidx_t tgt_owned = tgt_fs.size(); if( tgt_fs.type() != "PointCloud" ) { const idx_t tgt_size = tgt_fs.size(); idx_t tgt_nghost{0}; @@ -133,15 +132,8 @@ std::vector& serial_grid_matrix( const Interpolation& in ++tgt_nghost; } } - tgt_owned = tgt_size - tgt_nghost; + tgt_owned -= tgt_nghost; } - - triplets.reserve(original_matrix.nonZeros()); - const auto outer = original_matrix.outer(); - const auto index = original_matrix.inner(); - const auto weight = original_matrix.data(); - const idx_t rows = static_cast(original_matrix.rows()); - auto tgt_is_owned = [&](idx_t idx) { if( tgt_fs.type() == "PointCloud" ) { return true; @@ -149,29 +141,61 @@ std::vector& serial_grid_matrix( const Interpolation& in return not tgt_ghost(idx); }; - for(idx_t row = 0; row < rows; ++row) { - if( tgt_is_owned(row) ) { - idx_t r = tgt_fs.type() == "PointCloud" ? row : tgt_global_index(row)-1; + auto src_owned = src_size - src_nghost; + + std::cout << "src_owned, tgt_owned: " << src_owned << ", " << tgt_owned << std::endl; + + auto src_global_index = array::make_view(src_fs.global_index()); + auto tgt_global_index = array::make_view(tgt_fs.global_index()); + auto src_part = array::make_view(src_fs.partition()); + + std::vector send_global_idx(src_size); + for (int i = 0; i < src_size; ++i) { + send_global_idx[i] = src_global_index[i]; + } + std::vector> recv_global_idx( mpi::comm().size() ); + eckit::mpi::Buffer recv_global_idx_buf( mpi::comm().size() ); + mpi::comm().allGatherv(send_global_idx.begin(), send_global_idx.end(), recv_global_idx_buf); + + triplets.reserve(original_matrix.nonZeros()); + const auto outer = original_matrix.outer(); + const auto index = original_matrix.inner(); + const auto weight = original_matrix.data(); + const idx_t rows = static_cast(original_matrix.rows()); + + for(idx_t r = 0; r < rows; ++r) { + if( tgt_is_owned(r) ) { + idx_t row = (tgt_fs.type() == "PointCloud" ? r : tgt_global_index(r) - 1); for (idx_t c = outer[row]; c < outer[row + 1]; ++c) { idx_t col = index[c]; - ATLAS_ASSERT(col < src_size); - if( src_remote_index(col) >= src_owned ) { - ATLAS_DEBUG_VAR( col ); - ATLAS_DEBUG_VAR( src_global_index(src_remote_index(col)) ); - } - col = src_remote_index(col); - ATLAS_ASSERT( col < src_owned ); - triplets.emplace_back(tgt_global_index(r), src_global_index(col), weight[c]); - if (mpi::comm().rank() == 1) { - std::cout << tgt_global_index(r) << ", " << src_global_index(col) << ", " << weight[c] << std::endl; + auto src_ridx = src_remote_index(col); + auto src_gidx = recv_global_idx_buf.buffer[recv_global_idx_buf.displs[src_part[col]] + src_ridx]; + //ATLAS_ASSERT(col < src_size); + //if( src_ridx >= src_owned ) { + //ATLAS_DEBUG_VAR( col ); + // ATLAS_DEBUG_VAR( src_global_index(ridx_col) ); // old + // ATLAS_DEBUG_VAR( src_gidx ); // new + //} + //ATLAS_ASSERT( col < src_owned ); + col = src_gidx - 1; + auto value = weight[c]; + triplets.emplace_back(row, col, value); +// triplets.emplace_back(tgt_global_index(r), src_global_index(col), weight[c]); + if (mpi::comm().rank() == 0) { + // std::cout << tgt_global_index(r) << ", " << src_global_index(col) << ", " << weight[c] << std::endl; } } } } // sort triplets by rows - atlas::util::sort_and_accumulate_triplets(triplets); - return triplets; + //atlas::util::sort_and_accumulate_triplets(triplets); + + if (mpi::comm().rank() == 0 ) { + for(gidx_t i = 0; i < triplets.size(); ++i) { + std::cout << triplets[i] << std::endl; + } + } } @@ -189,86 +213,137 @@ CASE("test_interpolation_structured using grid API") { std::unique_ptr matrix; std::vector triplets; - std::cout << "[" << mpi::comm().rank() << "] input, output grid size: " << output_grid.size() << ", " << input_grid.size() << std::endl; mpi::comm().barrier(); - serial_grid_matrix( Interpolation{ scheme(), input_grid, output_grid }, triplets ); + //auto interpolator = Interpolation{ scheme(), input_grid, output_grid }; + ATLAS_DEBUG(); + auto inmesh = Mesh(input_grid); + ATLAS_DEBUG(); + auto outmesh = Mesh(output_grid); + ATLAS_DEBUG(); + auto fs_in = NodeColumns(inmesh); + ATLAS_DEBUG(); + auto fs_out = NodeColumns(outmesh); + ATLAS_DEBUG(); + auto interpolator = Interpolation{ scheme(), fs_in, fs_out }; + ATLAS_DEBUG(); + serial_grid_matrix( interpolator, triplets ); + ATLAS_DEBUG(); // gather all triplets on rank 0 std::vector triplet_sizes(mpi::comm().size()); mpi::comm().gather(triplets.size(), triplet_sizes, 0); - if (mpi::comm().rank() == 0) { - std::cout << "ntriplets: " << triplet_sizes[0] << " " << triplet_sizes[1] << std::endl; - } - if (mpi::comm().rank() == 1) { - std::cout << "triplet size: " << triplets.size() << std::endl; - } + std::cout << "[" << mpi::comm().rank() << "] triplet size: " << triplets.size() << std::endl; + size_t ntriplets = 0; - for (int i = 1; i < mpi::comm().size(); ++i) { + for (int i = 0; i < triplet_sizes.size(); ++i) { ntriplets += triplet_sizes[i]; } - std::vector other_triplets(ntriplets); + std::vector cols(triplets.size()); + std::vector rows(triplets.size()); + std::vector vals(triplets.size()); + for (int i = 0; i < triplets.size(); ++i) { + cols[i] = triplets[i].col(); + rows[i] = triplets[i].row(); + vals[i] = triplets[i].value(); + } + + std::vector all_cols; + std::vector all_rows; + std::vector all_vals; if (mpi::comm().rank() == 0) { size_t pos = 0; + all_cols.resize(ntriplets); + all_rows.resize(ntriplets); + all_vals.resize(ntriplets); for (int i = 1; i < mpi::comm().size(); ++i) { - mpi::comm().receive(&other_triplets[pos], triplet_sizes[i], i, 0); - pos += triplet_sizes[i]; + pos += triplet_sizes[i-1]; + mpi::comm().receive(all_cols.data() + pos, triplet_sizes[i], i, 0); + mpi::comm().receive(all_rows.data() + pos, triplet_sizes[i], i, 0); + mpi::comm().receive(all_vals.data() + pos, triplet_sizes[i], i, 0); + } + for (int i = 0; i < triplets.size(); ++i) { + all_cols[i] = cols[i]; + all_rows[i] = rows[i]; + all_vals[i] = vals[i]; } } else { - mpi::comm().send(&triplets[0], triplets.size(), 0); + mpi::comm().send(cols.data(), cols.size(), 0, 0); + mpi::comm().send(rows.data(), rows.size(), 0, 0); + mpi::comm().send(vals.data(), vals.size(), 0, 0); } + cols.clear(); + rows.clear(); + vals.clear(); + + //reassemble triplets + std::vector all_triplets; + if (mpi::comm().rank() == 0) { + all_triplets.resize(ntriplets); + for (int i = 0; i < ntriplets; ++i) { + all_triplets[i] = Triplet(all_rows[i], all_cols[i], all_vals[i]); + } - //matrix->print(std::cout); - std::cout << std::endl; + atlas::util::sort_and_accumulate_triplets(all_triplets); - return; + std::cout << "ntriplets: " << ntriplets << std::endl; + for (int i = 0; i < ntriplets; ++i) { + std::cout << all_triplets[i] << std::endl; + } - //eckit::linalg::Size new_rows(tgt_owned); - //eckit::linalg::Size new_cols(src_owned); - //return std::unique_ptr( new Matrix{new_rows,new_cols,triplets} ); + eckit::linalg::Size new_rows(input_grid.size()); + eckit::linalg::Size new_cols(output_grid.size()); + std::cout << "new_rows: " << new_rows << std::endl; + std::cout << "new_cols: " << new_cols << std::endl; - // print the matrix - matrix->dump(std::cout << "[" << mpi::comm().rank() << "]"); + auto matrix = std::unique_ptr( new Matrix{new_rows, new_cols, all_triplets} ); + + // print the matrix + matrix->dump(std::cout << "[" << mpi::comm().rank() << "]"); - for (int i = 0; i < matrix->rows(); ++i) { - for (int j = 0; j < matrix->cols(); ++j) { - // std::cout << matrix->operator()(i,j) << " "; + for (int i = 0; i < matrix->rows(); ++i) { + for (int j = 0; j < matrix->cols(); ++j) { + // std::cout << matrix->operator()(i,j) << " "; + } + // std::cout << std::endl; } - // std::cout << std::endl; - } - // Allocate and initialise own memory here to show possibilities - std::vector src_data(input_grid.size()); - std::vector tgt_data(output_grid.size()); + // Allocate and initialise own memory here to show possibilities + std::vector src_data(input_grid.size()); + std::vector tgt_data(output_grid.size()); - ATLAS_TRACE_SCOPE("initialize source") { - idx_t n{0}; - for (auto p : input_grid.lonlat()) { - //src_data[n++] = util::function::vortex_rollup(p.lon(), p.lat(), 1.); - src_data[n++] = 1.; + ATLAS_TRACE_SCOPE("initialize source") { + idx_t n{0}; + for (auto p : input_grid.lonlat()) { + //src_data[n++] = util::function::vortex_rollup(p.lon(), p.lat(), 1.); + src_data[n++] = 1.; + } } - } - ATLAS_TRACE_SCOPE("interpolate") { - auto src = eckit::linalg::Vector(src_data.data(),src_data.size()); - auto tgt = eckit::linalg::Vector(tgt_data.data(),tgt_data.size()); - eckit::linalg::LinearAlgebraSparse::backend().spmv(*matrix,src,tgt); - } + ATLAS_TRACE_SCOPE("interpolate") { + auto src = eckit::linalg::Vector(src_data.data(), src_data.size()); + auto tgt = eckit::linalg::Vector(tgt_data.data(), tgt_data.size()); + eckit::linalg::LinearAlgebraSparse::backend().spmv(*matrix,src,tgt); + } + ATLAS_TRACE_SCOPE("output") { + mpi::Scope mpi_scope("self"); + auto tgt_field = Field("tgt",tgt_data.data(),array::make_shape(tgt_data.size())); // wrap + output::Gmsh gmsh(scheme().getString("name") + "-output-section" + std::to_string(_subsection) + ".msh", + Config("coordinates", "lonlat") | Config("ghost", "true")); + gmsh.write(MeshGenerator("structured",util::Config("three_dimensional",true)).generate(output_grid)); + gmsh.write(tgt_field, StructuredColumns(output_grid)); + } - ATLAS_TRACE_SCOPE("output") { - auto tgt_field = Field("tgt",tgt_data.data(),array::make_shape(tgt_data.size())); // wrap - output::Gmsh gmsh(scheme().getString("name") + "-output-section" + std::to_string(_subsection) + ".msh", - Config("coordinates", "lonlat") | Config("ghost", "true")); - gmsh.write(MeshGenerator("structured",util::Config("three_dimensional",true)).generate(output_grid)); - gmsh.write(tgt_field, StructuredColumns(output_grid)); } + + return; }; - test(Grid{input_gridname("F2")}, Grid{output_gridname("H2")}); + test(Grid{input_gridname("O32")}, Grid{output_gridname("O32")}); }