From 365b1838190c5ceab095b924e08f743f753c75d0 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Thu, 20 Feb 2025 11:30:39 -0700 Subject: [PATCH] EXODUS: Better handling of names in parallel io mode --- .../libraries/exodus/include/exodusII_int.h | 8 ++++-- .../seacas/libraries/exodus/src/ex_conv.c | 28 +++++++++++++++++-- .../seacas/libraries/exodus/src/ex_create.c | 5 ++-- .../libraries/exodus/src/ex_create_par.c | 6 ++-- .../seacas/libraries/exodus/src/ex_open.c | 4 +-- .../seacas/libraries/exodus/src/ex_open_par.c | 6 ++-- .../seacas/libraries/exodus/src/ex_utils.c | 28 +++++++++++++++++-- 7 files changed, 70 insertions(+), 15 deletions(-) diff --git a/packages/seacas/libraries/exodus/include/exodusII_int.h b/packages/seacas/libraries/exodus/include/exodusII_int.h index e1fbb34325..b7160a613a 100644 --- a/packages/seacas/libraries/exodus/include/exodusII_int.h +++ b/packages/seacas/libraries/exodus/include/exodusII_int.h @@ -1,6 +1,6 @@ /* - * Copyright(C) 1999-2020, 2022, 2023, 2024 National Technology & Engineering Solutions + * Copyright(C) 1999-2020, 2022, 2023, 2024, 2025 National Technology & Engineering Solutions * of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with * NTESS, the U.S. Government retains certain rights in this software. * @@ -706,6 +706,7 @@ struct exi_file_item gzip, 4..32 and even for szip; -131072..22 for zstd, NetCDF-4 only */ unsigned int assembly_count; unsigned int blob_count; + unsigned int mpi_rank; /**< Only valid if `is_parallel` is true */ unsigned int persist_define_mode : 10; /**< Stay in define mode until exi_persist_leavedef is called. Set by exi_persist_redef... */ @@ -786,7 +787,7 @@ EXODUS_EXPORT char *exi_name_red_var_of_object(ex_entity_type /*obj_type*/, int EXODUS_EXPORT char *exi_name_of_map(ex_entity_type /*map_type*/, int /*map_index*/); EXODUS_EXPORT int exi_conv_init(int exoid, int *comp_wordsize, int *io_wordsize, int file_wordsize, - int int64_status, bool is_parallel, bool is_hdf5, bool is_pnetcdf, + int int64_status, int mpi_rank, bool is_parallel, bool is_hdf5, bool is_pnetcdf, bool is_write); EXODUS_EXPORT void exi_conv_exit(int exoid); @@ -795,6 +796,7 @@ EXODUS_EXPORT nc_type nc_flt_code(int exoid); EXODUS_EXPORT int exi_comp_ws(int exoid); EXODUS_EXPORT int exi_get_cpu_ws(void); EXODUS_EXPORT int exi_is_parallel(int exoid); +EXODUS_EXPORT int exi_parallel_rank(int exoid); EXODUS_EXPORT struct exi_list_item **exi_get_counter_list(ex_entity_type obj_type); EXODUS_EXPORT int exi_get_file_item(int /*exoid*/, struct exi_list_item **/*list_ptr*/); @@ -883,7 +885,7 @@ EXODUS_EXPORT int exi_persist_leavedef(int exoid, /* NemesisI file I EXODUS_EXPORT int exi_check_version(int run_version); EXODUS_EXPORT int exi_handle_mode(unsigned int my_mode, int is_parallel, int run_version); -EXODUS_EXPORT int exi_populate_header(int exoid, const char *path, int my_mode, int is_parallel, +EXODUS_EXPORT int exi_populate_header(int exoid, const char *path, int my_mode, int my_rank, int is_parallel, int *comp_ws, int *io_ws); EXODUS_EXPORT int exi_get_block_param(int exoid, ex_entity_id id, int ndim, diff --git a/packages/seacas/libraries/exodus/src/ex_conv.c b/packages/seacas/libraries/exodus/src/ex_conv.c index 4b96ee7e2e..a3d9edbdf6 100644 --- a/packages/seacas/libraries/exodus/src/ex_conv.c +++ b/packages/seacas/libraries/exodus/src/ex_conv.c @@ -1,5 +1,5 @@ /* - * Copyright(C) 1999-2021, 2024 National Technology & Engineering Solutions + * Copyright(C) 1999-2021, 2024, 2025 National Technology & Engineering Solutions * of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with * NTESS, the U.S. Government retains certain rights in this software. * @@ -108,7 +108,7 @@ int exi_check_valid_file_id(int exoid, const char *func) } int exi_conv_init(int exoid, int *comp_wordsize, int *io_wordsize, int file_wordsize, - int int64_status, bool is_parallel, bool is_hdf5, bool is_pnetcdf, bool is_write) + int int64_status, int mpi_rank, bool is_parallel, bool is_hdf5, bool is_pnetcdf, bool is_write) { char errmsg[MAX_ERR_LENGTH]; struct exi_file_item *new_file = NULL; @@ -575,6 +575,30 @@ int exi_is_parallel(int exoid) EX_FUNC_LEAVE(file->is_parallel); } +/*! + * \ingroup Utilities exi_parallel_rank() returns the mpi rank for the + * current file *IF* the file was opened in parallel; otherwise + * (file-per-rank or serial), it will return 0. + * + * NOTE that in this + * case parallel assumes the output of a single file, not a parallel + * run using file-per-processor. \param exoid integer which uniquely + * identifies the file of interest. + */ +int exi_parallel_rank(int exoid) +{ + EX_FUNC_ENTER(); + struct exi_file_item *file = exi_find_file_item(exoid); + + if (!file) { + char errmsg[MAX_ERR_LENGTH]; + snprintf(errmsg, MAX_ERR_LENGTH, "ERROR: unknown file id %d", exoid); + ex_err(__func__, errmsg, EX_BADFILEID); + EX_FUNC_LEAVE(EX_FATAL); + } + EX_FUNC_LEAVE(file->is_parallel ? file->mpi_rank : 0); +} + /*! * \ingroup Utilities * \note diff --git a/packages/seacas/libraries/exodus/src/ex_create.c b/packages/seacas/libraries/exodus/src/ex_create.c index bca2d8b5b7..791e26e572 100644 --- a/packages/seacas/libraries/exodus/src/ex_create.c +++ b/packages/seacas/libraries/exodus/src/ex_create.c @@ -1,5 +1,5 @@ /* - * Copyright(C) 1999-2021, 2023 National Technology & Engineering Solutions + * Copyright(C) 1999-2021, 2023, 2025 National Technology & Engineering Solutions * of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with * NTESS, the U.S. Government retains certain rights in this software. * @@ -191,7 +191,8 @@ int ex_create_int(const char *path, int cmode, int *comp_ws, int *io_ws, int run EX_FUNC_LEAVE(EX_FATAL); } - status = exi_populate_header(exoid, canon_path, my_mode, is_parallel, comp_ws, io_ws); + int rank = 0; + status = exi_populate_header(exoid, canon_path, my_mode, rank, is_parallel, comp_ws, io_ws); if (status != EX_NOERR) { free(canon_path); EX_FUNC_LEAVE(status); diff --git a/packages/seacas/libraries/exodus/src/ex_create_par.c b/packages/seacas/libraries/exodus/src/ex_create_par.c index 597e989220..ab36e7c571 100644 --- a/packages/seacas/libraries/exodus/src/ex_create_par.c +++ b/packages/seacas/libraries/exodus/src/ex_create_par.c @@ -1,5 +1,5 @@ /* - * Copyright(C) 1999-2021, 2023, 2024 National Technology & Engineering Solutions + * Copyright(C) 1999-2021, 2023, 2024, 2025 National Technology & Engineering Solutions * of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with * NTESS, the U.S. Government retains certain rights in this software. * @@ -227,7 +227,9 @@ int ex_create_par_int(const char *path, int cmode, int *comp_ws, int *io_ws, MPI EX_FUNC_LEAVE(EX_FATAL); } - status = exi_populate_header(exoid, canon_path, my_mode, is_parallel, comp_ws, io_ws); + int rank = 0; + MPI_Comm_rank(comm, &rank); + status = exi_populate_header(exoid, canon_path, my_mode, rank, is_parallel, comp_ws, io_ws); if (status != EX_NOERR) { free(canon_path); EX_FUNC_LEAVE(status); diff --git a/packages/seacas/libraries/exodus/src/ex_open.c b/packages/seacas/libraries/exodus/src/ex_open.c index 91901eb62b..80757d8427 100644 --- a/packages/seacas/libraries/exodus/src/ex_open.c +++ b/packages/seacas/libraries/exodus/src/ex_open.c @@ -1,5 +1,5 @@ /* - * Copyright(C) 1999-2024 National Technology & Engineering Solutions + * Copyright(C) 1999-2025 National Technology & Engineering Solutions * of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with * NTESS, the U.S. Government retains certain rights in this software. * @@ -417,7 +417,7 @@ int ex_open_int(const char *path, int mode, int *comp_ws, int *io_ws, float *ver } /* initialize floating point and integer size conversion. */ - if (exi_conv_init(exoid, comp_ws, io_ws, file_wordsize, int64_status, false, false, false, + if (exi_conv_init(exoid, comp_ws, io_ws, file_wordsize, int64_status, 0, false, false, false, mode & EX_WRITE) != EX_NOERR) { snprintf(errmsg, MAX_ERR_LENGTH, "ERROR: failed to initialize conversion routines in file id %d named %s", exoid, diff --git a/packages/seacas/libraries/exodus/src/ex_open_par.c b/packages/seacas/libraries/exodus/src/ex_open_par.c index 8b91169b5c..065aaba706 100644 --- a/packages/seacas/libraries/exodus/src/ex_open_par.c +++ b/packages/seacas/libraries/exodus/src/ex_open_par.c @@ -1,5 +1,5 @@ /* - * Copyright(C) 1999-2024 National Technology & Engineering Solutions + * Copyright(C) 1999-2025 National Technology & Engineering Solutions * of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with * NTESS, the U.S. Government retains certain rights in this software. * @@ -480,7 +480,9 @@ int ex_open_par_int(const char *path, int mode, int *comp_ws, int *io_ws, float } /* initialize floating point and integer size conversion. */ - if (exi_conv_init(exoid, comp_ws, io_ws, file_wordsize, int64_status, 1, is_hdf5, is_pnetcdf, + int rank = 0; + MPI_Comm_rank(comm, &rank); + if (exi_conv_init(exoid, comp_ws, io_ws, file_wordsize, int64_status, rank, 1, is_hdf5, is_pnetcdf, mode & EX_WRITE) != EX_NOERR) { snprintf(errmsg, MAX_ERR_LENGTH, "ERROR: failed to initialize conversion routines in file id %d", exoid); diff --git a/packages/seacas/libraries/exodus/src/ex_utils.c b/packages/seacas/libraries/exodus/src/ex_utils.c index 6d0153223f..153ae6a87c 100644 --- a/packages/seacas/libraries/exodus/src/ex_utils.c +++ b/packages/seacas/libraries/exodus/src/ex_utils.c @@ -331,6 +331,23 @@ int exi_put_names(int exoid, int varid, size_t num_names, char *const *names, if (exi_check_valid_file_id(exoid, __func__) == EX_FATAL) { EX_FUNC_LEAVE(EX_FATAL); } + +#if defined(PARALLEL_AWARE_EXODUS) + /* For parallel-io (all ranks writing to single file), we only + want/need to output the names on a single rank since all ranks + have the same set of names. This avoids issues with multiple + ranks writing same data to the same data space which can/has + caused issues. It should be ok, but this seems more robust. + + Calling code passes valid data on rank 0; all other ranks have `names == NULL` + */ + int rootid = exoid & EX_FILE_ID_MASK; + if (exi_is_parallel(rootid)) { + nc_var_par_access(rootid, varid, NC_INDEPENDENT); + } + if (exi_parallel_rank(rootid) == 0) { +#endif + /* inquire previously defined dimensions */ size_t name_length = ex_inquire_int(exoid, EX_INQ_DB_MAX_ALLOWED_NAME_LENGTH) + 1; @@ -386,6 +403,13 @@ int exi_put_names(int exoid, int varid, size_t num_names, char *const *names, } free(int_names); + /* PnetCDF applies setting to entire file, so put back to collective... */ +#if defined(PARALLEL_AWARE_EXODUS) + } + if (exi_is_parallel(rootid)) { + nc_var_par_access(rootid, varid, NC_COLLECTIVE); + } +#endif EX_FUNC_LEAVE(EX_NOERR); } @@ -2290,7 +2314,7 @@ int exi_handle_mode(unsigned int my_mode, int is_parallel, int run_version) \internal \undoc */ -int exi_populate_header(int exoid, const char *path, int my_mode, int is_parallel, int *comp_ws, +int exi_populate_header(int exoid, const char *path, int my_mode, int my_rank, int is_parallel, int *comp_ws, int *io_ws) { int status; @@ -2349,7 +2373,7 @@ int exi_populate_header(int exoid, const char *path, int my_mode, int is_paralle is_hdf5 = true; } - if (exi_conv_init(exoid, comp_ws, io_ws, 0, int64_status, is_parallel, is_hdf5, is_pnetcdf, + if (exi_conv_init(exoid, comp_ws, io_ws, 0, int64_status, my_rank, is_parallel, is_hdf5, is_pnetcdf, my_mode & EX_WRITE) != EX_NOERR) { snprintf(errmsg, MAX_ERR_LENGTH, "ERROR: failed to init conversion routines in file id %d", exoid);