Skip to content

Commit

Permalink
Merge pull request #3530 from guj/h5_subfile_support
Browse files Browse the repository at this point in the history
H5 subfile support
  • Loading branch information
anagainaru authored Mar 21, 2023
2 parents 4ee931e + 09c6a10 commit 4a59197
Show file tree
Hide file tree
Showing 9 changed files with 400 additions and 38 deletions.
4 changes: 2 additions & 2 deletions docs/user_guide/source/ecosystem/h5vol/vol.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Disclaimer

The Virtual Object Layer (VOL) is a feature introduced in recent release of HDF5 1.12 (https://hdf5.wiki/index.php/New_Features_in_HDF5_Release_1.12).

So please do make sure your HDF5 version supports VOL.
So please do make sure your HDF5 version supports the latest VOL.

Once the ADIOS VOL is compiled, There are two ways to apply it:

Expand Down Expand Up @@ -70,4 +70,4 @@ Internal





12 changes: 12 additions & 0 deletions docs/user_guide/source/engines/hdf5.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,15 @@ We can pass options to HDF5 API from ADIOS xml configuration. Currently we sup
<parameter key="H5ChunkVar" value="VarName1 VarName2"/>
We suggest to read HDF5 documentation before appling these options.

After the subfile feature is introduced in HDF5 version 1.14, the ADIOS2 HDF5 engine will use subfiles as the default h5 format as it improves I/O in general (for example, see https://escholarship.org/uc/item/6fs7s3jb)

To use the subfile feature, client needs to support MPI_Init_thread with MPI_THREAD_MULTIPLE.

Useful parameters from the HDF lirbary to tune subfiles are:
.. code-block:: xml
H5FD_SUBFILING_IOC_PER_NODE (num of subfiles per node)
set H5FD_SUBFILING_IOC_PER_NODE to 0 if the regular h5 file is prefered, before using ADIOS2 HDF5 engine.
H5FD_SUBFILING_STRIPE_SIZE
H5FD_IOC_THREAD_POOL_SIZE
7 changes: 7 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,10 @@ endif()
if(ADIOS2_HAVE_CUDA OR ADIOS2_HAVE_Kokkos_CUDA)
add_subdirectory(cuda)
endif()


if(ADIOS2_HAVE_MPI AND ADIOS2_HAVE_HDF5)
if(HDF5_VERSION VERSION_GREATER_EQUAL 1.14)
add_subdirectory(h5subfile)
endif()
endif()
10 changes: 10 additions & 0 deletions examples/h5subfile/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#------------------------------------------------------------------------------#
# Distributed under the OSI-approved Apache License, Version 2.0. See
# accompanying file Copyright.txt for details.
#------------------------------------------------------------------------------#

if(ADIOS2_HAVE_MPI)
add_executable(H5EngineSubfileTest h5_subfile.cpp)
target_link_libraries(H5EngineSubfileTest adios2::cxx11_mpi MPI::MPI_C)
endif()

300 changes: 300 additions & 0 deletions examples/h5subfile/h5_subfile.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,300 @@
/*
* Distributed under the OSI-approved Apache License, Version 2.0. See
* accompanying file Copyright.txt for details.
*
* helloHDF5Writer.cpp: Simple self-descriptive example of how to write a
* variable to a parallel HDF5 File using MPI processes.
*
* Created on: March 6, 2023
* Author: Junmin
*/

#include <adios2.h>
#include <ios> //std::ios_base::failure
#include <iostream> //std::cout
#include <mpi.h>
#include <stdexcept> //std::invalid_argument std::exception
#include <unistd.h>
#include <vector>

void writeMe(adios2::IO &hdf5IO, int rank, int size, const char *testFileName)
{
/** Application variable */
int scale = 1;

const char *temp = std::getenv("TEST_SCALE");
if (NULL != temp)
{
int itemp = -1;
sscanf(temp, "%d", &itemp);
if (itemp > 1)
scale = itemp;
}

const std::size_t Nx = 1024;
const std::size_t Ny = 1024 * scale;

std::vector<float> myFloats(Nx * Ny, 0.1 * rank);
std::vector<int> myInts(Nx * Ny, 1 + rank);

hdf5IO.SetParameter("IdleH5Writer",
"true"); // set this if not all ranks are writting

adios2::Variable<float> h5Floats = hdf5IO.DefineVariable<float>(
"h5Floats", {size * Nx, Ny}, {rank * Nx, 0}, {Nx, Ny},
adios2::ConstantDims);

adios2::Variable<int> h5Ints =
hdf5IO.DefineVariable<int>("h5Ints", {size * Nx, Ny}, {rank * Nx, 0},
{Nx, Ny}, adios2::ConstantDims);

/** Engine derived class, spawned to start IO operations */
adios2::Engine hdf5Writer = hdf5IO.Open(testFileName, adios2::Mode::Write);

int nsteps = 5;

if (size % 2 == 0)
{
// all Ranks must call Put
/** Write variable for buffering */
for (int i = 0; i < nsteps; i++)
{
hdf5Writer.BeginStep();
hdf5Writer.Put<float>(h5Floats, myFloats.data());
hdf5Writer.Put(h5Ints, myInts.data());
hdf5Writer.EndStep();
}
}
else
{
// using collective Begin/EndStep() to run the
// collective HDF5 calls. Now Ranks can skip writting if no data
// presented
for (int i = 0; i < nsteps; i++)
{
hdf5Writer.BeginStep();
if (rank == 0)
{
hdf5Writer.Put<float>(h5Floats, myFloats.data());
hdf5Writer.Put(h5Ints, myInts.data());
}
hdf5Writer.EndStep();
}
}
std::vector<int64_t> m_globalDims = {10, 20, 30, 40};
hdf5IO.DefineAttribute<std::string>("adios2_schema/version_major",
std::to_string(ADIOS2_VERSION_MAJOR));
hdf5IO.DefineAttribute<std::string>("adios2_schema/version_minor",
std::to_string(ADIOS2_VERSION_MINOR));
hdf5IO.DefineAttribute<std::string>("/adios2_schema/mesh/type", "explicit");
hdf5IO.DefineAttribute<std::int64_t>("adios2_schema/mesh/dimension0",
m_globalDims[0]);
hdf5IO.DefineAttribute<std::int64_t>("adios2_schema/mesh/dimension1",
m_globalDims[1]);
hdf5IO.DefineAttribute<std::int64_t>("adios2_schema/mesh/dimension2",
m_globalDims[2]);
hdf5IO.DefineAttribute<std::int64_t>("adios2_schema/mesh/dimension3",
m_globalDims[3]);
hdf5IO.DefineAttribute<std::int64_t>("adios2_schema/mesh/dimension-num",
m_globalDims.size());

hdf5Writer.Close();
}

template <class T>
void ReadVarData(adios2::IO h5IO, adios2::Engine &h5Reader,
const std::string &name)
{
adios2::Variable<T> var = h5IO.InquireVariable<T>(name);

if (var)
{
int nDims = var.Shape().size();
size_t totalSize = 1;
for (int i = 0; i < nDims; i++)
{
totalSize *= var.Shape()[i];
}
std::vector<T> myValues(totalSize);
// myFloats.data is pre-allocated
h5Reader.Get<T>(var, myValues.data(), adios2::Mode::Sync);

// std::cout << "\tValues of "<<name<<": ";
std::cout << "\tPeek Values: ";

if (totalSize < 20)
{ // print all
for (const auto number : myValues)
{
std::cout << number << " ";
}
}
else
{
size_t counter = 0;
for (const auto number : myValues)
{
if ((counter < 5) || (counter > totalSize - 5))
{
std::cout << number << " ";
}
else if (counter == 5)
{
std::cout << " ...... ";
}
counter++;
}
}
std::cout << "\n";
}
}

void readMe(adios2::IO &h5IO, int rank, int size, const char *fileName)
{
/** Engine derived class, spawned to start IO operations */
adios2::Engine h5Reader = h5IO.Open(fileName, adios2::Mode::Read);

const std::map<std::string, adios2::Params> variables =
h5IO.AvailableVariables();

if (0 == rank)
std::cout << " Num Vars: " << variables.size() << std::endl;

for (const auto &variablePair : variables)
{
std::cout << "Name: " << variablePair.first;
std::cout << std::endl;

for (const auto &parameter : variablePair.second)
{
std::cout << "\t" << parameter.first << ": " << parameter.second
<< "\n";
if (parameter.second == "double")
{
ReadVarData<double>(h5IO, h5Reader, variablePair.first);
}
else if (parameter.second == "float")
{
ReadVarData<float>(h5IO, h5Reader, variablePair.first);
}
else if (parameter.second == "unsigned int")
{
ReadVarData<unsigned int>(h5IO, h5Reader, variablePair.first);
}
else if (parameter.second == "int")
{
ReadVarData<int>(h5IO, h5Reader, variablePair.first);
}
}
} // variables

const std::map<std::string, adios2::Params> attributes =
h5IO.AvailableAttributes();

if (0 == rank)
std::cout << "Num Attrs:" << attributes.size() << std::endl;

for (const auto &attrPair : attributes)
{
std::cout << "AttrName: " << attrPair.first;
std::cout << std::endl;

for (const auto &parameter : attrPair.second)
{
std::cout << "\t" << parameter.first << ": " << parameter.second
<< "\n";

if (parameter.second == "double")
{
// ReadVarData<double>(h5IO, h5Reader, variablePair.first);
}
else if (parameter.second == "float")
{
// ReadVarData<float>(h5IO, h5Reader, variablePair.first);
}
else if (parameter.second == "unsigned int")
{
// ReadVarData<unsigned int>(h5IO, h5Reader,
// variablePair.first);
}
else if (parameter.second == "int")
{
// ReadVarData<int>(h5IO, h5Reader, variablePair.first);
}
//... add more types if needed
}
}

h5Reader.Close();
}

int main(int argc, char *argv[])
{
int provided;

// MPI_THREAD_MULTIPLE is only required if you enable the SST MPI_DP
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);

if (provided < MPI_THREAD_MULTIPLE)
{
std::cout << "MPI_THREAD_MULTIPLE is not supported, not able to use "
"the subfile feature in HDF5. Aborting. \n"
<< std::endl;
MPI_Abort(MPI_COMM_WORLD, -1);
}

int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);

try
{
/** ADIOS class factory of IO class objects */
adios2::ADIOS adios(MPI_COMM_WORLD);
adios2::IO writerIO = adios.DeclareIO("HDFFileIOWriter");

std::string testName = "test.h5";
if (argc > 1)
testName = argv[1];

if (1 == testName.size())
{
writerIO.SetEngine("NullCore");
writeMe(writerIO, rank, size, "null.bp");
}
else
{
writeMe(writerIO, rank, size, testName.c_str());
}

// read back if required
if (argc > 2)
{
MPI_Barrier(MPI_COMM_WORLD);
adios2::IO readerIO = adios.DeclareIO("HDFFileIOReader");
readMe(readerIO, rank, size, testName.c_str());
}
}
catch (std::invalid_argument &e)
{
std::cout << "Invalid argument exception, STOPPING PROGRAM from rank "
<< rank << "\n";
std::cout << e.what() << "\n";
}
catch (std::ios_base::failure &e)
{
std::cout
<< "IO System base failure exception, STOPPING PROGRAM from rank "
<< rank << "\n";
std::cout << e.what() << "\n";
}
catch (std::exception &e)
{
std::cout << "Exception, STOPPING PROGRAM from rank " << rank << "\n";
std::cout << e.what() << "\n";
}

MPI_Finalize();

return 0;
}
4 changes: 3 additions & 1 deletion source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,16 @@ add_subdirectory(utils)

# HDF5 VOL requires 1.13+
if(ADIOS2_HAVE_HDF5)
if(HDF5_VERSION VERSION_LESS 1.13)
if(HDF5_VERSION VERSION_LESS 1.14)
set(ADIOS2_HAVE_HDF5_VOL OFF CACHE INTERNAL "")
message(STATUS "[ADIOS2 WARNING] To enable ADIOS VOL for HDF5, please use the version 1.14+ ")
else()
set(ADIOS2_HAVE_HDF5_VOL ON CACHE INTERNAL "")
endif()
else()
set(ADIOS2_HAVE_HDF5_VOL OFF CACHE INTERNAL "")
endif()

if(ADIOS2_HAVE_HDF5_VOL)
add_subdirectory(h5vol)
endif()
15 changes: 15 additions & 0 deletions source/adios2/toolkit/interop/hdf5/HDF5Common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,21 @@ void HDF5Common::Init(const std::string &name, helper::Comm const &comm,
std::string ts0;
StaticGetAdiosStepString(ts0, 0);

#ifdef H5_HAVE_SUBFILING_VFD
bool useMPI = false;
const char *temp = getenv("H5FD_SUBFILING_IOC_PER_NODE");

if (NULL != temp)
{
int itemp = -1;
sscanf(temp, "%d", &itemp);
if (0 == itemp)
useMPI = true;
}

if (!useMPI)
H5Pset_fapl_subfiling(m_PropertyListId, NULL);
#endif
if (toWrite)
{
/*
Expand Down
Loading

0 comments on commit 4a59197

Please sign in to comment.