Skip to content

Commit

Permalink
Merge pull request ESMCI#1620 from NCAR/ejh_ncint_perf_3
Browse files Browse the repository at this point in the history
More performance program improvements
  • Loading branch information
edwardhartnett authored Dec 5, 2019
2 parents 6c03fdb + e5708e1 commit a8f7076
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 54 deletions.
6 changes: 5 additions & 1 deletion src/clib/pioc_support.c
Original file line number Diff line number Diff line change
Expand Up @@ -2433,7 +2433,11 @@ inq_file_metadata(file_desc_t *file, int ncid, int iotype, int *nvars,
}

/**
* Find the appropriate IOTYPE from mode flags to nc_open().
* Find the appropriate IOTYPE from mode flags to nc_open(). The
* following flags have meaning:
* - NC_NETCDF4 - use netCDF-4/HDF5 format
* - NC_MPIIO - when used with NC_NETCDF4, use parallel I/O.
* - NC_PNETCDF - use classic format with pnetcdf parallel I/O.
*
* @param mode the mode flag from nc_open().
* @param iotype pointer that gets the IOTYPE.
Expand Down
123 changes: 70 additions & 53 deletions tests/ncint/tst_ncint_perf.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#define NDIM2 2
#define NDIM3 3
#define NUM_TIMESTEPS 1
#define NUM_MODES 4

extern NC_Dispatch NCINT_dispatcher;

Expand All @@ -46,7 +47,7 @@ main(int argc, char **argv)
if (!my_rank)
printf("\n*** Testing netCDF integration PIO performance.\n");
if (!my_rank)
printf("*** testing simple async use of netCDF integration layer...");
printf("*** testing simple async use of netCDF integration layer...\n");
{
int ncid, ioid;
int dimid[NDIM3], varid;
Expand Down Expand Up @@ -86,66 +87,82 @@ main(int argc, char **argv)
num_procs2, NULL, NULL, NULL, PIO_REARR_BOX, &iosysid))
PERR;

if (my_rank > num_io_procs)
if (my_rank >= num_io_procs)
{
struct timeval starttime, endtime;
long long startt, endt;
long long delta;
float num_megabytes = DIM_LEN_X * DIM_LEN_Y * sizeof(int) / (float)1000000 * NUM_TIMESTEPS;
float delta_in_sec;
float mb_per_sec;
int t;

/* Create a file with a 3D record var. */
if (nc_create(FILE_NAME, NC_PIO|NC_NETCDF4, &ncid)) PERR;
if (nc_def_dim(ncid, DIM_NAME_UNLIMITED, dimlen[0], &dimid[0])) PERR;
if (nc_def_dim(ncid, DIM_NAME_X, dimlen[1], &dimid[1])) PERR;
if (nc_def_dim(ncid, DIM_NAME_Y, dimlen[2], &dimid[2])) PERR;
if (nc_def_var(ncid, VAR_NAME, NC_INT, NDIM3, dimid, &varid)) PERR;
if (nc_enddef(ncid)) PERR;

/* Calculate a decomposition for distributed arrays. */
elements_per_pe = DIM_LEN_X * DIM_LEN_Y / (ntasks - num_io_procs);
/* printf("my_rank %d elements_per_pe %ld\n", my_rank, elements_per_pe); */

if (!(compdof = malloc(elements_per_pe * sizeof(size_t))))
PERR;
for (i = 0; i < elements_per_pe; i++)
{
compdof[i] = (my_rank - num_io_procs) * elements_per_pe + i;
/* printf("my_rank %d compdof[%d]=%ld\n", my_rank, i, compdof[i]); */
}

/* Create the PIO decomposition for this test. */
if (nc_def_decomp(iosysid, PIO_INT, NDIM2, &dimlen[1], elements_per_pe,
compdof, &ioid, 1, NULL, NULL)) PERR;
free(compdof);

/* Create some data on this processor. */
if (!(my_data = malloc(elements_per_pe * sizeof(int)))) PERR;
for (i = 0; i < elements_per_pe; i++)
my_data[i] = my_rank * 10 + i;

/* Start the clock. */
gettimeofday(&starttime, NULL);

/* Write some data with distributed arrays. */
for (t = 0; t < NUM_TIMESTEPS; t++)
if (nc_put_vard_int(ncid, varid, ioid, t, my_data)) PERR;
if (nc_close(ncid)) PERR;

/* Stop the clock. */
gettimeofday(&endtime, NULL);

/* Compute the time delta */
startt = (1000000 * starttime.tv_sec) + starttime.tv_usec;
endt = (1000000 * endtime.tv_sec) + endtime.tv_usec;
delta = (endt - startt)/NUM_TIMESTEPS;
delta_in_sec = (float)delta / 1000000;
mb_per_sec = num_megabytes / delta_in_sec;
int cmode[NUM_MODES] = {NC_PIO, NC_PIO|NC_NETCDF4,
NC_PIO|NC_NETCDF4|NC_MPIIO,
NC_PIO|NC_PNETCDF};
char mode_name[NUM_MODES][NC_MAX_NAME + 1] = {"classic sequential ",
"netCDF-4 sequential ",
"netCDF-4 parallel I/O",
"pnetcdf "};
int t, m;

/* Print header. */
if (my_rank == num_io_procs)
printf("\n%d\t%d\t%d\t%d\t%d\t%8.3f\t%8.1f\t%8.3f\n", ntasks, num_io_procs,
1, 0, 1, delta_in_sec, num_megabytes, mb_per_sec);
printf("access,\t\t\tntasks,\tnio,\trearr,\ttime(s),\tdata size (MB),\t"
"performance(MB/s)\n");

for (m = 0; m < NUM_MODES; m++)
{
/* Create a file with a 3D record var. */
if (nc_create(FILE_NAME, cmode[m], &ncid)) PERR;
if (nc_def_dim(ncid, DIM_NAME_UNLIMITED, dimlen[0], &dimid[0])) PERR;
if (nc_def_dim(ncid, DIM_NAME_X, dimlen[1], &dimid[1])) PERR;
if (nc_def_dim(ncid, DIM_NAME_Y, dimlen[2], &dimid[2])) PERR;
if (nc_def_var(ncid, VAR_NAME, NC_INT, NDIM3, dimid, &varid)) PERR;
if (nc_enddef(ncid)) PERR;

/* Calculate a decomposition for distributed arrays. */
elements_per_pe = DIM_LEN_X * DIM_LEN_Y / (ntasks - num_io_procs);
/* printf("my_rank %d elements_per_pe %ld\n", my_rank, elements_per_pe); */

if (!(compdof = malloc(elements_per_pe * sizeof(size_t))))
PERR;
for (i = 0; i < elements_per_pe; i++)
{
compdof[i] = (my_rank - num_io_procs) * elements_per_pe + i;
/* printf("my_rank %d compdof[%d]=%ld\n", my_rank, i, compdof[i]); */
}

/* Create the PIO decomposition for this test. */
if (nc_def_decomp(iosysid, PIO_INT, NDIM2, &dimlen[1], elements_per_pe,
compdof, &ioid, 1, NULL, NULL)) PERR;
free(compdof);

/* Create some data on this processor. */
if (!(my_data = malloc(elements_per_pe * sizeof(int)))) PERR;
for (i = 0; i < elements_per_pe; i++)
my_data[i] = my_rank * 10 + i;

/* Start the clock. */
gettimeofday(&starttime, NULL);

/* Write some data with distributed arrays. */
for (t = 0; t < NUM_TIMESTEPS; t++)
if (nc_put_vard_int(ncid, varid, ioid, t, my_data)) PERR;
if (nc_close(ncid)) PERR;

/* Stop the clock. */
gettimeofday(&endtime, NULL);

/* Compute the time delta */
startt = (1000000 * starttime.tv_sec) + starttime.tv_usec;
endt = (1000000 * endtime.tv_sec) + endtime.tv_usec;
delta = (endt - startt)/NUM_TIMESTEPS;
delta_in_sec = (float)delta / 1000000;
mb_per_sec = num_megabytes / delta_in_sec;
if (my_rank == num_io_procs)
printf("%s,\t%d,\t%d,\t%d,\t%8.3f,\t%8.1f,\t%8.3f\n", mode_name[m],
ntasks, num_io_procs, 1, delta_in_sec, num_megabytes,
mb_per_sec);
} /* next mode flag */

free(my_data);
if (nc_free_decomp(ioid)) PERR;
Expand Down

0 comments on commit a8f7076

Please sign in to comment.