diff --git a/src/clib/pioc_support.c b/src/clib/pioc_support.c index d57459de104..849652e6a6a 100644 --- a/src/clib/pioc_support.c +++ b/src/clib/pioc_support.c @@ -2433,7 +2433,11 @@ inq_file_metadata(file_desc_t *file, int ncid, int iotype, int *nvars, } /** - * Find the appropriate IOTYPE from mode flags to nc_open(). + * Find the appropriate IOTYPE from mode flags to nc_open(). The + * following flags have meaning: + * - NC_NETCDF4 - use netCDF-4/HDF5 format + * - NC_MPIIO - when used with NC_NETCDF4, use parallel I/O. + * - NC_PNETCDF - use classic format with pnetcdf parallel I/O. * * @param mode the mode flag from nc_open(). * @param iotype pointer that gets the IOTYPE. diff --git a/tests/ncint/tst_ncint_perf.c b/tests/ncint/tst_ncint_perf.c index 5c1aa3d3edb..3666b528fa4 100644 --- a/tests/ncint/tst_ncint_perf.c +++ b/tests/ncint/tst_ncint_perf.c @@ -24,6 +24,7 @@ #define NDIM2 2 #define NDIM3 3 #define NUM_TIMESTEPS 1 +#define NUM_MODES 4 extern NC_Dispatch NCINT_dispatcher; @@ -46,7 +47,7 @@ main(int argc, char **argv) if (!my_rank) printf("\n*** Testing netCDF integration PIO performance.\n"); if (!my_rank) - printf("*** testing simple async use of netCDF integration layer..."); + printf("*** testing simple async use of netCDF integration layer...\n"); { int ncid, ioid; int dimid[NDIM3], varid; @@ -86,7 +87,7 @@ main(int argc, char **argv) num_procs2, NULL, NULL, NULL, PIO_REARR_BOX, &iosysid)) PERR; - if (my_rank > num_io_procs) + if (my_rank >= num_io_procs) { struct timeval starttime, endtime; long long startt, endt; @@ -94,58 +95,74 @@ main(int argc, char **argv) float num_megabytes = DIM_LEN_X * DIM_LEN_Y * sizeof(int) / (float)1000000 * NUM_TIMESTEPS; float delta_in_sec; float mb_per_sec; - int t; - - /* Create a file with a 3D record var. */ - if (nc_create(FILE_NAME, NC_PIO|NC_NETCDF4, &ncid)) PERR; - if (nc_def_dim(ncid, DIM_NAME_UNLIMITED, dimlen[0], &dimid[0])) PERR; - if (nc_def_dim(ncid, DIM_NAME_X, dimlen[1], &dimid[1])) PERR; - if (nc_def_dim(ncid, DIM_NAME_Y, dimlen[2], &dimid[2])) PERR; - if (nc_def_var(ncid, VAR_NAME, NC_INT, NDIM3, dimid, &varid)) PERR; - if (nc_enddef(ncid)) PERR; - - /* Calculate a decomposition for distributed arrays. */ - elements_per_pe = DIM_LEN_X * DIM_LEN_Y / (ntasks - num_io_procs); - /* printf("my_rank %d elements_per_pe %ld\n", my_rank, elements_per_pe); */ - - if (!(compdof = malloc(elements_per_pe * sizeof(size_t)))) - PERR; - for (i = 0; i < elements_per_pe; i++) - { - compdof[i] = (my_rank - num_io_procs) * elements_per_pe + i; - /* printf("my_rank %d compdof[%d]=%ld\n", my_rank, i, compdof[i]); */ - } - - /* Create the PIO decomposition for this test. */ - if (nc_def_decomp(iosysid, PIO_INT, NDIM2, &dimlen[1], elements_per_pe, - compdof, &ioid, 1, NULL, NULL)) PERR; - free(compdof); - - /* Create some data on this processor. */ - if (!(my_data = malloc(elements_per_pe * sizeof(int)))) PERR; - for (i = 0; i < elements_per_pe; i++) - my_data[i] = my_rank * 10 + i; - - /* Start the clock. */ - gettimeofday(&starttime, NULL); - - /* Write some data with distributed arrays. */ - for (t = 0; t < NUM_TIMESTEPS; t++) - if (nc_put_vard_int(ncid, varid, ioid, t, my_data)) PERR; - if (nc_close(ncid)) PERR; - - /* Stop the clock. */ - gettimeofday(&endtime, NULL); - - /* Compute the time delta */ - startt = (1000000 * starttime.tv_sec) + starttime.tv_usec; - endt = (1000000 * endtime.tv_sec) + endtime.tv_usec; - delta = (endt - startt)/NUM_TIMESTEPS; - delta_in_sec = (float)delta / 1000000; - mb_per_sec = num_megabytes / delta_in_sec; + int cmode[NUM_MODES] = {NC_PIO, NC_PIO|NC_NETCDF4, + NC_PIO|NC_NETCDF4|NC_MPIIO, + NC_PIO|NC_PNETCDF}; + char mode_name[NUM_MODES][NC_MAX_NAME + 1] = {"classic sequential ", + "netCDF-4 sequential ", + "netCDF-4 parallel I/O", + "pnetcdf "}; + int t, m; + + /* Print header. */ if (my_rank == num_io_procs) - printf("\n%d\t%d\t%d\t%d\t%d\t%8.3f\t%8.1f\t%8.3f\n", ntasks, num_io_procs, - 1, 0, 1, delta_in_sec, num_megabytes, mb_per_sec); + printf("access,\t\t\tntasks,\tnio,\trearr,\ttime(s),\tdata size (MB),\t" + "performance(MB/s)\n"); + + for (m = 0; m < NUM_MODES; m++) + { + /* Create a file with a 3D record var. */ + if (nc_create(FILE_NAME, cmode[m], &ncid)) PERR; + if (nc_def_dim(ncid, DIM_NAME_UNLIMITED, dimlen[0], &dimid[0])) PERR; + if (nc_def_dim(ncid, DIM_NAME_X, dimlen[1], &dimid[1])) PERR; + if (nc_def_dim(ncid, DIM_NAME_Y, dimlen[2], &dimid[2])) PERR; + if (nc_def_var(ncid, VAR_NAME, NC_INT, NDIM3, dimid, &varid)) PERR; + if (nc_enddef(ncid)) PERR; + + /* Calculate a decomposition for distributed arrays. */ + elements_per_pe = DIM_LEN_X * DIM_LEN_Y / (ntasks - num_io_procs); + /* printf("my_rank %d elements_per_pe %ld\n", my_rank, elements_per_pe); */ + + if (!(compdof = malloc(elements_per_pe * sizeof(size_t)))) + PERR; + for (i = 0; i < elements_per_pe; i++) + { + compdof[i] = (my_rank - num_io_procs) * elements_per_pe + i; + /* printf("my_rank %d compdof[%d]=%ld\n", my_rank, i, compdof[i]); */ + } + + /* Create the PIO decomposition for this test. */ + if (nc_def_decomp(iosysid, PIO_INT, NDIM2, &dimlen[1], elements_per_pe, + compdof, &ioid, 1, NULL, NULL)) PERR; + free(compdof); + + /* Create some data on this processor. */ + if (!(my_data = malloc(elements_per_pe * sizeof(int)))) PERR; + for (i = 0; i < elements_per_pe; i++) + my_data[i] = my_rank * 10 + i; + + /* Start the clock. */ + gettimeofday(&starttime, NULL); + + /* Write some data with distributed arrays. */ + for (t = 0; t < NUM_TIMESTEPS; t++) + if (nc_put_vard_int(ncid, varid, ioid, t, my_data)) PERR; + if (nc_close(ncid)) PERR; + + /* Stop the clock. */ + gettimeofday(&endtime, NULL); + + /* Compute the time delta */ + startt = (1000000 * starttime.tv_sec) + starttime.tv_usec; + endt = (1000000 * endtime.tv_sec) + endtime.tv_usec; + delta = (endt - startt)/NUM_TIMESTEPS; + delta_in_sec = (float)delta / 1000000; + mb_per_sec = num_megabytes / delta_in_sec; + if (my_rank == num_io_procs) + printf("%s,\t%d,\t%d,\t%d,\t%8.3f,\t%8.1f,\t%8.3f\n", mode_name[m], + ntasks, num_io_procs, 1, delta_in_sec, num_megabytes, + mb_per_sec); + } /* next mode flag */ free(my_data); if (nc_free_decomp(ioid)) PERR;