From ad3aa02207c5191a1e89fddbaa7c3d539212598d Mon Sep 17 00:00:00 2001 From: Sean Patrick Santos Date: Wed, 17 Jun 2015 11:08:36 -0600 Subject: [PATCH 1/5] Add `shr_RandNum_mod`. `shr_RandNum_mod` is the Fortran interface for a collection of random number generator codes, collected and optimized by Chris Kerr. This commit simply adds the files and a benchmark/test, with some modifications to the code and directory structure to simplify the build and for portability. The main deviations from the original are: 1) The input to create a random number generator is now a single 2D seed array, rather than a set of optional arguments for different RNG methods. 2) The Makefile is modified to work with the changed build directory structure and on different compilers. As currently implemented, the new code will be linked in to the executable, but there are no users of the code until RRTMG is modified to leverage `shr_RandNum_mod`. Testing: Test suite: System tests on Yellowstone/Intel, benchmark test run on Intel, PGI, GNU, NAG Baseline: None used. Namelist changes: None. Status: Presumably bit-for-bit (linking in an unused library shouldn't change answers). Code reviews: None (except dialog between Chris and Sean Santos). --- share/shr_RandNum/include/dSFMT-common.h | 115 ++++ share/shr_RandNum/include/dSFMT-params.h | 87 +++ share/shr_RandNum/include/dSFMT-params19937.h | 40 ++ share/shr_RandNum/include/dSFMT.h | 634 ++++++++++++++++++ share/shr_RandNum/src/dsfmt_f03/dSFMT.c | 633 +++++++++++++++++ .../src/dsfmt_f03/dSFMT_interface.F90 | 562 ++++++++++++++++ share/shr_RandNum/src/dsfmt_f03/dSFMT_utils.c | 18 + share/shr_RandNum/src/kissvec/kissvec.c | 36 + share/shr_RandNum/src/kissvec/kissvec_mod.F90 | 41 ++ .../src/mt19937/mersennetwister_mod.F90 | 293 ++++++++ share/shr_RandNum/src/shr_RandNum_mod.F90 | 247 +++++++ share/shr_RandNum/test/bench/Makefile | 87 +++ .../test/bench/test_shr_RandNum.F90 | 128 ++++ 13 files changed, 2921 insertions(+) create mode 100644 share/shr_RandNum/include/dSFMT-common.h create mode 100644 share/shr_RandNum/include/dSFMT-params.h create mode 100644 share/shr_RandNum/include/dSFMT-params19937.h create mode 100644 share/shr_RandNum/include/dSFMT.h create mode 100644 share/shr_RandNum/src/dsfmt_f03/dSFMT.c create mode 100644 share/shr_RandNum/src/dsfmt_f03/dSFMT_interface.F90 create mode 100644 share/shr_RandNum/src/dsfmt_f03/dSFMT_utils.c create mode 100644 share/shr_RandNum/src/kissvec/kissvec.c create mode 100644 share/shr_RandNum/src/kissvec/kissvec_mod.F90 create mode 100644 share/shr_RandNum/src/mt19937/mersennetwister_mod.F90 create mode 100644 share/shr_RandNum/src/shr_RandNum_mod.F90 create mode 100644 share/shr_RandNum/test/bench/Makefile create mode 100644 share/shr_RandNum/test/bench/test_shr_RandNum.F90 diff --git a/share/shr_RandNum/include/dSFMT-common.h b/share/shr_RandNum/include/dSFMT-common.h new file mode 100644 index 000000000000..a851f4c76064 --- /dev/null +++ b/share/shr_RandNum/include/dSFMT-common.h @@ -0,0 +1,115 @@ +#pragma once +/** + * @file dSFMT-common.h + * + * @brief SIMD oriented Fast Mersenne Twister(SFMT) pseudorandom + * number generator with jump function. This file includes common functions + * used in random number generation and jump. + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (The University of Tokyo) + * + * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. + * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, Hiroshima + * University and The University of Tokyo. + * All rights reserved. + * + * The 3-clause BSD License is applied to this software, see + * LICENSE.txt + */ +#ifndef DSFMT_COMMON_H +#define DSFMT_COMMON_H + +#include "dSFMT.h" + +#if defined(HAVE_SSE2) +# include +union X128I_T { + uint64_t u[2]; + __m128i i128; +}; +union X128D_T { + double d[2]; + __m128d d128; +}; +/** mask data for sse2 */ +static const union X128I_T sse2_param_mask = {{DSFMT_MSK1, DSFMT_MSK2}}; +#endif + +#if defined(HAVE_ALTIVEC) +inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b, + w128_t *lung) { + const vector unsigned char sl1 = ALTI_SL1; + const vector unsigned char sl1_perm = ALTI_SL1_PERM; + const vector unsigned int sl1_msk = ALTI_SL1_MSK; + const vector unsigned char sr1 = ALTI_SR; + const vector unsigned char sr1_perm = ALTI_SR_PERM; + const vector unsigned int sr1_msk = ALTI_SR_MSK; + const vector unsigned char perm = ALTI_PERM; + const vector unsigned int msk1 = ALTI_MSK; + vector unsigned int w, x, y, z; + + z = a->s; + w = lung->s; + x = vec_perm(w, (vector unsigned int)perm, perm); + y = vec_perm(z, sl1_perm, sl1_perm); + y = vec_sll(y, sl1); + y = vec_and(y, sl1_msk); + w = vec_xor(x, b->s); + w = vec_xor(w, y); + x = vec_perm(w, (vector unsigned int)sr1_perm, sr1_perm); + x = vec_srl(x, sr1); + x = vec_and(x, sr1_msk); + y = vec_and(w, msk1); + z = vec_xor(z, y); + r->s = vec_xor(z, x); + lung->s = w; +} +#elif defined(HAVE_SSE2) +/** + * This function represents the recursion formula. + * @param r output 128-bit + * @param a a 128-bit part of the internal state array + * @param b a 128-bit part of the internal state array + * @param d a 128-bit part of the internal state array (I/O) + */ +inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *u) { + __m128i v, w, x, y, z; + + x = a->si; + z = _mm_slli_epi64(x, DSFMT_SL1); + y = _mm_shuffle_epi32(u->si, SSE2_SHUFF); + z = _mm_xor_si128(z, b->si); + y = _mm_xor_si128(y, z); + + v = _mm_srli_epi64(y, DSFMT_SR); + w = _mm_and_si128(y, sse2_param_mask.i128); + v = _mm_xor_si128(v, x); + v = _mm_xor_si128(v, w); + r->si = v; + u->si = y; +} +#else +/** + * This function represents the recursion formula. + * @param r output 128-bit + * @param a a 128-bit part of the internal state array + * @param b a 128-bit part of the internal state array + * @param lung a 128-bit part of the internal state array (I/O) + */ +inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b, + w128_t *lung) { + uint64_t t0, t1, L0, L1; + + t0 = a->u[0]; + t1 = a->u[1]; + L0 = lung->u[0]; + L1 = lung->u[1]; + lung->u[0] = (t0 << DSFMT_SL1) ^ (L1 >> 32) ^ (L1 << 32) ^ b->u[0]; + lung->u[1] = (t1 << DSFMT_SL1) ^ (L0 >> 32) ^ (L0 << 32) ^ b->u[1]; + r->u[0] = (lung->u[0] >> DSFMT_SR) ^ (lung->u[0] & DSFMT_MSK1) ^ t0; + r->u[1] = (lung->u[1] >> DSFMT_SR) ^ (lung->u[1] & DSFMT_MSK2) ^ t1; +} +#endif +#endif diff --git a/share/shr_RandNum/include/dSFMT-params.h b/share/shr_RandNum/include/dSFMT-params.h new file mode 100644 index 000000000000..825148e689ab --- /dev/null +++ b/share/shr_RandNum/include/dSFMT-params.h @@ -0,0 +1,87 @@ +#ifndef DSFMT_PARAMS_H +#define DSFMT_PARAMS_H + +#include "dSFMT.h" + +/*---------------------- + the parameters of DSFMT + following definitions are in dSFMT-paramsXXXX.h file. + ----------------------*/ +/** the pick up position of the array. +#define DSFMT_POS1 122 +*/ + +/** the parameter of shift left as four 32-bit registers. +#define DSFMT_SL1 18 + */ + +/** the parameter of shift right as four 32-bit registers. +#define DSFMT_SR1 12 +*/ + +/** A bitmask, used in the recursion. These parameters are introduced + * to break symmetry of SIMD. +#define DSFMT_MSK1 (uint64_t)0xdfffffefULL +#define DSFMT_MSK2 (uint64_t)0xddfecb7fULL +*/ + +/** These definitions are part of a 128-bit period certification vector. +#define DSFMT_PCV1 UINT64_C(0x00000001) +#define DSFMT_PCV2 UINT64_C(0x00000000) +*/ + +#define DSFMT_LOW_MASK UINT64_C(0x000FFFFFFFFFFFFF) +#define DSFMT_HIGH_CONST UINT64_C(0x3FF0000000000000) +#define DSFMT_SR 12 + +/* for sse2 */ +#if defined(HAVE_SSE2) + #define SSE2_SHUFF 0x1b +#elif defined(HAVE_ALTIVEC) + #if defined(__APPLE__) /* For OSX */ + #define ALTI_SR (vector unsigned char)(4) + #define ALTI_SR_PERM \ + (vector unsigned char)(15,0,1,2,3,4,5,6,15,8,9,10,11,12,13,14) + #define ALTI_SR_MSK \ + (vector unsigned int)(0x000fffffU,0xffffffffU,0x000fffffU,0xffffffffU) + #define ALTI_PERM \ + (vector unsigned char)(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3) + #else + #define ALTI_SR {4} + #define ALTI_SR_PERM {15,0,1,2,3,4,5,6,15,8,9,10,11,12,13,14} + #define ALTI_SR_MSK {0x000fffffU,0xffffffffU,0x000fffffU,0xffffffffU} + #define ALTI_PERM {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3} + #endif +#endif + +#if DSFMT_MEXP == 521 + #include "dSFMT-params521.h" +#elif DSFMT_MEXP == 1279 + #include "dSFMT-params1279.h" +#elif DSFMT_MEXP == 2203 + #include "dSFMT-params2203.h" +#elif DSFMT_MEXP == 4253 + #include "dSFMT-params4253.h" +#elif DSFMT_MEXP == 11213 + #include "dSFMT-params11213.h" +#elif DSFMT_MEXP == 19937 + #include "dSFMT-params19937.h" +#elif DSFMT_MEXP == 44497 + #include "dSFMT-params44497.h" +#elif DSFMT_MEXP == 86243 + #include "dSFMT-params86243.h" +#elif DSFMT_MEXP == 132049 + #include "dSFMT-params132049.h" +#elif DSFMT_MEXP == 216091 + #include "dSFMT-params216091.h" +#else +#ifdef __GNUC__ + #error "DSFMT_MEXP is not valid." + #undef DSFMT_MEXP +#else + #undef DSFMT_MEXP +#endif + +#endif + +#endif /* DSFMT_PARAMS_H */ diff --git a/share/shr_RandNum/include/dSFMT-params19937.h b/share/shr_RandNum/include/dSFMT-params19937.h new file mode 100644 index 000000000000..7f018466db5b --- /dev/null +++ b/share/shr_RandNum/include/dSFMT-params19937.h @@ -0,0 +1,40 @@ +#ifndef DSFMT_PARAMS19937_H +#define DSFMT_PARAMS19937_H + +/* #define DSFMT_N 191 */ +/* #define DSFMT_MAXDEGREE 19992 */ +#define DSFMT_POS1 117 +#define DSFMT_SL1 19 +#define DSFMT_MSK1 UINT64_C(0x000ffafffffffb3f) +#define DSFMT_MSK2 UINT64_C(0x000ffdfffc90fffd) +#define DSFMT_MSK32_1 0x000ffaffU +#define DSFMT_MSK32_2 0xfffffb3fU +#define DSFMT_MSK32_3 0x000ffdffU +#define DSFMT_MSK32_4 0xfc90fffdU +#define DSFMT_FIX1 UINT64_C(0x90014964b32f4329) +#define DSFMT_FIX2 UINT64_C(0x3b8d12ac548a7c7a) +#define DSFMT_PCV1 UINT64_C(0x3d84e1ac0dc82880) +#define DSFMT_PCV2 UINT64_C(0x0000000000000001) +#define DSFMT_IDSTR "dSFMT2-19937:117-19:ffafffffffb3f-ffdfffc90fffd" + + +/* PARAMETERS FOR ALTIVEC */ +#if defined(__APPLE__) /* For OSX */ + #define ALTI_SL1 (vector unsigned int)(3, 3, 3, 3) + #define ALTI_SL1_PERM \ + (vector unsigned char)(2,3,4,5,6,7,30,30,10,11,12,13,14,15,0,1) + #define ALTI_SL1_MSK \ + (vector unsigned int)(0xffffffffU,0xfff80000U,0xffffffffU,0xfff80000U) + #define ALTI_MSK (vector unsigned int)(DSFMT_MSK32_1, \ + DSFMT_MSK32_2, DSFMT_MSK32_3, DSFMT_MSK32_4) +#else /* For OTHER OSs(Linux?) */ + #define ALTI_SL1 {3, 3, 3, 3} + #define ALTI_SL1_PERM \ + {2,3,4,5,6,7,30,30,10,11,12,13,14,15,0,1} + #define ALTI_SL1_MSK \ + {0xffffffffU,0xfff80000U,0xffffffffU,0xfff80000U} + #define ALTI_MSK \ + {DSFMT_MSK32_1, DSFMT_MSK32_2, DSFMT_MSK32_3, DSFMT_MSK32_4} +#endif + +#endif /* DSFMT_PARAMS19937_H */ diff --git a/share/shr_RandNum/include/dSFMT.h b/share/shr_RandNum/include/dSFMT.h new file mode 100644 index 000000000000..c5debf971da7 --- /dev/null +++ b/share/shr_RandNum/include/dSFMT.h @@ -0,0 +1,634 @@ +#pragma once +/** + * @file dSFMT.h + * + * @brief double precision SIMD oriented Fast Mersenne Twister(dSFMT) + * pseudorandom number generator based on IEEE 754 format. + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (Hiroshima University) + * + * Copyright (C) 2007, 2008 Mutsuo Saito, Makoto Matsumoto and + * Hiroshima University. All rights reserved. + * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, + * Hiroshima University and The University of Tokyo. + * All rights reserved. + * + * The new BSD License is applied to this software. + * see LICENSE.txt + * + * @note We assume that your system has inttypes.h. If your system + * doesn't have inttypes.h, you have to typedef uint32_t and uint64_t, + * and you have to define PRIu64 and PRIx64 in this file as follows: + * @verbatim + typedef unsigned int uint32_t + typedef unsigned long long uint64_t + #define PRIu64 "llu" + #define PRIx64 "llx" +@endverbatim + * uint32_t must be exactly 32-bit unsigned integer type (no more, no + * less), and uint64_t must be exactly 64-bit unsigned integer type. + * PRIu64 and PRIx64 are used for printf function to print 64-bit + * unsigned int and 64-bit unsigned int in hexadecimal format. + */ + +#ifndef DSFMT_H +#define DSFMT_H +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include + +#if !defined(DSFMT_MEXP) +#ifdef __GNUC__ + #warning "DSFMT_MEXP is not defined. I assume DSFMT_MEXP is 19937." +#endif + #define DSFMT_MEXP 19937 +#endif +/*----------------- + BASIC DEFINITIONS + -----------------*/ +/* Mersenne Exponent. The period of the sequence + * is a multiple of 2^DSFMT_MEXP-1. + * #define DSFMT_MEXP 19937 */ +/** DSFMT generator has an internal state array of 128-bit integers, + * and N is its size. */ +#define DSFMT_N ((DSFMT_MEXP - 128) / 104 + 1) +/** N32 is the size of internal state array when regarded as an array + * of 32-bit integers.*/ +#define DSFMT_N32 (DSFMT_N * 4) +/** N64 is the size of internal state array when regarded as an array + * of 64-bit integers.*/ +#define DSFMT_N64 (DSFMT_N * 2) + +#if !defined(DSFMT_BIG_ENDIAN) +# if defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) +# if __BYTE_ORDER == __BIG_ENDIAN +# define DSFMT_BIG_ENDIAN 1 +# endif +# elif defined(_BYTE_ORDER) && defined(_BIG_ENDIAN) +# if _BYTE_ORDER == _BIG_ENDIAN +# define DSFMT_BIG_ENDIAN 1 +# endif +# elif defined(__BYTE_ORDER__) && defined(__BIG_ENDIAN__) +# if __BYTE_ORDER__ == __BIG_ENDIAN__ +# define DSFMT_BIG_ENDIAN 1 +# endif +# elif defined(BYTE_ORDER) && defined(BIG_ENDIAN) +# if BYTE_ORDER == BIG_ENDIAN +# define DSFMT_BIG_ENDIAN 1 +# endif +# elif defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN) \ + || defined(__BIG_ENDIAN__) || defined(BIG_ENDIAN) +# define DSFMT_BIG_ENDIAN 1 +# endif +#endif + +#if defined(DSFMT_BIG_ENDIAN) && defined(__amd64) +# undef DSFMT_BIG_ENDIAN +#endif + +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) +# include +#elif defined(_MSC_VER) || defined(__BORLANDC__) +# if !defined(DSFMT_UINT32_DEFINED) && !defined(SFMT_UINT32_DEFINED) +typedef unsigned int uint32_t; +typedef unsigned __int64 uint64_t; +# define UINT64_C(v) (v ## ui64) +# define DSFMT_UINT32_DEFINED +# if !defined(inline) +# define inline __inline +# endif +# endif +#else +# include +# if !defined(inline) +# if defined(__GNUC__) +# define inline __inline__ +# else +# define inline +# endif +# endif +#endif + +#ifndef PRIu64 +# if defined(_MSC_VER) || defined(__BORLANDC__) +# define PRIu64 "I64u" +# define PRIx64 "I64x" +# else +# define PRIu64 "llu" +# define PRIx64 "llx" +# endif +#endif + +#ifndef UINT64_C +# define UINT64_C(v) (v ## ULL) +#endif + +/*------------------------------------------ + 128-bit SIMD like data type for standard C + ------------------------------------------*/ +#if defined(HAVE_ALTIVEC) +# if !defined(__APPLE__) +# include +# endif +/** 128-bit data structure */ +union W128_T { + vector unsigned int s; + uint64_t u[2]; + uint32_t u32[4]; + double d[2]; +}; + +#elif defined(HAVE_SSE2) +# include + +/** 128-bit data structure */ +union W128_T { + __m128i si; + __m128d sd; + uint64_t u[2]; + uint32_t u32[4]; + double d[2]; +}; +#else /* standard C */ +/** 128-bit data structure */ +union W128_T { + uint64_t u[2]; + uint32_t u32[4]; + double d[2]; +}; +#endif + +/** 128-bit data type */ +typedef union W128_T w128_t; + +/** the 128-bit internal state array */ +struct DSFMT_T { + w128_t status[DSFMT_N + 1]; + int idx; +}; +typedef struct DSFMT_T dsfmt_t; + +/** dsfmt internal state vector */ +extern dsfmt_t dsfmt_global_data; +/** dsfmt mexp for check */ +extern const int dsfmt_global_mexp; + +void dsfmt_gen_rand_all(dsfmt_t *dsfmt); +void dsfmt_fill_array_open_close(dsfmt_t *dsfmt, double array[], int size); +void dsfmt_fill_array_close_open(dsfmt_t *dsfmt, double array[], int size); +void dsfmt_fill_array_open_open(dsfmt_t *dsfmt, double array[], int size); +void dsfmt_fill_array_close1_open2(dsfmt_t *dsfmt, double array[], int size); +void dsfmt_chk_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed, int mexp); +void dsfmt_chk_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], + int key_length, int mexp); +const char *dsfmt_get_idstring(void); +int dsfmt_get_min_array_size(void); + +#if defined(__GNUC__) +# define DSFMT_PRE_INLINE inline static +# define DSFMT_PST_INLINE __attribute__((always_inline)) +#elif defined(_MSC_VER) && _MSC_VER >= 1200 +# define DSFMT_PRE_INLINE __forceinline static +# define DSFMT_PST_INLINE +#else +# define DSFMT_PRE_INLINE inline static +# define DSFMT_PST_INLINE +#endif +DSFMT_PRE_INLINE uint32_t dsfmt_genrand_uint32(dsfmt_t *dsfmt) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_genrand_close1_open2(dsfmt_t *dsfmt) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_genrand_close_open(dsfmt_t *dsfmt) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_genrand_open_close(dsfmt_t *dsfmt) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_genrand_open_open(dsfmt_t *dsfmt) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE uint32_t dsfmt_gv_genrand_uint32(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_gv_genrand_close1_open2(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_gv_genrand_close_open(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_gv_genrand_open_close(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_gv_genrand_open_open(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_gv_fill_array_open_close(double array[], int size) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_gv_fill_array_close_open(double array[], int size) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_gv_fill_array_open_open(double array[], int size) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_gv_fill_array_close1_open2(double array[], int size) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_gv_init_gen_rand(uint32_t seed) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_gv_init_by_array(uint32_t init_key[], + int key_length) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], + int key_length) DSFMT_PST_INLINE; + +/** + * This function generates and returns unsigned 32-bit integer. + * This is slower than SFMT, only for convenience usage. + * dsfmt_init_gen_rand() or dsfmt_init_by_array() must be called + * before this function. + * @param dsfmt dsfmt internal state date + * @return double precision floating point pseudorandom number + */ +inline static uint32_t dsfmt_genrand_uint32(dsfmt_t *dsfmt) { + uint32_t r; + uint64_t *psfmt64 = &dsfmt->status[0].u[0]; + + if (dsfmt->idx >= DSFMT_N64) { + dsfmt_gen_rand_all(dsfmt); + dsfmt->idx = 0; + } + r = psfmt64[dsfmt->idx++] & 0xffffffffU; + return r; +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range [1, 2). This is + * the primitive and faster than generating numbers in other ranges. + * dsfmt_init_gen_rand() or dsfmt_init_by_array() must be called + * before this function. + * @param dsfmt dsfmt internal state date + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_genrand_close1_open2(dsfmt_t *dsfmt) { + double r; + double *psfmt64 = &dsfmt->status[0].d[0]; + + if (dsfmt->idx >= DSFMT_N64) { + dsfmt_gen_rand_all(dsfmt); + dsfmt->idx = 0; + } + r = psfmt64[dsfmt->idx++]; + return r; +} + +/** + * This function generates and returns unsigned 32-bit integer. + * This is slower than SFMT, only for convenience usage. + * dsfmt_gv_init_gen_rand() or dsfmt_gv_init_by_array() must be called + * before this function. This function uses \b global variables. + * @return double precision floating point pseudorandom number + */ +inline static uint32_t dsfmt_gv_genrand_uint32(void) { + return dsfmt_genrand_uint32(&dsfmt_global_data); +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range [1, 2). + * dsfmt_gv_init_gen_rand() or dsfmt_gv_init_by_array() must be called + * before this function. This function uses \b global variables. + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_gv_genrand_close1_open2(void) { + return dsfmt_genrand_close1_open2(&dsfmt_global_data); +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range [0, 1). + * dsfmt_init_gen_rand() or dsfmt_init_by_array() must be called + * before this function. + * @param dsfmt dsfmt internal state date + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_genrand_close_open(dsfmt_t *dsfmt) { + return dsfmt_genrand_close1_open2(dsfmt) - 1.0; +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range [0, 1). + * dsfmt_gv_init_gen_rand() or dsfmt_gv_init_by_array() must be called + * before this function. This function uses \b global variables. + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_gv_genrand_close_open(void) { + return dsfmt_gv_genrand_close1_open2() - 1.0; +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range (0, 1]. + * dsfmt_init_gen_rand() or dsfmt_init_by_array() must be called + * before this function. + * @param dsfmt dsfmt internal state date + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_genrand_open_close(dsfmt_t *dsfmt) { + return 2.0 - dsfmt_genrand_close1_open2(dsfmt); +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range (0, 1]. + * dsfmt_gv_init_gen_rand() or dsfmt_gv_init_by_array() must be called + * before this function. This function uses \b global variables. + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_gv_genrand_open_close(void) { + return 2.0 - dsfmt_gv_genrand_close1_open2(); +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range (0, 1). + * dsfmt_init_gen_rand() or dsfmt_init_by_array() must be called + * before this function. + * @param dsfmt dsfmt internal state date + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_genrand_open_open(dsfmt_t *dsfmt) { + double *dsfmt64 = &dsfmt->status[0].d[0]; + union { + double d; + uint64_t u; + } r; + + if (dsfmt->idx >= DSFMT_N64) { + dsfmt_gen_rand_all(dsfmt); + dsfmt->idx = 0; + } + r.d = dsfmt64[dsfmt->idx++]; + r.u |= 1; + return r.d - 1.0; +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range (0, 1). + * dsfmt_gv_init_gen_rand() or dsfmt_gv_init_by_array() must be called + * before this function. This function uses \b global variables. + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_gv_genrand_open_open(void) { + return dsfmt_genrand_open_open(&dsfmt_global_data); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range [1, 2) to the + * specified array[] by one call. This function is the same as + * dsfmt_fill_array_close1_open2() except that this function uses + * \b global variables. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_fill_array_close1_open2() + */ +inline static void dsfmt_gv_fill_array_close1_open2(double array[], int size) { + dsfmt_fill_array_close1_open2(&dsfmt_global_data, array, size); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range (0, 1] to the + * specified array[] by one call. This function is the same as + * dsfmt_gv_fill_array_close1_open2() except the distribution range. + * This function uses \b global variables. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_fill_array_close1_open2() and \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void dsfmt_gv_fill_array_open_close(double array[], int size) { + dsfmt_fill_array_open_close(&dsfmt_global_data, array, size); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range [0, 1) to the + * specified array[] by one call. This function is the same as + * dsfmt_gv_fill_array_close1_open2() except the distribution range. + * This function uses \b global variables. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_fill_array_close1_open2() \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void dsfmt_gv_fill_array_close_open(double array[], int size) { + dsfmt_fill_array_close_open(&dsfmt_global_data, array, size); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range (0, 1) to the + * specified array[] by one call. This function is the same as + * dsfmt_gv_fill_array_close1_open2() except the distribution range. + * This function uses \b global variables. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_fill_array_close1_open2() \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void dsfmt_gv_fill_array_open_open(double array[], int size) { + dsfmt_fill_array_open_open(&dsfmt_global_data, array, size); +} + +/** + * This function initializes the internal state array with a 32-bit + * integer seed. + * @param dsfmt dsfmt state vector. + * @param seed a 32-bit integer used as the seed. + */ +inline static void dsfmt_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed) { + dsfmt_chk_init_gen_rand(dsfmt, seed, DSFMT_MEXP); +} + +/** + * This function initializes the internal state array with a 32-bit + * integer seed. This function uses \b global variables. + * @param seed a 32-bit integer used as the seed. + * see also \sa dsfmt_init_gen_rand() + */ +inline static void dsfmt_gv_init_gen_rand(uint32_t seed) { + dsfmt_init_gen_rand(&dsfmt_global_data, seed); +} + +/** + * This function initializes the internal state array, + * with an array of 32-bit integers used as the seeds. + * @param dsfmt dsfmt state vector + * @param init_key the array of 32-bit integers, used as a seed. + * @param key_length the length of init_key. + */ +inline static void dsfmt_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], + int key_length) { + dsfmt_chk_init_by_array(dsfmt, init_key, key_length, DSFMT_MEXP); +} + +/** + * This function initializes the internal state array, + * with an array of 32-bit integers used as the seeds. + * This function uses \b global variables. + * @param init_key the array of 32-bit integers, used as a seed. + * @param key_length the length of init_key. + * see also \sa dsfmt_init_by_array() + */ +inline static void dsfmt_gv_init_by_array(uint32_t init_key[], int key_length) { + dsfmt_init_by_array(&dsfmt_global_data, init_key, key_length); +} + +#if !defined(DSFMT_DO_NOT_USE_OLD_NAMES) +DSFMT_PRE_INLINE const char *get_idstring(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE int get_min_array_size(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void init_gen_rand(uint32_t seed) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void init_by_array(uint32_t init_key[], int key_length) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double genrand_close1_open2(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double genrand_close_open(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double genrand_open_close(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double genrand_open_open(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void fill_array_open_close(double array[], int size) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void fill_array_close_open(double array[], int size) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void fill_array_open_open(double array[], int size) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void fill_array_close1_open2(double array[], int size) + DSFMT_PST_INLINE; + +/** + * This function is just the same as dsfmt_get_idstring(). + * @return id string. + * see also \sa dsfmt_get_idstring() + */ +inline static const char *get_idstring(void) { + return dsfmt_get_idstring(); +} + +/** + * This function is just the same as dsfmt_get_min_array_size(). + * @return minimum size of array used for fill_array functions. + * see also \sa dsfmt_get_min_array_size() + */ +inline static int get_min_array_size(void) { + return dsfmt_get_min_array_size(); +} + +/** + * This function is just the same as dsfmt_gv_init_gen_rand(). + * @param seed a 32-bit integer used as the seed. + * see also \sa dsfmt_gv_init_gen_rand(), \sa dsfmt_init_gen_rand(). + */ +inline static void init_gen_rand(uint32_t seed) { + dsfmt_gv_init_gen_rand(seed); +} + +/** + * This function is just the same as dsfmt_gv_init_by_array(). + * @param init_key the array of 32-bit integers, used as a seed. + * @param key_length the length of init_key. + * see also \sa dsfmt_gv_init_by_array(), \sa dsfmt_init_by_array(). + */ +inline static void init_by_array(uint32_t init_key[], int key_length) { + dsfmt_gv_init_by_array(init_key, key_length); +} + +/** + * This function is just the same as dsfmt_gv_genrand_close1_open2(). + * @return double precision floating point number. + * see also \sa dsfmt_genrand_close1_open2() \sa + * dsfmt_gv_genrand_close1_open2() + */ +inline static double genrand_close1_open2(void) { + return dsfmt_gv_genrand_close1_open2(); +} + +/** + * This function is just the same as dsfmt_gv_genrand_close_open(). + * @return double precision floating point number. + * see also \sa dsfmt_genrand_close_open() \sa + * dsfmt_gv_genrand_close_open() + */ +inline static double genrand_close_open(void) { + return dsfmt_gv_genrand_close_open(); +} + +/** + * This function is just the same as dsfmt_gv_genrand_open_close(). + * @return double precision floating point number. + * see also \sa dsfmt_genrand_open_close() \sa + * dsfmt_gv_genrand_open_close() + */ +inline static double genrand_open_close(void) { + return dsfmt_gv_genrand_open_close(); +} + +/** + * This function is just the same as dsfmt_gv_genrand_open_open(). + * @return double precision floating point number. + * see also \sa dsfmt_genrand_open_open() \sa + * dsfmt_gv_genrand_open_open() + */ +inline static double genrand_open_open(void) { + return dsfmt_gv_genrand_open_open(); +} + +/** + * This function is juset the same as dsfmt_gv_fill_array_open_close(). + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_gv_fill_array_open_close(), \sa + * dsfmt_fill_array_close1_open2(), \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void fill_array_open_close(double array[], int size) { + dsfmt_gv_fill_array_open_close(array, size); +} + +/** + * This function is juset the same as dsfmt_gv_fill_array_close_open(). + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_gv_fill_array_close_open(), \sa + * dsfmt_fill_array_close1_open2(), \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void fill_array_close_open(double array[], int size) { + dsfmt_gv_fill_array_close_open(array, size); +} + +/** + * This function is juset the same as dsfmt_gv_fill_array_open_open(). + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_gv_fill_array_open_open(), \sa + * dsfmt_fill_array_close1_open2(), \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void fill_array_open_open(double array[], int size) { + dsfmt_gv_fill_array_open_open(array, size); +} + +/** + * This function is juset the same as dsfmt_gv_fill_array_close1_open2(). + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_fill_array_close1_open2(), \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void fill_array_close1_open2(double array[], int size) { + dsfmt_gv_fill_array_close1_open2(array, size); +} +#endif /* DSFMT_DO_NOT_USE_OLD_NAMES */ + +#if defined(__cplusplus) +} +#endif + +#endif /* DSFMT_H */ diff --git a/share/shr_RandNum/src/dsfmt_f03/dSFMT.c b/share/shr_RandNum/src/dsfmt_f03/dSFMT.c new file mode 100644 index 000000000000..e86449ac32af --- /dev/null +++ b/share/shr_RandNum/src/dsfmt_f03/dSFMT.c @@ -0,0 +1,633 @@ +/** + * @file dSFMT.c + * @brief double precision SIMD-oriented Fast Mersenne Twister (dSFMT) + * based on IEEE 754 format. + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (Hiroshima University) + * + * Copyright (C) 2007,2008 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * The new BSD License is applied to this software, see LICENSE.txt + */ +#include +#include +#include +#include "dSFMT-params.h" +#include "dSFMT-common.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/** dsfmt internal state vector */ +dsfmt_t dsfmt_global_data; +/** dsfmt mexp for check */ +static const int dsfmt_mexp = DSFMT_MEXP; + +/*---------------- + STATIC FUNCTIONS + ----------------*/ +inline static uint32_t ini_func1(uint32_t x); +inline static uint32_t ini_func2(uint32_t x); +inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array, + int size); +inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array, + int size); +inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array, + int size); +inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array, + int size); +inline static int idxof(int i); +static void initial_mask(dsfmt_t *dsfmt); +static void period_certification(dsfmt_t *dsfmt); + +#if defined(HAVE_SSE2) +/** 1 in 64bit for sse2 */ +static const union X128I_T sse2_int_one = {{1, 1}}; +/** 2.0 double for sse2 */ +static const union X128D_T sse2_double_two = {{2.0, 2.0}}; +/** -1.0 double for sse2 */ +static const union X128D_T sse2_double_m_one = {{-1.0, -1.0}}; +#endif + +/** + * This function simulate a 32-bit array index overlapped to 64-bit + * array of LITTLE ENDIAN in BIG ENDIAN machine. + */ +#if defined(DSFMT_BIG_ENDIAN) +inline static int idxof(int i) { + return i ^ 1; +} +#else +inline static int idxof(int i) { + return i; +} +#endif + +#if defined(HAVE_SSE2) +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range [0, 1). + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_c0o1(w128_t *w) { + w->sd = _mm_add_pd(w->sd, sse2_double_m_one.d128); +} + +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range (0, 1]. + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_o0c1(w128_t *w) { + w->sd = _mm_sub_pd(sse2_double_two.d128, w->sd); +} + +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range (0, 1). + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_o0o1(w128_t *w) { + w->si = _mm_or_si128(w->si, sse2_int_one.i128); + w->sd = _mm_add_pd(w->sd, sse2_double_m_one.d128); +} +#else /* standard C and altivec */ +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range [0, 1). + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_c0o1(w128_t *w) { + w->d[0] -= 1.0; + w->d[1] -= 1.0; +} + +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range (0, 1]. + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_o0c1(w128_t *w) { + w->d[0] = 2.0 - w->d[0]; + w->d[1] = 2.0 - w->d[1]; +} + +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range (0, 1). + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_o0o1(w128_t *w) { + w->u[0] |= 1; + w->u[1] |= 1; + w->d[0] -= 1.0; + w->d[1] -= 1.0; +} +#endif + +/** + * This function fills the user-specified array with double precision + * floating point pseudorandom numbers of the IEEE 754 format. + * @param dsfmt dsfmt state vector. + * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param size number of 128-bit pseudorandom numbers to be generated. + */ +inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array, + int size) { + int i, j; + w128_t lung; + + lung = dsfmt->status[DSFMT_N]; + do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1], + &lung); + for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { + do_recursion(&array[i], &dsfmt->status[i], + &dsfmt->status[i + DSFMT_POS1], &lung); + } + for (; i < DSFMT_N; i++) { + do_recursion(&array[i], &dsfmt->status[i], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + } + for (; i < size - DSFMT_N; i++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + } + for (j = 0; j < 2 * DSFMT_N - size; j++) { + dsfmt->status[j] = array[j + size - DSFMT_N]; + } + for (; i < size; i++, j++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + dsfmt->status[j] = array[i]; + } + dsfmt->status[DSFMT_N] = lung; +} + +/** + * This function fills the user-specified array with double precision + * floating point pseudorandom numbers of the IEEE 754 format. + * @param dsfmt dsfmt state vector. + * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param size number of 128-bit pseudorandom numbers to be generated. + */ +inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array, + int size) { + int i, j; + w128_t lung; + + lung = dsfmt->status[DSFMT_N]; + do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1], + &lung); + for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { + do_recursion(&array[i], &dsfmt->status[i], + &dsfmt->status[i + DSFMT_POS1], &lung); + } + for (; i < DSFMT_N; i++) { + do_recursion(&array[i], &dsfmt->status[i], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + } + for (; i < size - DSFMT_N; i++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + convert_c0o1(&array[i - DSFMT_N]); + } + for (j = 0; j < 2 * DSFMT_N - size; j++) { + dsfmt->status[j] = array[j + size - DSFMT_N]; + } + for (; i < size; i++, j++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + dsfmt->status[j] = array[i]; + convert_c0o1(&array[i - DSFMT_N]); + } + for (i = size - DSFMT_N; i < size; i++) { + convert_c0o1(&array[i]); + } + dsfmt->status[DSFMT_N] = lung; +} + +/** + * This function fills the user-specified array with double precision + * floating point pseudorandom numbers of the IEEE 754 format. + * @param dsfmt dsfmt state vector. + * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param size number of 128-bit pseudorandom numbers to be generated. + */ +inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array, + int size) { + int i, j; + w128_t lung; + + lung = dsfmt->status[DSFMT_N]; + do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1], + &lung); + for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { + do_recursion(&array[i], &dsfmt->status[i], + &dsfmt->status[i + DSFMT_POS1], &lung); + } + for (; i < DSFMT_N; i++) { + do_recursion(&array[i], &dsfmt->status[i], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + } + for (; i < size - DSFMT_N; i++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + convert_o0o1(&array[i - DSFMT_N]); + } + for (j = 0; j < 2 * DSFMT_N - size; j++) { + dsfmt->status[j] = array[j + size - DSFMT_N]; + } + for (; i < size; i++, j++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + dsfmt->status[j] = array[i]; + convert_o0o1(&array[i - DSFMT_N]); + } + for (i = size - DSFMT_N; i < size; i++) { + convert_o0o1(&array[i]); + } + dsfmt->status[DSFMT_N] = lung; +} + +/** + * This function fills the user-specified array with double precision + * floating point pseudorandom numbers of the IEEE 754 format. + * @param dsfmt dsfmt state vector. + * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param size number of 128-bit pseudorandom numbers to be generated. + */ +inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array, + int size) { + int i, j; + w128_t lung; + + lung = dsfmt->status[DSFMT_N]; + do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1], + &lung); + for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { + do_recursion(&array[i], &dsfmt->status[i], + &dsfmt->status[i + DSFMT_POS1], &lung); + } + for (; i < DSFMT_N; i++) { + do_recursion(&array[i], &dsfmt->status[i], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + } + for (; i < size - DSFMT_N; i++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + convert_o0c1(&array[i - DSFMT_N]); + } + for (j = 0; j < 2 * DSFMT_N - size; j++) { + dsfmt->status[j] = array[j + size - DSFMT_N]; + } + for (; i < size; i++, j++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + dsfmt->status[j] = array[i]; + convert_o0c1(&array[i - DSFMT_N]); + } + for (i = size - DSFMT_N; i < size; i++) { + convert_o0c1(&array[i]); + } + dsfmt->status[DSFMT_N] = lung; +} + +/** + * This function represents a function used in the initialization + * by init_by_array + * @param x 32-bit integer + * @return 32-bit integer + */ +static uint32_t ini_func1(uint32_t x) { + return (x ^ (x >> 27)) * (uint32_t)1664525UL; +} + +/** + * This function represents a function used in the initialization + * by init_by_array + * @param x 32-bit integer + * @return 32-bit integer + */ +static uint32_t ini_func2(uint32_t x) { + return (x ^ (x >> 27)) * (uint32_t)1566083941UL; +} + +/** + * This function initializes the internal state array to fit the IEEE + * 754 format. + * @param dsfmt dsfmt state vector. + */ +static void initial_mask(dsfmt_t *dsfmt) { + int i; + uint64_t *psfmt; + + psfmt = &dsfmt->status[0].u[0]; + for (i = 0; i < DSFMT_N * 2; i++) { + psfmt[i] = (psfmt[i] & DSFMT_LOW_MASK) | DSFMT_HIGH_CONST; + } +} + +/** + * This function certificate the period of 2^{SFMT_MEXP}-1. + * @param dsfmt dsfmt state vector. + */ +static void period_certification(dsfmt_t *dsfmt) { + uint64_t pcv[2] = {DSFMT_PCV1, DSFMT_PCV2}; + uint64_t tmp[2]; + uint64_t inner; + int i; +#if (DSFMT_PCV2 & 1) != 1 + int j; + uint64_t work; +#endif + + tmp[0] = (dsfmt->status[DSFMT_N].u[0] ^ DSFMT_FIX1); + tmp[1] = (dsfmt->status[DSFMT_N].u[1] ^ DSFMT_FIX2); + + inner = tmp[0] & pcv[0]; + inner ^= tmp[1] & pcv[1]; + for (i = 32; i > 0; i >>= 1) { + inner ^= inner >> i; + } + inner &= 1; + /* check OK */ + if (inner == 1) { + return; + } + /* check NG, and modification */ +#if (DSFMT_PCV2 & 1) == 1 + dsfmt->status[DSFMT_N].u[1] ^= 1; +#else + for (i = 1; i >= 0; i--) { + work = 1; + for (j = 0; j < 64; j++) { + if ((work & pcv[i]) != 0) { + dsfmt->status[DSFMT_N].u[i] ^= work; + return; + } + work = work << 1; + } + } +#endif + return; +} + +/*---------------- + PUBLIC FUNCTIONS + ----------------*/ +/** + * This function returns the identification string. The string shows + * the Mersenne exponent, and all parameters of this generator. + * @return id string. + */ +const char *dsfmt_get_idstring(void) { + return DSFMT_IDSTR; +} + +/** + * This function returns the minimum size of array used for \b + * fill_array functions. + * @return minimum size of array used for fill_array functions. + */ +int dsfmt_get_min_array_size(void) { + return DSFMT_N64; +} + +/** + * This function fills the internal state array with double precision + * floating point pseudorandom numbers of the IEEE 754 format. + * @param dsfmt dsfmt state vector. + */ +void dsfmt_gen_rand_all(dsfmt_t *dsfmt) { + int i; + w128_t lung; + + lung = dsfmt->status[DSFMT_N]; + do_recursion(&dsfmt->status[0], &dsfmt->status[0], + &dsfmt->status[DSFMT_POS1], &lung); + for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { + do_recursion(&dsfmt->status[i], &dsfmt->status[i], + &dsfmt->status[i + DSFMT_POS1], &lung); + } + for (; i < DSFMT_N; i++) { + do_recursion(&dsfmt->status[i], &dsfmt->status[i], + &dsfmt->status[i + DSFMT_POS1 - DSFMT_N], &lung); + } + dsfmt->status[DSFMT_N] = lung; +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range [1, 2) to the + * specified array[] by one call. The number of pseudorandom numbers + * is specified by the argument \b size, which must be at least (SFMT_MEXP + * / 128) * 2 and a multiple of two. The function + * get_min_array_size() returns this minimum size. The generation by + * this function is much faster than the following fill_array_xxx functions. + * + * For initialization, init_gen_rand() or init_by_array() must be called + * before the first call of this function. This function can not be + * used after calling genrand_xxx functions, without initialization. + * + * @param dsfmt dsfmt state vector. + * @param array an array where pseudorandom numbers are filled + * by this function. The pointer to the array must be "aligned" + * (namely, must be a multiple of 16) in the SIMD version, since it + * refers to the address of a 128-bit integer. In the standard C + * version, the pointer is arbitrary. + * + * @param size the number of 64-bit pseudorandom integers to be + * generated. size must be a multiple of 2, and greater than or equal + * to (SFMT_MEXP / 128) * 2. + * + * @note \b memalign or \b posix_memalign is available to get aligned + * memory. Mac OSX doesn't have these functions, but \b malloc of OSX + * returns the pointer to the aligned memory block. + */ +void dsfmt_fill_array_close1_open2(dsfmt_t *dsfmt, double array[], int size) { + assert(size % 2 == 0); + assert(size >= DSFMT_N64); + gen_rand_array_c1o2(dsfmt, (w128_t *)array, size / 2); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range (0, 1] to the + * specified array[] by one call. This function is the same as + * fill_array_close1_open2() except the distribution range. + * + * @param dsfmt dsfmt state vector. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa fill_array_close1_open2() + */ +void dsfmt_fill_array_open_close(dsfmt_t *dsfmt, double array[], int size) { + assert(size % 2 == 0); + assert(size >= DSFMT_N64); + gen_rand_array_o0c1(dsfmt, (w128_t *)array, size / 2); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range [0, 1) to the + * specified array[] by one call. This function is the same as + * fill_array_close1_open2() except the distribution range. + * + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param dsfmt dsfmt state vector. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa fill_array_close1_open2() + */ +void dsfmt_fill_array_close_open(dsfmt_t *dsfmt, double array[], int size) { + assert(size % 2 == 0); + assert(size >= DSFMT_N64); + gen_rand_array_c0o1(dsfmt, (w128_t *)array, size / 2); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range (0, 1) to the + * specified array[] by one call. This function is the same as + * fill_array_close1_open2() except the distribution range. + * + * @param dsfmt dsfmt state vector. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa fill_array_close1_open2() + */ +void dsfmt_fill_array_open_open(dsfmt_t *dsfmt, double array[], int size) { + assert(size % 2 == 0); + assert(size >= DSFMT_N64); + gen_rand_array_o0o1(dsfmt, (w128_t *)array, size / 2); +} + +#if defined(__INTEL_COMPILER) +# pragma warning(disable:981) +#endif +/** + * This function initializes the internal state array with a 32-bit + * integer seed. + * @param dsfmt dsfmt state vector. + * @param seed a 32-bit integer used as the seed. + * @param mexp caller's mersenne expornent + */ +void dsfmt_chk_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed, int mexp) { + int i; + uint32_t *psfmt; + + /* make sure caller program is compiled with the same MEXP */ + if (mexp != dsfmt_mexp) { + fprintf(stderr, "DSFMT_MEXP doesn't match with dSFMT.c\n"); + exit(1); + } + psfmt = &dsfmt->status[0].u32[0]; + psfmt[idxof(0)] = seed; + for (i = 1; i < (DSFMT_N + 1) * 4; i++) { + psfmt[idxof(i)] = 1812433253UL + * (psfmt[idxof(i - 1)] ^ (psfmt[idxof(i - 1)] >> 30)) + i; + } + initial_mask(dsfmt); + period_certification(dsfmt); + dsfmt->idx = DSFMT_N64; +} + +/** + * This function initializes the internal state array, + * with an array of 32-bit integers used as the seeds + * @param dsfmt dsfmt state vector. + * @param init_key the array of 32-bit integers, used as a seed. + * @param key_length the length of init_key. + * @param mexp caller's mersenne expornent + */ +void dsfmt_chk_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], + int key_length, int mexp) { + int i, j, count; + uint32_t r; + uint32_t *psfmt32; + int lag; + int mid; + int size = (DSFMT_N + 1) * 4; /* pulmonary */ + + /* make sure caller program is compiled with the same MEXP */ + if (mexp != dsfmt_mexp) { + fprintf(stderr, "DSFMT_MEXP doesn't match with dSFMT.c\n"); + exit(1); + } + if (size >= 623) { + lag = 11; + } else if (size >= 68) { + lag = 7; + } else if (size >= 39) { + lag = 5; + } else { + lag = 3; + } + mid = (size - lag) / 2; + + psfmt32 = &dsfmt->status[0].u32[0]; + memset(dsfmt->status, 0x8b, sizeof(dsfmt->status)); + if (key_length + 1 > size) { + count = key_length + 1; + } else { + count = size; + } + r = ini_func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid % size)] + ^ psfmt32[idxof((size - 1) % size)]); + psfmt32[idxof(mid % size)] += r; + r += key_length; + psfmt32[idxof((mid + lag) % size)] += r; + psfmt32[idxof(0)] = r; + count--; + for (i = 1, j = 0; (j < count) && (j < key_length); j++) { + r = ini_func1(psfmt32[idxof(i)] + ^ psfmt32[idxof((i + mid) % size)] + ^ psfmt32[idxof((i + size - 1) % size)]); + psfmt32[idxof((i + mid) % size)] += r; + r += init_key[j] + i; + psfmt32[idxof((i + mid + lag) % size)] += r; + psfmt32[idxof(i)] = r; + i = (i + 1) % size; + } + for (; j < count; j++) { + r = ini_func1(psfmt32[idxof(i)] + ^ psfmt32[idxof((i + mid) % size)] + ^ psfmt32[idxof((i + size - 1) % size)]); + psfmt32[idxof((i + mid) % size)] += r; + r += i; + psfmt32[idxof((i + mid + lag) % size)] += r; + psfmt32[idxof(i)] = r; + i = (i + 1) % size; + } + for (j = 0; j < size; j++) { + r = ini_func2(psfmt32[idxof(i)] + + psfmt32[idxof((i + mid) % size)] + + psfmt32[idxof((i + size - 1) % size)]); + psfmt32[idxof((i + mid) % size)] ^= r; + r -= i; + psfmt32[idxof((i + mid + lag) % size)] ^= r; + psfmt32[idxof(i)] = r; + i = (i + 1) % size; + } + initial_mask(dsfmt); + period_certification(dsfmt); + dsfmt->idx = DSFMT_N64; +} +#if defined(__INTEL_COMPILER) +# pragma warning(default:981) +#endif + +#if defined(__cplusplus) +} +#endif diff --git a/share/shr_RandNum/src/dsfmt_f03/dSFMT_interface.F90 b/share/shr_RandNum/src/dsfmt_f03/dSFMT_interface.F90 new file mode 100644 index 000000000000..e312647e71fb --- /dev/null +++ b/share/shr_RandNum/src/dsfmt_f03/dSFMT_interface.F90 @@ -0,0 +1,562 @@ +module dSFMT_interface + +! copyright James Spencer 2012. +! New BSD License, see License.txt for details. + +! This contains a wrapper around a subset of of the functionality in dSFMT RNG +! (double precision SIMD-oriented Fast Mersenne Twister random number +! generator). + +! Functions which return a single (pseudo-)random number are defined as inline +! in the original dSFMT source. Accessing these functions is quite slow +! (compared to C) in Fortran as we can't inline them (or indeed call them +! without modifying the C source). A more efficient way is to fill up an array +! of random numbers and then access the array as a stream, refilling the array +! as needed. By thus amortizing the cost of calling non-inlined functions +! across many random numbers, the cost becomes very competitive with +! a C implementation. Functions are provided which return a single random +! number and refill the storage array when required, thus giving a fast +! interface which closely mirrors that of the C implementation. + +! Usage (for each psuedo-random stream): +! i) call dSFMT_init. +! ii) access desired number of random numbers either by calling +! a get_rand_xxx function the required number of times or by filling up +! user-defined arrays with a fill_array_xxx subroutine. +! iii) call dSFMT_end to free all memory. + +! Note that an array must have at least a minimum number of elements (as given +! in dsfmt_get_min_array_size). + +! IMPORTANT: see the warning below about calling multiple get_rand_xxx +! functions. + +! Match definition of DSFMT_MEXP in dSFMT.h. +#if !defined(DSFMT_MEXP) +#define DSFMT_MEXP 19937 +#endif + +use, intrinsic :: iso_c_binding +implicit none + +private +public :: dSFMT_t, dSFMT_init, dSFMT_end, dSFMT_reset, & + fill_array_close_open, & + fill_array_open_close, & + fill_array_open_open, & + fill_array_gaussian, & + get_rand_close_open, & + get_rand_open_close, & + get_rand_open_open, & + get_rand_gaussian, & + get_rand_arr_close_open, & + get_rand_arr_open_close, & + get_rand_arr_open_open, & + get_rand_arr_gaussian, & + dsfmt_get_min_array_size, & + unset, close_open, open_close, open_open + +! Expose functions from C as needed. +! See dSFMT documentation for details. +interface + pure function dsfmt_get_min_array_size() result(min_size) bind(c, name='dsfmt_get_min_array_size') + import :: c_int32_t + integer(c_int32_t) :: min_size + end function dsfmt_get_min_array_size + + pure function malloc_dsfmt_t() result(dsfmt_state) bind(c, name='malloc_dsfmt_t') + import :: c_ptr + type(c_ptr) :: dsfmt_state + end function malloc_dsfmt_t + pure subroutine free_dsfmt_t(dsfmt_state) bind(c, name='free_dsfmt_t') + import :: c_ptr + type(c_ptr), value, intent(in) :: dsfmt_state + end subroutine free_dsfmt_t + + pure subroutine dsfmt_chk_init_gen_rand(dsfmt_state, seed, mexp) bind(c, name='dsfmt_chk_init_gen_rand') + import :: c_ptr, c_int32_t + type(c_ptr), value, intent(in) :: dsfmt_state + integer(c_int32_t), value, intent(in) :: seed, mexp + end subroutine dsfmt_chk_init_gen_rand + + pure subroutine dsfmt_fill_array_close_open(dSFMT_state, array, array_size) bind(c, name='dsfmt_fill_array_close_open') + import :: c_ptr, c_double, c_int32_t + type(c_ptr), value, intent(in) :: dSFMT_state + real(c_double), intent(out) :: array(*) + integer(c_int32_t), value, intent(in) :: array_size + end subroutine dsfmt_fill_array_close_open + pure subroutine dsfmt_fill_array_open_close(dSFMT_state, array, array_size) bind(c, name='dsfmt_fill_array_open_close') + import :: c_ptr, c_double, c_int32_t + type(c_ptr), value, intent(in) :: dSFMT_state + real(c_double), intent(out) :: array(*) + integer(c_int32_t), value, intent(in) :: array_size + end subroutine dsfmt_fill_array_open_close + pure subroutine dsfmt_fill_array_open_open(dSFMT_state, array, array_size) bind(c, name='dsfmt_fill_array_open_open') + import :: c_ptr, c_double, c_int32_t + type(c_ptr), value, intent(in) :: dSFMT_state + real(c_double), intent(out) :: array(*) + integer(c_int32_t), value, intent(in) :: array_size + end subroutine dsfmt_fill_array_open_open +end interface + +! distribution modes +integer, parameter :: unset = 2**0 +integer, parameter :: close_open = 2**1 +integer, parameter :: open_close = 2**2 +integer, parameter :: open_open = 2**3 +integer, parameter :: gaussian = 2**4 + +type dSFMT_t + private + ! Type of distribution in the random store + integer, public :: distribution + ! Pointer to the dsfmt_t internal state (as defined in dSFMT.h). + type(c_ptr) :: dSFMT_state = c_null_ptr + ! Testing indicates that 50000 is a very good size for the array storing the + ! random numbers. Testing was done standalone, so undoubtedly influenced by + ! cache size and this might be different for real-world applications. + integer(c_int) :: random_store_size + ! Seed passed to dSFMT. + integer :: seed + ! Store of random numbers. + real(c_double), allocatable :: random_store(:) + ! Index of the next random number to be returned from random_store. + integer :: next_element +end type dSFMT_t + +integer, parameter, private :: dp = selected_real_kind(15,307) + +contains + +!--- Initialisation, termination and state interaction --- + + subroutine dSFMT_init(seed, rng_store_size, rng) + + ! Initialise the dSFMT RNG and fill rng%random_store with + ! a block of random numbers in interval [0,1). + ! + ! In: + ! seed: seed for the RNG. + ! rng_store_size: number of random numbers to store at once. + ! dsfmt_get_min_array_size() is used if + ! rng_store_size < dsfmt_get_min_array_size(). + ! Out: + ! rng: dSFMT_t with internal variables initialised and associated + ! with an initialised psuedo-random number stream. + ! If rng has already been initialised, then no additional memory + ! allocations are performed unless rng_store_size has changed. + + integer, intent(in) :: seed, rng_store_size + type(dSFMT_t), intent(inout) :: rng + + if (dsfmt_get_min_array_size() > rng_store_size) then + rng%random_store_size = dsfmt_get_min_array_size() + else + rng%random_store_size = rng_store_size + end if + + if (allocated(rng%random_store)) then + if (size(rng%random_store) /= rng%random_store_size) then + deallocate(rng%random_store) + allocate(rng%random_store(rng%random_store_size)) + end if + else + allocate(rng%random_store(rng%random_store_size)) + end if + + ! Set current element to be larger than the store, so it is + ! filled in the first call to the get_* functions. + rng%next_element = rng%random_store_size + 1 + ! But the store has not yet been filled. + rng%distribution = unset + + rng%seed = int(seed, c_int32_t) + + ! Create dSFMT state + ! Why is c_associated not pure? I have not been able to find any good + ! reason for this to be the case... + if (.not.c_associated(rng%dSFMT_state)) rng%dSFMT_state = malloc_dsfmt_t() + + ! Initialise dSFMT PRNG. + call dsfmt_chk_init_gen_rand(rng%dSFMT_state, rng%seed, DSFMT_MEXP) + + end subroutine dSFMT_init + + pure subroutine dSFMT_end(rng) + + ! Deallocate and reset a dSFMT_t variable safely. + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is closed on output and memory + ! associated with it is deallocated. + + type(dSFMT_t), intent(inout) :: rng + + ! Destroy dSFMT state + call free_dsfmt_t(rng%dSFMT_state) + + ! Reset store. + deallocate(rng%random_store) + rng%random_store_size = -1 + rng%next_element = -1 + + end subroutine dSFMT_end + + pure subroutine dSFMT_reset(rng) + + ! Reset the dSFMT state such that the next get_rand_xxx call requires + ! the store of random numbers to be refilled. + + ! In/Out: + ! rng: dSFMT_t. The internal store of random numbers is cleared. + + type(dSFMT_t), intent(inout) :: rng + + rng%next_element = rng%random_store_size + 1 + rng%random_store = huge(1.0_dp) + rng%distribution = unset + + end subroutine dSFMT_reset + +!--- Get arrays of random numbers --- + + pure subroutine fill_array_close_open(rng, array) + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. + ! Out: + ! array: array filled with random numbers in the interval [0,1). + + ! NOTE: array must be at least as large as the size returned by dsfmt_get_min_array_size. + + type(dSFMT_t), intent(inout) :: rng + real(c_double), intent(out) :: array(:) + + call dsfmt_fill_array_close_open(rng%dSFMT_state, array, size(array)) + + end subroutine fill_array_close_open + + pure subroutine fill_array_open_close(rng, array) + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. + ! Out: + ! array: array filled with random numbers in the interval (0,1]. + + ! NOTE: array must be at least as large as the size returned by dsfmt_get_min_array_size. + + type(dSFMT_t), intent(inout) :: rng + real(c_double), intent(out) :: array(:) + + call dsfmt_fill_array_open_close(rng%dSFMT_state, array, size(array)) + + end subroutine fill_array_open_close + + pure subroutine fill_array_open_open(rng, array) + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. + ! Out: + ! array: array filled with random numbers in the interval (0,1). + + ! NOTE: array must be at least as large as the size returned by dsfmt_get_min_array_size. + + type(dSFMT_t), intent(inout) :: rng + real(c_double), intent(out) :: array(:) + + call dsfmt_fill_array_open_open(rng%dSFMT_state, array, size(array)) + + end subroutine fill_array_open_open + + pure subroutine fill_array_gaussian(rng, array) + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. + ! Out: + ! array: array filled with random numbers with a standard normal + ! (Gaussian) distribution (i.e. mean=0, variance=1). + + ! NOTE: array must be at least as large as the size returned by dsfmt_get_min_array_size. + + type(dSFMT_t), intent(inout) :: rng + real(c_double), intent(out) :: array(:) + + real(dp), parameter :: pi = acos(-1.0_dp) + integer :: i + real(c_double) :: s, t + + call dsfmt_fill_array_open_close(rng%dSFMT_state, array, size(array)) + + ! The Box-Muller transform converts a pair of random numbers, u, v, + ! which are uniformly distributed in (0,1] into a pair of random + ! numbers, y, z, which have a standard normal distribution using + ! y = R cos(\phi) = \sqrt(-2*ln(u)) cos(2\pi v) + ! z = R sin(\phi) = \sqrt(-2*ln(u)) sin(2\pi v) + ! See http://en.wikipedia.org/wiki/Box_Muller_transform for more + ! details. + ! This might be slower than the Marsaglia polar method but it doesn't + ! involve any rejections, which is convenient for us. + do i = 1, ubound(array, dim=1), 2 + s = sqrt(-2*log(array(i))) + t = 2*pi*array(i+1) + array(i) = s*cos(t) + array(i+1) = s*sin(t) + end do + + end subroutine fill_array_gaussian + +!--- Get a random number --- + +! WARNING: these calls use a store internal to the dSFMT_t variable. Calls to +! different functions with the same state must be separated either by a call to +! dSFMT_reset or a call to dSFMT_end followed by reinitialisation of the dSFMT_t +! state using dSFMT_init. + + function get_rand_close_open(rng) result(r) + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. The store of random numbers is refilled if necessary. + + ! Returns: + ! random number in interval [0,1). + + real(dp) :: r + type(dSFMT_t), intent(inout) :: rng + + if (rng%next_element == rng%random_store_size+1) then + ! Run out of random numbers: get more. + call dsfmt_fill_array_close_open(rng%dSFMT_state, rng%random_store, rng%random_store_size) + rng%next_element = 1 + rng%distribution = close_open + end if + + r = rng%random_store(rng%next_element) + rng%next_element = rng%next_element + 1 + + end function get_rand_close_open + + function get_rand_open_close(rng) result(r) + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. The store of random numbers is refilled if necessary. + + ! Returns: + ! random number in interval (0,1]. + + real(dp) :: r + type(dSFMT_t), intent(inout) :: rng + + if (rng%next_element == rng%random_store_size+1) then + ! Run out of random numbers: get more. + call dsfmt_fill_array_open_close(rng%dSFMT_state, rng%random_store, rng%random_store_size) + rng%next_element = 1 + rng%distribution = open_close + end if + + r = rng%random_store(rng%next_element) + rng%next_element = rng%next_element + 1 + + end function get_rand_open_close + + function get_rand_open_open(rng) result(r) + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. The store of random numbers is refilled if necessary. + + ! Returns: + ! random number in interval (0,1). + + real(dp) :: r + type(dSFMT_t), intent(inout) :: rng + + if (rng%next_element == rng%random_store_size+1) then + ! Run out of random numbers: get more. + call dsfmt_fill_array_open_open(rng%dSFMT_state, rng%random_store, rng%random_store_size) + rng%next_element = 1 + rng%distribution = open_open + end if + + r = rng%random_store(rng%next_element) + rng%next_element = rng%next_element + 1 + + end function get_rand_open_open + + function get_rand_gaussian(rng) result(r) + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. The store of random numbers is refilled if necessary. + + ! Returns: + ! random number with a standard normal (Gaussian) distribution (ie + ! mean=0 and variance=1). + + real(dp) :: r + type(dSFMT_t), intent(inout) :: rng + + if (rng%next_element == rng%random_store_size+1) then + ! Run out of random numbers: get more. + call fill_array_gaussian(rng, rng%random_store) + rng%next_element = 1 + rng%distribution = gaussian + end if + + r = rng%random_store(rng%next_element) + rng%next_element = rng%next_element + 1 + + end function get_rand_gaussian + +!--- Get array of random numbers from a store --- + +! If a small number of random numbers is repeatedly required, then filling up +! an array at a time comes with a large function call overhead. The following +! routines use an internal store, similar to the get_rand_xxx functions, but +! fill a small array in a single call. This can be faster than repeated +! get_rand_xxx calls as we need to only test how many elements are left in the +! store once rather than N times. + +! WARNING: these calls use a store internal to the dSFMT_t variable. Calls to +! different functions with the same state must be separated either by a call to +! dSFMT_reset or a call to dSFMT_end followed by reinitialisation of the dSFMT_t +! state using dSFMT_init. + + pure subroutine get_rand_arr_close_open(rng, arr, n) + + ! Fill an array with random numbers in interval [0,1). + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. The store of random numbers is refilled if necessary. + ! Out: + ! arr: array of random numbers. Must contain at least n elements. + ! In: + ! n: number of random numbers to return in arr. + + type(dSFMT_t), intent(inout) :: rng + real(dp), intent(out) :: arr(:) + integer, intent(in) :: n + + integer :: navail, nleft + + if (rng%next_element + n <= rng%random_store_size) then + arr(1:n) = rng%random_store(rng%next_element:rng%next_element+n-1) + rng%next_element = rng%next_element + n + else + navail = rng%random_store_size - rng%next_element + 1 + arr(1:navail) = rng%random_store(rng%next_element:rng%random_store_size) + call dsfmt_fill_array_close_open(rng%dSFMT_state, rng%random_store, rng%random_store_size) + rng%distribution = close_open + nleft = n - navail + arr(navail+1:n) = rng%random_store(1:nleft) + rng%next_element = nleft + 1 + end if + + end subroutine get_rand_arr_close_open + + pure subroutine get_rand_arr_open_close(rng, arr, n) + + ! Fill an array with random numbers in interval (0,1]. + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. The store of random numbers is refilled if necessary. + ! Out: + ! arr: array of random numbers. Must contain at least n elements. + ! In: + ! n: number of random numbers to return in arr. + + type(dSFMT_t), intent(inout) :: rng + real(dp), intent(out) :: arr(:) + integer, intent(in) :: n + + integer :: navail, nleft + + if (rng%next_element + n <= rng%random_store_size) then + arr(1:n) = rng%random_store(rng%next_element:rng%next_element+n-1) + rng%next_element = rng%next_element + n + else + navail = rng%random_store_size - rng%next_element + 1 + arr(1:navail) = rng%random_store(rng%next_element:rng%random_store_size) + call dsfmt_fill_array_open_close(rng%dSFMT_state, rng%random_store, rng%random_store_size) + rng%distribution = open_close + nleft = n - navail + arr(navail+1:n) = rng%random_store(1:nleft) + rng%next_element = nleft + 1 + end if + + end subroutine get_rand_arr_open_close + + pure subroutine get_rand_arr_open_open(rng, arr, n) + + ! Fill an array with random numbers in interval (0,1). + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. The store of random numbers is refilled if necessary. + ! Out: + ! arr: array of random numbers. Must contain at least n elements. + ! In: + ! n: number of random numbers to return in arr. + + type(dSFMT_t), intent(inout) :: rng + real(dp), intent(out) :: arr(:) + integer, intent(in) :: n + + integer :: navail, nleft + + if (rng%next_element + n <= rng%random_store_size) then + arr(1:n) = rng%random_store(rng%next_element:rng%next_element+n-1) + rng%next_element = rng%next_element + n + else + navail = rng%random_store_size - rng%next_element + 1 + arr(1:navail) = rng%random_store(rng%next_element:rng%random_store_size) + call dsfmt_fill_array_open_open(rng%dSFMT_state, rng%random_store, rng%random_store_size) + rng%distribution = open_open + nleft = n - navail + arr(navail+1:n) = rng%random_store(1:nleft) + rng%next_element = nleft + 1 + end if + + end subroutine get_rand_arr_open_open + + pure subroutine get_rand_arr_gaussian(rng, arr, n) + + ! Fill an array with random numbers in interval (0,1). + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. The store of random numbers is refilled if necessary. + ! Out: + ! arr: array of random numbers. Must contain at least n elements. + ! In: + ! n: number of random numbers to return in arr. + + type(dSFMT_t), intent(inout) :: rng + real(dp), intent(out) :: arr(:) + integer, intent(in) :: n + + integer :: navail, nleft + + if (rng%next_element + n <= rng%random_store_size) then + arr(1:n) = rng%random_store(rng%next_element:rng%next_element+n-1) + rng%next_element = rng%next_element + n + else + navail = rng%random_store_size - rng%next_element + 1 + arr(1:navail) = rng%random_store(rng%next_element:rng%random_store_size) + call fill_array_gaussian(rng, rng%random_store) + rng%distribution = gaussian + nleft = n - navail + arr(navail+1:n) = rng%random_store(1:nleft) + rng%next_element = nleft + 1 + end if + + end subroutine get_rand_arr_gaussian + +end module dSFMT_interface diff --git a/share/shr_RandNum/src/dsfmt_f03/dSFMT_utils.c b/share/shr_RandNum/src/dsfmt_f03/dSFMT_utils.c new file mode 100644 index 000000000000..77314de2f5ac --- /dev/null +++ b/share/shr_RandNum/src/dsfmt_f03/dSFMT_utils.c @@ -0,0 +1,18 @@ +#include "dSFMT.h" +#include + +/* copyright James Spencer 2012. + * New BSD License, see License.txt for details. + */ + +/* Utility (memory-access) functions to enable use of dSFMT from Fortran. */ + +void* malloc_dsfmt_t(void) { + /* Allocate sufficient memory for a dSFMT state (ie a variable of type dsfmt_t). */ + return malloc(sizeof(dsfmt_t)); +} + +void free_dsfmt_t(dsfmt_t* ptr) { + /* Free memory associated with a dSFMT state (ie a variable of type dsfmt_t). */ + free(ptr); +} diff --git a/share/shr_RandNum/src/kissvec/kissvec.c b/share/shr_RandNum/src/kissvec/kissvec.c new file mode 100644 index 000000000000..41f314a84be8 --- /dev/null +++ b/share/shr_RandNum/src/kissvec/kissvec.c @@ -0,0 +1,36 @@ +// KISS random generator implemented in C +// Basic algorithm from George Marsaglia circa 1998 +// Public domain Fortran implementation from http://www.fortran.com/ +// downloaded by pjr on 03/16/04 +// converted to vector form, functions inlined by pjr,mvr on 05/10/2004 +// Translated back into C in 2015 + +#include +#include + +#define shiftl_xor(x, n) (x ^= (x << n)) +#define shiftr_xor(x, n) (x ^= (x >> n)) + +// The KISS (Keep It Simple Stupid) random number generator. Combines: +// (1) The congruential generator x(n)=69069*x(n-1)+1327217885, period 2^32. +// (2) A 3-shift shift-register generator, period 2^32-1, +// (3) Two 16-bit multiply-with-carry generators, period 597273182964842497>2^59 +// Overall period>2^123; +// +// Note use of the C99 restrict keyword to enable optimization. +void kiss_rng(uint32_t seed1[restrict], uint32_t seed2[restrict], + uint32_t seed3[restrict], uint32_t seed4[restrict], + double ran_arr[restrict], size_t length) { + size_t i; + + for (i = 0; i < length; ++i) { + seed1[i] = 69069U * seed1[i] + 1327217885U; + shiftl_xor(seed2[i], 13); + shiftr_xor(seed2[i], 17); + shiftl_xor(seed2[i], 5); + seed3[i] = 18000U * (seed3[i] & 65535U) + (seed3[i] >> 16); + seed4[i] = 30903U * (seed4[i] & 65535U) + (seed4[i] >> 16); + ran_arr[i] = (seed1[i] + seed2[i] + (seed3[i] << 16) + seed4[i]) * 2.328306E-10; + } + +} diff --git a/share/shr_RandNum/src/kissvec/kissvec_mod.F90 b/share/shr_RandNum/src/kissvec/kissvec_mod.F90 new file mode 100644 index 000000000000..318bad97bf93 --- /dev/null +++ b/share/shr_RandNum/src/kissvec/kissvec_mod.F90 @@ -0,0 +1,41 @@ +module kissvec_mod +! Fortran binding for the KISS vectorizable random number generator. + +implicit none + +integer, parameter :: r8 = selected_real_kind(12) + +private +public :: kissvec + +contains + + subroutine kissvec( seed1, seed2, seed3, seed4, ran_arr, length) + + ! We can assume that an r8 is a double and an i4 is an int32_t, but we can't + ! make any guarantees about the relative sizes of a Fortran default integer + ! and C's size_t. + use iso_c_binding, only: c_size_t + + integer, intent(in) :: length + real(r8), dimension(length), intent(inout) :: ran_arr + integer, dimension(length), intent(inout) :: seed1, seed2, seed3, seed4 + + ! C implementation + interface + subroutine kiss_rng(seed1, seed2, seed3, seed4, ran_arr, length) bind(c) + ! Note that the definition of kiss_rng uses unsigned int, but the + ! Fortran standard largely ignores signed/unsigned because there + ! are no unsigned integers in Fortran. + use iso_c_binding, only: c_int32_t, c_double, c_size_t + integer(c_int32_t), intent(inout) :: seed1(*), seed2(*), seed3(*), seed4(*) + real(c_double), intent(inout) :: ran_arr(*) + integer(c_size_t), value :: length + end subroutine kiss_rng + end interface + + call kiss_rng(seed1, seed2, seed3, seed4, ran_arr, int(length, c_size_t)) + + end subroutine kissvec + +end module kissvec_mod diff --git a/share/shr_RandNum/src/mt19937/mersennetwister_mod.F90 b/share/shr_RandNum/src/mt19937/mersennetwister_mod.F90 new file mode 100644 index 000000000000..f3e9b2e5241c --- /dev/null +++ b/share/shr_RandNum/src/mt19937/mersennetwister_mod.F90 @@ -0,0 +1,293 @@ +! Fortran-95 implementation of the Mersenne Twister 19937, following +! the C implementation described below (code mt19937ar-cok.c, dated 2002/2/10), +! adapted cosmetically by making the names more general. +! Users must declare one or more variables of type randomNumberSequence in the calling +! procedure which are then initialized using a required seed. If the +! variable is not initialized the random numbers will all be 0. +! For example: +! program testRandoms +! use RandomNumbers +! type(randomNumberSequence) :: randomNumbers +! integer :: i +! +! randomNumbers = new_RandomNumberSequence(seed = 100) +! do i = 1, 10 +! print ('(f12.10, 2x)'), getRandomReal(randomNumbers) +! end do +! end program testRandoms +! +! Fortran-95 implementation by +! Robert Pincus +! NOAA-CIRES Climate Diagnostics Center +! Boulder, CO 80305 +! email: Robert.Pincus@colorado.edu +! +! This documentation in the original C program reads: +! ------------------------------------------------------------- +! A C-program for MT19937, with initialization improved 2002/2/10. +! Coded by Takuji Nishimura and Makoto Matsumoto. +! This is a faster version by taking Shawn Cokus's optimization, +! Matthe Bellew's simplification, Isaku Wada's real version. +! +! Before using, initialize the state by using init_genrand(seed) +! or init_by_array(init_key, key_length). +! +! Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! +! 3. The names of its contributors may not be used to endorse or promote +! products derived from this software without specific prior written +! permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +! A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +! +! +! Any feedback is very welcome. +! http://www.math.keio.ac.jp/matumoto/emt.html +! email: matumoto@math.keio.ac.jp +! ------------------------------------------------------------- + +module MersenneTwister_mod +! ------------------------------------------------------------- + implicit none + private + + ! Algorithm parameters + ! ------- + ! Period parameters + integer, parameter :: i8 = selected_int_kind(13) + integer, parameter :: blockSize = 624, & + M = 397, & + MATRIX_A = -1727483681, & ! constant vector a (0x9908b0dfUL) + UMASK = -2147483648_i8,& ! most significant w-r bits (0x80000000UL) + LMASK = 2147483647 ! least significant r bits (0x7fffffffUL) + ! Tempering parameters + integer, parameter :: TMASKB= -1658038656, & ! (0x9d2c5680UL) + TMASKC= -272236544 ! (0xefc60000UL) + ! ------- + + ! The type containing the state variable + type randomNumberSequence + integer :: currentElement ! = blockSize + integer, dimension(0:blockSize -1) :: state ! = 0 + end type randomNumberSequence + + interface new_RandomNumberSequence + module procedure initialize_scalar, initialize_vector + end interface new_RandomNumberSequence + + public :: randomNumberSequence + public :: new_RandomNumberSequence, finalize_RandomNumberSequence, & + getRandomInt, getRandomPositiveInt, getRandomReal +! ------------------------------------------------------------- +contains + ! ------------------------------------------------------------- + ! Private functions + ! --------------------------- + function mixbits(u, v) + integer, intent( in) :: u, v + integer :: mixbits + + mixbits = ior(iand(u, UMASK), iand(v, LMASK)) + end function mixbits + ! --------------------------- + function twist(u, v) + integer, intent( in) :: u, v + integer :: twist + + ! Local variable + integer, parameter, dimension(0:1) :: t_matrix = (/ 0, MATRIX_A /) + + twist = ieor(ishft(mixbits(u, v), -1), t_matrix(iand(v, 1))) + twist = ieor(ishft(mixbits(u, v), -1), t_matrix(iand(v, 1))) + end function twist + ! --------------------------- + subroutine nextState(twister) + type(randomNumberSequence), intent(inout) :: twister + + ! Local variables + integer :: k + + do k = 0, blockSize - M - 1 + twister%state(k) = ieor(twister%state(k + M), & + twist(twister%state(k), twister%state(k + 1))) + end do + do k = blockSize - M, blockSize - 2 + twister%state(k) = ieor(twister%state(k + M - blockSize), & + twist(twister%state(k), twister%state(k + 1))) + end do + twister%state(blockSize - 1) = ieor(twister%state(M - 1), & + twist(twister%state(blockSize - 1), twister%state(0))) + twister%currentElement = 0 + + end subroutine nextState + ! --------------------------- + elemental function temper(y) + integer, intent(in) :: y + integer :: temper + + integer :: x + + ! Tempering + x = ieor(y, ishft(y, -11)) + x = ieor(x, iand(ishft(x, 7), TMASKB)) + x = ieor(x, iand(ishft(x, 15), TMASKC)) + temper = ieor(x, ishft(x, -18)) + end function temper + ! ------------------------------------------------------------- + ! Public (but hidden) functions + ! -------------------- + function initialize_scalar(seed) result(twister) + integer, intent(in ) :: seed + type(randomNumberSequence) :: twister + + integer :: i + ! See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. In the previous versions, + ! MSBs of the seed affect only MSBs of the array state[]. + ! 2002/01/09 modified by Makoto Matsumoto + + twister%state(0) = iand(seed, -1) + do i = 1, blockSize - 1 ! ubound(twister%state) + twister%state(i) = 1812433253 * ieor(twister%state(i-1), & + ishft(twister%state(i-1), -30)) + i + twister%state(i) = iand(twister%state(i), -1) ! for >32 bit machines + end do + twister%currentElement = blockSize + end function initialize_scalar + ! ------------------------------------------------------------- + function initialize_vector(seed) result(twister) + integer, dimension(0:), intent(in) :: seed + type(randomNumberSequence) :: twister + + integer :: i, j, k, nFirstLoop, nWraps + + nWraps = 0 + twister = initialize_scalar(19650218) + + nFirstLoop = max(blockSize, size(seed)) + do k = 1, nFirstLoop + i = mod(k + nWraps, blockSize) + j = mod(k - 1, size(seed)) + if(i == 0) then + twister%state(i) = twister%state(blockSize - 1) + twister%state(1) = ieor(twister%state(1), & + ieor(twister%state(1-1), & + ishft(twister%state(1-1), -30)) * 1664525) + & + seed(j) + j ! Non-linear + twister%state(i) = iand(twister%state(i), -1) ! for >32 bit machines + nWraps = nWraps + 1 + else + twister%state(i) = ieor(twister%state(i), & + ieor(twister%state(i-1), & + ishft(twister%state(i-1), -30)) * 1664525) + & + seed(j) + j ! Non-linear + twister%state(i) = iand(twister%state(i), -1) ! for >32 bit machines + end if + end do + + ! + ! Walk through the state array, beginning where we left off in the block above + ! + do i = mod(nFirstLoop, blockSize) + nWraps + 1, blockSize - 1 + twister%state(i) = ieor(twister%state(i), & + ieor(twister%state(i-1), & + ishft(twister%state(i-1), -30)) * 1566083941) - i ! Non-linear + twister%state(i) = iand(twister%state(i), -1) ! for >32 bit machines + end do + + twister%state(0) = twister%state(blockSize - 1) + + do i = 1, mod(nFirstLoop, blockSize) + nWraps + twister%state(i) = ieor(twister%state(i), & + ieor(twister%state(i-1), & + ishft(twister%state(i-1), -30)) * 1566083941) - i ! Non-linear + twister%state(i) = iand(twister%state(i), -1) ! for >32 bit machines + end do + + twister%state(0) = UMASK + twister%currentElement = blockSize + + end function initialize_vector + ! ------------------------------------------------------------- + ! Public functions + ! -------------------- + function getRandomInt(twister) + type(randomNumberSequence), intent(inout) :: twister + integer :: getRandomInt + ! Generate a random integer on the interval [0,0xffffffff] + ! Equivalent to genrand_int32 in the C code. + ! Fortran doesn't have a type that's unsigned like C does, + ! so this is integers in the range -2**31 - 2**31 + ! All functions for getting random numbers call this one, + ! then manipulate the result + + if(twister%currentElement >= blockSize) call nextState(twister) + + getRandomInt = temper(twister%state(twister%currentElement)) + twister%currentElement = twister%currentElement + 1 + + end function getRandomInt + ! -------------------- + function getRandomPositiveInt(twister) + type(randomNumberSequence), intent(inout) :: twister + integer :: getRandomPositiveInt + ! Generate a random integer on the interval [0,0x7fffffff] + ! or [0,2**31] + ! Equivalent to genrand_int31 in the C code. + + ! Local integers + integer :: localInt + + localInt = getRandomInt(twister) + getRandomPositiveInt = ishft(localInt, -1) + + end function getRandomPositiveInt + ! -------------------- + function getRandomReal(twister) + + type(randomNumberSequence), intent(inout) :: twister + double precision :: getRandomReal + ! Generate a random number on [0,1] + ! Equivalent to genrand_real1 in the C code + ! The result is stored as double precision but has 32 bit resolution + + integer :: localInt + + localInt = getRandomInt(twister) + if(localInt < 0) then + getRandomReal = dble(localInt + 2.0d0**32)/(2.0d0**32 - 1.0d0) + else + getRandomReal = dble(localInt )/(2.0d0**32 - 1.0d0) + end if + end function getRandomReal + ! -------------------- + subroutine finalize_RandomNumberSequence(twister) + type(randomNumberSequence), intent(inout) :: twister + + twister%currentElement = blockSize + twister%state(:) = 0 + end subroutine finalize_RandomNumberSequence + ! -------------------- +end module MersenneTwister_mod + diff --git a/share/shr_RandNum/src/shr_RandNum_mod.F90 b/share/shr_RandNum/src/shr_RandNum_mod.F90 new file mode 100644 index 000000000000..0c161850cce0 --- /dev/null +++ b/share/shr_RandNum/src/shr_RandNum_mod.F90 @@ -0,0 +1,247 @@ +#ifdef INTEL_MKL +include 'mkl_vsl.f90' +#endif + +module shr_RandNum_mod + +! this module contains the driver for the available versions of random number generator + +use mersennetwister_mod, only : new_RandomNumberSequence, getRandomReal, & + randomNumberSequence, finalize_RandomNumberSequence +use dSFMT_interface, only : dSFMT_init, dSFMT_end, get_rand_arr_close_open, dSFMT_t +use kissvec_mod, only : kissvec + +#ifdef INTEL_MKL +use mkl_vsl_type +use mkl_vsl +#endif + implicit none + +integer, parameter :: r8 = selected_real_kind(12) +integer, parameter :: i8 = selected_int_kind(12) + +integer :: errcode + +real(r8) :: a=0.0_r8, b=1.0_r8 +integer :: brng, method + +type shr_rand_t + character(len=32) :: type + integer :: nstream + integer :: nrandom + integer, allocatable :: seed(:,:) +#ifdef INTEL_MKL + type (VSL_STREAM_STATE), allocatable :: vsl_stream(:) +#endif + type (dSFMT_t), allocatable :: rng(:) + type (randomNumberSequence), allocatable :: randomseq(:) +end type + +public shr_genRandNum, shr_RandNum_init, shr_RandNum_term, shr_rand_t + +contains + +subroutine shr_genRandNum(randStream, array, nrandom_reset) + + type (shr_rand_t), intent(inout) :: randStream + real(r8), dimension(:,:), intent(inout) :: array + integer, optional, intent(in) :: nrandom_reset + + integer :: i, n, nstream, nrandom + integer, dimension(1) :: seed + + nstream = randStream%nstream + nrandom = randStream%nrandom + + select case (randStream%type) + +#ifdef INTEL_MKL +! intel math kernel library SIMD fast merseene twister 19937 + case ("SFMT_MKL") + do n=1,nstream + errcode = vdrnguniform( method, randStream%vsl_stream(n), nrandom, array(n,:), a, b) + enddo +#endif + +! keep it simple stupid + case("KISSVEC") + if (present(nrandom_reset)) then + nrandom = nrandom_reset + endif + do i=1,nrandom + call kissvec( randStream%seed(:,1), randStream%seed(:,2), & + randStream%seed(:,3), randStream%seed(:,4), array(:,i), nstream ) + enddo + +! fortran-95 implementation of merseene twister 19937 + case("MT19937") + do i=1,nrandom + do n=1,nstream + array(n,i) = getRandomReal( randStream%randomseq(n) ) + enddo + enddo + +! fortran-90 intrinsic pseudorandom number generator + case("F90_INTRINSIC") + do n=1,nstream + call random_seed( put=randStream%seed(n,:) ) + call random_number( array(n,:) ) + enddo + +! SIMD-oriented fast mersenne twister 19937 + case("DSFMT_F03") + do n=1,nstream + call get_rand_arr_close_open( randStream%rng(n), array(n,:), nrandom ) + enddo + +end select + +end subroutine shr_genRandNum + +integer function shr_RandNum_seed_size(type) + character(len=*), intent(in) :: type + + select case (to_upper(type)) +#ifdef INTEL_MKL + case ("SFMT_MKL") + shr_RandNum_seed_size = 1 +#endif + case("KISSVEC") + shr_RandNum_seed_size = 4 + case("MT19937", "DSFMT_F03") + shr_RandNum_seed_size = 1 + case("F90_INTRINSIC") + call random_seed(size=shr_RandNum_seed_size) + end select +end function shr_RandNum_seed_size + +subroutine shr_RandNum_init( randStream, nstream, nrandom, type, seed ) + + type (shr_rand_t), intent(inout) :: randStream + integer, intent(in) :: nstream ! number of streams of random numbers + integer, intent(in) :: nrandom ! number of random numbers in streams + character(len=*), intent(in) :: type + integer, intent(in) :: seed(:,:) + + integer :: i, n + + randStream%nstream = nstream + randStream%nrandom = nrandom + randStream%type = to_upper(type) + + ! It might be useful to assert that the "seed" argument is of shape + ! [nstream, shr_Randnum_seed_size(type)] + + select case (randStream%type) + +#ifdef INTEL_MKL +! intel math kernel library SIMD fast merseene twister 19937 + case ("SFMT_MKL") + method = VSL_RNG_METHOD_UNIFORM_STD + brng = VSL_BRNG_SFMT19937 + + if( .NOT. allocated(randStream%vsl_stream) ) allocate(randStream%vsl_stream(nstream)) + do n=1,nstream + errcode = vslnewstream( randStream%vsl_stream(n), brng, seed=seed(n,1) ) + enddo +#endif + +! keep it simple stupid + case("KISSVEC") + + if (allocated(randStream%seed)) deallocate(randStream%seed) + allocate(randStream%seed(nstream,4)) + + randStream%seed = seed + +! fortran-95 implementation of mersenne twister 19937 + case("MT19937") + if( .NOT. allocated(randStream%randomseq) ) allocate(randStream%randomseq(nstream)) + + do n=1,nstream + randStream%randomseq(n) = new_RandomNumberSequence( seed=seed(n,1) ) + enddo + +! fortran-90 intrinsic pseudorandom number generator + case("F90_INTRINSIC") + if (allocated(randStream%seed)) deallocate(randStream%seed) + allocate(randStream%seed(nstream,size(seed, 2))) + + randStream%seed = seed + +! SIMD-oriented fast merseene twister 19937 + case("DSFMT_F03") + if( .NOT. allocated(randStream%rng) ) allocate(randStream%rng(nstream)) + do n=1,nstream + call dSFMT_init(seed(n,1), nrandom, randStream%rng(n) ) + enddo + + end select + +end subroutine shr_RandNum_init + +subroutine shr_RandNum_term( randStream ) + +type (shr_rand_t), intent(inout) :: randStream + +integer :: n, nstream,ierr + + nstream = randStream%nstream + + select case (randStream%type) + +#ifdef INTEL_MKL +! intel math kernel library SIMD fast merseene twister 19937 + case ("SFMT_MKL") + if( allocated (randStream%vsl_stream) ) then + do n=1,nstream + ierr = vslDeleteStream(randStream%vsl_stream(n)) + enddo + deallocate (randStream%vsl_stream) + endif +#endif + +! keep it simple stupid + case("KISSVEC") + if ( allocated (randStream%seed) ) deallocate (randStream%seed) + +! fortran-95 implementation of merseene twister 19937 + case("MT19937") + do n=1,nstream + call finalize_RandomNumberSequence(randStream%randomseq(n)) + enddo + if ( allocated(randStream%randomseq) ) deallocate (randStream%randomseq) + +! fortran-90 intrinsic pseudorandom number generator + case("F90_INTRINSIC") + if ( allocated(randStream%seed) ) deallocate(randStream%seed) + +! SIMD-oriented fast merseene twister 19937 + case("DSFMT_F03") + do n=1,nstream + call dSFMT_end(randStream%rng(n)) + enddo + if ( allocated (randStream%rng) ) deallocate (randStream%rng) + + end select + +end subroutine shr_RandNum_term + +function to_upper(strIn) result(strOut) + + character(len=*), intent(in) :: strIn + character(len=len(strIn)) :: strOut + integer :: i, j + + do i = 1,len(strIn) + j = iachar(strIn(i:i)) + if (j>= iachar("a") .and. j<=iachar("z") ) then + strOut(i:i) = achar(iachar(strIn(i:i))-32) + else + strOut(i:i) = strIn(i:i) + end if + end do + +end function to_upper + +end module shr_RandNum_mod diff --git a/share/shr_RandNum/test/bench/Makefile b/share/shr_RandNum/test/bench/Makefile new file mode 100644 index 000000000000..50db6aa88809 --- /dev/null +++ b/share/shr_RandNum/test/bench/Makefile @@ -0,0 +1,87 @@ +# Makefile created by mkmf $Id: mkmf,v 18.0.18.4 2012/12/04 15:24:15 Seth.Underwood Exp $ + +ifeq ($(COMPILER),intel) + FC := ifort + LD := ifort + LDFLAGS += -mkl + CC := icc + +# Sandy Bridge/Ivy Bridge + + FFLAGS := -O3 -xHost -fp-model fast -mkl -no-prec-div -no-prec-sqrt -override-limits + + CFLAGS := -O3 -xHost -fp-model fast -std=c99 + + CPPDEFS = -DINTEL_MKL -DHAVE_SSE2 + + +# Knights Corner + +# FFLAGS := -mmic -O3 -qopt-report=5 -fp-model fast -no-prec-div -no-prec-sqrt -I./ + +# Haswell + +# FFLAGS := -O3 -xCORE-AVX2 -no-prec-div -no-prec-sqrt -I./ -DCPRINTEL + +endif + +ifeq ($(COMPILER),pgi) + FC := pgfortran + LD := pgfortran + CC := pgcc + + FFLAGS := -fastsse + CFLAGS := -fastsse + CPPDEFS = +endif + +ifeq ($(COMPILER),gnu) + FC := gfortran + LD := gfortran + CC := gcc + + FFLAGS := -Ofast -march=native + CFLAGS := -Ofast -march=native -std=gnu99 + CPPDEFS = -DHAVE_SSE2 +endif + +ifeq ($(COMPILER),nag) + FC := nagfor + LD := nagfor + CC := gcc + + FFLAGS := -O4 + CFLAGS := -Ofast -march=native -std=gnu99 + CPPDEFS = -DHAVE_SSE2 +endif + +FFLAGS := $(FFLAGS) -I../../include -I./ +CFLAGS := $(CFLAGS) -I../../include + +CPPDEFS := $(CPPDEFS) -DDSFMT_MEXP=19937 + +.DEFAULT: + -echo $@ does not exist. +all: ./shr_RandNum.exe +dSFMT.o: ../../src/dsfmt_f03/dSFMT.c + $(CC) $(CPPDEFS) $(CPPFLAGS) $(CFLAGS) $(OTHERFLAGS) -c ../../src/dsfmt_f03/dSFMT.c +dSFMT_interface.o: ../../src/dsfmt_f03/dSFMT_interface.F90 + $(FC) $(CPPDEFS) $(CPPFLAGS) $(FPPFLAGS) $(FFLAGS) $(OTHERFLAGS) -c ../../src/dsfmt_f03/dSFMT_interface.F90 +dSFMT_utils.o: ../../src/dsfmt_f03/dSFMT_utils.c ../../include/dSFMT.h + $(CC) $(CPPDEFS) $(CPPFLAGS) $(CFLAGS) $(OTHERFLAGS) -c ../../src/dsfmt_f03/dSFMT_utils.c +kissvec_mod.o: ../../src/kissvec/kissvec_mod.F90 + $(FC) $(CPPDEFS) $(CPPFLAGS) $(FPPFLAGS) $(FFLAGS) $(OTHERFLAGS) -c ../../src/kissvec/kissvec_mod.F90 +kissvec.o: ../../src/kissvec/kissvec.c + $(CC) $(CPPDEFS) $(CPPFLAGS) $(CFLAGS) $(OTHERFLAGS) -c ../../src/kissvec/kissvec.c +test_shr_RandNum.o: ./test_shr_RandNum.F90 shr_RandNum_mod.o + $(FC) $(CPPDEFS) $(CPPFLAGS) $(FPPFLAGS) $(FFLAGS) $(OTHERFLAGS) -c ./test_shr_RandNum.F90 +mersennetwister_mod.o: ../../src/mt19937/mersennetwister_mod.F90 + $(FC) $(CPPDEFS) $(CPPFLAGS) $(FPPFLAGS) $(FFLAGS) $(OTHERFLAGS) -c ../../src/mt19937/mersennetwister_mod.F90 +shr_RandNum_mod.o: ../../src/shr_RandNum_mod.F90 kissvec_mod.o mersennetwister_mod.o dSFMT_interface.o + $(FC) $(CPPDEFS) $(CPPFLAGS) $(FPPFLAGS) $(FFLAGS) $(OTHERFLAGS) -c ../../src/shr_RandNum_mod.F90 +SRC = ./test_shr_RandNum.F90 ../kissvec/kissvec_mod.F90 ../mt19937/mersennetwister_mod.F90 ../dsfmt_f03/dSFMT.c ../dsfmt_f03/dSFMT_interface.F90 ../dsfmt_f03/dSFMT_utils.c ../shr_RandNum_mod.F90 ../../include/dSFMT.h +OBJ = test_shr_RandNum.o kissvec_mod.o mersennetwister_mod.o dSFMT.o dSFMT_interface.o dSFMT_utils.o shr_RandNum_mod.o kissvec.o +clean: + -rm -f .shr_RandNum.exe.cppdefs $(OBJ) *.mod ./shr_RandNum.exe +shr_RandNum.exe: $(OBJ) + $(LD) $(OBJ) -o shr_RandNum.exe $(LDFLAGS) diff --git a/share/shr_RandNum/test/bench/test_shr_RandNum.F90 b/share/shr_RandNum/test/bench/test_shr_RandNum.F90 new file mode 100644 index 000000000000..314298115843 --- /dev/null +++ b/share/shr_RandNum/test/bench/test_shr_RandNum.F90 @@ -0,0 +1,128 @@ +program test + +! this program calls the available versions of the random generators + +use shr_RandNum_mod, only : shr_genRandNum, shr_RandNum_init, shr_RandNum_term, shr_rand_t + +INTEGER,parameter :: r8 = selected_real_kind(12) + +type (shr_rand_t) :: randStream + +integer, parameter :: nstream = 16 ! number of streams of random numbers +integer, parameter :: length = 1000 ! length of stream of random numbers +integer :: ntrials = 50000 + +integer, dimension(nstream,1) :: seed = 7776578 +integer, dimension(nstream,4) :: kiss_seed +integer, dimension(:,:), allocatable :: intrinsic_seed + +real(r8), dimension(nstream,length) :: array + +integer :: i, n, m, intrinsic_size +integer :: c1, c2, cr, cm +real(r8) :: dt, dt1,dt2 + +#ifdef INTEL_MKL +! intel math kernel library merseene twister + + call system_clock(c1, cr, cm) + do m = 1,ntrials + call shr_RandNum_init( randStream, nstream, length, 'SFMT_MKL', seed=seed ) + enddo + call system_clock(c2, cr, cm); dt1 = dble(c2-c1)/dble(cr) + + call system_clock(c1, cr, cm) + do m = 1,ntrials + call shr_genRandNum( randStream, array ) + enddo + call shr_RandNum_term(randStream) + call system_clock(c2, cr, cm); dt2 = dble(c2-c1)/dble(cr) + dt = dt1+dt2 + print *, 'Init time (SFMT_MKL): ',dt1 + print *, 'Gen time (SFMT_MKL): ',dt2 + print *, 'Total time (SFMT_MKL): ',dt + print *, 'MegaRNumbers (SFMT_MKL): ', 1.0e-6*dble(nstream*length*ntrials)/dt + print *, 'Summation of Random Numbers: ', SUM(array) + print *, '--------'; print *, '' +#endif + +! keep it simple stupid random number + + call system_clock(c1, cr, cm) + do m = 1,ntrials + do n = 1,nstream + do i = 1, 4 + kiss_seed(n,i) = seed(n,1)*i+n + end do + end do + + call shr_RandNum_init( randStream, nstream, length, 'KISSVEC', & + seed=kiss_seed ) + + call shr_genRandNum ( randStream, array ) + enddo + call shr_RandNum_term(randStream) + call system_clock(c2, cr, cm); dt = dble(c2-c1)/dble(cr) + + print *, 'Total time (KISSVEC): ',dt + print *, 'MegaRNumbers (KISSVEC): ', 1.0e-6*dble(nstream*length*ntrials)/dt + print *, 'Summation of Random Numbers: ', SUM(array) + print *, '--------'; print *, '' + +! fortran-95 implementation of merseene twister + + call system_clock(c1, cr, cm) + do m = 1,ntrials + call shr_RandNum_init( randStream, nstream, length, 'MT19937', seed=seed ) + + call shr_genRandNum ( randStream, array ) + enddo + call shr_RandNum_term(randStream) + call system_clock(c2, cr, cm); dt = dble(c2-c1)/dble(cr) + + print *, 'Total time (MT19937): ',dt + print *, 'MegaRNumbers (MT19937): ', 1.0e-6*dble(nstream*length*ntrials)/dt + print *, 'Summation of Random Numbers: ', SUM(array) + print *, '--------'; print *, '' + +! fortran-90 intrinsic pseudorandom number generator + + call system_clock(c1, cr, cm) + call random_seed(size=intrinsic_size) + allocate(intrinsic_seed(nstream,intrinsic_size)) + do m = 1,ntrials + do n = 1, nstream + do i = 1, intrinsic_size + intrinsic_seed(n,i) = seed(n,1)*i+n + end do + end do + call shr_RandNum_init( randStream, nstream, length, 'F90_INTRINSIC', & + seed=intrinsic_seed ) + + call shr_genRandNum ( randStream, array ) + enddo + call shr_RandNum_term(randStream) + call system_clock(c2, cr, cm); dt = dble(c2-c1)/dble(cr) + + print *, 'Total time (F90_INTRINSIC): ',dt + print *, 'MegaRNumbers (F90_INTRINSIC): ', 1.0e-6*dble(nstream*length*ntrials)/dt + print *, 'Summation of Random Numbers: ', SUM(array) + print *, '--------'; print *, '' + +! SIMD-orientated merseene twister + + call system_clock(c1, cr, cm) + do m = 1,ntrials + call shr_RandNum_init( randStream, nstream, length, 'DSFMT_F03', seed=seed ) + + call shr_genRandNum ( randStream, array ) + enddo + call shr_RandNum_term(randStream) + call system_clock(c2, cr, cm); dt = dble(c2-c1)/dble(cr) + + print *, 'Total time (DSFMT_F03): ',dt + print *, 'MegaRNumbers (DSFMT_F03): ', 1.0e-6*dble(nstream*length*ntrials)/dt + print *, 'Summation of Random Numbers: ', SUM(array) + print *, '--------'; print *, '' + +end program test From 9e32bcf1e85b9b5f02c34c87406bacc1cda66f12 Mon Sep 17 00:00:00 2001 From: Sean Patrick Santos Date: Sat, 7 Nov 2015 23:50:03 -0700 Subject: [PATCH 2/5] Make `shr_RandNum_mod` more object-oriented. Rather than using a case statement comparing strings to figure out which random number generator is being used, we now have a class for each generation method. This greatly reduces the input necessary to construct a generator object. Testing: Test suite: Intel benchmark for shr_RandNum code only. Baseline: N/A Namelist changes: No Status: bit for bit --- share/shr_RandNum/src/shr_RandNum_mod.F90 | 430 ++++++++++-------- .../test/bench/test_shr_RandNum.F90 | 62 ++- 2 files changed, 281 insertions(+), 211 deletions(-) diff --git a/share/shr_RandNum/src/shr_RandNum_mod.F90 b/share/shr_RandNum/src/shr_RandNum_mod.F90 index 0c161850cce0..0fff8e885ded 100644 --- a/share/shr_RandNum/src/shr_RandNum_mod.F90 +++ b/share/shr_RandNum/src/shr_RandNum_mod.F90 @@ -15,233 +15,291 @@ module shr_RandNum_mod use mkl_vsl_type use mkl_vsl #endif - implicit none +implicit none -integer, parameter :: r8 = selected_real_kind(12) -integer, parameter :: i8 = selected_int_kind(12) +private + +public :: ShrRandGen -integer :: errcode +public :: ShrIntrinsicRandGen +public :: ShrKissRandGen +public :: ShrF95MtRandGen +public :: ShrDsfmtRandGen +public :: ShrMklMtRandGen -real(r8) :: a=0.0_r8, b=1.0_r8 -integer :: brng, method +integer, parameter :: r8 = selected_real_kind(12) +integer, parameter :: i8 = selected_int_kind(12) -type shr_rand_t - character(len=32) :: type - integer :: nstream - integer :: nrandom - integer, allocatable :: seed(:,:) +! Abstract base class for random number generators +type, abstract :: ShrRandGen + contains + procedure(gen_random), deferred :: random + procedure(gen_finalize), deferred :: finalize +end type ShrRandGen + +abstract interface + subroutine gen_random(self, array) + import + class(ShrRandGen), intent(inout) :: self + real(r8), dimension(:,:), intent(out) :: array + end subroutine gen_random + + subroutine gen_finalize(self) + import + class(ShrRandGen), intent(inout) :: self + end subroutine gen_finalize +end interface + +! Fortran 90 "random_number" intrinsic +type, extends(ShrRandGen) :: ShrIntrinsicRandGen + integer, allocatable, private :: seed(:,:) + contains + procedure :: random => intrinsic_random + procedure :: finalize => intrinsic_finalize +end type ShrIntrinsicRandGen + +interface ShrIntrinsicRandGen + module procedure ShrIntrinsicRandGen_constructor +end interface ShrIntrinsicRandGen + +! KISS (Keep It Simple Stupid) pseudorandom number generator +type, extends(ShrRandGen) :: ShrKissRandGen + integer, allocatable, private :: seed(:,:) + contains + procedure :: random => kiss_random + procedure :: finalize => kiss_finalize +end type ShrKissRandGen + +interface ShrKissRandGen + module procedure ShrKissRandGen_constructor +end interface ShrKissRandGen + +! Fortran 95 implementation of Mersene Twister 19937 +type, extends(ShrRandGen) :: ShrF95MtRandGen + type(randomNumberSequence), allocatable, private :: wrapped(:) + contains + procedure :: random => f95_mt_random + procedure :: finalize => f95_mt_finalize +end type ShrF95MtRandGen + +interface ShrF95MtRandGen + module procedure ShrF95MtRandGen_constructor +end interface ShrF95MtRandGen + +! Double precision SIMD Fast Mersene Twister 19937 +type, extends(ShrRandGen) :: ShrDsfmtRandGen + type(dSFMT_t), allocatable, private :: wrapped(:) + contains + procedure :: random => dsfmt_random + procedure :: finalize => dsfmt_finalize +end type ShrDsfmtRandGen + +interface ShrDsfmtRandGen + module procedure ShrDsfmtRandGen_constructor +end interface ShrDsfmtRandGen + +! Intel Math Kernel Library Mersenne Twister #ifdef INTEL_MKL - type (VSL_STREAM_STATE), allocatable :: vsl_stream(:) +type, extends(ShrRandGen) :: ShrMklMtRandGen + type (VSL_STREAM_STATE), allocatable :: wrapped(:) + contains + procedure :: random => mkl_mt_random + procedure :: finalize => mkl_mt_finalize +end type ShrMklMtRandGen #endif - type (dSFMT_t), allocatable :: rng(:) - type (randomNumberSequence), allocatable :: randomseq(:) -end type -public shr_genRandNum, shr_RandNum_init, shr_RandNum_term, shr_rand_t +interface ShrMklMtRandGen + module procedure ShrMklMtRandGen_constructor +end interface ShrMklMtRandGen contains -subroutine shr_genRandNum(randStream, array, nrandom_reset) +function ShrIntrinsicRandGen_constructor(seed) result(rand_gen) + integer, intent(in) :: seed(:,:) + type(ShrIntrinsicRandGen) :: rand_gen - type (shr_rand_t), intent(inout) :: randStream - real(r8), dimension(:,:), intent(inout) :: array - integer, optional, intent(in) :: nrandom_reset + allocate(rand_gen%seed(size(seed, 1),size(seed, 2))) + rand_gen%seed = seed - integer :: i, n, nstream, nrandom - integer, dimension(1) :: seed +end function ShrIntrinsicRandGen_constructor - nstream = randStream%nstream - nrandom = randStream%nrandom +subroutine intrinsic_random(self, array) + class(ShrIntrinsicRandGen), intent(inout) :: self + real(r8), dimension(:,:), intent(out) :: array - select case (randStream%type) + integer :: i -#ifdef INTEL_MKL -! intel math kernel library SIMD fast merseene twister 19937 - case ("SFMT_MKL") - do n=1,nstream - errcode = vdrnguniform( method, randStream%vsl_stream(n), nrandom, array(n,:), a, b) - enddo -#endif + do i = 1, size(self%seed, 1) + call random_seed(put=self%seed(i,:)) + call random_number(array(i,:)) + call random_seed(get=self%seed(i,:)) + end do -! keep it simple stupid - case("KISSVEC") - if (present(nrandom_reset)) then - nrandom = nrandom_reset - endif - do i=1,nrandom - call kissvec( randStream%seed(:,1), randStream%seed(:,2), & - randStream%seed(:,3), randStream%seed(:,4), array(:,i), nstream ) - enddo - -! fortran-95 implementation of merseene twister 19937 - case("MT19937") - do i=1,nrandom - do n=1,nstream - array(n,i) = getRandomReal( randStream%randomseq(n) ) - enddo - enddo - -! fortran-90 intrinsic pseudorandom number generator - case("F90_INTRINSIC") - do n=1,nstream - call random_seed( put=randStream%seed(n,:) ) - call random_number( array(n,:) ) - enddo - -! SIMD-oriented fast mersenne twister 19937 - case("DSFMT_F03") - do n=1,nstream - call get_rand_arr_close_open( randStream%rng(n), array(n,:), nrandom ) - enddo - -end select - -end subroutine shr_genRandNum - -integer function shr_RandNum_seed_size(type) - character(len=*), intent(in) :: type - - select case (to_upper(type)) -#ifdef INTEL_MKL - case ("SFMT_MKL") - shr_RandNum_seed_size = 1 -#endif - case("KISSVEC") - shr_RandNum_seed_size = 4 - case("MT19937", "DSFMT_F03") - shr_RandNum_seed_size = 1 - case("F90_INTRINSIC") - call random_seed(size=shr_RandNum_seed_size) - end select -end function shr_RandNum_seed_size +end subroutine intrinsic_random -subroutine shr_RandNum_init( randStream, nstream, nrandom, type, seed ) +subroutine intrinsic_finalize(self) + class(ShrIntrinsicRandGen), intent(inout) :: self - type (shr_rand_t), intent(inout) :: randStream - integer, intent(in) :: nstream ! number of streams of random numbers - integer, intent(in) :: nrandom ! number of random numbers in streams - character(len=*), intent(in) :: type - integer, intent(in) :: seed(:,:) + if ( allocated(self%seed) ) deallocate(self%seed) - integer :: i, n +end subroutine intrinsic_finalize - randStream%nstream = nstream - randStream%nrandom = nrandom - randStream%type = to_upper(type) +function ShrKissRandGen_constructor(seed) result(rand_gen) + integer, intent(in) :: seed(:,:) + type(ShrKissRandGen) :: rand_gen - ! It might be useful to assert that the "seed" argument is of shape - ! [nstream, shr_Randnum_seed_size(type)] + allocate(rand_gen%seed(size(seed, 1),4)) + rand_gen%seed = seed - select case (randStream%type) +end function ShrKissRandGen_constructor -#ifdef INTEL_MKL -! intel math kernel library SIMD fast merseene twister 19937 - case ("SFMT_MKL") - method = VSL_RNG_METHOD_UNIFORM_STD - brng = VSL_BRNG_SFMT19937 - - if( .NOT. allocated(randStream%vsl_stream) ) allocate(randStream%vsl_stream(nstream)) - do n=1,nstream - errcode = vslnewstream( randStream%vsl_stream(n), brng, seed=seed(n,1) ) - enddo -#endif +subroutine kiss_random(self, array) + class(ShrKissRandGen), intent(inout) :: self + real(r8), dimension(:,:), intent(out) :: array -! keep it simple stupid - case("KISSVEC") + integer :: i - if (allocated(randStream%seed)) deallocate(randStream%seed) - allocate(randStream%seed(nstream,4)) + do i = 1, size(array, 2) + call kissvec(self%seed(:,1), self%seed(:,2), self%seed(:,3), & + self%seed(:,4), array(:,i), size(self%seed, 1)) + end do - randStream%seed = seed +end subroutine kiss_random -! fortran-95 implementation of mersenne twister 19937 - case("MT19937") - if( .NOT. allocated(randStream%randomseq) ) allocate(randStream%randomseq(nstream)) +subroutine kiss_finalize(self) + class(ShrKissRandGen), intent(inout) :: self - do n=1,nstream - randStream%randomseq(n) = new_RandomNumberSequence( seed=seed(n,1) ) - enddo + if ( allocated(self%seed) ) deallocate(self%seed) -! fortran-90 intrinsic pseudorandom number generator - case("F90_INTRINSIC") - if (allocated(randStream%seed)) deallocate(randStream%seed) - allocate(randStream%seed(nstream,size(seed, 2))) +end subroutine kiss_finalize - randStream%seed = seed +function ShrF95MtRandGen_constructor(seed) result(rand_gen) + integer, intent(in) :: seed(:) + type(ShrF95MtRandGen) :: rand_gen -! SIMD-oriented fast merseene twister 19937 - case("DSFMT_F03") - if( .NOT. allocated(randStream%rng) ) allocate(randStream%rng(nstream)) - do n=1,nstream - call dSFMT_init(seed(n,1), nrandom, randStream%rng(n) ) - enddo + integer :: i - end select + allocate(rand_gen%wrapped(size(seed))) + do i = 1, size(seed) + rand_gen%wrapped(i) = new_RandomNumberSequence(seed=seed(i)) + end do -end subroutine shr_RandNum_init +end function ShrF95MtRandGen_constructor -subroutine shr_RandNum_term( randStream ) +subroutine f95_mt_random(self, array) + class(ShrF95MtRandGen), intent(inout) :: self + real(r8), dimension(:,:), intent(out) :: array -type (shr_rand_t), intent(inout) :: randStream + integer :: i, j -integer :: n, nstream,ierr + do i = 1, size(array, 2) + do j = 1, size(self%wrapped) + array(j,i) = getRandomReal(self%wrapped(j)) + end do + end do - nstream = randStream%nstream +end subroutine f95_mt_random - select case (randStream%type) +subroutine f95_mt_finalize(self) + class(ShrF95MtRandGen), intent(inout) :: self -#ifdef INTEL_MKL -! intel math kernel library SIMD fast merseene twister 19937 - case ("SFMT_MKL") - if( allocated (randStream%vsl_stream) ) then - do n=1,nstream - ierr = vslDeleteStream(randStream%vsl_stream(n)) - enddo - deallocate (randStream%vsl_stream) - endif -#endif + integer :: i + + if (allocated(self%wrapped)) then + do i = 1, size(self%wrapped) + call finalize_RandomNumberSequence(self%wrapped(i)) + end do + deallocate(self%wrapped) + end if + +end subroutine f95_mt_finalize + +function ShrDsfmtRandGen_constructor(seed, cache_size) result(rand_gen) + integer, intent(in) :: seed(:) + integer, intent(in) :: cache_size + type(ShrDsfmtRandGen) :: rand_gen + + integer :: i + + allocate(rand_gen%wrapped(size(seed))) + do i = 1, size(seed) + call dSFMT_init(seed(i), cache_size, rand_gen%wrapped(i)) + end do + +end function ShrDsfmtRandGen_constructor + +subroutine dsfmt_random(self, array) + class(ShrDsfmtRandGen), intent(inout) :: self + real(r8), dimension(:,:), intent(out) :: array + + integer :: i + + do i = 1, size(self%wrapped) + call get_rand_arr_close_open(self%wrapped(i), array(i,:), size(array, 2)) + end do + +end subroutine dsfmt_random + +subroutine dsfmt_finalize(self) + class(ShrDsfmtRandGen), intent(inout) :: self + + integer :: i + + if (allocated(self%wrapped)) then + do i = 1, size(self%wrapped) + call dSFMT_end(self%wrapped(i)) + end do + deallocate(self%wrapped) + end if + +end subroutine dsfmt_finalize + +function ShrMklMtRandGen_constructor(seed) result(rand_gen) + integer, intent(in) :: seed(:) + type(ShrMklMtRandGen) :: rand_gen + + integer :: err + + integer :: i + + allocate(rand_gen%wrapped(size(seed))) + do i = 1, size(seed) + err = vslnewstream(rand_gen%wrapped(i), VSL_BRNG_SFMT19937, seed=seed(i)) + end do + +end function ShrMklMtRandGen_constructor + +subroutine mkl_mt_random(self, array) + class(ShrMklMtRandGen), intent(inout) :: self + real(r8), dimension(:,:), intent(out) :: array + + real(r8), parameter :: range_start = 0.0_r8, range_end = 1.0_r8 + + integer :: err + + integer :: i + + do i = 1, size(self%wrapped) + err = vdrnguniform(VSL_RNG_METHOD_UNIFORM_STD, self%wrapped(i), & + size(array, 2), array(i,:), range_start, range_end) + end do + +end subroutine mkl_mt_random + +subroutine mkl_mt_finalize(self) + class(ShrMklMtRandGen), intent(inout) :: self + + integer :: err + + integer :: i -! keep it simple stupid - case("KISSVEC") - if ( allocated (randStream%seed) ) deallocate (randStream%seed) - -! fortran-95 implementation of merseene twister 19937 - case("MT19937") - do n=1,nstream - call finalize_RandomNumberSequence(randStream%randomseq(n)) - enddo - if ( allocated(randStream%randomseq) ) deallocate (randStream%randomseq) - -! fortran-90 intrinsic pseudorandom number generator - case("F90_INTRINSIC") - if ( allocated(randStream%seed) ) deallocate(randStream%seed) - -! SIMD-oriented fast merseene twister 19937 - case("DSFMT_F03") - do n=1,nstream - call dSFMT_end(randStream%rng(n)) - enddo - if ( allocated (randStream%rng) ) deallocate (randStream%rng) - - end select - -end subroutine shr_RandNum_term - -function to_upper(strIn) result(strOut) - - character(len=*), intent(in) :: strIn - character(len=len(strIn)) :: strOut - integer :: i, j - - do i = 1,len(strIn) - j = iachar(strIn(i:i)) - if (j>= iachar("a") .and. j<=iachar("z") ) then - strOut(i:i) = achar(iachar(strIn(i:i))-32) - else - strOut(i:i) = strIn(i:i) - end if + if (allocated(self%wrapped)) then + do i = 1, size(self%wrapped) + err = vslDeleteStream(self%wrapped(i)) end do + deallocate(self%wrapped) + end if -end function to_upper +end subroutine mkl_mt_finalize end module shr_RandNum_mod diff --git a/share/shr_RandNum/test/bench/test_shr_RandNum.F90 b/share/shr_RandNum/test/bench/test_shr_RandNum.F90 index 314298115843..dd60da6b53a4 100644 --- a/share/shr_RandNum/test/bench/test_shr_RandNum.F90 +++ b/share/shr_RandNum/test/bench/test_shr_RandNum.F90 @@ -2,17 +2,18 @@ program test ! this program calls the available versions of the random generators -use shr_RandNum_mod, only : shr_genRandNum, shr_RandNum_init, shr_RandNum_term, shr_rand_t +use shr_RandNum_mod, only: ShrRandGen, ShrIntrinsicRandGen, ShrKissRandGen, & + ShrF95MtRandGen, ShrDsfmtRandGen, ShrMklMtRandGen -INTEGER,parameter :: r8 = selected_real_kind(12) +INTEGER, parameter :: r8 = selected_real_kind(12) -type (shr_rand_t) :: randStream +class(ShrRandGen), allocatable :: randStream integer, parameter :: nstream = 16 ! number of streams of random numbers integer, parameter :: length = 1000 ! length of stream of random numbers integer :: ntrials = 50000 -integer, dimension(nstream,1) :: seed = 7776578 +integer, dimension(nstream) :: seed = 7776578 integer, dimension(nstream,4) :: kiss_seed integer, dimension(:,:), allocatable :: intrinsic_seed @@ -23,19 +24,24 @@ program test real(r8) :: dt, dt1,dt2 #ifdef INTEL_MKL -! intel math kernel library merseene twister +! intel math kernel library mersenne twister + + allocate(randStream, source=ShrMklMtRandGen(seed)) call system_clock(c1, cr, cm) do m = 1,ntrials - call shr_RandNum_init( randStream, nstream, length, 'SFMT_MKL', seed=seed ) + call randStream%finalize() + deallocate(randStream) + allocate(randStream, source=ShrMklMtRandGen(seed)) enddo call system_clock(c2, cr, cm); dt1 = dble(c2-c1)/dble(cr) call system_clock(c1, cr, cm) do m = 1,ntrials - call shr_genRandNum( randStream, array ) + call randStream%random(array) enddo - call shr_RandNum_term(randStream) + call randStream%finalize() + deallocate(randStream) call system_clock(c2, cr, cm); dt2 = dble(c2-c1)/dble(cr) dt = dt1+dt2 print *, 'Init time (SFMT_MKL): ',dt1 @@ -52,16 +58,17 @@ program test do m = 1,ntrials do n = 1,nstream do i = 1, 4 - kiss_seed(n,i) = seed(n,1)*i+n + kiss_seed(n,i) = seed(n)*i+n end do end do - call shr_RandNum_init( randStream, nstream, length, 'KISSVEC', & - seed=kiss_seed ) + allocate(randStream, source=ShrKissRandGen(kiss_seed)) + + call randStream%random(array) - call shr_genRandNum ( randStream, array ) + call randStream%finalize() + deallocate(randStream) enddo - call shr_RandNum_term(randStream) call system_clock(c2, cr, cm); dt = dble(c2-c1)/dble(cr) print *, 'Total time (KISSVEC): ',dt @@ -73,11 +80,13 @@ program test call system_clock(c1, cr, cm) do m = 1,ntrials - call shr_RandNum_init( randStream, nstream, length, 'MT19937', seed=seed ) + allocate(randStream, source=ShrF95MtRandGen(seed)) + + call randStream%random(array) - call shr_genRandNum ( randStream, array ) + call randStream%finalize() + deallocate(randStream) enddo - call shr_RandNum_term(randStream) call system_clock(c2, cr, cm); dt = dble(c2-c1)/dble(cr) print *, 'Total time (MT19937): ',dt @@ -93,15 +102,16 @@ program test do m = 1,ntrials do n = 1, nstream do i = 1, intrinsic_size - intrinsic_seed(n,i) = seed(n,1)*i+n + intrinsic_seed(n,i) = seed(n)*i+n end do end do - call shr_RandNum_init( randStream, nstream, length, 'F90_INTRINSIC', & - seed=intrinsic_seed ) + allocate(randStream, source=ShrIntrinsicRandGen(intrinsic_seed)) - call shr_genRandNum ( randStream, array ) + call randStream%random(array) + + call randStream%finalize() + deallocate(randStream) enddo - call shr_RandNum_term(randStream) call system_clock(c2, cr, cm); dt = dble(c2-c1)/dble(cr) print *, 'Total time (F90_INTRINSIC): ',dt @@ -109,15 +119,17 @@ program test print *, 'Summation of Random Numbers: ', SUM(array) print *, '--------'; print *, '' -! SIMD-orientated merseene twister +! SIMD-orientated mersenne twister call system_clock(c1, cr, cm) do m = 1,ntrials - call shr_RandNum_init( randStream, nstream, length, 'DSFMT_F03', seed=seed ) + allocate(randStream, source=ShrDsfmtRandGen(seed, length)) + + call randStream%random(array) - call shr_genRandNum ( randStream, array ) + call randStream%finalize() + deallocate(randStream) enddo - call shr_RandNum_term(randStream) call system_clock(c2, cr, cm); dt = dble(c2-c1)/dble(cr) print *, 'Total time (DSFMT_F03): ',dt From c2f0f35cbcd8cb6915677290cb74d1479bf800ca Mon Sep 17 00:00:00 2001 From: Sean Patrick Santos Date: Sun, 8 Nov 2015 01:38:37 -0700 Subject: [PATCH 3/5] Performance tweaks to `shr_RandNum_mod` Some changes were made which seem to modestly improve performance of the object-oriented interface for `shr_RandNum_mod`. In addition, the benchmark itself was changed to avoid including the cost of vtable lookup (which won't typically affect applications) and to increase the detail of output Testing: Test suite: Intel, GNU, and PGI benchmark Baseline: N/A Namelist changes: None Status: bit for bit --- share/shr_RandNum/src/shr_RandNum_mod.F90 | 50 +++--- .../test/bench/test_shr_RandNum.F90 | 155 +++++++++++------- 2 files changed, 124 insertions(+), 81 deletions(-) diff --git a/share/shr_RandNum/src/shr_RandNum_mod.F90 b/share/shr_RandNum/src/shr_RandNum_mod.F90 index 0fff8e885ded..65a71cdc85eb 100644 --- a/share/shr_RandNum/src/shr_RandNum_mod.F90 +++ b/share/shr_RandNum/src/shr_RandNum_mod.F90 @@ -25,7 +25,9 @@ module shr_RandNum_mod public :: ShrKissRandGen public :: ShrF95MtRandGen public :: ShrDsfmtRandGen +#ifdef INTEL_MKL public :: ShrMklMtRandGen +#endif integer, parameter :: r8 = selected_real_kind(12) integer, parameter :: i8 = selected_int_kind(12) @@ -54,8 +56,8 @@ end subroutine gen_finalize type, extends(ShrRandGen) :: ShrIntrinsicRandGen integer, allocatable, private :: seed(:,:) contains - procedure :: random => intrinsic_random - procedure :: finalize => intrinsic_finalize + procedure, non_overridable :: random => intrinsic_random + procedure, non_overridable :: finalize => intrinsic_finalize end type ShrIntrinsicRandGen interface ShrIntrinsicRandGen @@ -66,8 +68,8 @@ end subroutine gen_finalize type, extends(ShrRandGen) :: ShrKissRandGen integer, allocatable, private :: seed(:,:) contains - procedure :: random => kiss_random - procedure :: finalize => kiss_finalize + procedure, non_overridable :: random => kiss_random + procedure, non_overridable :: finalize => kiss_finalize end type ShrKissRandGen interface ShrKissRandGen @@ -78,8 +80,8 @@ end subroutine gen_finalize type, extends(ShrRandGen) :: ShrF95MtRandGen type(randomNumberSequence), allocatable, private :: wrapped(:) contains - procedure :: random => f95_mt_random - procedure :: finalize => f95_mt_finalize + procedure, non_overridable :: random => f95_mt_random + procedure, non_overridable :: finalize => f95_mt_finalize end type ShrF95MtRandGen interface ShrF95MtRandGen @@ -90,8 +92,8 @@ end subroutine gen_finalize type, extends(ShrRandGen) :: ShrDsfmtRandGen type(dSFMT_t), allocatable, private :: wrapped(:) contains - procedure :: random => dsfmt_random - procedure :: finalize => dsfmt_finalize + procedure, non_overridable :: random => dsfmt_random + procedure, non_overridable :: finalize => dsfmt_finalize end type ShrDsfmtRandGen interface ShrDsfmtRandGen @@ -103,14 +105,14 @@ end subroutine gen_finalize type, extends(ShrRandGen) :: ShrMklMtRandGen type (VSL_STREAM_STATE), allocatable :: wrapped(:) contains - procedure :: random => mkl_mt_random - procedure :: finalize => mkl_mt_finalize + procedure, non_overridable :: random => mkl_mt_random + procedure, non_overridable :: finalize => mkl_mt_finalize end type ShrMklMtRandGen -#endif interface ShrMklMtRandGen module procedure ShrMklMtRandGen_constructor end interface ShrMklMtRandGen +#endif contains @@ -157,11 +159,13 @@ subroutine kiss_random(self, array) class(ShrKissRandGen), intent(inout) :: self real(r8), dimension(:,:), intent(out) :: array - integer :: i + integer :: nstream, i + + nstream = size(self%seed, 1) do i = 1, size(array, 2) call kissvec(self%seed(:,1), self%seed(:,2), self%seed(:,3), & - self%seed(:,4), array(:,i), size(self%seed, 1)) + self%seed(:,4), array(:,i), nstream) end do end subroutine kiss_random @@ -190,10 +194,12 @@ subroutine f95_mt_random(self, array) class(ShrF95MtRandGen), intent(inout) :: self real(r8), dimension(:,:), intent(out) :: array - integer :: i, j + integer :: nstream, i, j + + nstream = size(self%wrapped) do i = 1, size(array, 2) - do j = 1, size(self%wrapped) + do j = 1, nstream array(j,i) = getRandomReal(self%wrapped(j)) end do end do @@ -232,10 +238,12 @@ subroutine dsfmt_random(self, array) class(ShrDsfmtRandGen), intent(inout) :: self real(r8), dimension(:,:), intent(out) :: array - integer :: i + integer :: nrandom, i + + nrandom = size(array, 2) do i = 1, size(self%wrapped) - call get_rand_arr_close_open(self%wrapped(i), array(i,:), size(array, 2)) + call get_rand_arr_close_open(self%wrapped(i), array(i,:), nrandom) end do end subroutine dsfmt_random @@ -254,6 +262,7 @@ subroutine dsfmt_finalize(self) end subroutine dsfmt_finalize +#ifdef INTEL_MKL function ShrMklMtRandGen_constructor(seed) result(rand_gen) integer, intent(in) :: seed(:) type(ShrMklMtRandGen) :: rand_gen @@ -277,11 +286,13 @@ subroutine mkl_mt_random(self, array) integer :: err - integer :: i + integer :: nrandom, i + + nrandom = size(array, 2) do i = 1, size(self%wrapped) err = vdrnguniform(VSL_RNG_METHOD_UNIFORM_STD, self%wrapped(i), & - size(array, 2), array(i,:), range_start, range_end) + nrandom, array(i,:), range_start, range_end) end do end subroutine mkl_mt_random @@ -301,5 +312,6 @@ subroutine mkl_mt_finalize(self) end if end subroutine mkl_mt_finalize +#endif end module shr_RandNum_mod diff --git a/share/shr_RandNum/test/bench/test_shr_RandNum.F90 b/share/shr_RandNum/test/bench/test_shr_RandNum.F90 index dd60da6b53a4..621ba9274391 100644 --- a/share/shr_RandNum/test/bench/test_shr_RandNum.F90 +++ b/share/shr_RandNum/test/bench/test_shr_RandNum.F90 @@ -2,12 +2,21 @@ program test ! this program calls the available versions of the random generators -use shr_RandNum_mod, only: ShrRandGen, ShrIntrinsicRandGen, ShrKissRandGen, & - ShrF95MtRandGen, ShrDsfmtRandGen, ShrMklMtRandGen +use shr_RandNum_mod, only: ShrIntrinsicRandGen, ShrKissRandGen, & + ShrF95MtRandGen, ShrDsfmtRandGen +#ifdef INTEL_MKL +use shr_RandNum_mod, only: ShrMklMtRandGen +#endif INTEGER, parameter :: r8 = selected_real_kind(12) -class(ShrRandGen), allocatable :: randStream +#ifdef INTEL_MKL +type(ShrMklMtRandGen) :: mkl_gen +#endif +type(ShrKissRandGen) :: kiss_gen +type(ShrF95MtRandGen) :: f95_mt_gen +type(ShrIntrinsicRandGen) :: intrinsic_gen +type(ShrDsfmtRandGen) :: dsfmt_gen integer, parameter :: nstream = 16 ! number of streams of random numbers integer, parameter :: length = 1000 ! length of stream of random numbers @@ -26,53 +35,56 @@ program test #ifdef INTEL_MKL ! intel math kernel library mersenne twister - allocate(randStream, source=ShrMklMtRandGen(seed)) - call system_clock(c1, cr, cm) do m = 1,ntrials - call randStream%finalize() - deallocate(randStream) - allocate(randStream, source=ShrMklMtRandGen(seed)) + mkl_gen = ShrMklMtRandGen(seed) + call mkl_gen%finalize() enddo call system_clock(c2, cr, cm); dt1 = dble(c2-c1)/dble(cr) + mkl_gen = ShrMklMtRandGen(seed) call system_clock(c1, cr, cm) do m = 1,ntrials - call randStream%random(array) + call mkl_gen%random(array) enddo - call randStream%finalize() - deallocate(randStream) + call mkl_gen%finalize() call system_clock(c2, cr, cm); dt2 = dble(c2-c1)/dble(cr) dt = dt1+dt2 - print *, 'Init time (SFMT_MKL): ',dt1 - print *, 'Gen time (SFMT_MKL): ',dt2 - print *, 'Total time (SFMT_MKL): ',dt - print *, 'MegaRNumbers (SFMT_MKL): ', 1.0e-6*dble(nstream*length*ntrials)/dt + print *, 'Init/term time (SFMT_MKL): ',dt1 + print *, 'Gen time (SFMT_MKL): ',dt2 + print *, 'Total time (SFMT_MKL): ',dt + print *, 'MegaRNumbers (SFMT_MKL): ', 1.0e-6*dble(nstream*length*ntrials)/dt print *, 'Summation of Random Numbers: ', SUM(array) print *, '--------'; print *, '' #endif ! keep it simple stupid random number + do n = 1,nstream + do i = 1, 4 + kiss_seed(n,i) = seed(n)*i+n + end do + end do + call system_clock(c1, cr, cm) do m = 1,ntrials - do n = 1,nstream - do i = 1, 4 - kiss_seed(n,i) = seed(n)*i+n - end do - end do - - allocate(randStream, source=ShrKissRandGen(kiss_seed)) - - call randStream%random(array) - - call randStream%finalize() - deallocate(randStream) + kiss_gen = ShrKissRandGen(kiss_seed) + call kiss_gen%finalize() enddo - call system_clock(c2, cr, cm); dt = dble(c2-c1)/dble(cr) + call system_clock(c2, cr, cm); dt1 = dble(c2-c1)/dble(cr) - print *, 'Total time (KISSVEC): ',dt - print *, 'MegaRNumbers (KISSVEC): ', 1.0e-6*dble(nstream*length*ntrials)/dt + kiss_gen = ShrKissRandGen(kiss_seed) + call system_clock(c1, cr, cm) + do m = 1,ntrials + call kiss_gen%random(array) + enddo + call kiss_gen%finalize() + call system_clock(c2, cr, cm); dt2 = dble(c2-c1)/dble(cr) + dt = dt1+dt2 + print *, 'Init/term time (KISSVEC): ',dt1 + print *, 'Gen time (KISSVEC): ',dt2 + print *, 'Total time (KISSVEC): ',dt + print *, 'MegaRNumbers (KISSVEC): ', 1.0e-6*dble(nstream*length*ntrials)/dt print *, 'Summation of Random Numbers: ', SUM(array) print *, '--------'; print *, '' @@ -80,42 +92,55 @@ program test call system_clock(c1, cr, cm) do m = 1,ntrials - allocate(randStream, source=ShrF95MtRandGen(seed)) - - call randStream%random(array) - - call randStream%finalize() - deallocate(randStream) + f95_mt_gen = ShrF95MtRandGen(seed) + call f95_mt_gen%finalize() enddo - call system_clock(c2, cr, cm); dt = dble(c2-c1)/dble(cr) + call system_clock(c2, cr, cm); dt1 = dble(c2-c1)/dble(cr) - print *, 'Total time (MT19937): ',dt - print *, 'MegaRNumbers (MT19937): ', 1.0e-6*dble(nstream*length*ntrials)/dt + f95_mt_gen = ShrF95MtRandGen(seed) + call system_clock(c1, cr, cm) + do m = 1,ntrials + call f95_mt_gen%random(array) + enddo + call f95_mt_gen%finalize() + call system_clock(c2, cr, cm); dt2 = dble(c2-c1)/dble(cr) + dt = dt1+dt2 + print *, 'Init/term time (MT19937): ',dt1 + print *, 'Gen time (MT19937): ',dt2 + print *, 'Total time (MT19937): ',dt + print *, 'MegaRNumbers (MT19937): ', 1.0e-6*dble(nstream*length*ntrials)/dt print *, 'Summation of Random Numbers: ', SUM(array) print *, '--------'; print *, '' ! fortran-90 intrinsic pseudorandom number generator - call system_clock(c1, cr, cm) call random_seed(size=intrinsic_size) allocate(intrinsic_seed(nstream,intrinsic_size)) - do m = 1,ntrials - do n = 1, nstream - do i = 1, intrinsic_size - intrinsic_seed(n,i) = seed(n)*i+n - end do + do n = 1, nstream + do i = 1, intrinsic_size + intrinsic_seed(n,i) = seed(n)*i+n end do - allocate(randStream, source=ShrIntrinsicRandGen(intrinsic_seed)) - - call randStream%random(array) + end do - call randStream%finalize() - deallocate(randStream) + call system_clock(c1, cr, cm) + do m = 1,ntrials + intrinsic_gen = ShrIntrinsicRandGen(intrinsic_seed) + call intrinsic_gen%finalize() enddo - call system_clock(c2, cr, cm); dt = dble(c2-c1)/dble(cr) + call system_clock(c2, cr, cm); dt1 = dble(c2-c1)/dble(cr) - print *, 'Total time (F90_INTRINSIC): ',dt - print *, 'MegaRNumbers (F90_INTRINSIC): ', 1.0e-6*dble(nstream*length*ntrials)/dt + intrinsic_gen = ShrIntrinsicRandGen(intrinsic_seed) + call system_clock(c1, cr, cm) + do m = 1,ntrials + call intrinsic_gen%random(array) + enddo + call intrinsic_gen%finalize() + call system_clock(c2, cr, cm); dt2 = dble(c2-c1)/dble(cr) + dt = dt1+dt2 + print *, 'Init/term time (F90_INTRINSIC): ',dt1 + print *, 'Gen time (F90_INTRINSIC): ',dt2 + print *, 'Total time (F90_INTRINSIC): ',dt + print *, 'MegaRNumbers (F90_INTRINSIC): ', 1.0e-6*dble(nstream*length*ntrials)/dt print *, 'Summation of Random Numbers: ', SUM(array) print *, '--------'; print *, '' @@ -123,17 +148,23 @@ program test call system_clock(c1, cr, cm) do m = 1,ntrials - allocate(randStream, source=ShrDsfmtRandGen(seed, length)) - - call randStream%random(array) - - call randStream%finalize() - deallocate(randStream) + dsfmt_gen = ShrDsfmtRandGen(seed, length) + call dsfmt_gen%finalize() enddo - call system_clock(c2, cr, cm); dt = dble(c2-c1)/dble(cr) + call system_clock(c2, cr, cm); dt1 = dble(c2-c1)/dble(cr) - print *, 'Total time (DSFMT_F03): ',dt - print *, 'MegaRNumbers (DSFMT_F03): ', 1.0e-6*dble(nstream*length*ntrials)/dt + dsfmt_gen = ShrDsfmtRandGen(seed, length) + call system_clock(c1, cr, cm) + do m = 1,ntrials + call dsfmt_gen%random(array) + enddo + call dsfmt_gen%finalize() + call system_clock(c2, cr, cm); dt2 = dble(c2-c1)/dble(cr) + dt = dt1+dt2 + print *, 'Init/term time (DSFMT_F03): ',dt1 + print *, 'Gen time (DSFMT_F03): ',dt2 + print *, 'Total time (DSFMT_F03): ',dt + print *, 'MegaRNumbers (DSFMT_F03): ', 1.0e-6*dble(nstream*length*ntrials)/dt print *, 'Summation of Random Numbers: ', SUM(array) print *, '--------'; print *, '' From e1c66253958f53322a3ae273a621db5283579984 Mon Sep 17 00:00:00 2001 From: Sean Patrick Santos Date: Sun, 8 Nov 2015 21:13:31 -0700 Subject: [PATCH 4/5] Change `kissvec` to be BFB with old version. In the translation from C, the KISS random number generator was modified in a way that caused it to provide random floats that differ from the Fortran version by roundoff-level differences. This tweaks the C version to give the exact same answers as the original code, at least for current Intel compilers. Testing: Test suite: Intel and GNU benchmark. Baseline: N/A Namelist changes: None. Status: bit-for-bit (since the code is still not used in CESM) --- share/shr_RandNum/src/kissvec/kissvec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share/shr_RandNum/src/kissvec/kissvec.c b/share/shr_RandNum/src/kissvec/kissvec.c index 41f314a84be8..deef6ffa1a46 100644 --- a/share/shr_RandNum/src/kissvec/kissvec.c +++ b/share/shr_RandNum/src/kissvec/kissvec.c @@ -30,7 +30,7 @@ void kiss_rng(uint32_t seed1[restrict], uint32_t seed2[restrict], shiftl_xor(seed2[i], 5); seed3[i] = 18000U * (seed3[i] & 65535U) + (seed3[i] >> 16); seed4[i] = 30903U * (seed4[i] & 65535U) + (seed4[i] >> 16); - ran_arr[i] = (seed1[i] + seed2[i] + (seed3[i] << 16) + seed4[i]) * 2.328306E-10; + ran_arr[i] = ((int32_t) (seed1[i] + seed2[i] + (seed3[i] << 16) + seed4[i])) * 2.328306E-10 + 0.5; } } From 7c7be228f081fdbf28e550020b156af6d7ea7e8a Mon Sep 17 00:00:00 2001 From: Sean Patrick Santos Date: Mon, 9 Nov 2015 21:17:12 +0000 Subject: [PATCH 5/5] Port `shr_RandNum_mod` benchmark Makefile to XLF. Test suite: benchmark only Test baseline: N/A Test namelist changes: None Test status: bit for bit --- share/shr_RandNum/test/bench/Makefile | 39 +++++++++++++++++++-------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/share/shr_RandNum/test/bench/Makefile b/share/shr_RandNum/test/bench/Makefile index 50db6aa88809..5b10b0b54960 100644 --- a/share/shr_RandNum/test/bench/Makefile +++ b/share/shr_RandNum/test/bench/Makefile @@ -55,33 +55,50 @@ ifeq ($(COMPILER),nag) CPPDEFS = -DHAVE_SSE2 endif -FFLAGS := $(FFLAGS) -I../../include -I./ -CFLAGS := $(CFLAGS) -I../../include +ifeq ($(COMPILER),ibm) + FC := xlf2003 + LD := xlf2003 + CC := xlc + + FFLAGS := -O4 + CFLAGS := -O4 + CPPDEFS = +endif CPPDEFS := $(CPPDEFS) -DDSFMT_MEXP=19937 +ifeq ($(COMPILER),ibm) + cpre = $(null)-WF,-D$(null) + FPPDEFS := $(patsubst -D%,$(cpre)%,$(CPPDEF)) +else + FPPDEFS := $(CPPDEFS) +endif + +FFLAGS := $(FFLAGS) $(FPPDEFS) -I../../include -I./ +CFLAGS := $(CFLAGS) $(CPPDEFS) -I../../include + .DEFAULT: -echo $@ does not exist. all: ./shr_RandNum.exe dSFMT.o: ../../src/dsfmt_f03/dSFMT.c - $(CC) $(CPPDEFS) $(CPPFLAGS) $(CFLAGS) $(OTHERFLAGS) -c ../../src/dsfmt_f03/dSFMT.c + $(CC) $(CPPFLAGS) $(CFLAGS) $(OTHERFLAGS) -c ../../src/dsfmt_f03/dSFMT.c dSFMT_interface.o: ../../src/dsfmt_f03/dSFMT_interface.F90 - $(FC) $(CPPDEFS) $(CPPFLAGS) $(FPPFLAGS) $(FFLAGS) $(OTHERFLAGS) -c ../../src/dsfmt_f03/dSFMT_interface.F90 + $(FC) $(CPPFLAGS) $(FPPFLAGS) $(FFLAGS) $(OTHERFLAGS) -c ../../src/dsfmt_f03/dSFMT_interface.F90 dSFMT_utils.o: ../../src/dsfmt_f03/dSFMT_utils.c ../../include/dSFMT.h - $(CC) $(CPPDEFS) $(CPPFLAGS) $(CFLAGS) $(OTHERFLAGS) -c ../../src/dsfmt_f03/dSFMT_utils.c + $(CC) $(CPPFLAGS) $(CFLAGS) $(OTHERFLAGS) -c ../../src/dsfmt_f03/dSFMT_utils.c kissvec_mod.o: ../../src/kissvec/kissvec_mod.F90 - $(FC) $(CPPDEFS) $(CPPFLAGS) $(FPPFLAGS) $(FFLAGS) $(OTHERFLAGS) -c ../../src/kissvec/kissvec_mod.F90 + $(FC) $(CPPFLAGS) $(FPPFLAGS) $(FFLAGS) $(OTHERFLAGS) -c ../../src/kissvec/kissvec_mod.F90 kissvec.o: ../../src/kissvec/kissvec.c - $(CC) $(CPPDEFS) $(CPPFLAGS) $(CFLAGS) $(OTHERFLAGS) -c ../../src/kissvec/kissvec.c + $(CC) $(CPPFLAGS) $(CFLAGS) $(OTHERFLAGS) -c ../../src/kissvec/kissvec.c test_shr_RandNum.o: ./test_shr_RandNum.F90 shr_RandNum_mod.o - $(FC) $(CPPDEFS) $(CPPFLAGS) $(FPPFLAGS) $(FFLAGS) $(OTHERFLAGS) -c ./test_shr_RandNum.F90 + $(FC) $(CPPFLAGS) $(FPPFLAGS) $(FFLAGS) $(OTHERFLAGS) -c ./test_shr_RandNum.F90 mersennetwister_mod.o: ../../src/mt19937/mersennetwister_mod.F90 - $(FC) $(CPPDEFS) $(CPPFLAGS) $(FPPFLAGS) $(FFLAGS) $(OTHERFLAGS) -c ../../src/mt19937/mersennetwister_mod.F90 + $(FC) $(CPPFLAGS) $(FPPFLAGS) $(FFLAGS) $(OTHERFLAGS) -c ../../src/mt19937/mersennetwister_mod.F90 shr_RandNum_mod.o: ../../src/shr_RandNum_mod.F90 kissvec_mod.o mersennetwister_mod.o dSFMT_interface.o - $(FC) $(CPPDEFS) $(CPPFLAGS) $(FPPFLAGS) $(FFLAGS) $(OTHERFLAGS) -c ../../src/shr_RandNum_mod.F90 + $(FC) $(CPPFLAGS) $(FPPFLAGS) $(FFLAGS) $(OTHERFLAGS) -c ../../src/shr_RandNum_mod.F90 SRC = ./test_shr_RandNum.F90 ../kissvec/kissvec_mod.F90 ../mt19937/mersennetwister_mod.F90 ../dsfmt_f03/dSFMT.c ../dsfmt_f03/dSFMT_interface.F90 ../dsfmt_f03/dSFMT_utils.c ../shr_RandNum_mod.F90 ../../include/dSFMT.h OBJ = test_shr_RandNum.o kissvec_mod.o mersennetwister_mod.o dSFMT.o dSFMT_interface.o dSFMT_utils.o shr_RandNum_mod.o kissvec.o clean: - -rm -f .shr_RandNum.exe.cppdefs $(OBJ) *.mod ./shr_RandNum.exe + -rm -f .shr_RandNum.exe.cppdefs $(OBJ) *.mod ./shr_RandNum.exe *.s shr_RandNum.exe: $(OBJ) $(LD) $(OBJ) -o shr_RandNum.exe $(LDFLAGS)