diff --git a/share/shr_RandNum/include/dSFMT-common.h b/share/shr_RandNum/include/dSFMT-common.h new file mode 100644 index 000000000000..a851f4c76064 --- /dev/null +++ b/share/shr_RandNum/include/dSFMT-common.h @@ -0,0 +1,115 @@ +#pragma once +/** + * @file dSFMT-common.h + * + * @brief SIMD oriented Fast Mersenne Twister(SFMT) pseudorandom + * number generator with jump function. This file includes common functions + * used in random number generation and jump. + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (The University of Tokyo) + * + * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. + * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, Hiroshima + * University and The University of Tokyo. + * All rights reserved. + * + * The 3-clause BSD License is applied to this software, see + * LICENSE.txt + */ +#ifndef DSFMT_COMMON_H +#define DSFMT_COMMON_H + +#include "dSFMT.h" + +#if defined(HAVE_SSE2) +# include +union X128I_T { + uint64_t u[2]; + __m128i i128; +}; +union X128D_T { + double d[2]; + __m128d d128; +}; +/** mask data for sse2 */ +static const union X128I_T sse2_param_mask = {{DSFMT_MSK1, DSFMT_MSK2}}; +#endif + +#if defined(HAVE_ALTIVEC) +inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b, + w128_t *lung) { + const vector unsigned char sl1 = ALTI_SL1; + const vector unsigned char sl1_perm = ALTI_SL1_PERM; + const vector unsigned int sl1_msk = ALTI_SL1_MSK; + const vector unsigned char sr1 = ALTI_SR; + const vector unsigned char sr1_perm = ALTI_SR_PERM; + const vector unsigned int sr1_msk = ALTI_SR_MSK; + const vector unsigned char perm = ALTI_PERM; + const vector unsigned int msk1 = ALTI_MSK; + vector unsigned int w, x, y, z; + + z = a->s; + w = lung->s; + x = vec_perm(w, (vector unsigned int)perm, perm); + y = vec_perm(z, sl1_perm, sl1_perm); + y = vec_sll(y, sl1); + y = vec_and(y, sl1_msk); + w = vec_xor(x, b->s); + w = vec_xor(w, y); + x = vec_perm(w, (vector unsigned int)sr1_perm, sr1_perm); + x = vec_srl(x, sr1); + x = vec_and(x, sr1_msk); + y = vec_and(w, msk1); + z = vec_xor(z, y); + r->s = vec_xor(z, x); + lung->s = w; +} +#elif defined(HAVE_SSE2) +/** + * This function represents the recursion formula. + * @param r output 128-bit + * @param a a 128-bit part of the internal state array + * @param b a 128-bit part of the internal state array + * @param d a 128-bit part of the internal state array (I/O) + */ +inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *u) { + __m128i v, w, x, y, z; + + x = a->si; + z = _mm_slli_epi64(x, DSFMT_SL1); + y = _mm_shuffle_epi32(u->si, SSE2_SHUFF); + z = _mm_xor_si128(z, b->si); + y = _mm_xor_si128(y, z); + + v = _mm_srli_epi64(y, DSFMT_SR); + w = _mm_and_si128(y, sse2_param_mask.i128); + v = _mm_xor_si128(v, x); + v = _mm_xor_si128(v, w); + r->si = v; + u->si = y; +} +#else +/** + * This function represents the recursion formula. + * @param r output 128-bit + * @param a a 128-bit part of the internal state array + * @param b a 128-bit part of the internal state array + * @param lung a 128-bit part of the internal state array (I/O) + */ +inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b, + w128_t *lung) { + uint64_t t0, t1, L0, L1; + + t0 = a->u[0]; + t1 = a->u[1]; + L0 = lung->u[0]; + L1 = lung->u[1]; + lung->u[0] = (t0 << DSFMT_SL1) ^ (L1 >> 32) ^ (L1 << 32) ^ b->u[0]; + lung->u[1] = (t1 << DSFMT_SL1) ^ (L0 >> 32) ^ (L0 << 32) ^ b->u[1]; + r->u[0] = (lung->u[0] >> DSFMT_SR) ^ (lung->u[0] & DSFMT_MSK1) ^ t0; + r->u[1] = (lung->u[1] >> DSFMT_SR) ^ (lung->u[1] & DSFMT_MSK2) ^ t1; +} +#endif +#endif diff --git a/share/shr_RandNum/include/dSFMT-params.h b/share/shr_RandNum/include/dSFMT-params.h new file mode 100644 index 000000000000..825148e689ab --- /dev/null +++ b/share/shr_RandNum/include/dSFMT-params.h @@ -0,0 +1,87 @@ +#ifndef DSFMT_PARAMS_H +#define DSFMT_PARAMS_H + +#include "dSFMT.h" + +/*---------------------- + the parameters of DSFMT + following definitions are in dSFMT-paramsXXXX.h file. + ----------------------*/ +/** the pick up position of the array. +#define DSFMT_POS1 122 +*/ + +/** the parameter of shift left as four 32-bit registers. +#define DSFMT_SL1 18 + */ + +/** the parameter of shift right as four 32-bit registers. +#define DSFMT_SR1 12 +*/ + +/** A bitmask, used in the recursion. These parameters are introduced + * to break symmetry of SIMD. +#define DSFMT_MSK1 (uint64_t)0xdfffffefULL +#define DSFMT_MSK2 (uint64_t)0xddfecb7fULL +*/ + +/** These definitions are part of a 128-bit period certification vector. +#define DSFMT_PCV1 UINT64_C(0x00000001) +#define DSFMT_PCV2 UINT64_C(0x00000000) +*/ + +#define DSFMT_LOW_MASK UINT64_C(0x000FFFFFFFFFFFFF) +#define DSFMT_HIGH_CONST UINT64_C(0x3FF0000000000000) +#define DSFMT_SR 12 + +/* for sse2 */ +#if defined(HAVE_SSE2) + #define SSE2_SHUFF 0x1b +#elif defined(HAVE_ALTIVEC) + #if defined(__APPLE__) /* For OSX */ + #define ALTI_SR (vector unsigned char)(4) + #define ALTI_SR_PERM \ + (vector unsigned char)(15,0,1,2,3,4,5,6,15,8,9,10,11,12,13,14) + #define ALTI_SR_MSK \ + (vector unsigned int)(0x000fffffU,0xffffffffU,0x000fffffU,0xffffffffU) + #define ALTI_PERM \ + (vector unsigned char)(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3) + #else + #define ALTI_SR {4} + #define ALTI_SR_PERM {15,0,1,2,3,4,5,6,15,8,9,10,11,12,13,14} + #define ALTI_SR_MSK {0x000fffffU,0xffffffffU,0x000fffffU,0xffffffffU} + #define ALTI_PERM {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3} + #endif +#endif + +#if DSFMT_MEXP == 521 + #include "dSFMT-params521.h" +#elif DSFMT_MEXP == 1279 + #include "dSFMT-params1279.h" +#elif DSFMT_MEXP == 2203 + #include "dSFMT-params2203.h" +#elif DSFMT_MEXP == 4253 + #include "dSFMT-params4253.h" +#elif DSFMT_MEXP == 11213 + #include "dSFMT-params11213.h" +#elif DSFMT_MEXP == 19937 + #include "dSFMT-params19937.h" +#elif DSFMT_MEXP == 44497 + #include "dSFMT-params44497.h" +#elif DSFMT_MEXP == 86243 + #include "dSFMT-params86243.h" +#elif DSFMT_MEXP == 132049 + #include "dSFMT-params132049.h" +#elif DSFMT_MEXP == 216091 + #include "dSFMT-params216091.h" +#else +#ifdef __GNUC__ + #error "DSFMT_MEXP is not valid." + #undef DSFMT_MEXP +#else + #undef DSFMT_MEXP +#endif + +#endif + +#endif /* DSFMT_PARAMS_H */ diff --git a/share/shr_RandNum/include/dSFMT-params19937.h b/share/shr_RandNum/include/dSFMT-params19937.h new file mode 100644 index 000000000000..7f018466db5b --- /dev/null +++ b/share/shr_RandNum/include/dSFMT-params19937.h @@ -0,0 +1,40 @@ +#ifndef DSFMT_PARAMS19937_H +#define DSFMT_PARAMS19937_H + +/* #define DSFMT_N 191 */ +/* #define DSFMT_MAXDEGREE 19992 */ +#define DSFMT_POS1 117 +#define DSFMT_SL1 19 +#define DSFMT_MSK1 UINT64_C(0x000ffafffffffb3f) +#define DSFMT_MSK2 UINT64_C(0x000ffdfffc90fffd) +#define DSFMT_MSK32_1 0x000ffaffU +#define DSFMT_MSK32_2 0xfffffb3fU +#define DSFMT_MSK32_3 0x000ffdffU +#define DSFMT_MSK32_4 0xfc90fffdU +#define DSFMT_FIX1 UINT64_C(0x90014964b32f4329) +#define DSFMT_FIX2 UINT64_C(0x3b8d12ac548a7c7a) +#define DSFMT_PCV1 UINT64_C(0x3d84e1ac0dc82880) +#define DSFMT_PCV2 UINT64_C(0x0000000000000001) +#define DSFMT_IDSTR "dSFMT2-19937:117-19:ffafffffffb3f-ffdfffc90fffd" + + +/* PARAMETERS FOR ALTIVEC */ +#if defined(__APPLE__) /* For OSX */ + #define ALTI_SL1 (vector unsigned int)(3, 3, 3, 3) + #define ALTI_SL1_PERM \ + (vector unsigned char)(2,3,4,5,6,7,30,30,10,11,12,13,14,15,0,1) + #define ALTI_SL1_MSK \ + (vector unsigned int)(0xffffffffU,0xfff80000U,0xffffffffU,0xfff80000U) + #define ALTI_MSK (vector unsigned int)(DSFMT_MSK32_1, \ + DSFMT_MSK32_2, DSFMT_MSK32_3, DSFMT_MSK32_4) +#else /* For OTHER OSs(Linux?) */ + #define ALTI_SL1 {3, 3, 3, 3} + #define ALTI_SL1_PERM \ + {2,3,4,5,6,7,30,30,10,11,12,13,14,15,0,1} + #define ALTI_SL1_MSK \ + {0xffffffffU,0xfff80000U,0xffffffffU,0xfff80000U} + #define ALTI_MSK \ + {DSFMT_MSK32_1, DSFMT_MSK32_2, DSFMT_MSK32_3, DSFMT_MSK32_4} +#endif + +#endif /* DSFMT_PARAMS19937_H */ diff --git a/share/shr_RandNum/include/dSFMT.h b/share/shr_RandNum/include/dSFMT.h new file mode 100644 index 000000000000..c5debf971da7 --- /dev/null +++ b/share/shr_RandNum/include/dSFMT.h @@ -0,0 +1,634 @@ +#pragma once +/** + * @file dSFMT.h + * + * @brief double precision SIMD oriented Fast Mersenne Twister(dSFMT) + * pseudorandom number generator based on IEEE 754 format. + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (Hiroshima University) + * + * Copyright (C) 2007, 2008 Mutsuo Saito, Makoto Matsumoto and + * Hiroshima University. All rights reserved. + * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, + * Hiroshima University and The University of Tokyo. + * All rights reserved. + * + * The new BSD License is applied to this software. + * see LICENSE.txt + * + * @note We assume that your system has inttypes.h. If your system + * doesn't have inttypes.h, you have to typedef uint32_t and uint64_t, + * and you have to define PRIu64 and PRIx64 in this file as follows: + * @verbatim + typedef unsigned int uint32_t + typedef unsigned long long uint64_t + #define PRIu64 "llu" + #define PRIx64 "llx" +@endverbatim + * uint32_t must be exactly 32-bit unsigned integer type (no more, no + * less), and uint64_t must be exactly 64-bit unsigned integer type. + * PRIu64 and PRIx64 are used for printf function to print 64-bit + * unsigned int and 64-bit unsigned int in hexadecimal format. + */ + +#ifndef DSFMT_H +#define DSFMT_H +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include + +#if !defined(DSFMT_MEXP) +#ifdef __GNUC__ + #warning "DSFMT_MEXP is not defined. I assume DSFMT_MEXP is 19937." +#endif + #define DSFMT_MEXP 19937 +#endif +/*----------------- + BASIC DEFINITIONS + -----------------*/ +/* Mersenne Exponent. The period of the sequence + * is a multiple of 2^DSFMT_MEXP-1. + * #define DSFMT_MEXP 19937 */ +/** DSFMT generator has an internal state array of 128-bit integers, + * and N is its size. */ +#define DSFMT_N ((DSFMT_MEXP - 128) / 104 + 1) +/** N32 is the size of internal state array when regarded as an array + * of 32-bit integers.*/ +#define DSFMT_N32 (DSFMT_N * 4) +/** N64 is the size of internal state array when regarded as an array + * of 64-bit integers.*/ +#define DSFMT_N64 (DSFMT_N * 2) + +#if !defined(DSFMT_BIG_ENDIAN) +# if defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) +# if __BYTE_ORDER == __BIG_ENDIAN +# define DSFMT_BIG_ENDIAN 1 +# endif +# elif defined(_BYTE_ORDER) && defined(_BIG_ENDIAN) +# if _BYTE_ORDER == _BIG_ENDIAN +# define DSFMT_BIG_ENDIAN 1 +# endif +# elif defined(__BYTE_ORDER__) && defined(__BIG_ENDIAN__) +# if __BYTE_ORDER__ == __BIG_ENDIAN__ +# define DSFMT_BIG_ENDIAN 1 +# endif +# elif defined(BYTE_ORDER) && defined(BIG_ENDIAN) +# if BYTE_ORDER == BIG_ENDIAN +# define DSFMT_BIG_ENDIAN 1 +# endif +# elif defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN) \ + || defined(__BIG_ENDIAN__) || defined(BIG_ENDIAN) +# define DSFMT_BIG_ENDIAN 1 +# endif +#endif + +#if defined(DSFMT_BIG_ENDIAN) && defined(__amd64) +# undef DSFMT_BIG_ENDIAN +#endif + +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) +# include +#elif defined(_MSC_VER) || defined(__BORLANDC__) +# if !defined(DSFMT_UINT32_DEFINED) && !defined(SFMT_UINT32_DEFINED) +typedef unsigned int uint32_t; +typedef unsigned __int64 uint64_t; +# define UINT64_C(v) (v ## ui64) +# define DSFMT_UINT32_DEFINED +# if !defined(inline) +# define inline __inline +# endif +# endif +#else +# include +# if !defined(inline) +# if defined(__GNUC__) +# define inline __inline__ +# else +# define inline +# endif +# endif +#endif + +#ifndef PRIu64 +# if defined(_MSC_VER) || defined(__BORLANDC__) +# define PRIu64 "I64u" +# define PRIx64 "I64x" +# else +# define PRIu64 "llu" +# define PRIx64 "llx" +# endif +#endif + +#ifndef UINT64_C +# define UINT64_C(v) (v ## ULL) +#endif + +/*------------------------------------------ + 128-bit SIMD like data type for standard C + ------------------------------------------*/ +#if defined(HAVE_ALTIVEC) +# if !defined(__APPLE__) +# include +# endif +/** 128-bit data structure */ +union W128_T { + vector unsigned int s; + uint64_t u[2]; + uint32_t u32[4]; + double d[2]; +}; + +#elif defined(HAVE_SSE2) +# include + +/** 128-bit data structure */ +union W128_T { + __m128i si; + __m128d sd; + uint64_t u[2]; + uint32_t u32[4]; + double d[2]; +}; +#else /* standard C */ +/** 128-bit data structure */ +union W128_T { + uint64_t u[2]; + uint32_t u32[4]; + double d[2]; +}; +#endif + +/** 128-bit data type */ +typedef union W128_T w128_t; + +/** the 128-bit internal state array */ +struct DSFMT_T { + w128_t status[DSFMT_N + 1]; + int idx; +}; +typedef struct DSFMT_T dsfmt_t; + +/** dsfmt internal state vector */ +extern dsfmt_t dsfmt_global_data; +/** dsfmt mexp for check */ +extern const int dsfmt_global_mexp; + +void dsfmt_gen_rand_all(dsfmt_t *dsfmt); +void dsfmt_fill_array_open_close(dsfmt_t *dsfmt, double array[], int size); +void dsfmt_fill_array_close_open(dsfmt_t *dsfmt, double array[], int size); +void dsfmt_fill_array_open_open(dsfmt_t *dsfmt, double array[], int size); +void dsfmt_fill_array_close1_open2(dsfmt_t *dsfmt, double array[], int size); +void dsfmt_chk_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed, int mexp); +void dsfmt_chk_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], + int key_length, int mexp); +const char *dsfmt_get_idstring(void); +int dsfmt_get_min_array_size(void); + +#if defined(__GNUC__) +# define DSFMT_PRE_INLINE inline static +# define DSFMT_PST_INLINE __attribute__((always_inline)) +#elif defined(_MSC_VER) && _MSC_VER >= 1200 +# define DSFMT_PRE_INLINE __forceinline static +# define DSFMT_PST_INLINE +#else +# define DSFMT_PRE_INLINE inline static +# define DSFMT_PST_INLINE +#endif +DSFMT_PRE_INLINE uint32_t dsfmt_genrand_uint32(dsfmt_t *dsfmt) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_genrand_close1_open2(dsfmt_t *dsfmt) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_genrand_close_open(dsfmt_t *dsfmt) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_genrand_open_close(dsfmt_t *dsfmt) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_genrand_open_open(dsfmt_t *dsfmt) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE uint32_t dsfmt_gv_genrand_uint32(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_gv_genrand_close1_open2(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_gv_genrand_close_open(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_gv_genrand_open_close(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_gv_genrand_open_open(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_gv_fill_array_open_close(double array[], int size) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_gv_fill_array_close_open(double array[], int size) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_gv_fill_array_open_open(double array[], int size) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_gv_fill_array_close1_open2(double array[], int size) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_gv_init_gen_rand(uint32_t seed) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_gv_init_by_array(uint32_t init_key[], + int key_length) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], + int key_length) DSFMT_PST_INLINE; + +/** + * This function generates and returns unsigned 32-bit integer. + * This is slower than SFMT, only for convenience usage. + * dsfmt_init_gen_rand() or dsfmt_init_by_array() must be called + * before this function. + * @param dsfmt dsfmt internal state date + * @return double precision floating point pseudorandom number + */ +inline static uint32_t dsfmt_genrand_uint32(dsfmt_t *dsfmt) { + uint32_t r; + uint64_t *psfmt64 = &dsfmt->status[0].u[0]; + + if (dsfmt->idx >= DSFMT_N64) { + dsfmt_gen_rand_all(dsfmt); + dsfmt->idx = 0; + } + r = psfmt64[dsfmt->idx++] & 0xffffffffU; + return r; +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range [1, 2). This is + * the primitive and faster than generating numbers in other ranges. + * dsfmt_init_gen_rand() or dsfmt_init_by_array() must be called + * before this function. + * @param dsfmt dsfmt internal state date + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_genrand_close1_open2(dsfmt_t *dsfmt) { + double r; + double *psfmt64 = &dsfmt->status[0].d[0]; + + if (dsfmt->idx >= DSFMT_N64) { + dsfmt_gen_rand_all(dsfmt); + dsfmt->idx = 0; + } + r = psfmt64[dsfmt->idx++]; + return r; +} + +/** + * This function generates and returns unsigned 32-bit integer. + * This is slower than SFMT, only for convenience usage. + * dsfmt_gv_init_gen_rand() or dsfmt_gv_init_by_array() must be called + * before this function. This function uses \b global variables. + * @return double precision floating point pseudorandom number + */ +inline static uint32_t dsfmt_gv_genrand_uint32(void) { + return dsfmt_genrand_uint32(&dsfmt_global_data); +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range [1, 2). + * dsfmt_gv_init_gen_rand() or dsfmt_gv_init_by_array() must be called + * before this function. This function uses \b global variables. + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_gv_genrand_close1_open2(void) { + return dsfmt_genrand_close1_open2(&dsfmt_global_data); +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range [0, 1). + * dsfmt_init_gen_rand() or dsfmt_init_by_array() must be called + * before this function. + * @param dsfmt dsfmt internal state date + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_genrand_close_open(dsfmt_t *dsfmt) { + return dsfmt_genrand_close1_open2(dsfmt) - 1.0; +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range [0, 1). + * dsfmt_gv_init_gen_rand() or dsfmt_gv_init_by_array() must be called + * before this function. This function uses \b global variables. + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_gv_genrand_close_open(void) { + return dsfmt_gv_genrand_close1_open2() - 1.0; +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range (0, 1]. + * dsfmt_init_gen_rand() or dsfmt_init_by_array() must be called + * before this function. + * @param dsfmt dsfmt internal state date + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_genrand_open_close(dsfmt_t *dsfmt) { + return 2.0 - dsfmt_genrand_close1_open2(dsfmt); +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range (0, 1]. + * dsfmt_gv_init_gen_rand() or dsfmt_gv_init_by_array() must be called + * before this function. This function uses \b global variables. + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_gv_genrand_open_close(void) { + return 2.0 - dsfmt_gv_genrand_close1_open2(); +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range (0, 1). + * dsfmt_init_gen_rand() or dsfmt_init_by_array() must be called + * before this function. + * @param dsfmt dsfmt internal state date + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_genrand_open_open(dsfmt_t *dsfmt) { + double *dsfmt64 = &dsfmt->status[0].d[0]; + union { + double d; + uint64_t u; + } r; + + if (dsfmt->idx >= DSFMT_N64) { + dsfmt_gen_rand_all(dsfmt); + dsfmt->idx = 0; + } + r.d = dsfmt64[dsfmt->idx++]; + r.u |= 1; + return r.d - 1.0; +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range (0, 1). + * dsfmt_gv_init_gen_rand() or dsfmt_gv_init_by_array() must be called + * before this function. This function uses \b global variables. + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_gv_genrand_open_open(void) { + return dsfmt_genrand_open_open(&dsfmt_global_data); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range [1, 2) to the + * specified array[] by one call. This function is the same as + * dsfmt_fill_array_close1_open2() except that this function uses + * \b global variables. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_fill_array_close1_open2() + */ +inline static void dsfmt_gv_fill_array_close1_open2(double array[], int size) { + dsfmt_fill_array_close1_open2(&dsfmt_global_data, array, size); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range (0, 1] to the + * specified array[] by one call. This function is the same as + * dsfmt_gv_fill_array_close1_open2() except the distribution range. + * This function uses \b global variables. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_fill_array_close1_open2() and \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void dsfmt_gv_fill_array_open_close(double array[], int size) { + dsfmt_fill_array_open_close(&dsfmt_global_data, array, size); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range [0, 1) to the + * specified array[] by one call. This function is the same as + * dsfmt_gv_fill_array_close1_open2() except the distribution range. + * This function uses \b global variables. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_fill_array_close1_open2() \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void dsfmt_gv_fill_array_close_open(double array[], int size) { + dsfmt_fill_array_close_open(&dsfmt_global_data, array, size); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range (0, 1) to the + * specified array[] by one call. This function is the same as + * dsfmt_gv_fill_array_close1_open2() except the distribution range. + * This function uses \b global variables. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_fill_array_close1_open2() \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void dsfmt_gv_fill_array_open_open(double array[], int size) { + dsfmt_fill_array_open_open(&dsfmt_global_data, array, size); +} + +/** + * This function initializes the internal state array with a 32-bit + * integer seed. + * @param dsfmt dsfmt state vector. + * @param seed a 32-bit integer used as the seed. + */ +inline static void dsfmt_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed) { + dsfmt_chk_init_gen_rand(dsfmt, seed, DSFMT_MEXP); +} + +/** + * This function initializes the internal state array with a 32-bit + * integer seed. This function uses \b global variables. + * @param seed a 32-bit integer used as the seed. + * see also \sa dsfmt_init_gen_rand() + */ +inline static void dsfmt_gv_init_gen_rand(uint32_t seed) { + dsfmt_init_gen_rand(&dsfmt_global_data, seed); +} + +/** + * This function initializes the internal state array, + * with an array of 32-bit integers used as the seeds. + * @param dsfmt dsfmt state vector + * @param init_key the array of 32-bit integers, used as a seed. + * @param key_length the length of init_key. + */ +inline static void dsfmt_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], + int key_length) { + dsfmt_chk_init_by_array(dsfmt, init_key, key_length, DSFMT_MEXP); +} + +/** + * This function initializes the internal state array, + * with an array of 32-bit integers used as the seeds. + * This function uses \b global variables. + * @param init_key the array of 32-bit integers, used as a seed. + * @param key_length the length of init_key. + * see also \sa dsfmt_init_by_array() + */ +inline static void dsfmt_gv_init_by_array(uint32_t init_key[], int key_length) { + dsfmt_init_by_array(&dsfmt_global_data, init_key, key_length); +} + +#if !defined(DSFMT_DO_NOT_USE_OLD_NAMES) +DSFMT_PRE_INLINE const char *get_idstring(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE int get_min_array_size(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void init_gen_rand(uint32_t seed) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void init_by_array(uint32_t init_key[], int key_length) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double genrand_close1_open2(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double genrand_close_open(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double genrand_open_close(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double genrand_open_open(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void fill_array_open_close(double array[], int size) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void fill_array_close_open(double array[], int size) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void fill_array_open_open(double array[], int size) + DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void fill_array_close1_open2(double array[], int size) + DSFMT_PST_INLINE; + +/** + * This function is just the same as dsfmt_get_idstring(). + * @return id string. + * see also \sa dsfmt_get_idstring() + */ +inline static const char *get_idstring(void) { + return dsfmt_get_idstring(); +} + +/** + * This function is just the same as dsfmt_get_min_array_size(). + * @return minimum size of array used for fill_array functions. + * see also \sa dsfmt_get_min_array_size() + */ +inline static int get_min_array_size(void) { + return dsfmt_get_min_array_size(); +} + +/** + * This function is just the same as dsfmt_gv_init_gen_rand(). + * @param seed a 32-bit integer used as the seed. + * see also \sa dsfmt_gv_init_gen_rand(), \sa dsfmt_init_gen_rand(). + */ +inline static void init_gen_rand(uint32_t seed) { + dsfmt_gv_init_gen_rand(seed); +} + +/** + * This function is just the same as dsfmt_gv_init_by_array(). + * @param init_key the array of 32-bit integers, used as a seed. + * @param key_length the length of init_key. + * see also \sa dsfmt_gv_init_by_array(), \sa dsfmt_init_by_array(). + */ +inline static void init_by_array(uint32_t init_key[], int key_length) { + dsfmt_gv_init_by_array(init_key, key_length); +} + +/** + * This function is just the same as dsfmt_gv_genrand_close1_open2(). + * @return double precision floating point number. + * see also \sa dsfmt_genrand_close1_open2() \sa + * dsfmt_gv_genrand_close1_open2() + */ +inline static double genrand_close1_open2(void) { + return dsfmt_gv_genrand_close1_open2(); +} + +/** + * This function is just the same as dsfmt_gv_genrand_close_open(). + * @return double precision floating point number. + * see also \sa dsfmt_genrand_close_open() \sa + * dsfmt_gv_genrand_close_open() + */ +inline static double genrand_close_open(void) { + return dsfmt_gv_genrand_close_open(); +} + +/** + * This function is just the same as dsfmt_gv_genrand_open_close(). + * @return double precision floating point number. + * see also \sa dsfmt_genrand_open_close() \sa + * dsfmt_gv_genrand_open_close() + */ +inline static double genrand_open_close(void) { + return dsfmt_gv_genrand_open_close(); +} + +/** + * This function is just the same as dsfmt_gv_genrand_open_open(). + * @return double precision floating point number. + * see also \sa dsfmt_genrand_open_open() \sa + * dsfmt_gv_genrand_open_open() + */ +inline static double genrand_open_open(void) { + return dsfmt_gv_genrand_open_open(); +} + +/** + * This function is juset the same as dsfmt_gv_fill_array_open_close(). + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_gv_fill_array_open_close(), \sa + * dsfmt_fill_array_close1_open2(), \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void fill_array_open_close(double array[], int size) { + dsfmt_gv_fill_array_open_close(array, size); +} + +/** + * This function is juset the same as dsfmt_gv_fill_array_close_open(). + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_gv_fill_array_close_open(), \sa + * dsfmt_fill_array_close1_open2(), \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void fill_array_close_open(double array[], int size) { + dsfmt_gv_fill_array_close_open(array, size); +} + +/** + * This function is juset the same as dsfmt_gv_fill_array_open_open(). + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_gv_fill_array_open_open(), \sa + * dsfmt_fill_array_close1_open2(), \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void fill_array_open_open(double array[], int size) { + dsfmt_gv_fill_array_open_open(array, size); +} + +/** + * This function is juset the same as dsfmt_gv_fill_array_close1_open2(). + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_fill_array_close1_open2(), \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void fill_array_close1_open2(double array[], int size) { + dsfmt_gv_fill_array_close1_open2(array, size); +} +#endif /* DSFMT_DO_NOT_USE_OLD_NAMES */ + +#if defined(__cplusplus) +} +#endif + +#endif /* DSFMT_H */ diff --git a/share/shr_RandNum/src/dsfmt_f03/dSFMT.c b/share/shr_RandNum/src/dsfmt_f03/dSFMT.c new file mode 100644 index 000000000000..e86449ac32af --- /dev/null +++ b/share/shr_RandNum/src/dsfmt_f03/dSFMT.c @@ -0,0 +1,633 @@ +/** + * @file dSFMT.c + * @brief double precision SIMD-oriented Fast Mersenne Twister (dSFMT) + * based on IEEE 754 format. + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (Hiroshima University) + * + * Copyright (C) 2007,2008 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * The new BSD License is applied to this software, see LICENSE.txt + */ +#include +#include +#include +#include "dSFMT-params.h" +#include "dSFMT-common.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/** dsfmt internal state vector */ +dsfmt_t dsfmt_global_data; +/** dsfmt mexp for check */ +static const int dsfmt_mexp = DSFMT_MEXP; + +/*---------------- + STATIC FUNCTIONS + ----------------*/ +inline static uint32_t ini_func1(uint32_t x); +inline static uint32_t ini_func2(uint32_t x); +inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array, + int size); +inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array, + int size); +inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array, + int size); +inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array, + int size); +inline static int idxof(int i); +static void initial_mask(dsfmt_t *dsfmt); +static void period_certification(dsfmt_t *dsfmt); + +#if defined(HAVE_SSE2) +/** 1 in 64bit for sse2 */ +static const union X128I_T sse2_int_one = {{1, 1}}; +/** 2.0 double for sse2 */ +static const union X128D_T sse2_double_two = {{2.0, 2.0}}; +/** -1.0 double for sse2 */ +static const union X128D_T sse2_double_m_one = {{-1.0, -1.0}}; +#endif + +/** + * This function simulate a 32-bit array index overlapped to 64-bit + * array of LITTLE ENDIAN in BIG ENDIAN machine. + */ +#if defined(DSFMT_BIG_ENDIAN) +inline static int idxof(int i) { + return i ^ 1; +} +#else +inline static int idxof(int i) { + return i; +} +#endif + +#if defined(HAVE_SSE2) +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range [0, 1). + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_c0o1(w128_t *w) { + w->sd = _mm_add_pd(w->sd, sse2_double_m_one.d128); +} + +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range (0, 1]. + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_o0c1(w128_t *w) { + w->sd = _mm_sub_pd(sse2_double_two.d128, w->sd); +} + +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range (0, 1). + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_o0o1(w128_t *w) { + w->si = _mm_or_si128(w->si, sse2_int_one.i128); + w->sd = _mm_add_pd(w->sd, sse2_double_m_one.d128); +} +#else /* standard C and altivec */ +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range [0, 1). + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_c0o1(w128_t *w) { + w->d[0] -= 1.0; + w->d[1] -= 1.0; +} + +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range (0, 1]. + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_o0c1(w128_t *w) { + w->d[0] = 2.0 - w->d[0]; + w->d[1] = 2.0 - w->d[1]; +} + +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range (0, 1). + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_o0o1(w128_t *w) { + w->u[0] |= 1; + w->u[1] |= 1; + w->d[0] -= 1.0; + w->d[1] -= 1.0; +} +#endif + +/** + * This function fills the user-specified array with double precision + * floating point pseudorandom numbers of the IEEE 754 format. + * @param dsfmt dsfmt state vector. + * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param size number of 128-bit pseudorandom numbers to be generated. + */ +inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array, + int size) { + int i, j; + w128_t lung; + + lung = dsfmt->status[DSFMT_N]; + do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1], + &lung); + for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { + do_recursion(&array[i], &dsfmt->status[i], + &dsfmt->status[i + DSFMT_POS1], &lung); + } + for (; i < DSFMT_N; i++) { + do_recursion(&array[i], &dsfmt->status[i], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + } + for (; i < size - DSFMT_N; i++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + } + for (j = 0; j < 2 * DSFMT_N - size; j++) { + dsfmt->status[j] = array[j + size - DSFMT_N]; + } + for (; i < size; i++, j++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + dsfmt->status[j] = array[i]; + } + dsfmt->status[DSFMT_N] = lung; +} + +/** + * This function fills the user-specified array with double precision + * floating point pseudorandom numbers of the IEEE 754 format. + * @param dsfmt dsfmt state vector. + * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param size number of 128-bit pseudorandom numbers to be generated. + */ +inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array, + int size) { + int i, j; + w128_t lung; + + lung = dsfmt->status[DSFMT_N]; + do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1], + &lung); + for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { + do_recursion(&array[i], &dsfmt->status[i], + &dsfmt->status[i + DSFMT_POS1], &lung); + } + for (; i < DSFMT_N; i++) { + do_recursion(&array[i], &dsfmt->status[i], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + } + for (; i < size - DSFMT_N; i++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + convert_c0o1(&array[i - DSFMT_N]); + } + for (j = 0; j < 2 * DSFMT_N - size; j++) { + dsfmt->status[j] = array[j + size - DSFMT_N]; + } + for (; i < size; i++, j++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + dsfmt->status[j] = array[i]; + convert_c0o1(&array[i - DSFMT_N]); + } + for (i = size - DSFMT_N; i < size; i++) { + convert_c0o1(&array[i]); + } + dsfmt->status[DSFMT_N] = lung; +} + +/** + * This function fills the user-specified array with double precision + * floating point pseudorandom numbers of the IEEE 754 format. + * @param dsfmt dsfmt state vector. + * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param size number of 128-bit pseudorandom numbers to be generated. + */ +inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array, + int size) { + int i, j; + w128_t lung; + + lung = dsfmt->status[DSFMT_N]; + do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1], + &lung); + for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { + do_recursion(&array[i], &dsfmt->status[i], + &dsfmt->status[i + DSFMT_POS1], &lung); + } + for (; i < DSFMT_N; i++) { + do_recursion(&array[i], &dsfmt->status[i], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + } + for (; i < size - DSFMT_N; i++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + convert_o0o1(&array[i - DSFMT_N]); + } + for (j = 0; j < 2 * DSFMT_N - size; j++) { + dsfmt->status[j] = array[j + size - DSFMT_N]; + } + for (; i < size; i++, j++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + dsfmt->status[j] = array[i]; + convert_o0o1(&array[i - DSFMT_N]); + } + for (i = size - DSFMT_N; i < size; i++) { + convert_o0o1(&array[i]); + } + dsfmt->status[DSFMT_N] = lung; +} + +/** + * This function fills the user-specified array with double precision + * floating point pseudorandom numbers of the IEEE 754 format. + * @param dsfmt dsfmt state vector. + * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param size number of 128-bit pseudorandom numbers to be generated. + */ +inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array, + int size) { + int i, j; + w128_t lung; + + lung = dsfmt->status[DSFMT_N]; + do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1], + &lung); + for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { + do_recursion(&array[i], &dsfmt->status[i], + &dsfmt->status[i + DSFMT_POS1], &lung); + } + for (; i < DSFMT_N; i++) { + do_recursion(&array[i], &dsfmt->status[i], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + } + for (; i < size - DSFMT_N; i++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + convert_o0c1(&array[i - DSFMT_N]); + } + for (j = 0; j < 2 * DSFMT_N - size; j++) { + dsfmt->status[j] = array[j + size - DSFMT_N]; + } + for (; i < size; i++, j++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + dsfmt->status[j] = array[i]; + convert_o0c1(&array[i - DSFMT_N]); + } + for (i = size - DSFMT_N; i < size; i++) { + convert_o0c1(&array[i]); + } + dsfmt->status[DSFMT_N] = lung; +} + +/** + * This function represents a function used in the initialization + * by init_by_array + * @param x 32-bit integer + * @return 32-bit integer + */ +static uint32_t ini_func1(uint32_t x) { + return (x ^ (x >> 27)) * (uint32_t)1664525UL; +} + +/** + * This function represents a function used in the initialization + * by init_by_array + * @param x 32-bit integer + * @return 32-bit integer + */ +static uint32_t ini_func2(uint32_t x) { + return (x ^ (x >> 27)) * (uint32_t)1566083941UL; +} + +/** + * This function initializes the internal state array to fit the IEEE + * 754 format. + * @param dsfmt dsfmt state vector. + */ +static void initial_mask(dsfmt_t *dsfmt) { + int i; + uint64_t *psfmt; + + psfmt = &dsfmt->status[0].u[0]; + for (i = 0; i < DSFMT_N * 2; i++) { + psfmt[i] = (psfmt[i] & DSFMT_LOW_MASK) | DSFMT_HIGH_CONST; + } +} + +/** + * This function certificate the period of 2^{SFMT_MEXP}-1. + * @param dsfmt dsfmt state vector. + */ +static void period_certification(dsfmt_t *dsfmt) { + uint64_t pcv[2] = {DSFMT_PCV1, DSFMT_PCV2}; + uint64_t tmp[2]; + uint64_t inner; + int i; +#if (DSFMT_PCV2 & 1) != 1 + int j; + uint64_t work; +#endif + + tmp[0] = (dsfmt->status[DSFMT_N].u[0] ^ DSFMT_FIX1); + tmp[1] = (dsfmt->status[DSFMT_N].u[1] ^ DSFMT_FIX2); + + inner = tmp[0] & pcv[0]; + inner ^= tmp[1] & pcv[1]; + for (i = 32; i > 0; i >>= 1) { + inner ^= inner >> i; + } + inner &= 1; + /* check OK */ + if (inner == 1) { + return; + } + /* check NG, and modification */ +#if (DSFMT_PCV2 & 1) == 1 + dsfmt->status[DSFMT_N].u[1] ^= 1; +#else + for (i = 1; i >= 0; i--) { + work = 1; + for (j = 0; j < 64; j++) { + if ((work & pcv[i]) != 0) { + dsfmt->status[DSFMT_N].u[i] ^= work; + return; + } + work = work << 1; + } + } +#endif + return; +} + +/*---------------- + PUBLIC FUNCTIONS + ----------------*/ +/** + * This function returns the identification string. The string shows + * the Mersenne exponent, and all parameters of this generator. + * @return id string. + */ +const char *dsfmt_get_idstring(void) { + return DSFMT_IDSTR; +} + +/** + * This function returns the minimum size of array used for \b + * fill_array functions. + * @return minimum size of array used for fill_array functions. + */ +int dsfmt_get_min_array_size(void) { + return DSFMT_N64; +} + +/** + * This function fills the internal state array with double precision + * floating point pseudorandom numbers of the IEEE 754 format. + * @param dsfmt dsfmt state vector. + */ +void dsfmt_gen_rand_all(dsfmt_t *dsfmt) { + int i; + w128_t lung; + + lung = dsfmt->status[DSFMT_N]; + do_recursion(&dsfmt->status[0], &dsfmt->status[0], + &dsfmt->status[DSFMT_POS1], &lung); + for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { + do_recursion(&dsfmt->status[i], &dsfmt->status[i], + &dsfmt->status[i + DSFMT_POS1], &lung); + } + for (; i < DSFMT_N; i++) { + do_recursion(&dsfmt->status[i], &dsfmt->status[i], + &dsfmt->status[i + DSFMT_POS1 - DSFMT_N], &lung); + } + dsfmt->status[DSFMT_N] = lung; +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range [1, 2) to the + * specified array[] by one call. The number of pseudorandom numbers + * is specified by the argument \b size, which must be at least (SFMT_MEXP + * / 128) * 2 and a multiple of two. The function + * get_min_array_size() returns this minimum size. The generation by + * this function is much faster than the following fill_array_xxx functions. + * + * For initialization, init_gen_rand() or init_by_array() must be called + * before the first call of this function. This function can not be + * used after calling genrand_xxx functions, without initialization. + * + * @param dsfmt dsfmt state vector. + * @param array an array where pseudorandom numbers are filled + * by this function. The pointer to the array must be "aligned" + * (namely, must be a multiple of 16) in the SIMD version, since it + * refers to the address of a 128-bit integer. In the standard C + * version, the pointer is arbitrary. + * + * @param size the number of 64-bit pseudorandom integers to be + * generated. size must be a multiple of 2, and greater than or equal + * to (SFMT_MEXP / 128) * 2. + * + * @note \b memalign or \b posix_memalign is available to get aligned + * memory. Mac OSX doesn't have these functions, but \b malloc of OSX + * returns the pointer to the aligned memory block. + */ +void dsfmt_fill_array_close1_open2(dsfmt_t *dsfmt, double array[], int size) { + assert(size % 2 == 0); + assert(size >= DSFMT_N64); + gen_rand_array_c1o2(dsfmt, (w128_t *)array, size / 2); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range (0, 1] to the + * specified array[] by one call. This function is the same as + * fill_array_close1_open2() except the distribution range. + * + * @param dsfmt dsfmt state vector. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa fill_array_close1_open2() + */ +void dsfmt_fill_array_open_close(dsfmt_t *dsfmt, double array[], int size) { + assert(size % 2 == 0); + assert(size >= DSFMT_N64); + gen_rand_array_o0c1(dsfmt, (w128_t *)array, size / 2); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range [0, 1) to the + * specified array[] by one call. This function is the same as + * fill_array_close1_open2() except the distribution range. + * + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param dsfmt dsfmt state vector. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa fill_array_close1_open2() + */ +void dsfmt_fill_array_close_open(dsfmt_t *dsfmt, double array[], int size) { + assert(size % 2 == 0); + assert(size >= DSFMT_N64); + gen_rand_array_c0o1(dsfmt, (w128_t *)array, size / 2); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range (0, 1) to the + * specified array[] by one call. This function is the same as + * fill_array_close1_open2() except the distribution range. + * + * @param dsfmt dsfmt state vector. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa fill_array_close1_open2() + */ +void dsfmt_fill_array_open_open(dsfmt_t *dsfmt, double array[], int size) { + assert(size % 2 == 0); + assert(size >= DSFMT_N64); + gen_rand_array_o0o1(dsfmt, (w128_t *)array, size / 2); +} + +#if defined(__INTEL_COMPILER) +# pragma warning(disable:981) +#endif +/** + * This function initializes the internal state array with a 32-bit + * integer seed. + * @param dsfmt dsfmt state vector. + * @param seed a 32-bit integer used as the seed. + * @param mexp caller's mersenne expornent + */ +void dsfmt_chk_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed, int mexp) { + int i; + uint32_t *psfmt; + + /* make sure caller program is compiled with the same MEXP */ + if (mexp != dsfmt_mexp) { + fprintf(stderr, "DSFMT_MEXP doesn't match with dSFMT.c\n"); + exit(1); + } + psfmt = &dsfmt->status[0].u32[0]; + psfmt[idxof(0)] = seed; + for (i = 1; i < (DSFMT_N + 1) * 4; i++) { + psfmt[idxof(i)] = 1812433253UL + * (psfmt[idxof(i - 1)] ^ (psfmt[idxof(i - 1)] >> 30)) + i; + } + initial_mask(dsfmt); + period_certification(dsfmt); + dsfmt->idx = DSFMT_N64; +} + +/** + * This function initializes the internal state array, + * with an array of 32-bit integers used as the seeds + * @param dsfmt dsfmt state vector. + * @param init_key the array of 32-bit integers, used as a seed. + * @param key_length the length of init_key. + * @param mexp caller's mersenne expornent + */ +void dsfmt_chk_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], + int key_length, int mexp) { + int i, j, count; + uint32_t r; + uint32_t *psfmt32; + int lag; + int mid; + int size = (DSFMT_N + 1) * 4; /* pulmonary */ + + /* make sure caller program is compiled with the same MEXP */ + if (mexp != dsfmt_mexp) { + fprintf(stderr, "DSFMT_MEXP doesn't match with dSFMT.c\n"); + exit(1); + } + if (size >= 623) { + lag = 11; + } else if (size >= 68) { + lag = 7; + } else if (size >= 39) { + lag = 5; + } else { + lag = 3; + } + mid = (size - lag) / 2; + + psfmt32 = &dsfmt->status[0].u32[0]; + memset(dsfmt->status, 0x8b, sizeof(dsfmt->status)); + if (key_length + 1 > size) { + count = key_length + 1; + } else { + count = size; + } + r = ini_func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid % size)] + ^ psfmt32[idxof((size - 1) % size)]); + psfmt32[idxof(mid % size)] += r; + r += key_length; + psfmt32[idxof((mid + lag) % size)] += r; + psfmt32[idxof(0)] = r; + count--; + for (i = 1, j = 0; (j < count) && (j < key_length); j++) { + r = ini_func1(psfmt32[idxof(i)] + ^ psfmt32[idxof((i + mid) % size)] + ^ psfmt32[idxof((i + size - 1) % size)]); + psfmt32[idxof((i + mid) % size)] += r; + r += init_key[j] + i; + psfmt32[idxof((i + mid + lag) % size)] += r; + psfmt32[idxof(i)] = r; + i = (i + 1) % size; + } + for (; j < count; j++) { + r = ini_func1(psfmt32[idxof(i)] + ^ psfmt32[idxof((i + mid) % size)] + ^ psfmt32[idxof((i + size - 1) % size)]); + psfmt32[idxof((i + mid) % size)] += r; + r += i; + psfmt32[idxof((i + mid + lag) % size)] += r; + psfmt32[idxof(i)] = r; + i = (i + 1) % size; + } + for (j = 0; j < size; j++) { + r = ini_func2(psfmt32[idxof(i)] + + psfmt32[idxof((i + mid) % size)] + + psfmt32[idxof((i + size - 1) % size)]); + psfmt32[idxof((i + mid) % size)] ^= r; + r -= i; + psfmt32[idxof((i + mid + lag) % size)] ^= r; + psfmt32[idxof(i)] = r; + i = (i + 1) % size; + } + initial_mask(dsfmt); + period_certification(dsfmt); + dsfmt->idx = DSFMT_N64; +} +#if defined(__INTEL_COMPILER) +# pragma warning(default:981) +#endif + +#if defined(__cplusplus) +} +#endif diff --git a/share/shr_RandNum/src/dsfmt_f03/dSFMT_interface.F90 b/share/shr_RandNum/src/dsfmt_f03/dSFMT_interface.F90 new file mode 100644 index 000000000000..e312647e71fb --- /dev/null +++ b/share/shr_RandNum/src/dsfmt_f03/dSFMT_interface.F90 @@ -0,0 +1,562 @@ +module dSFMT_interface + +! copyright James Spencer 2012. +! New BSD License, see License.txt for details. + +! This contains a wrapper around a subset of of the functionality in dSFMT RNG +! (double precision SIMD-oriented Fast Mersenne Twister random number +! generator). + +! Functions which return a single (pseudo-)random number are defined as inline +! in the original dSFMT source. Accessing these functions is quite slow +! (compared to C) in Fortran as we can't inline them (or indeed call them +! without modifying the C source). A more efficient way is to fill up an array +! of random numbers and then access the array as a stream, refilling the array +! as needed. By thus amortizing the cost of calling non-inlined functions +! across many random numbers, the cost becomes very competitive with +! a C implementation. Functions are provided which return a single random +! number and refill the storage array when required, thus giving a fast +! interface which closely mirrors that of the C implementation. + +! Usage (for each psuedo-random stream): +! i) call dSFMT_init. +! ii) access desired number of random numbers either by calling +! a get_rand_xxx function the required number of times or by filling up +! user-defined arrays with a fill_array_xxx subroutine. +! iii) call dSFMT_end to free all memory. + +! Note that an array must have at least a minimum number of elements (as given +! in dsfmt_get_min_array_size). + +! IMPORTANT: see the warning below about calling multiple get_rand_xxx +! functions. + +! Match definition of DSFMT_MEXP in dSFMT.h. +#if !defined(DSFMT_MEXP) +#define DSFMT_MEXP 19937 +#endif + +use, intrinsic :: iso_c_binding +implicit none + +private +public :: dSFMT_t, dSFMT_init, dSFMT_end, dSFMT_reset, & + fill_array_close_open, & + fill_array_open_close, & + fill_array_open_open, & + fill_array_gaussian, & + get_rand_close_open, & + get_rand_open_close, & + get_rand_open_open, & + get_rand_gaussian, & + get_rand_arr_close_open, & + get_rand_arr_open_close, & + get_rand_arr_open_open, & + get_rand_arr_gaussian, & + dsfmt_get_min_array_size, & + unset, close_open, open_close, open_open + +! Expose functions from C as needed. +! See dSFMT documentation for details. +interface + pure function dsfmt_get_min_array_size() result(min_size) bind(c, name='dsfmt_get_min_array_size') + import :: c_int32_t + integer(c_int32_t) :: min_size + end function dsfmt_get_min_array_size + + pure function malloc_dsfmt_t() result(dsfmt_state) bind(c, name='malloc_dsfmt_t') + import :: c_ptr + type(c_ptr) :: dsfmt_state + end function malloc_dsfmt_t + pure subroutine free_dsfmt_t(dsfmt_state) bind(c, name='free_dsfmt_t') + import :: c_ptr + type(c_ptr), value, intent(in) :: dsfmt_state + end subroutine free_dsfmt_t + + pure subroutine dsfmt_chk_init_gen_rand(dsfmt_state, seed, mexp) bind(c, name='dsfmt_chk_init_gen_rand') + import :: c_ptr, c_int32_t + type(c_ptr), value, intent(in) :: dsfmt_state + integer(c_int32_t), value, intent(in) :: seed, mexp + end subroutine dsfmt_chk_init_gen_rand + + pure subroutine dsfmt_fill_array_close_open(dSFMT_state, array, array_size) bind(c, name='dsfmt_fill_array_close_open') + import :: c_ptr, c_double, c_int32_t + type(c_ptr), value, intent(in) :: dSFMT_state + real(c_double), intent(out) :: array(*) + integer(c_int32_t), value, intent(in) :: array_size + end subroutine dsfmt_fill_array_close_open + pure subroutine dsfmt_fill_array_open_close(dSFMT_state, array, array_size) bind(c, name='dsfmt_fill_array_open_close') + import :: c_ptr, c_double, c_int32_t + type(c_ptr), value, intent(in) :: dSFMT_state + real(c_double), intent(out) :: array(*) + integer(c_int32_t), value, intent(in) :: array_size + end subroutine dsfmt_fill_array_open_close + pure subroutine dsfmt_fill_array_open_open(dSFMT_state, array, array_size) bind(c, name='dsfmt_fill_array_open_open') + import :: c_ptr, c_double, c_int32_t + type(c_ptr), value, intent(in) :: dSFMT_state + real(c_double), intent(out) :: array(*) + integer(c_int32_t), value, intent(in) :: array_size + end subroutine dsfmt_fill_array_open_open +end interface + +! distribution modes +integer, parameter :: unset = 2**0 +integer, parameter :: close_open = 2**1 +integer, parameter :: open_close = 2**2 +integer, parameter :: open_open = 2**3 +integer, parameter :: gaussian = 2**4 + +type dSFMT_t + private + ! Type of distribution in the random store + integer, public :: distribution + ! Pointer to the dsfmt_t internal state (as defined in dSFMT.h). + type(c_ptr) :: dSFMT_state = c_null_ptr + ! Testing indicates that 50000 is a very good size for the array storing the + ! random numbers. Testing was done standalone, so undoubtedly influenced by + ! cache size and this might be different for real-world applications. + integer(c_int) :: random_store_size + ! Seed passed to dSFMT. + integer :: seed + ! Store of random numbers. + real(c_double), allocatable :: random_store(:) + ! Index of the next random number to be returned from random_store. + integer :: next_element +end type dSFMT_t + +integer, parameter, private :: dp = selected_real_kind(15,307) + +contains + +!--- Initialisation, termination and state interaction --- + + subroutine dSFMT_init(seed, rng_store_size, rng) + + ! Initialise the dSFMT RNG and fill rng%random_store with + ! a block of random numbers in interval [0,1). + ! + ! In: + ! seed: seed for the RNG. + ! rng_store_size: number of random numbers to store at once. + ! dsfmt_get_min_array_size() is used if + ! rng_store_size < dsfmt_get_min_array_size(). + ! Out: + ! rng: dSFMT_t with internal variables initialised and associated + ! with an initialised psuedo-random number stream. + ! If rng has already been initialised, then no additional memory + ! allocations are performed unless rng_store_size has changed. + + integer, intent(in) :: seed, rng_store_size + type(dSFMT_t), intent(inout) :: rng + + if (dsfmt_get_min_array_size() > rng_store_size) then + rng%random_store_size = dsfmt_get_min_array_size() + else + rng%random_store_size = rng_store_size + end if + + if (allocated(rng%random_store)) then + if (size(rng%random_store) /= rng%random_store_size) then + deallocate(rng%random_store) + allocate(rng%random_store(rng%random_store_size)) + end if + else + allocate(rng%random_store(rng%random_store_size)) + end if + + ! Set current element to be larger than the store, so it is + ! filled in the first call to the get_* functions. + rng%next_element = rng%random_store_size + 1 + ! But the store has not yet been filled. + rng%distribution = unset + + rng%seed = int(seed, c_int32_t) + + ! Create dSFMT state + ! Why is c_associated not pure? I have not been able to find any good + ! reason for this to be the case... + if (.not.c_associated(rng%dSFMT_state)) rng%dSFMT_state = malloc_dsfmt_t() + + ! Initialise dSFMT PRNG. + call dsfmt_chk_init_gen_rand(rng%dSFMT_state, rng%seed, DSFMT_MEXP) + + end subroutine dSFMT_init + + pure subroutine dSFMT_end(rng) + + ! Deallocate and reset a dSFMT_t variable safely. + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is closed on output and memory + ! associated with it is deallocated. + + type(dSFMT_t), intent(inout) :: rng + + ! Destroy dSFMT state + call free_dsfmt_t(rng%dSFMT_state) + + ! Reset store. + deallocate(rng%random_store) + rng%random_store_size = -1 + rng%next_element = -1 + + end subroutine dSFMT_end + + pure subroutine dSFMT_reset(rng) + + ! Reset the dSFMT state such that the next get_rand_xxx call requires + ! the store of random numbers to be refilled. + + ! In/Out: + ! rng: dSFMT_t. The internal store of random numbers is cleared. + + type(dSFMT_t), intent(inout) :: rng + + rng%next_element = rng%random_store_size + 1 + rng%random_store = huge(1.0_dp) + rng%distribution = unset + + end subroutine dSFMT_reset + +!--- Get arrays of random numbers --- + + pure subroutine fill_array_close_open(rng, array) + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. + ! Out: + ! array: array filled with random numbers in the interval [0,1). + + ! NOTE: array must be at least as large as the size returned by dsfmt_get_min_array_size. + + type(dSFMT_t), intent(inout) :: rng + real(c_double), intent(out) :: array(:) + + call dsfmt_fill_array_close_open(rng%dSFMT_state, array, size(array)) + + end subroutine fill_array_close_open + + pure subroutine fill_array_open_close(rng, array) + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. + ! Out: + ! array: array filled with random numbers in the interval (0,1]. + + ! NOTE: array must be at least as large as the size returned by dsfmt_get_min_array_size. + + type(dSFMT_t), intent(inout) :: rng + real(c_double), intent(out) :: array(:) + + call dsfmt_fill_array_open_close(rng%dSFMT_state, array, size(array)) + + end subroutine fill_array_open_close + + pure subroutine fill_array_open_open(rng, array) + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. + ! Out: + ! array: array filled with random numbers in the interval (0,1). + + ! NOTE: array must be at least as large as the size returned by dsfmt_get_min_array_size. + + type(dSFMT_t), intent(inout) :: rng + real(c_double), intent(out) :: array(:) + + call dsfmt_fill_array_open_open(rng%dSFMT_state, array, size(array)) + + end subroutine fill_array_open_open + + pure subroutine fill_array_gaussian(rng, array) + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. + ! Out: + ! array: array filled with random numbers with a standard normal + ! (Gaussian) distribution (i.e. mean=0, variance=1). + + ! NOTE: array must be at least as large as the size returned by dsfmt_get_min_array_size. + + type(dSFMT_t), intent(inout) :: rng + real(c_double), intent(out) :: array(:) + + real(dp), parameter :: pi = acos(-1.0_dp) + integer :: i + real(c_double) :: s, t + + call dsfmt_fill_array_open_close(rng%dSFMT_state, array, size(array)) + + ! The Box-Muller transform converts a pair of random numbers, u, v, + ! which are uniformly distributed in (0,1] into a pair of random + ! numbers, y, z, which have a standard normal distribution using + ! y = R cos(\phi) = \sqrt(-2*ln(u)) cos(2\pi v) + ! z = R sin(\phi) = \sqrt(-2*ln(u)) sin(2\pi v) + ! See http://en.wikipedia.org/wiki/Box_Muller_transform for more + ! details. + ! This might be slower than the Marsaglia polar method but it doesn't + ! involve any rejections, which is convenient for us. + do i = 1, ubound(array, dim=1), 2 + s = sqrt(-2*log(array(i))) + t = 2*pi*array(i+1) + array(i) = s*cos(t) + array(i+1) = s*sin(t) + end do + + end subroutine fill_array_gaussian + +!--- Get a random number --- + +! WARNING: these calls use a store internal to the dSFMT_t variable. Calls to +! different functions with the same state must be separated either by a call to +! dSFMT_reset or a call to dSFMT_end followed by reinitialisation of the dSFMT_t +! state using dSFMT_init. + + function get_rand_close_open(rng) result(r) + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. The store of random numbers is refilled if necessary. + + ! Returns: + ! random number in interval [0,1). + + real(dp) :: r + type(dSFMT_t), intent(inout) :: rng + + if (rng%next_element == rng%random_store_size+1) then + ! Run out of random numbers: get more. + call dsfmt_fill_array_close_open(rng%dSFMT_state, rng%random_store, rng%random_store_size) + rng%next_element = 1 + rng%distribution = close_open + end if + + r = rng%random_store(rng%next_element) + rng%next_element = rng%next_element + 1 + + end function get_rand_close_open + + function get_rand_open_close(rng) result(r) + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. The store of random numbers is refilled if necessary. + + ! Returns: + ! random number in interval (0,1]. + + real(dp) :: r + type(dSFMT_t), intent(inout) :: rng + + if (rng%next_element == rng%random_store_size+1) then + ! Run out of random numbers: get more. + call dsfmt_fill_array_open_close(rng%dSFMT_state, rng%random_store, rng%random_store_size) + rng%next_element = 1 + rng%distribution = open_close + end if + + r = rng%random_store(rng%next_element) + rng%next_element = rng%next_element + 1 + + end function get_rand_open_close + + function get_rand_open_open(rng) result(r) + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. The store of random numbers is refilled if necessary. + + ! Returns: + ! random number in interval (0,1). + + real(dp) :: r + type(dSFMT_t), intent(inout) :: rng + + if (rng%next_element == rng%random_store_size+1) then + ! Run out of random numbers: get more. + call dsfmt_fill_array_open_open(rng%dSFMT_state, rng%random_store, rng%random_store_size) + rng%next_element = 1 + rng%distribution = open_open + end if + + r = rng%random_store(rng%next_element) + rng%next_element = rng%next_element + 1 + + end function get_rand_open_open + + function get_rand_gaussian(rng) result(r) + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. The store of random numbers is refilled if necessary. + + ! Returns: + ! random number with a standard normal (Gaussian) distribution (ie + ! mean=0 and variance=1). + + real(dp) :: r + type(dSFMT_t), intent(inout) :: rng + + if (rng%next_element == rng%random_store_size+1) then + ! Run out of random numbers: get more. + call fill_array_gaussian(rng, rng%random_store) + rng%next_element = 1 + rng%distribution = gaussian + end if + + r = rng%random_store(rng%next_element) + rng%next_element = rng%next_element + 1 + + end function get_rand_gaussian + +!--- Get array of random numbers from a store --- + +! If a small number of random numbers is repeatedly required, then filling up +! an array at a time comes with a large function call overhead. The following +! routines use an internal store, similar to the get_rand_xxx functions, but +! fill a small array in a single call. This can be faster than repeated +! get_rand_xxx calls as we need to only test how many elements are left in the +! store once rather than N times. + +! WARNING: these calls use a store internal to the dSFMT_t variable. Calls to +! different functions with the same state must be separated either by a call to +! dSFMT_reset or a call to dSFMT_end followed by reinitialisation of the dSFMT_t +! state using dSFMT_init. + + pure subroutine get_rand_arr_close_open(rng, arr, n) + + ! Fill an array with random numbers in interval [0,1). + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. The store of random numbers is refilled if necessary. + ! Out: + ! arr: array of random numbers. Must contain at least n elements. + ! In: + ! n: number of random numbers to return in arr. + + type(dSFMT_t), intent(inout) :: rng + real(dp), intent(out) :: arr(:) + integer, intent(in) :: n + + integer :: navail, nleft + + if (rng%next_element + n <= rng%random_store_size) then + arr(1:n) = rng%random_store(rng%next_element:rng%next_element+n-1) + rng%next_element = rng%next_element + n + else + navail = rng%random_store_size - rng%next_element + 1 + arr(1:navail) = rng%random_store(rng%next_element:rng%random_store_size) + call dsfmt_fill_array_close_open(rng%dSFMT_state, rng%random_store, rng%random_store_size) + rng%distribution = close_open + nleft = n - navail + arr(navail+1:n) = rng%random_store(1:nleft) + rng%next_element = nleft + 1 + end if + + end subroutine get_rand_arr_close_open + + pure subroutine get_rand_arr_open_close(rng, arr, n) + + ! Fill an array with random numbers in interval (0,1]. + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. The store of random numbers is refilled if necessary. + ! Out: + ! arr: array of random numbers. Must contain at least n elements. + ! In: + ! n: number of random numbers to return in arr. + + type(dSFMT_t), intent(inout) :: rng + real(dp), intent(out) :: arr(:) + integer, intent(in) :: n + + integer :: navail, nleft + + if (rng%next_element + n <= rng%random_store_size) then + arr(1:n) = rng%random_store(rng%next_element:rng%next_element+n-1) + rng%next_element = rng%next_element + n + else + navail = rng%random_store_size - rng%next_element + 1 + arr(1:navail) = rng%random_store(rng%next_element:rng%random_store_size) + call dsfmt_fill_array_open_close(rng%dSFMT_state, rng%random_store, rng%random_store_size) + rng%distribution = open_close + nleft = n - navail + arr(navail+1:n) = rng%random_store(1:nleft) + rng%next_element = nleft + 1 + end if + + end subroutine get_rand_arr_open_close + + pure subroutine get_rand_arr_open_open(rng, arr, n) + + ! Fill an array with random numbers in interval (0,1). + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. The store of random numbers is refilled if necessary. + ! Out: + ! arr: array of random numbers. Must contain at least n elements. + ! In: + ! n: number of random numbers to return in arr. + + type(dSFMT_t), intent(inout) :: rng + real(dp), intent(out) :: arr(:) + integer, intent(in) :: n + + integer :: navail, nleft + + if (rng%next_element + n <= rng%random_store_size) then + arr(1:n) = rng%random_store(rng%next_element:rng%next_element+n-1) + rng%next_element = rng%next_element + n + else + navail = rng%random_store_size - rng%next_element + 1 + arr(1:navail) = rng%random_store(rng%next_element:rng%random_store_size) + call dsfmt_fill_array_open_open(rng%dSFMT_state, rng%random_store, rng%random_store_size) + rng%distribution = open_open + nleft = n - navail + arr(navail+1:n) = rng%random_store(1:nleft) + rng%next_element = nleft + 1 + end if + + end subroutine get_rand_arr_open_open + + pure subroutine get_rand_arr_gaussian(rng, arr, n) + + ! Fill an array with random numbers in interval (0,1). + + ! In/Out: + ! rng: dSFMT_t. The dSFMT state is updated by the request for random + ! numbers. The store of random numbers is refilled if necessary. + ! Out: + ! arr: array of random numbers. Must contain at least n elements. + ! In: + ! n: number of random numbers to return in arr. + + type(dSFMT_t), intent(inout) :: rng + real(dp), intent(out) :: arr(:) + integer, intent(in) :: n + + integer :: navail, nleft + + if (rng%next_element + n <= rng%random_store_size) then + arr(1:n) = rng%random_store(rng%next_element:rng%next_element+n-1) + rng%next_element = rng%next_element + n + else + navail = rng%random_store_size - rng%next_element + 1 + arr(1:navail) = rng%random_store(rng%next_element:rng%random_store_size) + call fill_array_gaussian(rng, rng%random_store) + rng%distribution = gaussian + nleft = n - navail + arr(navail+1:n) = rng%random_store(1:nleft) + rng%next_element = nleft + 1 + end if + + end subroutine get_rand_arr_gaussian + +end module dSFMT_interface diff --git a/share/shr_RandNum/src/dsfmt_f03/dSFMT_utils.c b/share/shr_RandNum/src/dsfmt_f03/dSFMT_utils.c new file mode 100644 index 000000000000..77314de2f5ac --- /dev/null +++ b/share/shr_RandNum/src/dsfmt_f03/dSFMT_utils.c @@ -0,0 +1,18 @@ +#include "dSFMT.h" +#include + +/* copyright James Spencer 2012. + * New BSD License, see License.txt for details. + */ + +/* Utility (memory-access) functions to enable use of dSFMT from Fortran. */ + +void* malloc_dsfmt_t(void) { + /* Allocate sufficient memory for a dSFMT state (ie a variable of type dsfmt_t). */ + return malloc(sizeof(dsfmt_t)); +} + +void free_dsfmt_t(dsfmt_t* ptr) { + /* Free memory associated with a dSFMT state (ie a variable of type dsfmt_t). */ + free(ptr); +} diff --git a/share/shr_RandNum/src/kissvec/kissvec.c b/share/shr_RandNum/src/kissvec/kissvec.c new file mode 100644 index 000000000000..deef6ffa1a46 --- /dev/null +++ b/share/shr_RandNum/src/kissvec/kissvec.c @@ -0,0 +1,36 @@ +// KISS random generator implemented in C +// Basic algorithm from George Marsaglia circa 1998 +// Public domain Fortran implementation from http://www.fortran.com/ +// downloaded by pjr on 03/16/04 +// converted to vector form, functions inlined by pjr,mvr on 05/10/2004 +// Translated back into C in 2015 + +#include +#include + +#define shiftl_xor(x, n) (x ^= (x << n)) +#define shiftr_xor(x, n) (x ^= (x >> n)) + +// The KISS (Keep It Simple Stupid) random number generator. Combines: +// (1) The congruential generator x(n)=69069*x(n-1)+1327217885, period 2^32. +// (2) A 3-shift shift-register generator, period 2^32-1, +// (3) Two 16-bit multiply-with-carry generators, period 597273182964842497>2^59 +// Overall period>2^123; +// +// Note use of the C99 restrict keyword to enable optimization. +void kiss_rng(uint32_t seed1[restrict], uint32_t seed2[restrict], + uint32_t seed3[restrict], uint32_t seed4[restrict], + double ran_arr[restrict], size_t length) { + size_t i; + + for (i = 0; i < length; ++i) { + seed1[i] = 69069U * seed1[i] + 1327217885U; + shiftl_xor(seed2[i], 13); + shiftr_xor(seed2[i], 17); + shiftl_xor(seed2[i], 5); + seed3[i] = 18000U * (seed3[i] & 65535U) + (seed3[i] >> 16); + seed4[i] = 30903U * (seed4[i] & 65535U) + (seed4[i] >> 16); + ran_arr[i] = ((int32_t) (seed1[i] + seed2[i] + (seed3[i] << 16) + seed4[i])) * 2.328306E-10 + 0.5; + } + +} diff --git a/share/shr_RandNum/src/kissvec/kissvec_mod.F90 b/share/shr_RandNum/src/kissvec/kissvec_mod.F90 new file mode 100644 index 000000000000..318bad97bf93 --- /dev/null +++ b/share/shr_RandNum/src/kissvec/kissvec_mod.F90 @@ -0,0 +1,41 @@ +module kissvec_mod +! Fortran binding for the KISS vectorizable random number generator. + +implicit none + +integer, parameter :: r8 = selected_real_kind(12) + +private +public :: kissvec + +contains + + subroutine kissvec( seed1, seed2, seed3, seed4, ran_arr, length) + + ! We can assume that an r8 is a double and an i4 is an int32_t, but we can't + ! make any guarantees about the relative sizes of a Fortran default integer + ! and C's size_t. + use iso_c_binding, only: c_size_t + + integer, intent(in) :: length + real(r8), dimension(length), intent(inout) :: ran_arr + integer, dimension(length), intent(inout) :: seed1, seed2, seed3, seed4 + + ! C implementation + interface + subroutine kiss_rng(seed1, seed2, seed3, seed4, ran_arr, length) bind(c) + ! Note that the definition of kiss_rng uses unsigned int, but the + ! Fortran standard largely ignores signed/unsigned because there + ! are no unsigned integers in Fortran. + use iso_c_binding, only: c_int32_t, c_double, c_size_t + integer(c_int32_t), intent(inout) :: seed1(*), seed2(*), seed3(*), seed4(*) + real(c_double), intent(inout) :: ran_arr(*) + integer(c_size_t), value :: length + end subroutine kiss_rng + end interface + + call kiss_rng(seed1, seed2, seed3, seed4, ran_arr, int(length, c_size_t)) + + end subroutine kissvec + +end module kissvec_mod diff --git a/share/shr_RandNum/src/mt19937/mersennetwister_mod.F90 b/share/shr_RandNum/src/mt19937/mersennetwister_mod.F90 new file mode 100644 index 000000000000..f3e9b2e5241c --- /dev/null +++ b/share/shr_RandNum/src/mt19937/mersennetwister_mod.F90 @@ -0,0 +1,293 @@ +! Fortran-95 implementation of the Mersenne Twister 19937, following +! the C implementation described below (code mt19937ar-cok.c, dated 2002/2/10), +! adapted cosmetically by making the names more general. +! Users must declare one or more variables of type randomNumberSequence in the calling +! procedure which are then initialized using a required seed. If the +! variable is not initialized the random numbers will all be 0. +! For example: +! program testRandoms +! use RandomNumbers +! type(randomNumberSequence) :: randomNumbers +! integer :: i +! +! randomNumbers = new_RandomNumberSequence(seed = 100) +! do i = 1, 10 +! print ('(f12.10, 2x)'), getRandomReal(randomNumbers) +! end do +! end program testRandoms +! +! Fortran-95 implementation by +! Robert Pincus +! NOAA-CIRES Climate Diagnostics Center +! Boulder, CO 80305 +! email: Robert.Pincus@colorado.edu +! +! This documentation in the original C program reads: +! ------------------------------------------------------------- +! A C-program for MT19937, with initialization improved 2002/2/10. +! Coded by Takuji Nishimura and Makoto Matsumoto. +! This is a faster version by taking Shawn Cokus's optimization, +! Matthe Bellew's simplification, Isaku Wada's real version. +! +! Before using, initialize the state by using init_genrand(seed) +! or init_by_array(init_key, key_length). +! +! Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! +! 3. The names of its contributors may not be used to endorse or promote +! products derived from this software without specific prior written +! permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +! A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +! +! +! Any feedback is very welcome. +! http://www.math.keio.ac.jp/matumoto/emt.html +! email: matumoto@math.keio.ac.jp +! ------------------------------------------------------------- + +module MersenneTwister_mod +! ------------------------------------------------------------- + implicit none + private + + ! Algorithm parameters + ! ------- + ! Period parameters + integer, parameter :: i8 = selected_int_kind(13) + integer, parameter :: blockSize = 624, & + M = 397, & + MATRIX_A = -1727483681, & ! constant vector a (0x9908b0dfUL) + UMASK = -2147483648_i8,& ! most significant w-r bits (0x80000000UL) + LMASK = 2147483647 ! least significant r bits (0x7fffffffUL) + ! Tempering parameters + integer, parameter :: TMASKB= -1658038656, & ! (0x9d2c5680UL) + TMASKC= -272236544 ! (0xefc60000UL) + ! ------- + + ! The type containing the state variable + type randomNumberSequence + integer :: currentElement ! = blockSize + integer, dimension(0:blockSize -1) :: state ! = 0 + end type randomNumberSequence + + interface new_RandomNumberSequence + module procedure initialize_scalar, initialize_vector + end interface new_RandomNumberSequence + + public :: randomNumberSequence + public :: new_RandomNumberSequence, finalize_RandomNumberSequence, & + getRandomInt, getRandomPositiveInt, getRandomReal +! ------------------------------------------------------------- +contains + ! ------------------------------------------------------------- + ! Private functions + ! --------------------------- + function mixbits(u, v) + integer, intent( in) :: u, v + integer :: mixbits + + mixbits = ior(iand(u, UMASK), iand(v, LMASK)) + end function mixbits + ! --------------------------- + function twist(u, v) + integer, intent( in) :: u, v + integer :: twist + + ! Local variable + integer, parameter, dimension(0:1) :: t_matrix = (/ 0, MATRIX_A /) + + twist = ieor(ishft(mixbits(u, v), -1), t_matrix(iand(v, 1))) + twist = ieor(ishft(mixbits(u, v), -1), t_matrix(iand(v, 1))) + end function twist + ! --------------------------- + subroutine nextState(twister) + type(randomNumberSequence), intent(inout) :: twister + + ! Local variables + integer :: k + + do k = 0, blockSize - M - 1 + twister%state(k) = ieor(twister%state(k + M), & + twist(twister%state(k), twister%state(k + 1))) + end do + do k = blockSize - M, blockSize - 2 + twister%state(k) = ieor(twister%state(k + M - blockSize), & + twist(twister%state(k), twister%state(k + 1))) + end do + twister%state(blockSize - 1) = ieor(twister%state(M - 1), & + twist(twister%state(blockSize - 1), twister%state(0))) + twister%currentElement = 0 + + end subroutine nextState + ! --------------------------- + elemental function temper(y) + integer, intent(in) :: y + integer :: temper + + integer :: x + + ! Tempering + x = ieor(y, ishft(y, -11)) + x = ieor(x, iand(ishft(x, 7), TMASKB)) + x = ieor(x, iand(ishft(x, 15), TMASKC)) + temper = ieor(x, ishft(x, -18)) + end function temper + ! ------------------------------------------------------------- + ! Public (but hidden) functions + ! -------------------- + function initialize_scalar(seed) result(twister) + integer, intent(in ) :: seed + type(randomNumberSequence) :: twister + + integer :: i + ! See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. In the previous versions, + ! MSBs of the seed affect only MSBs of the array state[]. + ! 2002/01/09 modified by Makoto Matsumoto + + twister%state(0) = iand(seed, -1) + do i = 1, blockSize - 1 ! ubound(twister%state) + twister%state(i) = 1812433253 * ieor(twister%state(i-1), & + ishft(twister%state(i-1), -30)) + i + twister%state(i) = iand(twister%state(i), -1) ! for >32 bit machines + end do + twister%currentElement = blockSize + end function initialize_scalar + ! ------------------------------------------------------------- + function initialize_vector(seed) result(twister) + integer, dimension(0:), intent(in) :: seed + type(randomNumberSequence) :: twister + + integer :: i, j, k, nFirstLoop, nWraps + + nWraps = 0 + twister = initialize_scalar(19650218) + + nFirstLoop = max(blockSize, size(seed)) + do k = 1, nFirstLoop + i = mod(k + nWraps, blockSize) + j = mod(k - 1, size(seed)) + if(i == 0) then + twister%state(i) = twister%state(blockSize - 1) + twister%state(1) = ieor(twister%state(1), & + ieor(twister%state(1-1), & + ishft(twister%state(1-1), -30)) * 1664525) + & + seed(j) + j ! Non-linear + twister%state(i) = iand(twister%state(i), -1) ! for >32 bit machines + nWraps = nWraps + 1 + else + twister%state(i) = ieor(twister%state(i), & + ieor(twister%state(i-1), & + ishft(twister%state(i-1), -30)) * 1664525) + & + seed(j) + j ! Non-linear + twister%state(i) = iand(twister%state(i), -1) ! for >32 bit machines + end if + end do + + ! + ! Walk through the state array, beginning where we left off in the block above + ! + do i = mod(nFirstLoop, blockSize) + nWraps + 1, blockSize - 1 + twister%state(i) = ieor(twister%state(i), & + ieor(twister%state(i-1), & + ishft(twister%state(i-1), -30)) * 1566083941) - i ! Non-linear + twister%state(i) = iand(twister%state(i), -1) ! for >32 bit machines + end do + + twister%state(0) = twister%state(blockSize - 1) + + do i = 1, mod(nFirstLoop, blockSize) + nWraps + twister%state(i) = ieor(twister%state(i), & + ieor(twister%state(i-1), & + ishft(twister%state(i-1), -30)) * 1566083941) - i ! Non-linear + twister%state(i) = iand(twister%state(i), -1) ! for >32 bit machines + end do + + twister%state(0) = UMASK + twister%currentElement = blockSize + + end function initialize_vector + ! ------------------------------------------------------------- + ! Public functions + ! -------------------- + function getRandomInt(twister) + type(randomNumberSequence), intent(inout) :: twister + integer :: getRandomInt + ! Generate a random integer on the interval [0,0xffffffff] + ! Equivalent to genrand_int32 in the C code. + ! Fortran doesn't have a type that's unsigned like C does, + ! so this is integers in the range -2**31 - 2**31 + ! All functions for getting random numbers call this one, + ! then manipulate the result + + if(twister%currentElement >= blockSize) call nextState(twister) + + getRandomInt = temper(twister%state(twister%currentElement)) + twister%currentElement = twister%currentElement + 1 + + end function getRandomInt + ! -------------------- + function getRandomPositiveInt(twister) + type(randomNumberSequence), intent(inout) :: twister + integer :: getRandomPositiveInt + ! Generate a random integer on the interval [0,0x7fffffff] + ! or [0,2**31] + ! Equivalent to genrand_int31 in the C code. + + ! Local integers + integer :: localInt + + localInt = getRandomInt(twister) + getRandomPositiveInt = ishft(localInt, -1) + + end function getRandomPositiveInt + ! -------------------- + function getRandomReal(twister) + + type(randomNumberSequence), intent(inout) :: twister + double precision :: getRandomReal + ! Generate a random number on [0,1] + ! Equivalent to genrand_real1 in the C code + ! The result is stored as double precision but has 32 bit resolution + + integer :: localInt + + localInt = getRandomInt(twister) + if(localInt < 0) then + getRandomReal = dble(localInt + 2.0d0**32)/(2.0d0**32 - 1.0d0) + else + getRandomReal = dble(localInt )/(2.0d0**32 - 1.0d0) + end if + end function getRandomReal + ! -------------------- + subroutine finalize_RandomNumberSequence(twister) + type(randomNumberSequence), intent(inout) :: twister + + twister%currentElement = blockSize + twister%state(:) = 0 + end subroutine finalize_RandomNumberSequence + ! -------------------- +end module MersenneTwister_mod + diff --git a/share/shr_RandNum/src/shr_RandNum_mod.F90 b/share/shr_RandNum/src/shr_RandNum_mod.F90 new file mode 100644 index 000000000000..65a71cdc85eb --- /dev/null +++ b/share/shr_RandNum/src/shr_RandNum_mod.F90 @@ -0,0 +1,317 @@ +#ifdef INTEL_MKL +include 'mkl_vsl.f90' +#endif + +module shr_RandNum_mod + +! this module contains the driver for the available versions of random number generator + +use mersennetwister_mod, only : new_RandomNumberSequence, getRandomReal, & + randomNumberSequence, finalize_RandomNumberSequence +use dSFMT_interface, only : dSFMT_init, dSFMT_end, get_rand_arr_close_open, dSFMT_t +use kissvec_mod, only : kissvec + +#ifdef INTEL_MKL +use mkl_vsl_type +use mkl_vsl +#endif +implicit none + +private + +public :: ShrRandGen + +public :: ShrIntrinsicRandGen +public :: ShrKissRandGen +public :: ShrF95MtRandGen +public :: ShrDsfmtRandGen +#ifdef INTEL_MKL +public :: ShrMklMtRandGen +#endif + +integer, parameter :: r8 = selected_real_kind(12) +integer, parameter :: i8 = selected_int_kind(12) + +! Abstract base class for random number generators +type, abstract :: ShrRandGen + contains + procedure(gen_random), deferred :: random + procedure(gen_finalize), deferred :: finalize +end type ShrRandGen + +abstract interface + subroutine gen_random(self, array) + import + class(ShrRandGen), intent(inout) :: self + real(r8), dimension(:,:), intent(out) :: array + end subroutine gen_random + + subroutine gen_finalize(self) + import + class(ShrRandGen), intent(inout) :: self + end subroutine gen_finalize +end interface + +! Fortran 90 "random_number" intrinsic +type, extends(ShrRandGen) :: ShrIntrinsicRandGen + integer, allocatable, private :: seed(:,:) + contains + procedure, non_overridable :: random => intrinsic_random + procedure, non_overridable :: finalize => intrinsic_finalize +end type ShrIntrinsicRandGen + +interface ShrIntrinsicRandGen + module procedure ShrIntrinsicRandGen_constructor +end interface ShrIntrinsicRandGen + +! KISS (Keep It Simple Stupid) pseudorandom number generator +type, extends(ShrRandGen) :: ShrKissRandGen + integer, allocatable, private :: seed(:,:) + contains + procedure, non_overridable :: random => kiss_random + procedure, non_overridable :: finalize => kiss_finalize +end type ShrKissRandGen + +interface ShrKissRandGen + module procedure ShrKissRandGen_constructor +end interface ShrKissRandGen + +! Fortran 95 implementation of Mersene Twister 19937 +type, extends(ShrRandGen) :: ShrF95MtRandGen + type(randomNumberSequence), allocatable, private :: wrapped(:) + contains + procedure, non_overridable :: random => f95_mt_random + procedure, non_overridable :: finalize => f95_mt_finalize +end type ShrF95MtRandGen + +interface ShrF95MtRandGen + module procedure ShrF95MtRandGen_constructor +end interface ShrF95MtRandGen + +! Double precision SIMD Fast Mersene Twister 19937 +type, extends(ShrRandGen) :: ShrDsfmtRandGen + type(dSFMT_t), allocatable, private :: wrapped(:) + contains + procedure, non_overridable :: random => dsfmt_random + procedure, non_overridable :: finalize => dsfmt_finalize +end type ShrDsfmtRandGen + +interface ShrDsfmtRandGen + module procedure ShrDsfmtRandGen_constructor +end interface ShrDsfmtRandGen + +! Intel Math Kernel Library Mersenne Twister +#ifdef INTEL_MKL +type, extends(ShrRandGen) :: ShrMklMtRandGen + type (VSL_STREAM_STATE), allocatable :: wrapped(:) + contains + procedure, non_overridable :: random => mkl_mt_random + procedure, non_overridable :: finalize => mkl_mt_finalize +end type ShrMklMtRandGen + +interface ShrMklMtRandGen + module procedure ShrMklMtRandGen_constructor +end interface ShrMklMtRandGen +#endif + +contains + +function ShrIntrinsicRandGen_constructor(seed) result(rand_gen) + integer, intent(in) :: seed(:,:) + type(ShrIntrinsicRandGen) :: rand_gen + + allocate(rand_gen%seed(size(seed, 1),size(seed, 2))) + rand_gen%seed = seed + +end function ShrIntrinsicRandGen_constructor + +subroutine intrinsic_random(self, array) + class(ShrIntrinsicRandGen), intent(inout) :: self + real(r8), dimension(:,:), intent(out) :: array + + integer :: i + + do i = 1, size(self%seed, 1) + call random_seed(put=self%seed(i,:)) + call random_number(array(i,:)) + call random_seed(get=self%seed(i,:)) + end do + +end subroutine intrinsic_random + +subroutine intrinsic_finalize(self) + class(ShrIntrinsicRandGen), intent(inout) :: self + + if ( allocated(self%seed) ) deallocate(self%seed) + +end subroutine intrinsic_finalize + +function ShrKissRandGen_constructor(seed) result(rand_gen) + integer, intent(in) :: seed(:,:) + type(ShrKissRandGen) :: rand_gen + + allocate(rand_gen%seed(size(seed, 1),4)) + rand_gen%seed = seed + +end function ShrKissRandGen_constructor + +subroutine kiss_random(self, array) + class(ShrKissRandGen), intent(inout) :: self + real(r8), dimension(:,:), intent(out) :: array + + integer :: nstream, i + + nstream = size(self%seed, 1) + + do i = 1, size(array, 2) + call kissvec(self%seed(:,1), self%seed(:,2), self%seed(:,3), & + self%seed(:,4), array(:,i), nstream) + end do + +end subroutine kiss_random + +subroutine kiss_finalize(self) + class(ShrKissRandGen), intent(inout) :: self + + if ( allocated(self%seed) ) deallocate(self%seed) + +end subroutine kiss_finalize + +function ShrF95MtRandGen_constructor(seed) result(rand_gen) + integer, intent(in) :: seed(:) + type(ShrF95MtRandGen) :: rand_gen + + integer :: i + + allocate(rand_gen%wrapped(size(seed))) + do i = 1, size(seed) + rand_gen%wrapped(i) = new_RandomNumberSequence(seed=seed(i)) + end do + +end function ShrF95MtRandGen_constructor + +subroutine f95_mt_random(self, array) + class(ShrF95MtRandGen), intent(inout) :: self + real(r8), dimension(:,:), intent(out) :: array + + integer :: nstream, i, j + + nstream = size(self%wrapped) + + do i = 1, size(array, 2) + do j = 1, nstream + array(j,i) = getRandomReal(self%wrapped(j)) + end do + end do + +end subroutine f95_mt_random + +subroutine f95_mt_finalize(self) + class(ShrF95MtRandGen), intent(inout) :: self + + integer :: i + + if (allocated(self%wrapped)) then + do i = 1, size(self%wrapped) + call finalize_RandomNumberSequence(self%wrapped(i)) + end do + deallocate(self%wrapped) + end if + +end subroutine f95_mt_finalize + +function ShrDsfmtRandGen_constructor(seed, cache_size) result(rand_gen) + integer, intent(in) :: seed(:) + integer, intent(in) :: cache_size + type(ShrDsfmtRandGen) :: rand_gen + + integer :: i + + allocate(rand_gen%wrapped(size(seed))) + do i = 1, size(seed) + call dSFMT_init(seed(i), cache_size, rand_gen%wrapped(i)) + end do + +end function ShrDsfmtRandGen_constructor + +subroutine dsfmt_random(self, array) + class(ShrDsfmtRandGen), intent(inout) :: self + real(r8), dimension(:,:), intent(out) :: array + + integer :: nrandom, i + + nrandom = size(array, 2) + + do i = 1, size(self%wrapped) + call get_rand_arr_close_open(self%wrapped(i), array(i,:), nrandom) + end do + +end subroutine dsfmt_random + +subroutine dsfmt_finalize(self) + class(ShrDsfmtRandGen), intent(inout) :: self + + integer :: i + + if (allocated(self%wrapped)) then + do i = 1, size(self%wrapped) + call dSFMT_end(self%wrapped(i)) + end do + deallocate(self%wrapped) + end if + +end subroutine dsfmt_finalize + +#ifdef INTEL_MKL +function ShrMklMtRandGen_constructor(seed) result(rand_gen) + integer, intent(in) :: seed(:) + type(ShrMklMtRandGen) :: rand_gen + + integer :: err + + integer :: i + + allocate(rand_gen%wrapped(size(seed))) + do i = 1, size(seed) + err = vslnewstream(rand_gen%wrapped(i), VSL_BRNG_SFMT19937, seed=seed(i)) + end do + +end function ShrMklMtRandGen_constructor + +subroutine mkl_mt_random(self, array) + class(ShrMklMtRandGen), intent(inout) :: self + real(r8), dimension(:,:), intent(out) :: array + + real(r8), parameter :: range_start = 0.0_r8, range_end = 1.0_r8 + + integer :: err + + integer :: nrandom, i + + nrandom = size(array, 2) + + do i = 1, size(self%wrapped) + err = vdrnguniform(VSL_RNG_METHOD_UNIFORM_STD, self%wrapped(i), & + nrandom, array(i,:), range_start, range_end) + end do + +end subroutine mkl_mt_random + +subroutine mkl_mt_finalize(self) + class(ShrMklMtRandGen), intent(inout) :: self + + integer :: err + + integer :: i + + if (allocated(self%wrapped)) then + do i = 1, size(self%wrapped) + err = vslDeleteStream(self%wrapped(i)) + end do + deallocate(self%wrapped) + end if + +end subroutine mkl_mt_finalize +#endif + +end module shr_RandNum_mod diff --git a/share/shr_RandNum/test/bench/Makefile b/share/shr_RandNum/test/bench/Makefile new file mode 100644 index 000000000000..5b10b0b54960 --- /dev/null +++ b/share/shr_RandNum/test/bench/Makefile @@ -0,0 +1,104 @@ +# Makefile created by mkmf $Id: mkmf,v 18.0.18.4 2012/12/04 15:24:15 Seth.Underwood Exp $ + +ifeq ($(COMPILER),intel) + FC := ifort + LD := ifort + LDFLAGS += -mkl + CC := icc + +# Sandy Bridge/Ivy Bridge + + FFLAGS := -O3 -xHost -fp-model fast -mkl -no-prec-div -no-prec-sqrt -override-limits + + CFLAGS := -O3 -xHost -fp-model fast -std=c99 + + CPPDEFS = -DINTEL_MKL -DHAVE_SSE2 + + +# Knights Corner + +# FFLAGS := -mmic -O3 -qopt-report=5 -fp-model fast -no-prec-div -no-prec-sqrt -I./ + +# Haswell + +# FFLAGS := -O3 -xCORE-AVX2 -no-prec-div -no-prec-sqrt -I./ -DCPRINTEL + +endif + +ifeq ($(COMPILER),pgi) + FC := pgfortran + LD := pgfortran + CC := pgcc + + FFLAGS := -fastsse + CFLAGS := -fastsse + CPPDEFS = +endif + +ifeq ($(COMPILER),gnu) + FC := gfortran + LD := gfortran + CC := gcc + + FFLAGS := -Ofast -march=native + CFLAGS := -Ofast -march=native -std=gnu99 + CPPDEFS = -DHAVE_SSE2 +endif + +ifeq ($(COMPILER),nag) + FC := nagfor + LD := nagfor + CC := gcc + + FFLAGS := -O4 + CFLAGS := -Ofast -march=native -std=gnu99 + CPPDEFS = -DHAVE_SSE2 +endif + +ifeq ($(COMPILER),ibm) + FC := xlf2003 + LD := xlf2003 + CC := xlc + + FFLAGS := -O4 + CFLAGS := -O4 + CPPDEFS = +endif + +CPPDEFS := $(CPPDEFS) -DDSFMT_MEXP=19937 + +ifeq ($(COMPILER),ibm) + cpre = $(null)-WF,-D$(null) + FPPDEFS := $(patsubst -D%,$(cpre)%,$(CPPDEF)) +else + FPPDEFS := $(CPPDEFS) +endif + +FFLAGS := $(FFLAGS) $(FPPDEFS) -I../../include -I./ +CFLAGS := $(CFLAGS) $(CPPDEFS) -I../../include + +.DEFAULT: + -echo $@ does not exist. +all: ./shr_RandNum.exe +dSFMT.o: ../../src/dsfmt_f03/dSFMT.c + $(CC) $(CPPFLAGS) $(CFLAGS) $(OTHERFLAGS) -c ../../src/dsfmt_f03/dSFMT.c +dSFMT_interface.o: ../../src/dsfmt_f03/dSFMT_interface.F90 + $(FC) $(CPPFLAGS) $(FPPFLAGS) $(FFLAGS) $(OTHERFLAGS) -c ../../src/dsfmt_f03/dSFMT_interface.F90 +dSFMT_utils.o: ../../src/dsfmt_f03/dSFMT_utils.c ../../include/dSFMT.h + $(CC) $(CPPFLAGS) $(CFLAGS) $(OTHERFLAGS) -c ../../src/dsfmt_f03/dSFMT_utils.c +kissvec_mod.o: ../../src/kissvec/kissvec_mod.F90 + $(FC) $(CPPFLAGS) $(FPPFLAGS) $(FFLAGS) $(OTHERFLAGS) -c ../../src/kissvec/kissvec_mod.F90 +kissvec.o: ../../src/kissvec/kissvec.c + $(CC) $(CPPFLAGS) $(CFLAGS) $(OTHERFLAGS) -c ../../src/kissvec/kissvec.c +test_shr_RandNum.o: ./test_shr_RandNum.F90 shr_RandNum_mod.o + $(FC) $(CPPFLAGS) $(FPPFLAGS) $(FFLAGS) $(OTHERFLAGS) -c ./test_shr_RandNum.F90 +mersennetwister_mod.o: ../../src/mt19937/mersennetwister_mod.F90 + $(FC) $(CPPFLAGS) $(FPPFLAGS) $(FFLAGS) $(OTHERFLAGS) -c ../../src/mt19937/mersennetwister_mod.F90 +shr_RandNum_mod.o: ../../src/shr_RandNum_mod.F90 kissvec_mod.o mersennetwister_mod.o dSFMT_interface.o + $(FC) $(CPPFLAGS) $(FPPFLAGS) $(FFLAGS) $(OTHERFLAGS) -c ../../src/shr_RandNum_mod.F90 +SRC = ./test_shr_RandNum.F90 ../kissvec/kissvec_mod.F90 ../mt19937/mersennetwister_mod.F90 ../dsfmt_f03/dSFMT.c ../dsfmt_f03/dSFMT_interface.F90 ../dsfmt_f03/dSFMT_utils.c ../shr_RandNum_mod.F90 ../../include/dSFMT.h +OBJ = test_shr_RandNum.o kissvec_mod.o mersennetwister_mod.o dSFMT.o dSFMT_interface.o dSFMT_utils.o shr_RandNum_mod.o kissvec.o +clean: + -rm -f .shr_RandNum.exe.cppdefs $(OBJ) *.mod ./shr_RandNum.exe *.s +shr_RandNum.exe: $(OBJ) + $(LD) $(OBJ) -o shr_RandNum.exe $(LDFLAGS) diff --git a/share/shr_RandNum/test/bench/test_shr_RandNum.F90 b/share/shr_RandNum/test/bench/test_shr_RandNum.F90 new file mode 100644 index 000000000000..621ba9274391 --- /dev/null +++ b/share/shr_RandNum/test/bench/test_shr_RandNum.F90 @@ -0,0 +1,171 @@ +program test + +! this program calls the available versions of the random generators + +use shr_RandNum_mod, only: ShrIntrinsicRandGen, ShrKissRandGen, & + ShrF95MtRandGen, ShrDsfmtRandGen +#ifdef INTEL_MKL +use shr_RandNum_mod, only: ShrMklMtRandGen +#endif + +INTEGER, parameter :: r8 = selected_real_kind(12) + +#ifdef INTEL_MKL +type(ShrMklMtRandGen) :: mkl_gen +#endif +type(ShrKissRandGen) :: kiss_gen +type(ShrF95MtRandGen) :: f95_mt_gen +type(ShrIntrinsicRandGen) :: intrinsic_gen +type(ShrDsfmtRandGen) :: dsfmt_gen + +integer, parameter :: nstream = 16 ! number of streams of random numbers +integer, parameter :: length = 1000 ! length of stream of random numbers +integer :: ntrials = 50000 + +integer, dimension(nstream) :: seed = 7776578 +integer, dimension(nstream,4) :: kiss_seed +integer, dimension(:,:), allocatable :: intrinsic_seed + +real(r8), dimension(nstream,length) :: array + +integer :: i, n, m, intrinsic_size +integer :: c1, c2, cr, cm +real(r8) :: dt, dt1,dt2 + +#ifdef INTEL_MKL +! intel math kernel library mersenne twister + + call system_clock(c1, cr, cm) + do m = 1,ntrials + mkl_gen = ShrMklMtRandGen(seed) + call mkl_gen%finalize() + enddo + call system_clock(c2, cr, cm); dt1 = dble(c2-c1)/dble(cr) + + mkl_gen = ShrMklMtRandGen(seed) + call system_clock(c1, cr, cm) + do m = 1,ntrials + call mkl_gen%random(array) + enddo + call mkl_gen%finalize() + call system_clock(c2, cr, cm); dt2 = dble(c2-c1)/dble(cr) + dt = dt1+dt2 + print *, 'Init/term time (SFMT_MKL): ',dt1 + print *, 'Gen time (SFMT_MKL): ',dt2 + print *, 'Total time (SFMT_MKL): ',dt + print *, 'MegaRNumbers (SFMT_MKL): ', 1.0e-6*dble(nstream*length*ntrials)/dt + print *, 'Summation of Random Numbers: ', SUM(array) + print *, '--------'; print *, '' +#endif + +! keep it simple stupid random number + + do n = 1,nstream + do i = 1, 4 + kiss_seed(n,i) = seed(n)*i+n + end do + end do + + call system_clock(c1, cr, cm) + do m = 1,ntrials + kiss_gen = ShrKissRandGen(kiss_seed) + call kiss_gen%finalize() + enddo + call system_clock(c2, cr, cm); dt1 = dble(c2-c1)/dble(cr) + + kiss_gen = ShrKissRandGen(kiss_seed) + call system_clock(c1, cr, cm) + do m = 1,ntrials + call kiss_gen%random(array) + enddo + call kiss_gen%finalize() + call system_clock(c2, cr, cm); dt2 = dble(c2-c1)/dble(cr) + dt = dt1+dt2 + print *, 'Init/term time (KISSVEC): ',dt1 + print *, 'Gen time (KISSVEC): ',dt2 + print *, 'Total time (KISSVEC): ',dt + print *, 'MegaRNumbers (KISSVEC): ', 1.0e-6*dble(nstream*length*ntrials)/dt + print *, 'Summation of Random Numbers: ', SUM(array) + print *, '--------'; print *, '' + +! fortran-95 implementation of merseene twister + + call system_clock(c1, cr, cm) + do m = 1,ntrials + f95_mt_gen = ShrF95MtRandGen(seed) + call f95_mt_gen%finalize() + enddo + call system_clock(c2, cr, cm); dt1 = dble(c2-c1)/dble(cr) + + f95_mt_gen = ShrF95MtRandGen(seed) + call system_clock(c1, cr, cm) + do m = 1,ntrials + call f95_mt_gen%random(array) + enddo + call f95_mt_gen%finalize() + call system_clock(c2, cr, cm); dt2 = dble(c2-c1)/dble(cr) + dt = dt1+dt2 + print *, 'Init/term time (MT19937): ',dt1 + print *, 'Gen time (MT19937): ',dt2 + print *, 'Total time (MT19937): ',dt + print *, 'MegaRNumbers (MT19937): ', 1.0e-6*dble(nstream*length*ntrials)/dt + print *, 'Summation of Random Numbers: ', SUM(array) + print *, '--------'; print *, '' + +! fortran-90 intrinsic pseudorandom number generator + + call random_seed(size=intrinsic_size) + allocate(intrinsic_seed(nstream,intrinsic_size)) + do n = 1, nstream + do i = 1, intrinsic_size + intrinsic_seed(n,i) = seed(n)*i+n + end do + end do + + call system_clock(c1, cr, cm) + do m = 1,ntrials + intrinsic_gen = ShrIntrinsicRandGen(intrinsic_seed) + call intrinsic_gen%finalize() + enddo + call system_clock(c2, cr, cm); dt1 = dble(c2-c1)/dble(cr) + + intrinsic_gen = ShrIntrinsicRandGen(intrinsic_seed) + call system_clock(c1, cr, cm) + do m = 1,ntrials + call intrinsic_gen%random(array) + enddo + call intrinsic_gen%finalize() + call system_clock(c2, cr, cm); dt2 = dble(c2-c1)/dble(cr) + dt = dt1+dt2 + print *, 'Init/term time (F90_INTRINSIC): ',dt1 + print *, 'Gen time (F90_INTRINSIC): ',dt2 + print *, 'Total time (F90_INTRINSIC): ',dt + print *, 'MegaRNumbers (F90_INTRINSIC): ', 1.0e-6*dble(nstream*length*ntrials)/dt + print *, 'Summation of Random Numbers: ', SUM(array) + print *, '--------'; print *, '' + +! SIMD-orientated mersenne twister + + call system_clock(c1, cr, cm) + do m = 1,ntrials + dsfmt_gen = ShrDsfmtRandGen(seed, length) + call dsfmt_gen%finalize() + enddo + call system_clock(c2, cr, cm); dt1 = dble(c2-c1)/dble(cr) + + dsfmt_gen = ShrDsfmtRandGen(seed, length) + call system_clock(c1, cr, cm) + do m = 1,ntrials + call dsfmt_gen%random(array) + enddo + call dsfmt_gen%finalize() + call system_clock(c2, cr, cm); dt2 = dble(c2-c1)/dble(cr) + dt = dt1+dt2 + print *, 'Init/term time (DSFMT_F03): ',dt1 + print *, 'Gen time (DSFMT_F03): ',dt2 + print *, 'Total time (DSFMT_F03): ',dt + print *, 'MegaRNumbers (DSFMT_F03): ', 1.0e-6*dble(nstream*length*ntrials)/dt + print *, 'Summation of Random Numbers: ', SUM(array) + print *, '--------'; print *, '' + +end program test