dSFMT/dSFMT.c

   1 /**
   2  * @file dSFMT.c
   3  * @brief double precision SIMD-oriented Fast Mersenne Twister (dSFMT)
   4  * based on IEEE 754 format.
   5  *
   6  * @author Mutsuo Saito (Hiroshima University)
   7  * @author Makoto Matsumoto (Hiroshima University)
   8  *
   9  * Copyright (C) 2007,2008 Mutsuo Saito, Makoto Matsumoto and Hiroshima
  10  * University. All rights reserved.
  11  *
  12  * The new BSD License is applied to this software, see LICENSE.txt
  13  */
  14 #include <stdio.h>
  15 #include <string.h>
  16 #include <stdlib.h>
  17 #include "dSFMT-params.h"
  18
  19 /** dsfmt internal state vector */
  20 dsfmt_t dsfmt_global_data;
  21 /** dsfmt mexp for check */
  22 static const int dsfmt_mexp = DSFMT_MEXP;
  23
  24 /*----------------
  25   STATIC FUNCTIONS
  26   ----------------*/
  27 inline static uint32_t ini_func1(uint32_t x);
  28 inline static uint32_t ini_func2(uint32_t x);
  29 inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array,
  30                                        int size);
  31 inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array,
  32                                        int size);
  33 inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array,
  34                                        int size);
  35 inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array,
  36                                        int size);
  37 inline static int idxof(int i);
  38 static void initial_mask(dsfmt_t *dsfmt);
  39 static void period_certification(dsfmt_t *dsfmt);
  40
  41 #if defined(HAVE_SSE2)
  42 #  include <emmintrin.h>
  43 /** mask data for sse2 */
  44 static __m128i sse2_param_mask;
  45 /** 1 in 64bit for sse2 */
  46 static __m128i sse2_int_one;
  47 /** 2.0 double for sse2 */
  48 static __m128d sse2_double_two;
  49 /** -1.0 double for sse2 */
  50 static __m128d sse2_double_m_one;
  51
  52 static void setup_const(void);
  53 #endif
  54
  55 /**
  56  * This function simulate a 32-bit array index overlapped to 64-bit
  57  * array of LITTLE ENDIAN in BIG ENDIAN machine.
  58  */
  59 #if defined(DSFMT_BIG_ENDIAN)
  60 inline static int idxof(int i) {
  61     return i ^ 1;
  62 }
  63 #else
  64 inline static int idxof(int i) {
  65     return i;
  66 }
  67 #endif
  68
  69 /**
  70  * This function represents the recursion formula.
  71  * @param r output
  72  * @param a a 128-bit part of the internal state array
  73  * @param b a 128-bit part of the internal state array
  74  * @param lung a 128-bit part of the internal state array
  75  */
  76 #if defined(HAVE_ALTIVEC)
  77 inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b,
  78                                 w128_t *lung) {
  79     const vector unsigned char sl1 = ALTI_SL1;
  80     const vector unsigned char sl1_perm = ALTI_SL1_PERM;
  81     const vector unsigned int sl1_msk = ALTI_SL1_MSK;
  82     const vector unsigned char sr1 = ALTI_SR;
  83     const vector unsigned char sr1_perm = ALTI_SR_PERM;
  84     const vector unsigned int sr1_msk = ALTI_SR_MSK;
  85     const vector unsigned char perm = ALTI_PERM;
  86     const vector unsigned int msk1 = ALTI_MSK;
  87     vector unsigned int w, x, y, z;
  88
  89     z = a->s;
  90     w = lung->s;
  91     x = vec_perm(w, (vector unsigned int)perm, perm);
  92     y = vec_perm(z, sl1_perm, sl1_perm);
  93     y = vec_sll(y, sl1);
  94     y = vec_and(y, sl1_msk);
  95     w = vec_xor(x, b->s);
  96     w = vec_xor(w, y);
  97     x = vec_perm(w, (vector unsigned int)sr1_perm, sr1_perm);
  98     x = vec_srl(x, sr1);
  99     x = vec_and(x, sr1_msk);
 100     y = vec_and(w, msk1);
 101     z = vec_xor(z, y);
 102     r->s = vec_xor(z, x);
 103     lung->s = w;
 104 }
 105 #elif defined(HAVE_SSE2)
 106 /**
 107  * This function setup some constant variables for SSE2.
 108  */
 109 static void setup_const(void) {
 110     static int first = 1;
 111     if (!first) {
 112         return;
 113     }
 114     sse2_param_mask = _mm_set_epi32(DSFMT_MSK32_3, DSFMT_MSK32_4,
 115                                     DSFMT_MSK32_1, DSFMT_MSK32_2);
 116     sse2_int_one = _mm_set_epi32(0, 1, 0, 1);
 117     sse2_double_two = _mm_set_pd(2.0, 2.0);
 118     sse2_double_m_one = _mm_set_pd(-1.0, -1.0);
 119     first = 0;
 120 }
 121
 122 /**
 123  * This function represents the recursion formula.
 124  * @param r output 128-bit
 125  * @param a a 128-bit part of the internal state array
 126  * @param b a 128-bit part of the internal state array
 127  * @param d a 128-bit part of the internal state array (I/O)
 128  */
 129 inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *u) {
 130     __m128i v, w, x, y, z;
 131
 132     x = a->si;
 133     z = _mm_slli_epi64(x, DSFMT_SL1);
 134     y = _mm_shuffle_epi32(u->si, SSE2_SHUFF);
 135     z = _mm_xor_si128(z, b->si);
 136     y = _mm_xor_si128(y, z);
 137
 138     v = _mm_srli_epi64(y, DSFMT_SR);
 139     w = _mm_and_si128(y, sse2_param_mask);
 140     v = _mm_xor_si128(v, x);
 141     v = _mm_xor_si128(v, w);
 142     r->si = v;
 143     u->si = y;
 144 }
 145 #else /* standard C */
 146 /**
 147  * This function represents the recursion formula.
 148  * @param r output 128-bit
 149  * @param a a 128-bit part of the internal state array
 150  * @param b a 128-bit part of the internal state array
 151  * @param lung a 128-bit part of the internal state array (I/O)
 152  */
 153 inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b,
 154                                 w128_t *lung) {
 155     uint64_t t0, t1, L0, L1;
 156
 157     t0 = a->u[0];
 158     t1 = a->u[1];
 159     L0 = lung->u[0];
 160     L1 = lung->u[1];
 161     lung->u[0] = (t0 << DSFMT_SL1) ^ (L1 >> 32) ^ (L1 << 32) ^ b->u[0];
 162     lung->u[1] = (t1 << DSFMT_SL1) ^ (L0 >> 32) ^ (L0 << 32) ^ b->u[1];
 163     r->u[0] = (lung->u[0] >> DSFMT_SR) ^ (lung->u[0] & DSFMT_MSK1) ^ t0;
 164     r->u[1] = (lung->u[1] >> DSFMT_SR) ^ (lung->u[1] & DSFMT_MSK2) ^ t1;
 165 }
 166 #endif
 167
 168 #if defined(HAVE_SSE2)
 169 /**
 170  * This function converts the double precision floating point numbers which
 171  * distribute uniformly in the range [1, 2) to those which distribute uniformly
 172  * in the range [0, 1).
 173  * @param w 128bit stracture of double precision floating point numbers (I/O)
 174  */
 175 inline static void convert_c0o1(w128_t *w) {
 176     w->sd = _mm_add_pd(w->sd, sse2_double_m_one);
 177 }
 178
 179 /**
 180  * This function converts the double precision floating point numbers which
 181  * distribute uniformly in the range [1, 2) to those which distribute uniformly
 182  * in the range (0, 1].
 183  * @param w 128bit stracture of double precision floating point numbers (I/O)
 184  */
 185 inline static void convert_o0c1(w128_t *w) {
 186     w->sd = _mm_sub_pd(sse2_double_two, w->sd);
 187 }
 188
 189 /**
 190  * This function converts the double precision floating point numbers which
 191  * distribute uniformly in the range [1, 2) to those which distribute uniformly
 192  * in the range (0, 1).
 193  * @param w 128bit stracture of double precision floating point numbers (I/O)
 194  */
 195 inline static void convert_o0o1(w128_t *w) {
 196     w->si = _mm_or_si128(w->si, sse2_int_one);
 197     w->sd = _mm_add_pd(w->sd, sse2_double_m_one);
 198 }
 199 #else /* standard C and altivec */
 200 /**
 201  * This function converts the double precision floating point numbers which
 202  * distribute uniformly in the range [1, 2) to those which distribute uniformly
 203  * in the range [0, 1).
 204  * @param w 128bit stracture of double precision floating point numbers (I/O)
 205  */
 206 inline static void convert_c0o1(w128_t *w) {
 207     w->d[0] -= 1.0;
 208     w->d[1] -= 1.0;
 209 }
 210
 211 /**
 212  * This function converts the double precision floating point numbers which
 213  * distribute uniformly in the range [1, 2) to those which distribute uniformly
 214  * in the range (0, 1].
 215  * @param w 128bit stracture of double precision floating point numbers (I/O)
 216  */
 217 inline static void convert_o0c1(w128_t *w) {
 218     w->d[0] = 2.0 - w->d[0];
 219     w->d[1] = 2.0 - w->d[1];
 220 }
 221
 222 /**
 223  * This function converts the double precision floating point numbers which
 224  * distribute uniformly in the range [1, 2) to those which distribute uniformly
 225  * in the range (0, 1).
 226  * @param w 128bit stracture of double precision floating point numbers (I/O)
 227  */
 228 inline static void convert_o0o1(w128_t *w) {
 229     w->u[0] |= 1;
 230     w->u[1] |= 1;
 231     w->d[0] -= 1.0;
 232     w->d[1] -= 1.0;
 233 }
 234 #endif
 235
 236 /**
 237  * This function fills the user-specified array with double precision
 238  * floating point pseudorandom numbers of the IEEE 754 format.
 239  * @param dsfmt dsfmt state vector.
 240  * @param array an 128-bit array to be filled by pseudorandom numbers.
 241  * @param size number of 128-bit pseudorandom numbers to be generated.
 242  */
 243 inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array,
 244                                        int size) {
 245     int i, j;
 246     w128_t lung;
 247
 248     lung = dsfmt->status[DSFMT_N];
 249     do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
 250                  &lung);
 251     for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
 252         do_recursion(&array[i], &dsfmt->status[i],
 253                      &dsfmt->status[i + DSFMT_POS1], &lung);
 254     }
 255     for (; i < DSFMT_N; i++) {
 256         do_recursion(&array[i], &dsfmt->status[i],
 257                      &array[i + DSFMT_POS1 - DSFMT_N], &lung);
 258     }
 259     for (; i < size - DSFMT_N; i++) {
 260         do_recursion(&array[i], &array[i - DSFMT_N],
 261                      &array[i + DSFMT_POS1 - DSFMT_N], &lung);
 262     }
 263     for (j = 0; j < 2 * DSFMT_N - size; j++) {
 264         dsfmt->status[j] = array[j + size - DSFMT_N];
 265     }
 266     for (; i < size; i++, j++) {
 267         do_recursion(&array[i], &array[i - DSFMT_N],
 268                      &array[i + DSFMT_POS1 - DSFMT_N], &lung);
 269         dsfmt->status[j] = array[i];
 270     }
 271     dsfmt->status[DSFMT_N] = lung;
 272 }
 273
 274 /**
 275  * This function fills the user-specified array with double precision
 276  * floating point pseudorandom numbers of the IEEE 754 format.
 277  * @param dsfmt dsfmt state vector.
 278  * @param array an 128-bit array to be filled by pseudorandom numbers.
 279  * @param size number of 128-bit pseudorandom numbers to be generated.
 280  */
 281 inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array,
 282                                        int size) {
 283     int i, j;
 284     w128_t lung;
 285
 286     lung = dsfmt->status[DSFMT_N];
 287     do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
 288                  &lung);
 289     for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
 290         do_recursion(&array[i], &dsfmt->status[i],
 291                      &dsfmt->status[i + DSFMT_POS1], &lung);
 292     }
 293     for (; i < DSFMT_N; i++) {
 294         do_recursion(&array[i], &dsfmt->status[i],
 295                      &array[i + DSFMT_POS1 - DSFMT_N], &lung);
 296     }
 297     for (; i < size - DSFMT_N; i++) {
 298         do_recursion(&array[i], &array[i - DSFMT_N],
 299                      &array[i + DSFMT_POS1 - DSFMT_N], &lung);
 300         convert_c0o1(&array[i - DSFMT_N]);
 301     }
 302     for (j = 0; j < 2 * DSFMT_N - size; j++) {
 303         dsfmt->status[j] = array[j + size - DSFMT_N];
 304     }
 305     for (; i < size; i++, j++) {
 306         do_recursion(&array[i], &array[i - DSFMT_N],
 307                      &array[i + DSFMT_POS1 - DSFMT_N], &lung);
 308         dsfmt->status[j] = array[i];
 309         convert_c0o1(&array[i - DSFMT_N]);
 310     }
 311     for (i = size - DSFMT_N; i < size; i++) {
 312         convert_c0o1(&array[i]);
 313     }
 314     dsfmt->status[DSFMT_N] = lung;
 315 }
 316
 317 /**
 318  * This function fills the user-specified array with double precision
 319  * floating point pseudorandom numbers of the IEEE 754 format.
 320  * @param dsfmt dsfmt state vector.
 321  * @param array an 128-bit array to be filled by pseudorandom numbers.
 322  * @param size number of 128-bit pseudorandom numbers to be generated.
 323  */
 324 inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array,
 325                                        int size) {
 326     int i, j;
 327     w128_t lung;
 328
 329     lung = dsfmt->status[DSFMT_N];
 330     do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
 331                  &lung);
 332     for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
 333         do_recursion(&array[i], &dsfmt->status[i],
 334                      &dsfmt->status[i + DSFMT_POS1], &lung);
 335     }
 336     for (; i < DSFMT_N; i++) {
 337         do_recursion(&array[i], &dsfmt->status[i],
 338                      &array[i + DSFMT_POS1 - DSFMT_N], &lung);
 339     }
 340     for (; i < size - DSFMT_N; i++) {
 341         do_recursion(&array[i], &array[i - DSFMT_N],
 342                      &array[i + DSFMT_POS1 - DSFMT_N], &lung);
 343         convert_o0o1(&array[i - DSFMT_N]);
 344     }
 345     for (j = 0; j < 2 * DSFMT_N - size; j++) {
 346         dsfmt->status[j] = array[j + size - DSFMT_N];
 347     }
 348     for (; i < size; i++, j++) {
 349         do_recursion(&array[i], &array[i - DSFMT_N],
 350                      &array[i + DSFMT_POS1 - DSFMT_N], &lung);
 351         dsfmt->status[j] = array[i];
 352         convert_o0o1(&array[i - DSFMT_N]);
 353     }
 354     for (i = size - DSFMT_N; i < size; i++) {
 355         convert_o0o1(&array[i]);
 356     }
 357     dsfmt->status[DSFMT_N] = lung;
 358 }
 359
 360 /**
 361  * This function fills the user-specified array with double precision
 362  * floating point pseudorandom numbers of the IEEE 754 format.
 363  * @param dsfmt dsfmt state vector.
 364  * @param array an 128-bit array to be filled by pseudorandom numbers.
 365  * @param size number of 128-bit pseudorandom numbers to be generated.
 366  */
 367 inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array,
 368                                        int size) {
 369     int i, j;
 370     w128_t lung;
 371
 372     lung = dsfmt->status[DSFMT_N];
 373     do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
 374                  &lung);
 375     for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
 376         do_recursion(&array[i], &dsfmt->status[i],
 377                      &dsfmt->status[i + DSFMT_POS1], &lung);
 378     }
 379     for (; i < DSFMT_N; i++) {
 380         do_recursion(&array[i], &dsfmt->status[i],
 381                      &array[i + DSFMT_POS1 - DSFMT_N], &lung);
 382     }
 383     for (; i < size - DSFMT_N; i++) {
 384         do_recursion(&array[i], &array[i - DSFMT_N],
 385                      &array[i + DSFMT_POS1 - DSFMT_N], &lung);
 386         convert_o0c1(&array[i - DSFMT_N]);
 387     }
 388     for (j = 0; j < 2 * DSFMT_N - size; j++) {
 389         dsfmt->status[j] = array[j + size - DSFMT_N];
 390     }
 391     for (; i < size; i++, j++) {
 392         do_recursion(&array[i], &array[i - DSFMT_N],
 393                      &array[i + DSFMT_POS1 - DSFMT_N], &lung);
 394         dsfmt->status[j] = array[i];
 395         convert_o0c1(&array[i - DSFMT_N]);
 396     }
 397     for (i = size - DSFMT_N; i < size; i++) {
 398         convert_o0c1(&array[i]);
 399     }
 400     dsfmt->status[DSFMT_N] = lung;
 401 }
 402
 403 /**
 404  * This function represents a function used in the initialization
 405  * by init_by_array
 406  * @param x 32-bit integer
 407  * @return 32-bit integer
 408  */
 409 static uint32_t ini_func1(uint32_t x) {
 410     return (x ^ (x >> 27)) * (uint32_t)1664525UL;
 411 }
 412
 413 /**
 414  * This function represents a function used in the initialization
 415  * by init_by_array
 416  * @param x 32-bit integer
 417  * @return 32-bit integer
 418  */
 419 static uint32_t ini_func2(uint32_t x) {
 420     return (x ^ (x >> 27)) * (uint32_t)1566083941UL;
 421 }
 422
 423 /**
 424  * This function initializes the internal state array to fit the IEEE
 425  * 754 format.
 426  * @param dsfmt dsfmt state vector.
 427  */
 428 static void initial_mask(dsfmt_t *dsfmt) {
 429     int i;
 430     uint64_t *psfmt;
 431
 432     psfmt = &dsfmt->status[0].u[0];
 433     for (i = 0; i < DSFMT_N * 2; i++) {
 434         psfmt[i] = (psfmt[i] & DSFMT_LOW_MASK) | DSFMT_HIGH_CONST;
 435     }
 436 }
 437
 438 /**
 439  * This function certificate the period of 2^{SFMT_MEXP}-1.
 440  * @param dsfmt dsfmt state vector.
 441  */
 442 static void period_certification(dsfmt_t *dsfmt) {
 443     uint64_t pcv[2] = {DSFMT_PCV1, DSFMT_PCV2};
 444     uint64_t tmp[2];
 445     uint64_t inner;
 446     int i;
 447 #if (DSFMT_PCV2 & 1) != 1
 448     int j;
 449     uint64_t work;
 450 #endif
 451
 452     tmp[0] = (dsfmt->status[DSFMT_N].u[0] ^ DSFMT_FIX1);
 453     tmp[1] = (dsfmt->status[DSFMT_N].u[1] ^ DSFMT_FIX2);
 454
 455     inner = tmp[0] & pcv[0];
 456     inner ^= tmp[1] & pcv[1];
 457     for (i = 32; i > 0; i >>= 1) {
 458         inner ^= inner >> i;
 459     }
 460     inner &= 1;
 461     /* check OK */
 462     if (inner == 1) {
 463         return;
 464     }
 465     /* check NG, and modification */
 466 #if (DSFMT_PCV2 & 1) == 1
 467     dsfmt->status[DSFMT_N].u[1] ^= 1;
 468 #else
 469     for (i = 1; i >= 0; i--) {
 470         work = 1;
 471         for (j = 0; j < 64; j++) {
 472             if ((work & pcv[i]) != 0) {
 473                 dsfmt->status[DSFMT_N].u[i] ^= work;
 474                 return;
 475             }
 476             work = work << 1;
 477         }
 478     }
 479 #endif
 480     return;
 481 }
 482
 483 /*----------------
 484   PUBLIC FUNCTIONS
 485   ----------------*/
 486 /**
 487  * This function returns the identification string.  The string shows
 488  * the Mersenne exponent, and all parameters of this generator.
 489  * @return id string.
 490  */
 491 const char *dsfmt_get_idstring(void) {
 492     return DSFMT_IDSTR;
 493 }
 494
 495 /**
 496  * This function returns the minimum size of array used for \b
 497  * fill_array functions.
 498  * @return minimum size of array used for fill_array functions.
 499  */
 500 int dsfmt_get_min_array_size(void) {
 501     return DSFMT_N64;
 502 }
 503
 504 /**
 505  * This function fills the internal state array with double precision
 506  * floating point pseudorandom numbers of the IEEE 754 format.
 507  * @param dsfmt dsfmt state vector.
 508  */
 509 void dsfmt_gen_rand_all(dsfmt_t *dsfmt) {
 510     int i;
 511     w128_t lung;
 512
 513     lung = dsfmt->status[DSFMT_N];
 514     do_recursion(&dsfmt->status[0], &dsfmt->status[0],
 515                  &dsfmt->status[DSFMT_POS1], &lung);
 516     for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
 517         do_recursion(&dsfmt->status[i], &dsfmt->status[i],
 518                      &dsfmt->status[i + DSFMT_POS1], &lung);
 519     }
 520     for (; i < DSFMT_N; i++) {
 521         do_recursion(&dsfmt->status[i], &dsfmt->status[i],
 522                      &dsfmt->status[i + DSFMT_POS1 - DSFMT_N], &lung);
 523     }
 524     dsfmt->status[DSFMT_N] = lung;
 525 }
 526
 527 /**
 528  * This function generates double precision floating point
 529  * pseudorandom numbers which distribute in the range [1, 2) to the
 530  * specified array[] by one call. The number of pseudorandom numbers
 531  * is specified by the argument \b size, which must be at least (SFMT_MEXP
 532  * / 128) * 2 and a multiple of two.  The function
 533  * get_min_array_size() returns this minimum size.  The generation by
 534  * this function is much faster than the following fill_array_xxx functions.
 535  *
 536  * For initialization, init_gen_rand() or init_by_array() must be called
 537  * before the first call of this function. This function can not be
 538  * used after calling genrand_xxx functions, without initialization.
 539  *
 540  * @param dsfmt dsfmt state vector.
 541  * @param array an array where pseudorandom numbers are filled
 542  * by this function.  The pointer to the array must be "aligned"
 543  * (namely, must be a multiple of 16) in the SIMD version, since it
 544  * refers to the address of a 128-bit integer.  In the standard C
 545  * version, the pointer is arbitrary.
 546  *
 547  * @param size the number of 64-bit pseudorandom integers to be
 548  * generated.  size must be a multiple of 2, and greater than or equal
 549  * to (SFMT_MEXP / 128) * 2.
 550  *
 551  * @note \b memalign or \b posix_memalign is available to get aligned
 552  * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
 553  * returns the pointer to the aligned memory block.
 554  */
 555 void dsfmt_fill_array_close1_open2(dsfmt_t *dsfmt, double array[], int size) {
 556     assert(size % 2 == 0);
 557     assert(size >= DSFMT_N64);
 558     gen_rand_array_c1o2(dsfmt, (w128_t *)array, size / 2);
 559 }
 560
 561 /**
 562  * This function generates double precision floating point
 563  * pseudorandom numbers which distribute in the range (0, 1] to the
 564  * specified array[] by one call. This function is the same as
 565  * fill_array_close1_open2() except the distribution range.
 566  *
 567  * @param dsfmt dsfmt state vector.
 568  * @param array an array where pseudorandom numbers are filled
 569  * by this function.
 570  * @param size the number of pseudorandom numbers to be generated.
 571  * see also \sa fill_array_close1_open2()
 572  */
 573 void dsfmt_fill_array_open_close(dsfmt_t *dsfmt, double array[], int size) {
 574     assert(size % 2 == 0);
 575     assert(size >= DSFMT_N64);
 576     gen_rand_array_o0c1(dsfmt, (w128_t *)array, size / 2);
 577 }
 578
 579 /**
 580  * This function generates double precision floating point
 581  * pseudorandom numbers which distribute in the range [0, 1) to the
 582  * specified array[] by one call. This function is the same as
 583  * fill_array_close1_open2() except the distribution range.
 584  *
 585  * @param array an array where pseudorandom numbers are filled
 586  * by this function.
 587  * @param dsfmt dsfmt state vector.
 588  * @param size the number of pseudorandom numbers to be generated.
 589  * see also \sa fill_array_close1_open2()
 590  */
 591 void dsfmt_fill_array_close_open(dsfmt_t *dsfmt, double array[], int size) {
 592     assert(size % 2 == 0);
 593     assert(size >= DSFMT_N64);
 594     gen_rand_array_c0o1(dsfmt, (w128_t *)array, size / 2);
 595 }
 596
 597 /**
 598  * This function generates double precision floating point
 599  * pseudorandom numbers which distribute in the range (0, 1) to the
 600  * specified array[] by one call. This function is the same as
 601  * fill_array_close1_open2() except the distribution range.
 602  *
 603  * @param dsfmt dsfmt state vector.
 604  * @param array an array where pseudorandom numbers are filled
 605  * by this function.
 606  * @param size the number of pseudorandom numbers to be generated.
 607  * see also \sa fill_array_close1_open2()
 608  */
 609 void dsfmt_fill_array_open_open(dsfmt_t *dsfmt, double array[], int size) {
 610     assert(size % 2 == 0);
 611     assert(size >= DSFMT_N64);
 612     gen_rand_array_o0o1(dsfmt, (w128_t *)array, size / 2);
 613 }
 614
 615 #if defined(__INTEL_COMPILER)
 616 #  pragma warning(disable:981)
 617 #endif
 618 /**
 619  * This function initializes the internal state array with a 32-bit
 620  * integer seed.
 621  * @param dsfmt dsfmt state vector.
 622  * @param seed a 32-bit integer used as the seed.
 623  * @param mexp caller's mersenne expornent
 624  */
 625 void dsfmt_chk_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed, int mexp) {
 626     int i;
 627     uint32_t *psfmt;
 628
 629     /* make sure caller program is compiled with the same MEXP */
 630     if (mexp != dsfmt_mexp) {
 631         fprintf(stderr, "DSFMT_MEXP doesn't match with dSFMT.c\n");
 632         exit(1);
 633     }
 634     psfmt = &dsfmt->status[0].u32[0];
 635     psfmt[idxof(0)] = seed;
 636     for (i = 1; i < (DSFMT_N + 1) * 4; i++) {
 637         psfmt[idxof(i)] = 1812433253UL
 638             * (psfmt[idxof(i - 1)] ^ (psfmt[idxof(i - 1)] >> 30)) + i;
 639     }
 640     initial_mask(dsfmt);
 641     period_certification(dsfmt);
 642     dsfmt->idx = DSFMT_N64;
 643 #if defined(HAVE_SSE2)
 644     setup_const();
 645 #endif
 646 }
 647
 648 /**
 649  * This function initializes the internal state array,
 650  * with an array of 32-bit integers used as the seeds
 651  * @param dsfmt dsfmt state vector.
 652  * @param init_key the array of 32-bit integers, used as a seed.
 653  * @param key_length the length of init_key.
 654  * @param mexp caller's mersenne expornent
 655  */
 656 void dsfmt_chk_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[],
 657                              int key_length, int mexp) {
 658     int i, j, count;
 659     uint32_t r;
 660     uint32_t *psfmt32;
 661     int lag;
 662     int mid;
 663     int size = (DSFMT_N + 1) * 4;       /* pulmonary */
 664
 665     /* make sure caller program is compiled with the same MEXP */
 666     if (mexp != dsfmt_mexp) {
 667         fprintf(stderr, "DSFMT_MEXP doesn't match with dSFMT.c\n");
 668         exit(1);
 669     }
 670     if (size >= 623) {
 671         lag = 11;
 672     } else if (size >= 68) {
 673         lag = 7;
 674     } else if (size >= 39) {
 675         lag = 5;
 676     } else {
 677         lag = 3;
 678     }
 679     mid = (size - lag) / 2;
 680
 681     psfmt32 = &dsfmt->status[0].u32[0];
 682     memset(dsfmt->status, 0x8b, sizeof(dsfmt->status));
 683     if (key_length + 1 > size) {
 684         count = key_length + 1;
 685     } else {
 686         count = size;
 687     }
 688     r = ini_func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid % size)]
 689                   ^ psfmt32[idxof((size - 1) % size)]);
 690     psfmt32[idxof(mid % size)] += r;
 691     r += key_length;
 692     psfmt32[idxof((mid + lag) % size)] += r;
 693     psfmt32[idxof(0)] = r;
 694     count--;
 695     for (i = 1, j = 0; (j < count) && (j < key_length); j++) {
 696         r = ini_func1(psfmt32[idxof(i)]
 697                       ^ psfmt32[idxof((i + mid) % size)]
 698                       ^ psfmt32[idxof((i + size - 1) % size)]);
 699         psfmt32[idxof((i + mid) % size)] += r;
 700         r += init_key[j] + i;
 701         psfmt32[idxof((i + mid + lag) % size)] += r;
 702         psfmt32[idxof(i)] = r;
 703         i = (i + 1) % size;
 704     }
 705     for (; j < count; j++) {
 706         r = ini_func1(psfmt32[idxof(i)]
 707                       ^ psfmt32[idxof((i + mid) % size)]
 708                       ^ psfmt32[idxof((i + size - 1) % size)]);
 709         psfmt32[idxof((i + mid) % size)] += r;
 710         r += i;
 711         psfmt32[idxof((i + mid + lag) % size)] += r;
 712         psfmt32[idxof(i)] = r;
 713         i = (i + 1) % size;
 714     }
 715     for (j = 0; j < size; j++) {
 716         r = ini_func2(psfmt32[idxof(i)]
 717                       + psfmt32[idxof((i + mid) % size)]
 718                       + psfmt32[idxof((i + size - 1) % size)]);
 719         psfmt32[idxof((i + mid) % size)] ^= r;
 720         r -= i;
 721         psfmt32[idxof((i + mid + lag) % size)] ^= r;
 722         psfmt32[idxof(i)] = r;
 723         i = (i + 1) % size;
 724     }
 725     initial_mask(dsfmt);
 726     period_certification(dsfmt);
 727     dsfmt->idx = DSFMT_N64;
 728 #if defined(HAVE_SSE2)
 729     setup_const();
 730 #endif
 731 }
 732 #if defined(__INTEL_COMPILER)
 733 #  pragma warning(default:981)
 734 #endif