Mercurial > audlegacy-plugins
diff src/madplug/SFMT-sse2.h @ 1386:b8dd67ad7b86
update SFMT files to version 1.3. please let me know if it break on altivec box.
author | Yoshiki Yazawa <yaz@cc.rim.or.jp> |
---|---|
date | Fri, 27 Jul 2007 18:56:43 +0900 |
parents | 7e14701aef54 |
children |
line wrap: on
line diff
--- a/src/madplug/SFMT-sse2.h Fri Jul 27 04:53:35 2007 +0200 +++ b/src/madplug/SFMT-sse2.h Fri Jul 27 18:56:43 2007 +0900 @@ -1,36 +1,121 @@ /** - * @file SFMT-sse2.h - * - * @brief SIMD oriented Fast Mersenne Twister(SFMT) - * pseudorandom number generator + * @file SFMT-sse2.h + * @brief SIMD oriented Fast Mersenne Twister(SFMT) for Intel SSE2 * * @author Mutsuo Saito (Hiroshima University) * @author Makoto Matsumoto (Hiroshima University) * - * Copyright (C) 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * @note We assume LITTLE ENDIAN in this file + * + * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima * University. All rights reserved. * - * The new BSD License is applied to this software. - * see LICENSE.txt + * The new BSD License is applied to this software, see LICENSE.txt */ #ifndef SFMT_SSE2_H #define SFMT_SSE2_H -#include <emmintrin.h> + +inline static __m128i mm_recursion(__m128i *a, __m128i *b, __m128i c, + __m128i d, __m128i mask) ALWAYSINLINE; -union W128_T { - __m128i si; - uint32_t u[4]; -}; +/** + * This function represents the recursion formula. + * @param a a 128-bit part of the interal state array + * @param b a 128-bit part of the interal state array + * @param c a 128-bit part of the interal state array + * @param d a 128-bit part of the interal state array + * @param mask 128-bit mask + * @return output + */ +inline static __m128i mm_recursion(__m128i *a, __m128i *b, + __m128i c, __m128i d, __m128i mask) { + __m128i v, x, y, z; + + x = _mm_load_si128(a); + y = _mm_srli_epi32(*b, SR1); + z = _mm_srli_si128(c, SR2); + v = _mm_slli_epi32(d, SL1); + z = _mm_xor_si128(z, x); + z = _mm_xor_si128(z, v); + x = _mm_slli_si128(x, SL2); + y = _mm_and_si128(y, mask); + z = _mm_xor_si128(z, x); + z = _mm_xor_si128(z, y); + return z; +} -typedef union W128_T w128_t; +/** + * This function fills the internal state array with pseudorandom + * integers. + */ +inline static void gen_rand_all(void) { + int i; + __m128i r, r1, r2, mask; + mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1); -#ifdef __GNUC__ -inline static __m128i mm_recursion(__m128i *a, __m128i *b, __m128i c, - __m128i d, __m128i mask) - __attribute__((always_inline)); -#else -inline static __m128i mm_recursion(__m128i *a, __m128i *b, - __m128i c, __m128i d, __m128i mask); + r1 = _mm_load_si128(&sfmt[N - 2].si); + r2 = _mm_load_si128(&sfmt[N - 1].si); + for (i = 0; i < N - POS1; i++) { + r = mm_recursion(&sfmt[i].si, &sfmt[i + POS1].si, r1, r2, mask); + _mm_store_si128(&sfmt[i].si, r); + r1 = r2; + r2 = r; + } + for (; i < N; i++) { + r = mm_recursion(&sfmt[i].si, &sfmt[i + POS1 - N].si, r1, r2, mask); + _mm_store_si128(&sfmt[i].si, r); + r1 = r2; + r2 = r; + } +} + +/** + * This function fills the user-specified array with pseudorandom + * integers. + * + * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param size number of 128-bit pesudorandom numbers to be generated. + */ +inline static void gen_rand_array(w128_t *array, int size) { + int i, j; + __m128i r, r1, r2, mask; + mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1); + + r1 = _mm_load_si128(&sfmt[N - 2].si); + r2 = _mm_load_si128(&sfmt[N - 1].si); + for (i = 0; i < N - POS1; i++) { + r = mm_recursion(&sfmt[i].si, &sfmt[i + POS1].si, r1, r2, mask); + _mm_store_si128(&array[i].si, r); + r1 = r2; + r2 = r; + } + for (; i < N; i++) { + r = mm_recursion(&sfmt[i].si, &array[i + POS1 - N].si, r1, r2, mask); + _mm_store_si128(&array[i].si, r); + r1 = r2; + r2 = r; + } + /* main loop */ + for (; i < size - N; i++) { + r = mm_recursion(&array[i - N].si, &array[i + POS1 - N].si, r1, r2, + mask); + _mm_store_si128(&array[i].si, r); + r1 = r2; + r2 = r; + } + for (j = 0; j < 2 * N - size; j++) { + r = _mm_load_si128(&array[j + size - N].si); + _mm_store_si128(&sfmt[j].si, r); + } + for (; i < size; i++) { + r = mm_recursion(&array[i - N].si, &array[i + POS1 - N].si, r1, r2, + mask); + _mm_store_si128(&array[i].si, r); + _mm_store_si128(&sfmt[j++].si, r); + r1 = r2; + r2 = r; + } +} + #endif -#endif