Mercurial > audlegacy-plugins
changeset 1386:b8dd67ad7b86
update SFMT files to version 1.3. please let me know if it break on altivec box.
author | Yoshiki Yazawa <yaz@cc.rim.or.jp> |
---|---|
date | Fri, 27 Jul 2007 18:56:43 +0900 |
parents | f3c57bbb54d0 |
children | 20c6caad7e03 e6f5685f71a1 |
files | src/madplug/SFMT-alti.h src/madplug/SFMT-params.h src/madplug/SFMT-params19937.h src/madplug/SFMT-sse2.h src/madplug/SFMT.c src/madplug/SFMT.h |
diffstat | 6 files changed, 347 insertions(+), 84 deletions(-) [+] |
line wrap: on
line diff
--- a/src/madplug/SFMT-alti.h Fri Jul 27 04:53:35 2007 +0200 +++ b/src/madplug/SFMT-alti.h Fri Jul 27 18:56:43 2007 +0900 @@ -14,28 +14,143 @@ * see LICENSE.txt */ -#include <altivec.h> #ifndef SFMT_ALTI_H #define SFMT_ALTI_H -union W128_T { - vector unsigned int s; - uint32_t u[4]; -}; - -typedef union W128_T w128_t; - -#ifdef __GNUC__ inline static vector unsigned int vec_recursion(vector unsigned int a, vector unsigned int b, vector unsigned int c, vector unsigned int d) - __attribute__((always_inline)); -#else + ALWAYSINLINE; + +/** + * This function represents the recursion formula in AltiVec and BIG ENDIAN. + * @param a a 128-bit part of the interal state array + * @param b a 128-bit part of the interal state array + * @param c a 128-bit part of the interal state array + * @param d a 128-bit part of the interal state array + * @return output + */ inline static vector unsigned int vec_recursion(vector unsigned int a, vector unsigned int b, vector unsigned int c, - vector unsigned int d); + vector unsigned int d) { + + const vector unsigned int sl1 = ALTI_SL1; + const vector unsigned int sr1 = ALTI_SR1; +#ifdef ONLY64 + const vector unsigned int mask = ALTI_MSK64; + const vector unsigned char perm_sl = ALTI_SL2_PERM64; + const vector unsigned char perm_sr = ALTI_SR2_PERM64; +#else + const vector unsigned int mask = ALTI_MSK; + const vector unsigned char perm_sl = ALTI_SL2_PERM; + const vector unsigned char perm_sr = ALTI_SR2_PERM; +#endif + vector unsigned int v, w, x, y, z; + x = vec_perm(a, (vector unsigned int)perm_sl, perm_sl); + v = a; + y = vec_sr(b, sr1); + z = vec_perm(c, (vector unsigned int)perm_sr, perm_sr); + w = vec_sl(d, sl1); + z = vec_xor(z, w); + y = vec_and(y, mask); + v = vec_xor(v, x); + z = vec_xor(z, y); + z = vec_xor(z, v); + return z; +} + +/** + * This function fills the internal state array with pseudorandom + * integers. + */ +inline static void gen_rand_all(void) { + int i; + vector unsigned int r, r1, r2; + + r1 = sfmt[N - 2].s; + r2 = sfmt[N - 1].s; + for (i = 0; i < N - POS1; i++) { + r = vec_recursion(sfmt[i].s, sfmt[i + POS1].s, r1, r2); + sfmt[i].s = r; + r1 = r2; + r2 = r; + } + for (; i < N; i++) { + r = vec_recursion(sfmt[i].s, sfmt[i + POS1 - N].s, r1, r2); + sfmt[i].s = r; + r1 = r2; + r2 = r; + } +} + +/** + * This function fills the user-specified array with pseudorandom + * integers. + * + * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param size number of 128-bit pesudorandom numbers to be generated. + */ +inline static void gen_rand_array(w128_t *array, int size) { + int i, j; + vector unsigned int r, r1, r2; + + r1 = sfmt[N - 2].s; + r2 = sfmt[N - 1].s; + for (i = 0; i < N - POS1; i++) { + r = vec_recursion(sfmt[i].s, sfmt[i + POS1].s, r1, r2); + array[i].s = r; + r1 = r2; + r2 = r; + } + for (; i < N; i++) { + r = vec_recursion(sfmt[i].s, array[i + POS1 - N].s, r1, r2); + array[i].s = r; + r1 = r2; + r2 = r; + } + /* main loop */ + for (; i < size - N; i++) { + r = vec_recursion(array[i - N].s, array[i + POS1 - N].s, r1, r2); + array[i].s = r; + r1 = r2; + r2 = r; + } + for (j = 0; j < 2 * N - size; j++) { + sfmt[j].s = array[j + size - N].s; + } + for (; i < size; i++) { + r = vec_recursion(array[i - N].s, array[i + POS1 - N].s, r1, r2); + array[i].s = r; + sfmt[j++].s = r; + r1 = r2; + r2 = r; + } +} + +#ifndef ONLY64 +#if defined(__APPLE__) +#define ALTI_SWAP (vector unsigned char) \ + (4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11) +#else +#define ALTI_SWAP {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11} +#endif +/** + * This function swaps high and low 32-bit of 64-bit integers in user + * specified array. + * + * @param array an 128-bit array to be swaped. + * @param size size of 128-bit array. + */ +inline static void swap(w128_t *array, int size) { + int i; + const vector unsigned char perm = ALTI_SWAP; + + for (i = 0; i < size; i++) { + array[i].s = vec_perm(array[i].s, (vector unsigned int)perm, perm); + } +} #endif #endif
--- a/src/madplug/SFMT-params.h Fri Jul 27 04:53:35 2007 +0200 +++ b/src/madplug/SFMT-params.h Fri Jul 27 18:56:43 2007 +0900 @@ -1,3 +1,6 @@ +#ifndef SFMT_PARAMS_H +#define SFMT_PARAMS_H + #if !defined(MEXP) #ifdef __GNUC__ #warning "MEXP is not defined. I assume MEXP is 19937." @@ -79,6 +82,8 @@ #include "SFMT-params86243.h" #elif MEXP == 132049 #include "SFMT-params132049.h" +#elif MEXP == 216091 + #include "SFMT-params216091.h" #else #ifdef __GNUC__ #error "MEXP is not valid." @@ -88,3 +93,5 @@ #endif #endif + +#endif /* SFMT_PARAMS_H */
--- a/src/madplug/SFMT-params19937.h Fri Jul 27 04:53:35 2007 +0200 +++ b/src/madplug/SFMT-params19937.h Fri Jul 27 18:56:43 2007 +0900 @@ -1,3 +1,6 @@ +#ifndef SFMT_PARAMS19937_H +#define SFMT_PARAMS19937_H + #define POS1 122 #define SL1 18 #define SL2 1 @@ -11,12 +14,33 @@ #define PARITY2 0x00000000U #define PARITY3 0x00000000U #define PARITY4 0x13c9e684U -#define ALTI_SL2_PERM \ -(vector unsigned char){1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8} -#define ALTI_SL2_PERM64 \ -(vector unsigned char){1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0} -#define ALTI_SR2_PERM \ -(vector unsigned char){7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14} -#define ALTI_SR2_PERM64 \ -(vector unsigned char){15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14} + + +/* PARAMETERS FOR ALTIVEC */ +#if defined(__APPLE__) /* For OSX */ + #define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1) + #define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1) + #define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4) + #define ALTI_MSK64 \ + (vector unsigned int)(MSK2, MSK1, MSK4, MSK3) + #define ALTI_SL2_PERM \ + (vector unsigned char)(1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8) + #define ALTI_SL2_PERM64 \ + (vector unsigned char)(1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0) + #define ALTI_SR2_PERM \ + (vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14) + #define ALTI_SR2_PERM64 \ + (vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14) +#else /* For OTHER OSs(Linux?) */ + #define ALTI_SL1 {SL1, SL1, SL1, SL1} + #define ALTI_SR1 {SR1, SR1, SR1, SR1} + #define ALTI_MSK {MSK1, MSK2, MSK3, MSK4} + #define ALTI_MSK64 {MSK2, MSK1, MSK4, MSK3} + #define ALTI_SL2_PERM {1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8} + #define ALTI_SL2_PERM64 {1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0} + #define ALTI_SR2_PERM {7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14} + #define ALTI_SR2_PERM64 {15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14} +#endif /* For OSX */ #define IDSTR "SFMT-19937:122-18-1-11-1:dfffffef-ddfecb7f-bffaffff-bffffff6" + +#endif /* SFMT_PARAMS19937_H */
--- a/src/madplug/SFMT-sse2.h Fri Jul 27 04:53:35 2007 +0200 +++ b/src/madplug/SFMT-sse2.h Fri Jul 27 18:56:43 2007 +0900 @@ -1,36 +1,121 @@ /** - * @file SFMT-sse2.h - * - * @brief SIMD oriented Fast Mersenne Twister(SFMT) - * pseudorandom number generator + * @file SFMT-sse2.h + * @brief SIMD oriented Fast Mersenne Twister(SFMT) for Intel SSE2 * * @author Mutsuo Saito (Hiroshima University) * @author Makoto Matsumoto (Hiroshima University) * - * Copyright (C) 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * @note We assume LITTLE ENDIAN in this file + * + * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima * University. All rights reserved. * - * The new BSD License is applied to this software. - * see LICENSE.txt + * The new BSD License is applied to this software, see LICENSE.txt */ #ifndef SFMT_SSE2_H #define SFMT_SSE2_H -#include <emmintrin.h> + +inline static __m128i mm_recursion(__m128i *a, __m128i *b, __m128i c, + __m128i d, __m128i mask) ALWAYSINLINE; -union W128_T { - __m128i si; - uint32_t u[4]; -}; +/** + * This function represents the recursion formula. + * @param a a 128-bit part of the interal state array + * @param b a 128-bit part of the interal state array + * @param c a 128-bit part of the interal state array + * @param d a 128-bit part of the interal state array + * @param mask 128-bit mask + * @return output + */ +inline static __m128i mm_recursion(__m128i *a, __m128i *b, + __m128i c, __m128i d, __m128i mask) { + __m128i v, x, y, z; + + x = _mm_load_si128(a); + y = _mm_srli_epi32(*b, SR1); + z = _mm_srli_si128(c, SR2); + v = _mm_slli_epi32(d, SL1); + z = _mm_xor_si128(z, x); + z = _mm_xor_si128(z, v); + x = _mm_slli_si128(x, SL2); + y = _mm_and_si128(y, mask); + z = _mm_xor_si128(z, x); + z = _mm_xor_si128(z, y); + return z; +} -typedef union W128_T w128_t; +/** + * This function fills the internal state array with pseudorandom + * integers. + */ +inline static void gen_rand_all(void) { + int i; + __m128i r, r1, r2, mask; + mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1); -#ifdef __GNUC__ -inline static __m128i mm_recursion(__m128i *a, __m128i *b, __m128i c, - __m128i d, __m128i mask) - __attribute__((always_inline)); -#else -inline static __m128i mm_recursion(__m128i *a, __m128i *b, - __m128i c, __m128i d, __m128i mask); + r1 = _mm_load_si128(&sfmt[N - 2].si); + r2 = _mm_load_si128(&sfmt[N - 1].si); + for (i = 0; i < N - POS1; i++) { + r = mm_recursion(&sfmt[i].si, &sfmt[i + POS1].si, r1, r2, mask); + _mm_store_si128(&sfmt[i].si, r); + r1 = r2; + r2 = r; + } + for (; i < N; i++) { + r = mm_recursion(&sfmt[i].si, &sfmt[i + POS1 - N].si, r1, r2, mask); + _mm_store_si128(&sfmt[i].si, r); + r1 = r2; + r2 = r; + } +} + +/** + * This function fills the user-specified array with pseudorandom + * integers. + * + * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param size number of 128-bit pesudorandom numbers to be generated. + */ +inline static void gen_rand_array(w128_t *array, int size) { + int i, j; + __m128i r, r1, r2, mask; + mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1); + + r1 = _mm_load_si128(&sfmt[N - 2].si); + r2 = _mm_load_si128(&sfmt[N - 1].si); + for (i = 0; i < N - POS1; i++) { + r = mm_recursion(&sfmt[i].si, &sfmt[i + POS1].si, r1, r2, mask); + _mm_store_si128(&array[i].si, r); + r1 = r2; + r2 = r; + } + for (; i < N; i++) { + r = mm_recursion(&sfmt[i].si, &array[i + POS1 - N].si, r1, r2, mask); + _mm_store_si128(&array[i].si, r); + r1 = r2; + r2 = r; + } + /* main loop */ + for (; i < size - N; i++) { + r = mm_recursion(&array[i - N].si, &array[i + POS1 - N].si, r1, r2, + mask); + _mm_store_si128(&array[i].si, r); + r1 = r2; + r2 = r; + } + for (j = 0; j < 2 * N - size; j++) { + r = _mm_load_si128(&array[j + size - N].si); + _mm_store_si128(&sfmt[j].si, r); + } + for (; i < size; i++) { + r = mm_recursion(&array[i - N].si, &array[i + POS1 - N].si, r1, r2, + mask); + _mm_store_si128(&array[i].si, r); + _mm_store_si128(&sfmt[j++].si, r); + r1 = r2; + r2 = r; + } +} + #endif -#endif
--- a/src/madplug/SFMT.c Fri Jul 27 04:53:35 2007 +0200 +++ b/src/madplug/SFMT.c Fri Jul 27 18:56:43 2007 +0900 @@ -14,21 +14,51 @@ #include <assert.h> #include "SFMT.h" #include "SFMT-params.h" -#include "SFMT-params19937.h" -#if defined(ALTIVEC) - #include "SFMT-alti.h" -#elif defined(SSE2) - #include "SFMT-sse2.h" +#if defined(__BIG_ENDIAN__) && !defined(__amd64) && !defined(BIG_ENDIAN64) +#define BIG_ENDIAN64 1 +#endif +#if defined(HAVE_ALTIVEC) && !defined(BIG_ENDIAN64) +#define BIG_ENDIAN64 1 +#endif +#if defined(ONLY64) && !defined(BIG_ENDIAN64) + #if defined(__GNUC__) + #error "-DONLY64 must be specified with -DBIG_ENDIAN64" + #endif +#undef ONLY64 +#endif +/*------------------------------------------------------ + 128-bit SIMD data type for Altivec, SSE2 or standard C + ------------------------------------------------------*/ +#if defined(HAVE_ALTIVEC) + #if !defined(__APPLE__) + #include <altivec.h> + #endif +/** 128-bit data structure */ +union W128_T { + vector unsigned int s; + uint32_t u[4]; +}; +/** 128-bit data type */ +typedef union W128_T w128_t; + +#elif defined(HAVE_SSE2) + #include <emmintrin.h> + +/** 128-bit data structure */ +union W128_T { + __m128i si; + uint32_t u[4]; +}; +/** 128-bit data type */ +typedef union W128_T w128_t; + #else -/*------------------------------------------ - 128-bit SIMD like data type for standard C - ------------------------------------------*/ + /** 128-bit data structure */ struct W128_T { uint32_t u[4]; }; - /** 128-bit data type */ typedef struct W128_T w128_t; @@ -61,18 +91,18 @@ inline static void rshift128(w128_t *out, w128_t const *in, int shift); inline static void lshift128(w128_t *out, w128_t const *in, int shift); inline static void gen_rand_all(void); -inline static void gen_rand_array(w128_t array[], int size); +inline static void gen_rand_array(w128_t *array, int size); inline static uint32_t func1(uint32_t x); inline static uint32_t func2(uint32_t x); static void period_certification(void); #if defined(BIG_ENDIAN64) && !defined(ONLY64) -inline static void swap(w128_t array[], int size); +inline static void swap(w128_t *array, int size); #endif -#if defined(ALTIVEC) - #include "SFMT-alti.c" -#elif defined(SSE2) - #include "SFMT-sse2.c" +#if defined(HAVE_ALTIVEC) + #include "SFMT-alti.h" +#elif defined(HAVE_SSE2) + #include "SFMT-sse2.h" #endif /** @@ -211,7 +241,7 @@ } #endif -#if (!defined(ALTIVEC)) && (!defined(SSE2)) +#if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2)) /** * This function fills the internal state array with pseudorandom * integers. @@ -241,7 +271,7 @@ * @param array an 128-bit array to be filled by pseudorandom numbers. * @param size number of 128-bit pseudorandom numbers to be generated. */ -inline static void gen_rand_array(w128_t array[], int size) { +inline static void gen_rand_array(w128_t *array, int size) { int i, j; w128_t *r1, *r2; @@ -274,8 +304,8 @@ } #endif -#if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(ALTIVEC) -inline static void swap(w128_t array[], int size) { +#if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(HAVE_ALTIVEC) +inline static void swap(w128_t *array, int size) { int i; uint32_t x, y; @@ -317,13 +347,11 @@ int i, j; uint32_t work; - for (i = 0; i < 4; i++) { - work = psfmt32[idxof(i)] & parity[i]; - for (j = 0; j < 32; j++) { - inner ^= work & 1; - work = work >> 1; - } - } + for (i = 0; i < 4; i++) + inner ^= psfmt32[idxof(i)] & parity[i]; + for (i = 16; i > 0; i >>= 1) + inner ^= inner >> i; + inner &= 1; /* check OK */ if (inner == 1) { return; @@ -349,7 +377,7 @@ * The string shows the word size, the Mersenne exponent, * and all parameters of this generator. */ -char *get_idstring(void) { +const char *get_idstring(void) { return IDSTR; } @@ -377,7 +405,7 @@ * init_gen_rand or init_by_array must be called before this function. * @return 32-bit pseudorandom number */ -inline uint32_t gen_rand32(void) { +uint32_t gen_rand32(void) { uint32_t r; assert(initialized); @@ -396,7 +424,7 @@ * unless an initialization is again executed. * @return 64-bit pseudorandom number */ -inline uint64_t gen_rand64(void) { +uint64_t gen_rand64(void) { #if defined(BIG_ENDIAN64) && !defined(ONLY64) uint32_t r1, r2; #else @@ -448,7 +476,7 @@ * memory. Mac OSX doesn't have these functions, but \b malloc of OSX * returns the pointer to the aligned memory block. */ -inline void fill_array32(uint32_t array[], int size) { +void fill_array32(uint32_t *array, int size) { assert(initialized); assert(idx == N32); assert(size % 4 == 0); @@ -484,7 +512,7 @@ * memory. Mac OSX doesn't have these functions, but \b malloc of OSX * returns the pointer to the aligned memory block. */ -inline void fill_array64(uint64_t array[], int size) { +void fill_array64(uint64_t *array, int size) { assert(initialized); assert(idx == N32); assert(size % 2 == 0); @@ -524,7 +552,7 @@ * @param init_key the array of 32-bit integers, used as a seed. * @param key_length the length of init_key. */ -void init_by_array(uint32_t init_key[], int key_length) { +void init_by_array(uint32_t *init_key, int key_length) { int i, j, count; uint32_t r; int lag; @@ -554,7 +582,7 @@ r += key_length; psfmt32[idxof(mid + lag)] += r; psfmt32[idxof(0)] = r; - i = 1; + count--; for (i = 1, j = 0; (j < count) && (j < key_length); j++) { r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)]
--- a/src/madplug/SFMT.h Fri Jul 27 04:53:35 2007 +0200 +++ b/src/madplug/SFMT.h Fri Jul 27 18:56:43 2007 +0900 @@ -35,10 +35,10 @@ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) #include <inttypes.h> -#elif defined(_MSC_VER) +#elif defined(_MSC_VER) || defined(__BORLANDC__) typedef unsigned int uint32_t; - typedef unsigned long long uint64_t; - #define inline + typedef unsigned __int64 uint64_t; + #define inline __inline #else #include <inttypes.h> #if defined(__GNUC__) @@ -47,7 +47,7 @@ #endif #ifndef PRIu64 - #if defined(_MSC_VER) + #if defined(_MSC_VER) || defined(__BORLANDC__) #define PRIu64 "I64u" #define PRIx64 "I64x" #else @@ -56,13 +56,17 @@ #endif #endif -inline uint32_t gen_rand32(void); -inline uint64_t gen_rand64(void); -inline void fill_array32(uint32_t array[], int size); -inline void fill_array64(uint64_t array[], int size); +#if defined(__GNUC__) +#define ALWAYSINLINE __attribute__((always_inline)) +#endif + +uint32_t gen_rand32(void); +uint64_t gen_rand64(void); +void fill_array32(uint32_t *array, int size); +void fill_array64(uint64_t *array, int size); void init_gen_rand(uint32_t seed); -void init_by_array(uint32_t init_key[], int key_length); -char *get_idstring(void); +void init_by_array(uint32_t *init_key, int key_length); +const char *get_idstring(void); int get_min_array_size32(void); int get_min_array_size64(void);