comparison src/madplug_x/SFMT-sse2.c @ 2356:c5fa65cb26ca

make an experimental copy to update to new sound engine
author Yoshiki Yazawa <yaz@cc.rim.or.jp>
date Tue, 05 Feb 2008 01:11:50 +0900
parents src/madplug/SFMT-sse2.c@7e14701aef54
children
comparison
equal deleted inserted replaced
2355:0962a6325b9b 2356:c5fa65cb26ca
1 /**
2 * @file SFMT-sse2.c
3 * @brief SIMD oriented Fast Mersenne Twister(SFMT) for intel SSE2
4 *
5 * @author Mutsuo Saito (Hiroshima University)
6 * @author Makoto Matsumoto (Hiroshima University)
7 *
8 * @note We assume LITTLE ENDIAN in this file
9 *
10 * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima
11 * University. All rights reserved.
12 *
13 * The new BSD License is applied to this software, see LICENSE.txt
14 */
15
16 /**
17 * This function represents the recursion formula.
18 * @param a a 128-bit part of the interal state array
19 * @param b a 128-bit part of the interal state array
20 * @param c a 128-bit part of the interal state array
21 * @param d a 128-bit part of the interal state array
22 * @param mask 128-bit mask
23 * @return output
24 */
25 inline static __m128i mm_recursion(__m128i *a, __m128i *b,
26 __m128i c, __m128i d, __m128i mask) {
27 __m128i v, x, y, z;
28
29 x = _mm_load_si128(a);
30 y = _mm_srli_epi32(*b, SR1);
31 z = _mm_srli_si128(c, SR2);
32 v = _mm_slli_epi32(d, SL1);
33 z = _mm_xor_si128(z, x);
34 z = _mm_xor_si128(z, v);
35 x = _mm_slli_si128(x, SL2);
36 y = _mm_and_si128(y, mask);
37 z = _mm_xor_si128(z, x);
38 z = _mm_xor_si128(z, y);
39 return z;
40 }
41
42 /**
43 * This function fills the internal state array with psedorandom
44 * integers.
45 */
46 inline void gen_rand_all(void) {
47 int i;
48 __m128i r, r1, r2, mask;
49 mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
50
51 r1 = _mm_load_si128(&sfmt[N - 2].si);
52 r2 = _mm_load_si128(&sfmt[N - 1].si);
53 for (i = 0; i < N - POS1; i++) {
54 r = mm_recursion(&sfmt[i].si, &sfmt[i + POS1].si, r1, r2, mask);
55 _mm_store_si128(&sfmt[i].si, r);
56 r1 = r2;
57 r2 = r;
58 }
59 for (; i < N; i++) {
60 r = mm_recursion(&sfmt[i].si, &sfmt[i + POS1 - N].si, r1, r2, mask);
61 _mm_store_si128(&sfmt[i].si, r);
62 r1 = r2;
63 r2 = r;
64 }
65 }
66
67 /**
68 * This function fills the user-specified array with psedorandom
69 * integers.
70 *
71 * @param array an 128-bit array to be filled by pseudorandom numbers.
72 * @param size number of 128-bit pesudorandom numbers to be generated.
73 */
74 inline static void gen_rand_array(w128_t array[], int size) {
75 int i, j;
76 __m128i r, r1, r2, mask;
77 mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
78
79 r1 = _mm_load_si128(&sfmt[N - 2].si);
80 r2 = _mm_load_si128(&sfmt[N - 1].si);
81 for (i = 0; i < N - POS1; i++) {
82 r = mm_recursion(&sfmt[i].si, &sfmt[i + POS1].si, r1, r2, mask);
83 _mm_store_si128(&array[i].si, r);
84 r1 = r2;
85 r2 = r;
86 }
87 for (; i < N; i++) {
88 r = mm_recursion(&sfmt[i].si, &array[i + POS1 - N].si, r1, r2, mask);
89 _mm_store_si128(&array[i].si, r);
90 r1 = r2;
91 r2 = r;
92 }
93 /* main loop */
94 for (; i < size - N; i++) {
95 r = mm_recursion(&array[i - N].si, &array[i + POS1 - N].si, r1, r2,
96 mask);
97 _mm_store_si128(&array[i].si, r);
98 r1 = r2;
99 r2 = r;
100 }
101 for (j = 0; j < 2 * N - size; j++) {
102 r = _mm_load_si128(&array[j + size - N].si);
103 _mm_store_si128(&sfmt[j].si, r);
104 }
105 for (; i < size; i++) {
106 r = mm_recursion(&array[i - N].si, &array[i + POS1 - N].si, r1, r2,
107 mask);
108 _mm_store_si128(&array[i].si, r);
109 _mm_store_si128(&sfmt[j++].si, r);
110 r1 = r2;
111 r2 = r;
112 }
113 }