comparison src/madplug/SFMT-alti.c @ 922:7e14701aef54 trunk

[svn] - replace random number generator in dithering code with SIMD-oriented Fast Mersenne Twister (SFMT). it reduces CPU load on SSE2 or AltiVec capable platform.
author yaz
date Sun, 08 Apr 2007 21:30:22 -0700
parents
children fa7f7cd029af
comparison
equal deleted inserted replaced
921:8b0850943335 922:7e14701aef54
1 /**
2 * This function represents the recursion formula in AltiVec and BIG ENDIAN.
3 * @param a a 128-bit part of the interal state array
4 * @param b a 128-bit part of the interal state array
5 * @param c a 128-bit part of the interal state array
6 * @param d a 128-bit part of the interal state array
7 * @return output
8 */
9 inline static vector unsigned int vec_recursion(vector unsigned int a,
10 vector unsigned int b,
11 vector unsigned int c,
12 vector unsigned int d) {
13
14 const vector unsigned int sl1 = (vector unsigned int)(SL1, SL1, SL1, SL1);
15 const vector unsigned int sr1 = (vector unsigned int)(SR1, SR1, SR1, SR1);
16 #ifdef ONLY64
17 const vector unsigned int mask = (vector unsigned int)
18 (MSK2, MSK1, MSK4, MSK3);
19 const vector unsigned char perm_sl = ALTI_SL2_PERM64;
20 const vector unsigned char perm_sr = ALTI_SR2_PERM64;
21 #else
22 const vector unsigned int mask = (vector unsigned int)
23 (MSK1, MSK2, MSK3, MSK4);
24 const vector unsigned char perm_sl = ALTI_SL2_PERM;
25 const vector unsigned char perm_sr = ALTI_SR2_PERM;
26 #endif
27 vector unsigned int v, w, x, y, z;
28 x = vec_perm(a, (vector unsigned int)perm_sl, perm_sl);
29 v = a;
30 y = vec_sr(b, sr1);
31 z = vec_perm(c, (vector unsigned int)perm_sr, perm_sr);
32 w = vec_sl(d, sl1);
33 z = vec_xor(z, w);
34 y = vec_and(y, mask);
35 v = vec_xor(v, x);
36 z = vec_xor(z, y);
37 z = vec_xor(z, v);
38 return z;
39 }
40
41 /**
42 * This function fills the internal state array with psedorandom
43 * integers.
44 */
45 inline static void gen_rand_all(void) {
46 int i;
47 vector unsigned int r, r1, r2;
48
49 r1 = sfmt[N - 2].s;
50 r2 = sfmt[N - 1].s;
51 for (i = 0; i < N - POS1; i++) {
52 r = vec_recursion(sfmt[i].s, sfmt[i + POS1].s, r1, r2);
53 sfmt[i].s = r;
54 r1 = r2;
55 r2 = r;
56 }
57 for (; i < N; i++) {
58 r = vec_recursion(sfmt[i].s, sfmt[i + POS1 - N].s, r1, r2);
59 sfmt[i].s = r;
60 r1 = r2;
61 r2 = r;
62 }
63 }
64
65 /**
66 * This function fills the user-specified array with psedorandom
67 * integers.
68 *
69 * @param array an 128-bit array to be filled by pseudorandom numbers.
70 * @param size number of 128-bit pesudorandom numbers to be generated.
71 */
72 inline static void gen_rand_array(w128_t array[], int size) {
73 int i, j;
74 vector unsigned int r, r1, r2;
75
76 r1 = sfmt[N - 2].s;
77 r2 = sfmt[N - 1].s;
78 for (i = 0; i < N - POS1; i++) {
79 r = vec_recursion(sfmt[i].s, sfmt[i + POS1].s, r1, r2);
80 array[i].s = r;
81 r1 = r2;
82 r2 = r;
83 }
84 for (; i < N; i++) {
85 r = vec_recursion(sfmt[i].s, array[i + POS1 - N].s, r1, r2);
86 array[i].s = r;
87 r1 = r2;
88 r2 = r;
89 }
90 /* main loop */
91 for (; i < size - N; i++) {
92 r = vec_recursion(array[i - N].s, array[i + POS1 - N].s, r1, r2);
93 array[i].s = r;
94 r1 = r2;
95 r2 = r;
96 }
97 for (j = 0; j < 2 * N - size; j++) {
98 sfmt[j].s = array[j + size - N].s;
99 }
100 for (; i < size; i++) {
101 r = vec_recursion(array[i - N].s, array[i + POS1 - N].s, r1, r2);
102 array[i].s = r;
103 sfmt[j++].s = r;
104 r1 = r2;
105 r2 = r;
106 }
107 }
108
109 #ifndef ONLY64
110 /**
111 * This function swaps high and low 32-bit of 64-bit integers in user
112 * specified array.
113 *
114 * @param array an 128-bit array to be swaped.
115 * @param size size of 128-bit array.
116 */
117 inline static void swap(w128_t array[], int size) {
118 int i;
119 const vector unsigned char perm = (vector unsigned char)
120 (4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11);
121
122 for (i = 0; i < size; i++) {
123 array[i].s = vec_perm(array[i].s, (vector unsigned int)perm, perm);
124 }
125 }
126 #endif