Mercurial > mplayer.hg
annotate liba52/liba52_changes.diff @ 24992:5701e23ebcb4
Better handling of win32 GUI thread:
1. Use _beginthreadex to create the GUI thread to avoid possible memory leak
when linked to MS CRT.
2. Terminate the GUI thread in an cleaner way using PostThreadMessage()
rather than the unrecommended TerminateThread().
author | zuxy |
---|---|
date | Sun, 11 Nov 2007 08:14:57 +0000 |
parents | 49a433e2e78f |
children | a7b716b53e9f |
rev | line source |
---|---|
19249 | 1 --- include/a52.h 2006-06-12 15:04:57.000000000 +0200 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2 +++ liba52/a52.h 2006-06-05 02:23:02.000000000 +0200 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
3 @@ -59,4 +63,9 @@ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
4 int a52_block (a52_state_t * state); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
5 void a52_free (a52_state_t * state); |
14990 | 6 |
7 +void* a52_resample_init(uint32_t mm_accel,int flags,int chans); | |
8 +extern int (* a52_resample) (float * _f, int16_t * s16); | |
9 + | |
10 +uint16_t crc16_block(uint8_t *data,uint32_t num_bytes); | |
11 + | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
12 #endif /* A52_H */ |
19249 | 13 --- liba52/a52_internal.h 2006-06-12 15:05:07.000000000 +0200 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
14 +++ liba52/a52_internal.h 2006-06-05 02:23:02.000000000 +0200 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
15 @@ -103,18 +107,34 @@ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
16 #define DELTA_BIT_NONE (2) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
17 #define DELTA_BIT_RESERVED (3) |
14990 | 18 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
19 +#ifdef ARCH_X86_64 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
20 +# define REG_a "rax" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
21 +# define REG_d "rdx" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
22 +# define REG_S "rsi" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
23 +# define REG_D "rdi" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
24 +# define REG_BP "rbp" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
25 +#else |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
26 +# define REG_a "eax" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
27 +# define REG_d "edx" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
28 +# define REG_S "esi" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
29 +# define REG_D "edi" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
30 +# define REG_BP "ebp" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
31 +#endif |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
32 + |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
33 void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
34 int start, int end, int fastleak, int slowleak, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
35 expbap_t * expbap); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
36 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
37 int a52_downmix_init (int input, int flags, sample_t * level, |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
38 sample_t clev, sample_t slev); |
14990 | 39 +void downmix_accel_init(uint32_t mm_accel); |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
40 int a52_downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level, |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
41 sample_t clev, sample_t slev); |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
42 -void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
43 +extern void (*a52_downmix) (sample_t * samples, int acmod, int output, sample_t bias, |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
44 sample_t clev, sample_t slev); |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
45 -void a52_upmix (sample_t * samples, int acmod, int output); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
46 +extern void (*a52_upmix) (sample_t * samples, int acmod, int output); |
14990 | 47 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
48 void a52_imdct_init (uint32_t mm_accel); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
49 void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
50 -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
51 +extern void (*a52_imdct_512) (sample_t * data, sample_t * delay, sample_t bias); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
52 +void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias); |
19249 | 53 --- liba52/bitstream.c 2006-06-12 15:05:07.000000000 +0200 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
54 +++ liba52/bitstream.c 2006-06-05 02:23:02.000000000 +0200 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
55 @@ -31,6 +35,10 @@ |
14990 | 56 |
57 #define BUFFER_SIZE 4096 | |
58 | |
59 +#ifdef ALT_BITSTREAM_READER | |
60 +int indx=0; | |
61 +#endif | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
62 + |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
63 void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
64 { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
65 int align; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
66 @@ -38,6 +46,9 @@ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
67 align = (long)buf & 3; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
68 state->buffer_start = (uint32_t *) (buf - align); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
69 state->bits_left = 0; |
14990 | 70 +#ifdef ALT_BITSTREAM_READER |
71 + indx=0; | |
72 +#endif | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
73 bitstream_get (state, align * 8); |
14990 | 74 } |
75 | |
19249 | 76 --- liba52/bitstream.h 2006-06-12 15:05:07.000000000 +0200 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
77 +++ liba52/bitstream.h 2006-06-05 02:23:02.000000000 +0200 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
78 @@ -21,6 +25,48 @@ |
14990 | 79 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
80 */ | |
81 | |
82 +/* code from ffmpeg/libavcodec */ | |
83 +#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC_ == 3 && __GNUC_MINOR__ > 0) | |
84 +# define always_inline __attribute__((always_inline)) inline | |
85 +#else | |
86 +# define always_inline inline | |
87 +#endif | |
88 + | |
89 +#if defined(__sparc__) || defined(hpux) | |
90 +/* | |
91 + * the alt bitstream reader performs unaligned memory accesses; that doesn't work | |
92 + * on sparc/hpux. For now, disable ALT_BITSTREAM_READER. | |
93 + */ | |
94 +#undef ALT_BITSTREAM_READER | |
95 +#else | |
96 +// alternative (faster) bitstram reader (reades upto 3 bytes over the end of the input) | |
97 +#define ALT_BITSTREAM_READER | |
98 + | |
23873 | 99 +/* used to avoid misaligned exceptions on some archs (alpha, ...) */ |
14990 | 100 +#if defined (ARCH_X86) || defined(ARCH_ARMV4L) |
101 +# define unaligned32(a) (*(uint32_t*)(a)) | |
102 +#else | |
103 +# ifdef __GNUC__ | |
104 +static always_inline uint32_t unaligned32(const void *v) { | |
105 + struct Unaligned { | |
106 + uint32_t i; | |
107 + } __attribute__((packed)); | |
108 + | |
109 + return ((const struct Unaligned *) v)->i; | |
110 +} | |
111 +# elif defined(__DECC) | |
112 +static inline uint32_t unaligned32(const void *v) { | |
113 + return *(const __unaligned uint32_t *) v; | |
114 +} | |
115 +# else | |
116 +static inline uint32_t unaligned32(const void *v) { | |
117 + return *(const uint32_t *) v; | |
118 +} | |
119 +# endif | |
120 +#endif //!ARCH_X86 | |
121 + | |
122 +#endif | |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
123 + |
14990 | 124 /* (stolen from the kernel) */ |
125 #ifdef WORDS_BIGENDIAN | |
126 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
127 @@ -28,7 +74,7 @@ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
128 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
129 #else |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
130 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
131 -# if 0 && defined (__i386__) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
132 +# if defined (__i386__) |
14990 | 133 |
134 # define swab32(x) __i386_swab32(x) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
135 static inline const uint32_t __i386_swab32(uint32_t x) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
136 @@ -39,19 +85,34 @@ |
14990 | 137 |
138 # else | |
139 | |
140 -# define swab32(x)\ | |
141 -((((uint8_t*)&x)[0] << 24) | (((uint8_t*)&x)[1] << 16) | \ | |
142 - (((uint8_t*)&x)[2] << 8) | (((uint8_t*)&x)[3])) | |
143 - | |
144 +# define swab32(x) __generic_swab32(x) | |
145 + static always_inline const uint32_t __generic_swab32(uint32_t x) | |
146 + { | |
147 + return ((((uint8_t*)&x)[0] << 24) | (((uint8_t*)&x)[1] << 16) | | |
148 + (((uint8_t*)&x)[2] << 8) | (((uint8_t*)&x)[3])); | |
149 + } | |
150 # endif | |
151 #endif | |
152 | |
153 +#ifdef ALT_BITSTREAM_READER | |
154 +extern int indx; | |
155 +#endif | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
156 + |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
157 void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
158 uint32_t a52_bitstream_get_bh (a52_state_t * state, uint32_t num_bits); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
159 int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits); |
14990 | 160 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
161 static inline uint32_t bitstream_get (a52_state_t * state, uint32_t num_bits) |
14990 | 162 { |
163 +#ifdef ALT_BITSTREAM_READER | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
164 + uint32_t result= swab32( unaligned32(((uint8_t *)state->buffer_start)+(indx>>3)) ); |
14990 | 165 + |
166 + result<<= (indx&0x07); | |
167 + result>>= 32 - num_bits; | |
168 + indx+= num_bits; | |
169 + | |
170 + return result; | |
171 +#else | |
172 uint32_t result; | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
173 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
174 if (num_bits < state->bits_left) { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
175 @@ -61,10 +122,29 @@ |
14990 | 176 } |
177 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
178 return a52_bitstream_get_bh (state, num_bits); |
14990 | 179 +#endif |
180 +} | |
181 + | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
182 +static inline void bitstream_skip(a52_state_t * state, int num_bits) |
14990 | 183 +{ |
184 +#ifdef ALT_BITSTREAM_READER | |
185 + indx+= num_bits; | |
186 +#else | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
187 + bitstream_get(state, num_bits); |
14990 | 188 +#endif |
189 } | |
190 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
191 static inline int32_t bitstream_get_2 (a52_state_t * state, uint32_t num_bits) |
14990 | 192 { |
193 +#ifdef ALT_BITSTREAM_READER | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
194 + int32_t result= swab32( unaligned32(((uint8_t *)state->buffer_start)+(indx>>3)) ); |
14990 | 195 + |
196 + result<<= (indx&0x07); | |
197 + result>>= 32 - num_bits; | |
198 + indx+= num_bits; | |
199 + | |
200 + return result; | |
201 +#else | |
202 int32_t result; | |
203 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
204 if (num_bits < state->bits_left) { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
205 @@ -74,4 +154,5 @@ |
14990 | 206 } |
207 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
208 return a52_bitstream_get_bh_2 (state, num_bits); |
14990 | 209 +#endif |
210 } | |
19249 | 211 --- liba52/downmix.c 2006-06-12 15:17:53.000000000 +0200 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
212 +++ liba52/downmix.c 2006-06-05 02:23:02.000000000 +0200 |
19373 | 213 @@ -19,18 +23,46 @@ |
14990 | 214 * You should have received a copy of the GNU General Public License |
215 * along with this program; if not, write to the Free Software | |
216 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
217 + * | |
218 + * SSE optimizations from Michael Niedermayer (michaelni@gmx.at) | |
219 */ | |
220 | |
221 #include "config.h" | |
222 | |
223 #include <string.h> | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
224 #include <inttypes.h> |
14990 | 225 |
226 #include "a52.h" | |
227 #include "a52_internal.h" | |
228 +#include "mm_accel.h" | |
229 | |
230 #define CONVERT(acmod,output) (((output) << 3) + (acmod)) | |
231 | |
232 + | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
233 +void (*a52_downmix)(sample_t * samples, int acmod, int output, sample_t bias, |
14990 | 234 + sample_t clev, sample_t slev)= NULL; |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
235 +void (*a52_upmix)(sample_t * samples, int acmod, int output)= NULL; |
14990 | 236 + |
237 +static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias, | |
238 + sample_t clev, sample_t slev); | |
239 +static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias, | |
240 + sample_t clev, sample_t slev); | |
241 +static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, | |
242 + sample_t clev, sample_t slev); | |
243 +static void upmix_MMX (sample_t * samples, int acmod, int output); | |
244 +static void upmix_C (sample_t * samples, int acmod, int output); | |
245 + | |
246 +void downmix_accel_init(uint32_t mm_accel) | |
247 +{ | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
248 + a52_upmix= upmix_C; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
249 + a52_downmix= downmix_C; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
250 +#if defined(ARCH_X86) || defined(ARCH_X86_64) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
251 + if(mm_accel & MM_ACCEL_X86_MMX) a52_upmix= upmix_MMX; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
252 + if(mm_accel & MM_ACCEL_X86_SSE) a52_downmix= downmix_SSE; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
253 + if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow; |
14990 | 254 +#endif |
255 +} | |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
256 + |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
257 int a52_downmix_init (int input, int flags, sample_t * level, |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
258 sample_t clev, sample_t slev) |
14990 | 259 { |
19373 | 260 @@ -447,7 +479,7 @@ |
14990 | 261 samples[i] = 0; |
262 } | |
263 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
264 -void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
265 +void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
266 sample_t clev, sample_t slev) |
14990 | 267 { |
268 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
19373 | 269 @@ -559,7 +591,7 @@ |
14990 | 270 break; |
271 | |
272 case CONVERT (A52_3F2R, A52_2F1R): | |
273 - mix3to2 (samples, bias); | |
274 + mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used) | |
275 move2to1 (samples + 768, samples + 512, bias); | |
276 break; | |
277 | |
19373 | 278 @@ -583,12 +615,12 @@ |
14990 | 279 break; |
280 | |
281 case CONVERT (A52_3F1R, A52_3F2R): | |
282 - memcpy (samples + 1027, samples + 768, 256 * sizeof (sample_t)); | |
283 + memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); | |
284 break; | |
285 } | |
286 } | |
287 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
288 -void a52_upmix (sample_t * samples, int acmod, int output) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
289 +void upmix_C (sample_t * samples, int acmod, int output) |
14990 | 290 { |
291 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
292 | |
19373 | 293 @@ -653,3 +685,1137 @@ |
14990 | 294 goto mix_31to21; |
295 } | |
296 } | |
297 + | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
298 +#if defined(ARCH_X86) || defined(ARCH_X86_64) |
14990 | 299 +static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias) |
300 +{ | |
301 + asm volatile( | |
302 + "movlps %2, %%xmm7 \n\t" | |
303 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
304 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 305 + ASMALIGN(4) |
14990 | 306 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
307 + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
308 + "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
309 + "addps (%1, %%"REG_S"), %%xmm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
310 + "addps 16(%1, %%"REG_S"), %%xmm1\n\t" |
14990 | 311 + "addps %%xmm7, %%xmm0 \n\t" |
312 + "addps %%xmm7, %%xmm1 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
313 + "movaps %%xmm0, (%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
314 + "movaps %%xmm1, 16(%1, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
315 + "add $32, %%"REG_S" \n\t" |
14990 | 316 + " jnz 1b \n\t" |
317 + :: "r" (src+256), "r" (dest+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
318 + : "%"REG_S |
14990 | 319 + ); |
320 +} | |
321 + | |
322 +static void mix3to1_SSE (sample_t * samples, sample_t bias) | |
323 +{ | |
324 + asm volatile( | |
325 + "movlps %1, %%xmm7 \n\t" | |
326 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
327 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 328 + ASMALIGN(4) |
14990 | 329 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
330 + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
331 + "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
332 + "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" |
14990 | 333 + "addps %%xmm7, %%xmm1 \n\t" |
334 + "addps %%xmm1, %%xmm0 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
335 + "movaps %%xmm0, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
336 + "add $16, %%"REG_S" \n\t" |
14990 | 337 + " jnz 1b \n\t" |
338 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
339 + : "%"REG_S |
14990 | 340 + ); |
341 +} | |
342 + | |
343 +static void mix4to1_SSE (sample_t * samples, sample_t bias) | |
344 +{ | |
345 + asm volatile( | |
346 + "movlps %1, %%xmm7 \n\t" | |
347 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
348 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 349 + ASMALIGN(4) |
14990 | 350 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
351 + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
352 + "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
353 + "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
354 + "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" |
14990 | 355 + "addps %%xmm7, %%xmm0 \n\t" |
356 + "addps %%xmm1, %%xmm0 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
357 + "movaps %%xmm0, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
358 + "add $16, %%"REG_S" \n\t" |
14990 | 359 + " jnz 1b \n\t" |
360 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
361 + : "%"REG_S |
14990 | 362 + ); |
363 +} | |
364 + | |
365 +static void mix5to1_SSE (sample_t * samples, sample_t bias) | |
366 +{ | |
367 + asm volatile( | |
368 + "movlps %1, %%xmm7 \n\t" | |
369 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
370 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 371 + ASMALIGN(4) |
14990 | 372 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
373 + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
374 + "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
375 + "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
376 + "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" |
14990 | 377 + "addps %%xmm7, %%xmm0 \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
378 + "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" |
14990 | 379 + "addps %%xmm1, %%xmm0 \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
380 + "movaps %%xmm0, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
381 + "add $16, %%"REG_S" \n\t" |
14990 | 382 + " jnz 1b \n\t" |
383 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
384 + : "%"REG_S |
14990 | 385 + ); |
386 +} | |
387 + | |
388 +static void mix3to2_SSE (sample_t * samples, sample_t bias) | |
389 +{ | |
390 + asm volatile( | |
391 + "movlps %1, %%xmm7 \n\t" | |
392 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
393 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 394 + ASMALIGN(4) |
14990 | 395 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
396 + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
14990 | 397 + "addps %%xmm7, %%xmm0 \n\t" //common |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
398 + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
399 + "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" |
14990 | 400 + "addps %%xmm0, %%xmm1 \n\t" |
401 + "addps %%xmm0, %%xmm2 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
402 + "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
403 + "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
404 + "add $16, %%"REG_S" \n\t" |
14990 | 405 + " jnz 1b \n\t" |
406 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
407 + : "%"REG_S |
14990 | 408 + ); |
409 +} | |
410 + | |
411 +static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias) | |
412 +{ | |
413 + asm volatile( | |
414 + "movlps %2, %%xmm7 \n\t" | |
415 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
416 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 417 + ASMALIGN(4) |
14990 | 418 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
419 + "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t" |
14990 | 420 + "addps %%xmm7, %%xmm0 \n\t" //common |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
421 + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
422 + "movaps (%1, %%"REG_S"), %%xmm2 \n\t" |
14990 | 423 + "addps %%xmm0, %%xmm1 \n\t" |
424 + "addps %%xmm0, %%xmm2 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
425 + "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
426 + "movaps %%xmm2, (%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
427 + "add $16, %%"REG_S" \n\t" |
14990 | 428 + " jnz 1b \n\t" |
429 + :: "r" (left+256), "r" (right+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
430 + : "%"REG_S |
14990 | 431 + ); |
432 +} | |
433 + | |
434 +static void mix21toS_SSE (sample_t * samples, sample_t bias) | |
435 +{ | |
436 + asm volatile( | |
437 + "movlps %1, %%xmm7 \n\t" | |
438 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
439 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 440 + ASMALIGN(4) |
14990 | 441 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
442 + "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
443 + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
444 + "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" |
14990 | 445 + "addps %%xmm7, %%xmm1 \n\t" |
446 + "addps %%xmm7, %%xmm2 \n\t" | |
447 + "subps %%xmm0, %%xmm1 \n\t" | |
448 + "addps %%xmm0, %%xmm2 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
449 + "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
450 + "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
451 + "add $16, %%"REG_S" \n\t" |
14990 | 452 + " jnz 1b \n\t" |
453 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
454 + : "%"REG_S |
14990 | 455 + ); |
456 +} | |
457 + | |
458 +static void mix31to2_SSE (sample_t * samples, sample_t bias) | |
459 +{ | |
460 + asm volatile( | |
461 + "movlps %1, %%xmm7 \n\t" | |
462 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
463 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 464 + ASMALIGN(4) |
14990 | 465 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
466 + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
467 + "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" |
14990 | 468 + "addps %%xmm7, %%xmm0 \n\t" // common |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
469 + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
470 + "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" |
14990 | 471 + "addps %%xmm0, %%xmm1 \n\t" |
472 + "addps %%xmm0, %%xmm2 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
473 + "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
474 + "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
475 + "add $16, %%"REG_S" \n\t" |
14990 | 476 + " jnz 1b \n\t" |
477 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
478 + : "%"REG_S |
14990 | 479 + ); |
480 +} | |
481 + | |
482 +static void mix31toS_SSE (sample_t * samples, sample_t bias) | |
483 +{ | |
484 + asm volatile( | |
485 + "movlps %1, %%xmm7 \n\t" | |
486 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
487 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 488 + ASMALIGN(4) |
14990 | 489 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
490 + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
491 + "movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround |
14990 | 492 + "addps %%xmm7, %%xmm0 \n\t" // common |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
493 + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
494 + "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" |
14990 | 495 + "addps %%xmm0, %%xmm1 \n\t" |
496 + "addps %%xmm0, %%xmm2 \n\t" | |
497 + "subps %%xmm3, %%xmm1 \n\t" | |
498 + "addps %%xmm3, %%xmm2 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
499 + "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
500 + "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
501 + "add $16, %%"REG_S" \n\t" |
14990 | 502 + " jnz 1b \n\t" |
503 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
504 + : "%"REG_S |
14990 | 505 + ); |
506 +} | |
507 + | |
508 +static void mix22toS_SSE (sample_t * samples, sample_t bias) | |
509 +{ | |
510 + asm volatile( | |
511 + "movlps %1, %%xmm7 \n\t" | |
512 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
513 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 514 + ASMALIGN(4) |
14990 | 515 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
516 + "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
517 + "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
518 + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
519 + "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" |
14990 | 520 + "addps %%xmm7, %%xmm1 \n\t" |
521 + "addps %%xmm7, %%xmm2 \n\t" | |
522 + "subps %%xmm0, %%xmm1 \n\t" | |
523 + "addps %%xmm0, %%xmm2 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
524 + "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
525 + "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
526 + "add $16, %%"REG_S" \n\t" |
14990 | 527 + " jnz 1b \n\t" |
528 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
529 + : "%"REG_S |
14990 | 530 + ); |
531 +} | |
532 + | |
533 +static void mix32to2_SSE (sample_t * samples, sample_t bias) | |
534 +{ | |
535 + asm volatile( | |
536 + "movlps %1, %%xmm7 \n\t" | |
537 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
538 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 539 + ASMALIGN(4) |
14990 | 540 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
541 + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
14990 | 542 + "addps %%xmm7, %%xmm0 \n\t" // common |
543 + "movaps %%xmm0, %%xmm1 \n\t" // common | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
544 + "addps (%0, %%"REG_S"), %%xmm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
545 + "addps 2048(%0, %%"REG_S"), %%xmm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
546 + "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
547 + "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
548 + "movaps %%xmm0, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
549 + "movaps %%xmm1, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
550 + "add $16, %%"REG_S" \n\t" |
14990 | 551 + " jnz 1b \n\t" |
552 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
553 + : "%"REG_S |
14990 | 554 + ); |
555 +} | |
556 + | |
557 +static void mix32toS_SSE (sample_t * samples, sample_t bias) | |
558 +{ | |
559 + asm volatile( | |
560 + "movlps %1, %%xmm7 \n\t" | |
561 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
562 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 563 + ASMALIGN(4) |
14990 | 564 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
565 + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
566 + "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t" |
14990 | 567 + "addps %%xmm7, %%xmm0 \n\t" // common |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
568 + "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
569 + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
570 + "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t" |
14990 | 571 + "subps %%xmm2, %%xmm1 \n\t" |
572 + "addps %%xmm2, %%xmm3 \n\t" | |
573 + "addps %%xmm0, %%xmm1 \n\t" | |
574 + "addps %%xmm0, %%xmm3 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
575 + "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
576 + "movaps %%xmm3, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
577 + "add $16, %%"REG_S" \n\t" |
14990 | 578 + " jnz 1b \n\t" |
579 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
580 + : "%"REG_S |
14990 | 581 + ); |
582 +} | |
583 + | |
584 +static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias) | |
585 +{ | |
586 + asm volatile( | |
587 + "movlps %2, %%xmm7 \n\t" | |
588 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
589 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 590 + ASMALIGN(4) |
14990 | 591 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
592 + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
593 + "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
594 + "addps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
595 + "addps 1040(%0, %%"REG_S"), %%xmm1\n\t" |
14990 | 596 + "addps %%xmm7, %%xmm0 \n\t" |
597 + "addps %%xmm7, %%xmm1 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
598 + "movaps %%xmm0, (%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
599 + "movaps %%xmm1, 16(%1, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
600 + "add $32, %%"REG_S" \n\t" |
14990 | 601 + " jnz 1b \n\t" |
602 + :: "r" (src+256), "r" (dest+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
603 + : "%"REG_S |
14990 | 604 + ); |
605 +} | |
606 + | |
607 +static void zero_MMX(sample_t * samples) | |
608 +{ | |
609 + asm volatile( | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
610 + "mov $-1024, %%"REG_S" \n\t" |
14990 | 611 + "pxor %%mm0, %%mm0 \n\t" |
19373 | 612 + ASMALIGN(4) |
14990 | 613 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
614 + "movq %%mm0, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
615 + "movq %%mm0, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
616 + "movq %%mm0, 16(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
617 + "movq %%mm0, 24(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
618 + "add $32, %%"REG_S" \n\t" |
14990 | 619 + " jnz 1b \n\t" |
620 + "emms" | |
621 + :: "r" (samples+256) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
622 + : "%"REG_S |
14990 | 623 + ); |
624 +} | |
625 + | |
626 +/* | |
627 + I hope dest and src will be at least 8 byte aligned and size | |
628 + will devide on 8 without remain | |
629 + Note: untested and unused. | |
630 +*/ | |
631 +static void copy_MMX(void *dest,const void *src,unsigned size) | |
632 +{ | |
633 + unsigned i; | |
634 + size /= 64; | |
635 + for(i=0;i<size;i++) | |
636 + { | |
637 + __asm __volatile( | |
638 + "movq %0, %%mm0\n\t" | |
639 + "movq 8%0, %%mm1\n\t" | |
640 + "movq 16%0, %%mm2\n\t" | |
641 + "movq 24%0, %%mm3\n\t" | |
642 + "movq 32%0, %%mm4\n\t" | |
643 + "movq 40%0, %%mm5\n\t" | |
644 + "movq 48%0, %%mm6\n\t" | |
645 + "movq 56%0, %%mm7\n\t" | |
646 + "movq %%mm0, %1\n\t" | |
647 + "movq %%mm1, 8%1\n\t" | |
648 + "movq %%mm2, 16%1\n\t" | |
649 + "movq %%mm3, 24%1\n\t" | |
650 + "movq %%mm4, 32%1\n\t" | |
651 + "movq %%mm5, 40%1\n\t" | |
652 + "movq %%mm6, 48%1\n\t" | |
653 + "movq %%mm7, 56%1\n\t" | |
654 + : | |
655 + :"m"(src),"m"(dest)); | |
656 + } | |
657 +} | |
658 + | |
659 +static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias, | |
660 + sample_t clev, sample_t slev) | |
661 +{ | |
662 + switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
663 + | |
664 + case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
665 + memcpy (samples, samples + 256, 256 * sizeof (sample_t)); | |
666 + break; | |
667 + | |
668 + case CONVERT (A52_CHANNEL, A52_MONO): | |
669 + case CONVERT (A52_STEREO, A52_MONO): | |
670 + mix_2to1_SSE: | |
671 + mix2to1_SSE (samples, samples + 256, bias); | |
672 + break; | |
673 + | |
674 + case CONVERT (A52_2F1R, A52_MONO): | |
675 + if (slev == 0) | |
676 + goto mix_2to1_SSE; | |
677 + case CONVERT (A52_3F, A52_MONO): | |
678 + mix_3to1_SSE: | |
679 + mix3to1_SSE (samples, bias); | |
680 + break; | |
681 + | |
682 + case CONVERT (A52_3F1R, A52_MONO): | |
683 + if (slev == 0) | |
684 + goto mix_3to1_SSE; | |
685 + case CONVERT (A52_2F2R, A52_MONO): | |
686 + if (slev == 0) | |
687 + goto mix_2to1_SSE; | |
688 + mix4to1_SSE (samples, bias); | |
689 + break; | |
690 + | |
691 + case CONVERT (A52_3F2R, A52_MONO): | |
692 + if (slev == 0) | |
693 + goto mix_3to1_SSE; | |
694 + mix5to1_SSE (samples, bias); | |
695 + break; | |
696 + | |
697 + case CONVERT (A52_MONO, A52_DOLBY): | |
698 + memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
699 + break; | |
700 + | |
701 + case CONVERT (A52_3F, A52_STEREO): | |
702 + case CONVERT (A52_3F, A52_DOLBY): | |
703 + mix_3to2_SSE: | |
704 + mix3to2_SSE (samples, bias); | |
705 + break; | |
706 + | |
707 + case CONVERT (A52_2F1R, A52_STEREO): | |
708 + if (slev == 0) | |
709 + break; | |
710 + mix21to2_SSE (samples, samples + 256, bias); | |
711 + break; | |
712 + | |
713 + case CONVERT (A52_2F1R, A52_DOLBY): | |
714 + mix21toS_SSE (samples, bias); | |
715 + break; | |
716 + | |
717 + case CONVERT (A52_3F1R, A52_STEREO): | |
718 + if (slev == 0) | |
719 + goto mix_3to2_SSE; | |
720 + mix31to2_SSE (samples, bias); | |
721 + break; | |
722 + | |
723 + case CONVERT (A52_3F1R, A52_DOLBY): | |
724 + mix31toS_SSE (samples, bias); | |
725 + break; | |
726 + | |
727 + case CONVERT (A52_2F2R, A52_STEREO): | |
728 + if (slev == 0) | |
729 + break; | |
730 + mix2to1_SSE (samples, samples + 512, bias); | |
731 + mix2to1_SSE (samples + 256, samples + 768, bias); | |
732 + break; | |
733 + | |
734 + case CONVERT (A52_2F2R, A52_DOLBY): | |
735 + mix22toS_SSE (samples, bias); | |
736 + break; | |
737 + | |
738 + case CONVERT (A52_3F2R, A52_STEREO): | |
739 + if (slev == 0) | |
740 + goto mix_3to2_SSE; | |
741 + mix32to2_SSE (samples, bias); | |
742 + break; | |
743 + | |
744 + case CONVERT (A52_3F2R, A52_DOLBY): | |
745 + mix32toS_SSE (samples, bias); | |
746 + break; | |
747 + | |
748 + case CONVERT (A52_3F1R, A52_3F): | |
749 + if (slev == 0) | |
750 + break; | |
751 + mix21to2_SSE (samples, samples + 512, bias); | |
752 + break; | |
753 + | |
754 + case CONVERT (A52_3F2R, A52_3F): | |
755 + if (slev == 0) | |
756 + break; | |
757 + mix2to1_SSE (samples, samples + 768, bias); | |
758 + mix2to1_SSE (samples + 512, samples + 1024, bias); | |
759 + break; | |
760 + | |
761 + case CONVERT (A52_3F1R, A52_2F1R): | |
762 + mix3to2_SSE (samples, bias); | |
763 + memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
764 + break; | |
765 + | |
766 + case CONVERT (A52_2F2R, A52_2F1R): | |
767 + mix2to1_SSE (samples + 512, samples + 768, bias); | |
768 + break; | |
769 + | |
770 + case CONVERT (A52_3F2R, A52_2F1R): | |
771 + mix3to2_SSE (samples, bias); //FIXME possible bug? (output doesnt seem to be used) | |
772 + move2to1_SSE (samples + 768, samples + 512, bias); | |
773 + break; | |
774 + | |
775 + case CONVERT (A52_3F2R, A52_3F1R): | |
776 + mix2to1_SSE (samples + 768, samples + 1024, bias); | |
777 + break; | |
778 + | |
779 + case CONVERT (A52_2F1R, A52_2F2R): | |
780 + memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
781 + break; | |
782 + | |
783 + case CONVERT (A52_3F1R, A52_2F2R): | |
784 + mix3to2_SSE (samples, bias); | |
785 + memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
786 + break; | |
787 + | |
788 + case CONVERT (A52_3F2R, A52_2F2R): | |
789 + mix3to2_SSE (samples, bias); | |
790 + memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
791 + memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); | |
792 + break; | |
793 + | |
794 + case CONVERT (A52_3F1R, A52_3F2R): | |
795 + memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); | |
796 + break; | |
797 + } | |
798 +} | |
799 + | |
800 +static void upmix_MMX (sample_t * samples, int acmod, int output) | |
801 +{ | |
802 + switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
803 + | |
804 + case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
805 + memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
806 + break; | |
807 + | |
808 + case CONVERT (A52_3F2R, A52_MONO): | |
809 + zero_MMX (samples + 1024); | |
810 + case CONVERT (A52_3F1R, A52_MONO): | |
811 + case CONVERT (A52_2F2R, A52_MONO): | |
812 + zero_MMX (samples + 768); | |
813 + case CONVERT (A52_3F, A52_MONO): | |
814 + case CONVERT (A52_2F1R, A52_MONO): | |
815 + zero_MMX (samples + 512); | |
816 + case CONVERT (A52_CHANNEL, A52_MONO): | |
817 + case CONVERT (A52_STEREO, A52_MONO): | |
818 + zero_MMX (samples + 256); | |
819 + break; | |
820 + | |
821 + case CONVERT (A52_3F2R, A52_STEREO): | |
822 + case CONVERT (A52_3F2R, A52_DOLBY): | |
823 + zero_MMX (samples + 1024); | |
824 + case CONVERT (A52_3F1R, A52_STEREO): | |
825 + case CONVERT (A52_3F1R, A52_DOLBY): | |
826 + zero_MMX (samples + 768); | |
827 + case CONVERT (A52_3F, A52_STEREO): | |
828 + case CONVERT (A52_3F, A52_DOLBY): | |
829 + mix_3to2_MMX: | |
830 + memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t)); | |
831 + zero_MMX (samples + 256); | |
832 + break; | |
833 + | |
834 + case CONVERT (A52_2F2R, A52_STEREO): | |
835 + case CONVERT (A52_2F2R, A52_DOLBY): | |
836 + zero_MMX (samples + 768); | |
837 + case CONVERT (A52_2F1R, A52_STEREO): | |
838 + case CONVERT (A52_2F1R, A52_DOLBY): | |
839 + zero_MMX (samples + 512); | |
840 + break; | |
841 + | |
842 + case CONVERT (A52_3F2R, A52_3F): | |
843 + zero_MMX (samples + 1024); | |
844 + case CONVERT (A52_3F1R, A52_3F): | |
845 + case CONVERT (A52_2F2R, A52_2F1R): | |
846 + zero_MMX (samples + 768); | |
847 + break; | |
848 + | |
849 + case CONVERT (A52_3F2R, A52_3F1R): | |
850 + zero_MMX (samples + 1024); | |
851 + break; | |
852 + | |
853 + case CONVERT (A52_3F2R, A52_2F1R): | |
854 + zero_MMX (samples + 1024); | |
855 + case CONVERT (A52_3F1R, A52_2F1R): | |
856 + mix_31to21_MMX: | |
857 + memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
858 + goto mix_3to2_MMX; | |
859 + | |
860 + case CONVERT (A52_3F2R, A52_2F2R): | |
861 + memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); | |
862 + goto mix_31to21_MMX; | |
863 + } | |
864 +} | |
865 + | |
866 +static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias) | |
867 +{ | |
868 + asm volatile( | |
869 + "movd %2, %%mm7 \n\t" | |
870 + "punpckldq %2, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
871 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 872 + ASMALIGN(4) |
14990 | 873 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
874 + "movq (%0, %%"REG_S"), %%mm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
875 + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
876 + "movq 16(%0, %%"REG_S"), %%mm2 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
877 + "movq 24(%0, %%"REG_S"), %%mm3 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
878 + "pfadd (%1, %%"REG_S"), %%mm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
879 + "pfadd 8(%1, %%"REG_S"), %%mm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
880 + "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
881 + "pfadd 24(%1, %%"REG_S"), %%mm3 \n\t" |
14990 | 882 + "pfadd %%mm7, %%mm0 \n\t" |
883 + "pfadd %%mm7, %%mm1 \n\t" | |
884 + "pfadd %%mm7, %%mm2 \n\t" | |
885 + "pfadd %%mm7, %%mm3 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
886 + "movq %%mm0, (%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
887 + "movq %%mm1, 8(%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
888 + "movq %%mm2, 16(%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
889 + "movq %%mm3, 24(%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
890 + "add $32, %%"REG_S" \n\t" |
14990 | 891 + " jnz 1b \n\t" |
892 + :: "r" (src+256), "r" (dest+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
893 + : "%"REG_S |
14990 | 894 + ); |
895 +} | |
896 + | |
897 +static void mix3to1_3dnow (sample_t * samples, sample_t bias) | |
898 +{ | |
899 + asm volatile( | |
900 + "movd %1, %%mm7 \n\t" | |
901 + "punpckldq %1, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
902 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 903 + ASMALIGN(4) |
14990 | 904 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
905 + "movq (%0, %%"REG_S"), %%mm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
906 + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
907 + "movq 1024(%0, %%"REG_S"), %%mm2\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
908 + "movq 1032(%0, %%"REG_S"), %%mm3\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
909 + "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
910 + "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" |
14990 | 911 + "pfadd %%mm7, %%mm0 \n\t" |
912 + "pfadd %%mm7, %%mm1 \n\t" | |
913 + "pfadd %%mm2, %%mm0 \n\t" | |
914 + "pfadd %%mm3, %%mm1 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
915 + "movq %%mm0, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
916 + "movq %%mm1, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
917 + "add $16, %%"REG_S" \n\t" |
14990 | 918 + " jnz 1b \n\t" |
919 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
920 + : "%"REG_S |
14990 | 921 + ); |
922 +} | |
923 + | |
924 +static void mix4to1_3dnow (sample_t * samples, sample_t bias) | |
925 +{ | |
926 + asm volatile( | |
927 + "movd %1, %%mm7 \n\t" | |
928 + "punpckldq %1, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
929 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 930 + ASMALIGN(4) |
14990 | 931 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
932 + "movq (%0, %%"REG_S"), %%mm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
933 + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
934 + "movq 1024(%0, %%"REG_S"), %%mm2\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
935 + "movq 1032(%0, %%"REG_S"), %%mm3\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
936 + "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
937 + "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
938 + "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
939 + "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" |
14990 | 940 + "pfadd %%mm7, %%mm0 \n\t" |
941 + "pfadd %%mm7, %%mm1 \n\t" | |
942 + "pfadd %%mm2, %%mm0 \n\t" | |
943 + "pfadd %%mm3, %%mm1 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
944 + "movq %%mm0, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
945 + "movq %%mm1, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
946 + "add $16, %%"REG_S" \n\t" |
14990 | 947 + " jnz 1b \n\t" |
948 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
949 + : "%"REG_S |
14990 | 950 + ); |
951 +} | |
952 + | |
953 +static void mix5to1_3dnow (sample_t * samples, sample_t bias) | |
954 +{ | |
955 + asm volatile( | |
956 + "movd %1, %%mm7 \n\t" | |
957 + "punpckldq %1, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
958 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 959 + ASMALIGN(4) |
14990 | 960 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
961 + "movq (%0, %%"REG_S"), %%mm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
962 + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
963 + "movq 1024(%0, %%"REG_S"), %%mm2\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
964 + "movq 1032(%0, %%"REG_S"), %%mm3\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
965 + "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
966 + "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
967 + "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
968 + "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" |
14990 | 969 + "pfadd %%mm7, %%mm0 \n\t" |
970 + "pfadd %%mm7, %%mm1 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
971 + "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
972 + "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" |
14990 | 973 + "pfadd %%mm2, %%mm0 \n\t" |
974 + "pfadd %%mm3, %%mm1 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
975 + "movq %%mm0, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
976 + "movq %%mm1, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
977 + "add $16, %%"REG_S" \n\t" |
14990 | 978 + " jnz 1b \n\t" |
979 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
980 + : "%"REG_S |
14990 | 981 + ); |
982 +} | |
983 + | |
984 +static void mix3to2_3dnow (sample_t * samples, sample_t bias) | |
985 +{ | |
986 + asm volatile( | |
987 + "movd %1, %%mm7 \n\t" | |
988 + "punpckldq %1, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
989 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 990 + ASMALIGN(4) |
14990 | 991 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
992 + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
993 + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" |
14990 | 994 + "pfadd %%mm7, %%mm0 \n\t" //common |
995 + "pfadd %%mm7, %%mm1 \n\t" //common | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
996 + "movq (%0, %%"REG_S"), %%mm2 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
997 + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
998 + "movq 2048(%0, %%"REG_S"), %%mm4\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
999 + "movq 2056(%0, %%"REG_S"), %%mm5\n\t" |
14990 | 1000 + "pfadd %%mm0, %%mm2 \n\t" |
1001 + "pfadd %%mm1, %%mm3 \n\t" | |
1002 + "pfadd %%mm0, %%mm4 \n\t" | |
1003 + "pfadd %%mm1, %%mm5 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1004 + "movq %%mm2, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1005 + "movq %%mm3, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1006 + "movq %%mm4, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1007 + "movq %%mm5, 1032(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1008 + "add $16, %%"REG_S" \n\t" |
14990 | 1009 + " jnz 1b \n\t" |
1010 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1011 + : "%"REG_S |
14990 | 1012 + ); |
1013 +} | |
1014 + | |
1015 +static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias) | |
1016 +{ | |
1017 + asm volatile( | |
1018 + "movd %2, %%mm7 \n\t" | |
1019 + "punpckldq %2, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1020 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 1021 + ASMALIGN(4) |
14990 | 1022 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1023 + "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1024 + "movq 1032(%1, %%"REG_S"), %%mm1\n\t" |
14990 | 1025 + "pfadd %%mm7, %%mm0 \n\t" //common |
1026 + "pfadd %%mm7, %%mm1 \n\t" //common | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1027 + "movq (%0, %%"REG_S"), %%mm2 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1028 + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1029 + "movq (%1, %%"REG_S"), %%mm4 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1030 + "movq 8(%1, %%"REG_S"), %%mm5 \n\t" |
14990 | 1031 + "pfadd %%mm0, %%mm2 \n\t" |
1032 + "pfadd %%mm1, %%mm3 \n\t" | |
1033 + "pfadd %%mm0, %%mm4 \n\t" | |
1034 + "pfadd %%mm1, %%mm5 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1035 + "movq %%mm2, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1036 + "movq %%mm3, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1037 + "movq %%mm4, (%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1038 + "movq %%mm5, 8(%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1039 + "add $16, %%"REG_S" \n\t" |
14990 | 1040 + " jnz 1b \n\t" |
1041 + :: "r" (left+256), "r" (right+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1042 + : "%"REG_S |
14990 | 1043 + ); |
1044 +} | |
1045 + | |
1046 +static void mix21toS_3dnow (sample_t * samples, sample_t bias) | |
1047 +{ | |
1048 + asm volatile( | |
1049 + "movd %1, %%mm7 \n\t" | |
1050 + "punpckldq %1, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1051 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 1052 + ASMALIGN(4) |
14990 | 1053 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1054 + "movq 2048(%0, %%"REG_S"), %%mm0\n\t" // surround |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1055 + "movq 2056(%0, %%"REG_S"), %%mm1\n\t" // surround |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1056 + "movq (%0, %%"REG_S"), %%mm2 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1057 + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1058 + "movq 1024(%0, %%"REG_S"), %%mm4\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1059 + "movq 1032(%0, %%"REG_S"), %%mm5\n\t" |
14990 | 1060 + "pfadd %%mm7, %%mm2 \n\t" |
1061 + "pfadd %%mm7, %%mm3 \n\t" | |
1062 + "pfadd %%mm7, %%mm4 \n\t" | |
1063 + "pfadd %%mm7, %%mm5 \n\t" | |
1064 + "pfsub %%mm0, %%mm2 \n\t" | |
1065 + "pfsub %%mm1, %%mm3 \n\t" | |
1066 + "pfadd %%mm0, %%mm4 \n\t" | |
1067 + "pfadd %%mm1, %%mm5 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1068 + "movq %%mm2, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1069 + "movq %%mm3, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1070 + "movq %%mm4, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1071 + "movq %%mm5, 1032(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1072 + "add $16, %%"REG_S" \n\t" |
14990 | 1073 + " jnz 1b \n\t" |
1074 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1075 + : "%"REG_S |
14990 | 1076 + ); |
1077 +} | |
1078 + | |
1079 +static void mix31to2_3dnow (sample_t * samples, sample_t bias) | |
1080 +{ | |
1081 + asm volatile( | |
1082 + "movd %1, %%mm7 \n\t" | |
1083 + "punpckldq %1, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1084 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 1085 + ASMALIGN(4) |
14990 | 1086 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1087 + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1088 + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1089 + "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1090 + "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" |
14990 | 1091 + "pfadd %%mm7, %%mm0 \n\t" // common |
1092 + "pfadd %%mm7, %%mm1 \n\t" // common | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1093 + "movq (%0, %%"REG_S"), %%mm2 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1094 + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1095 + "movq 2048(%0, %%"REG_S"), %%mm4\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1096 + "movq 2056(%0, %%"REG_S"), %%mm5\n\t" |
14990 | 1097 + "pfadd %%mm0, %%mm2 \n\t" |
1098 + "pfadd %%mm1, %%mm3 \n\t" | |
1099 + "pfadd %%mm0, %%mm4 \n\t" | |
1100 + "pfadd %%mm1, %%mm5 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1101 + "movq %%mm2, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1102 + "movq %%mm3, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1103 + "movq %%mm4, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1104 + "movq %%mm5, 1032(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1105 + "add $16, %%"REG_S" \n\t" |
14990 | 1106 + " jnz 1b \n\t" |
1107 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1108 + : "%"REG_S |
14990 | 1109 + ); |
1110 +} | |
1111 + | |
1112 +static void mix31toS_3dnow (sample_t * samples, sample_t bias) | |
1113 +{ | |
1114 + asm volatile( | |
1115 + "movd %1, %%mm7 \n\t" | |
1116 + "punpckldq %1, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1117 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 1118 + ASMALIGN(4) |
14990 | 1119 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1120 + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1121 + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" |
14990 | 1122 + "pfadd %%mm7, %%mm0 \n\t" // common |
1123 + "pfadd %%mm7, %%mm1 \n\t" // common | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1124 + "movq (%0, %%"REG_S"), %%mm2 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1125 + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1126 + "movq 2048(%0, %%"REG_S"), %%mm4\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1127 + "movq 2056(%0, %%"REG_S"), %%mm5\n\t" |
14990 | 1128 + "pfadd %%mm0, %%mm2 \n\t" |
1129 + "pfadd %%mm1, %%mm3 \n\t" | |
1130 + "pfadd %%mm0, %%mm4 \n\t" | |
1131 + "pfadd %%mm1, %%mm5 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1132 + "movq 3072(%0, %%"REG_S"), %%mm0\n\t" // surround |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1133 + "movq 3080(%0, %%"REG_S"), %%mm1\n\t" // surround |
14990 | 1134 + "pfsub %%mm0, %%mm2 \n\t" |
1135 + "pfsub %%mm1, %%mm3 \n\t" | |
1136 + "pfadd %%mm0, %%mm4 \n\t" | |
1137 + "pfadd %%mm1, %%mm5 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1138 + "movq %%mm2, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1139 + "movq %%mm3, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1140 + "movq %%mm4, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1141 + "movq %%mm5, 1032(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1142 + "add $16, %%"REG_S" \n\t" |
14990 | 1143 + " jnz 1b \n\t" |
1144 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1145 + : "%"REG_S |
14990 | 1146 + ); |
1147 +} | |
1148 + | |
1149 +static void mix22toS_3dnow (sample_t * samples, sample_t bias) | |
1150 +{ | |
1151 + asm volatile( | |
1152 + "movd %1, %%mm7 \n\t" | |
1153 + "punpckldq %1, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1154 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 1155 + ASMALIGN(4) |
14990 | 1156 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1157 + "movq 2048(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1158 + "movq 2056(%0, %%"REG_S"), %%mm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1159 + "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" // surround |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1160 + "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" // surround |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1161 + "movq (%0, %%"REG_S"), %%mm2 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1162 + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1163 + "movq 1024(%0, %%"REG_S"), %%mm4\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1164 + "movq 1032(%0, %%"REG_S"), %%mm5\n\t" |
14990 | 1165 + "pfadd %%mm7, %%mm2 \n\t" |
1166 + "pfadd %%mm7, %%mm3 \n\t" | |
1167 + "pfadd %%mm7, %%mm4 \n\t" | |
1168 + "pfadd %%mm7, %%mm5 \n\t" | |
1169 + "pfsub %%mm0, %%mm2 \n\t" | |
1170 + "pfsub %%mm1, %%mm3 \n\t" | |
1171 + "pfadd %%mm0, %%mm4 \n\t" | |
1172 + "pfadd %%mm1, %%mm5 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1173 + "movq %%mm2, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1174 + "movq %%mm3, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1175 + "movq %%mm4, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1176 + "movq %%mm5, 1032(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1177 + "add $16, %%"REG_S" \n\t" |
14990 | 1178 + " jnz 1b \n\t" |
1179 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1180 + : "%"REG_S |
14990 | 1181 + ); |
1182 +} | |
1183 + | |
1184 +static void mix32to2_3dnow (sample_t * samples, sample_t bias) | |
1185 +{ | |
1186 + asm volatile( | |
1187 + "movd %1, %%mm7 \n\t" | |
1188 + "punpckldq %1, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1189 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 1190 + ASMALIGN(4) |
14990 | 1191 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1192 + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1193 + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" |
14990 | 1194 + "pfadd %%mm7, %%mm0 \n\t" // common |
1195 + "pfadd %%mm7, %%mm1 \n\t" // common | |
1196 + "movq %%mm0, %%mm2 \n\t" // common | |
1197 + "movq %%mm1, %%mm3 \n\t" // common | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1198 + "pfadd (%0, %%"REG_S"), %%mm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1199 + "pfadd 8(%0, %%"REG_S"), %%mm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1200 + "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1201 + "pfadd 2056(%0, %%"REG_S"), %%mm3\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1202 + "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1203 + "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1204 + "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1205 + "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1206 + "movq %%mm0, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1207 + "movq %%mm1, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1208 + "movq %%mm2, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1209 + "movq %%mm3, 1032(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1210 + "add $16, %%"REG_S" \n\t" |
14990 | 1211 + " jnz 1b \n\t" |
1212 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1213 + : "%"REG_S |
14990 | 1214 + ); |
1215 +} | |
1216 + | |
1217 +/* todo: should be optimized better */ | |
1218 +static void mix32toS_3dnow (sample_t * samples, sample_t bias) | |
1219 +{ | |
1220 + asm volatile( | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1221 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 1222 + ASMALIGN(4) |
14990 | 1223 + "1: \n\t" |
1224 + "movd %1, %%mm7 \n\t" | |
1225 + "punpckldq %1, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1226 + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1227 + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1228 + "movq 3072(%0, %%"REG_S"), %%mm4\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1229 + "movq 3080(%0, %%"REG_S"), %%mm5\n\t" |
14990 | 1230 + "pfadd %%mm7, %%mm0 \n\t" // common |
1231 + "pfadd %%mm7, %%mm1 \n\t" // common | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1232 + "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1233 + "pfadd 4104(%0, %%"REG_S"), %%mm5\n\t" // surround |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1234 + "movq (%0, %%"REG_S"), %%mm2 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1235 + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1236 + "movq 2048(%0, %%"REG_S"), %%mm6\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1237 + "movq 2056(%0, %%"REG_S"), %%mm7\n\t" |
14990 | 1238 + "pfsub %%mm4, %%mm2 \n\t" |
1239 + "pfsub %%mm5, %%mm3 \n\t" | |
1240 + "pfadd %%mm4, %%mm6 \n\t" | |
1241 + "pfadd %%mm5, %%mm7 \n\t" | |
1242 + "pfadd %%mm0, %%mm2 \n\t" | |
1243 + "pfadd %%mm1, %%mm3 \n\t" | |
1244 + "pfadd %%mm0, %%mm6 \n\t" | |
1245 + "pfadd %%mm1, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1246 + "movq %%mm2, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1247 + "movq %%mm3, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1248 + "movq %%mm6, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1249 + "movq %%mm7, 1032(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1250 + "add $16, %%"REG_S" \n\t" |
14990 | 1251 + " jnz 1b \n\t" |
1252 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1253 + : "%"REG_S |
14990 | 1254 + ); |
1255 +} | |
1256 + | |
1257 +static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias) | |
1258 +{ | |
1259 + asm volatile( | |
1260 + "movd %2, %%mm7 \n\t" | |
1261 + "punpckldq %2, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1262 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 1263 + ASMALIGN(4) |
14990 | 1264 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1265 + "movq (%0, %%"REG_S"), %%mm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1266 + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1267 + "movq 16(%0, %%"REG_S"), %%mm2 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1268 + "movq 24(%0, %%"REG_S"), %%mm3 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1269 + "pfadd 1024(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1270 + "pfadd 1032(%0, %%"REG_S"), %%mm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1271 + "pfadd 1040(%0, %%"REG_S"), %%mm2\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1272 + "pfadd 1048(%0, %%"REG_S"), %%mm3\n\t" |
14990 | 1273 + "pfadd %%mm7, %%mm0 \n\t" |
1274 + "pfadd %%mm7, %%mm1 \n\t" | |
1275 + "pfadd %%mm7, %%mm2 \n\t" | |
1276 + "pfadd %%mm7, %%mm3 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1277 + "movq %%mm0, (%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1278 + "movq %%mm1, 8(%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1279 + "movq %%mm2, 16(%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1280 + "movq %%mm3, 24(%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1281 + "add $32, %%"REG_S" \n\t" |
14990 | 1282 + " jnz 1b \n\t" |
1283 + :: "r" (src+256), "r" (dest+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1284 + : "%"REG_S |
14990 | 1285 + ); |
1286 +} | |
1287 + | |
1288 +static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias, | |
1289 + sample_t clev, sample_t slev) | |
1290 +{ | |
1291 + switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
1292 + | |
1293 + case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
1294 + memcpy (samples, samples + 256, 256 * sizeof (sample_t)); | |
1295 + break; | |
1296 + | |
1297 + case CONVERT (A52_CHANNEL, A52_MONO): | |
1298 + case CONVERT (A52_STEREO, A52_MONO): | |
1299 + mix_2to1_3dnow: | |
1300 + mix2to1_3dnow (samples, samples + 256, bias); | |
1301 + break; | |
1302 + | |
1303 + case CONVERT (A52_2F1R, A52_MONO): | |
1304 + if (slev == 0) | |
1305 + goto mix_2to1_3dnow; | |
1306 + case CONVERT (A52_3F, A52_MONO): | |
1307 + mix_3to1_3dnow: | |
1308 + mix3to1_3dnow (samples, bias); | |
1309 + break; | |
1310 + | |
1311 + case CONVERT (A52_3F1R, A52_MONO): | |
1312 + if (slev == 0) | |
1313 + goto mix_3to1_3dnow; | |
1314 + case CONVERT (A52_2F2R, A52_MONO): | |
1315 + if (slev == 0) | |
1316 + goto mix_2to1_3dnow; | |
1317 + mix4to1_3dnow (samples, bias); | |
1318 + break; | |
1319 + | |
1320 + case CONVERT (A52_3F2R, A52_MONO): | |
1321 + if (slev == 0) | |
1322 + goto mix_3to1_3dnow; | |
1323 + mix5to1_3dnow (samples, bias); | |
1324 + break; | |
1325 + | |
1326 + case CONVERT (A52_MONO, A52_DOLBY): | |
1327 + memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
1328 + break; | |
1329 + | |
1330 + case CONVERT (A52_3F, A52_STEREO): | |
1331 + case CONVERT (A52_3F, A52_DOLBY): | |
1332 + mix_3to2_3dnow: | |
1333 + mix3to2_3dnow (samples, bias); | |
1334 + break; | |
1335 + | |
1336 + case CONVERT (A52_2F1R, A52_STEREO): | |
1337 + if (slev == 0) | |
1338 + break; | |
1339 + mix21to2_3dnow (samples, samples + 256, bias); | |
1340 + break; | |
1341 + | |
1342 + case CONVERT (A52_2F1R, A52_DOLBY): | |
1343 + mix21toS_3dnow (samples, bias); | |
1344 + break; | |
1345 + | |
1346 + case CONVERT (A52_3F1R, A52_STEREO): | |
1347 + if (slev == 0) | |
1348 + goto mix_3to2_3dnow; | |
1349 + mix31to2_3dnow (samples, bias); | |
1350 + break; | |
1351 + | |
1352 + case CONVERT (A52_3F1R, A52_DOLBY): | |
1353 + mix31toS_3dnow (samples, bias); | |
1354 + break; | |
1355 + | |
1356 + case CONVERT (A52_2F2R, A52_STEREO): | |
1357 + if (slev == 0) | |
1358 + break; | |
1359 + mix2to1_3dnow (samples, samples + 512, bias); | |
1360 + mix2to1_3dnow (samples + 256, samples + 768, bias); | |
1361 + break; | |
1362 + | |
1363 + case CONVERT (A52_2F2R, A52_DOLBY): | |
1364 + mix22toS_3dnow (samples, bias); | |
1365 + break; | |
1366 + | |
1367 + case CONVERT (A52_3F2R, A52_STEREO): | |
1368 + if (slev == 0) | |
1369 + goto mix_3to2_3dnow; | |
1370 + mix32to2_3dnow (samples, bias); | |
1371 + break; | |
1372 + | |
1373 + case CONVERT (A52_3F2R, A52_DOLBY): | |
1374 + mix32toS_3dnow (samples, bias); | |
1375 + break; | |
1376 + | |
1377 + case CONVERT (A52_3F1R, A52_3F): | |
1378 + if (slev == 0) | |
1379 + break; | |
1380 + mix21to2_3dnow (samples, samples + 512, bias); | |
1381 + break; | |
1382 + | |
1383 + case CONVERT (A52_3F2R, A52_3F): | |
1384 + if (slev == 0) | |
1385 + break; | |
1386 + mix2to1_3dnow (samples, samples + 768, bias); | |
1387 + mix2to1_3dnow (samples + 512, samples + 1024, bias); | |
1388 + break; | |
1389 + | |
1390 + case CONVERT (A52_3F1R, A52_2F1R): | |
1391 + mix3to2_3dnow (samples, bias); | |
1392 + memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1393 + break; | |
1394 + | |
1395 + case CONVERT (A52_2F2R, A52_2F1R): | |
1396 + mix2to1_3dnow (samples + 512, samples + 768, bias); | |
1397 + break; | |
1398 + | |
1399 + case CONVERT (A52_3F2R, A52_2F1R): | |
1400 + mix3to2_3dnow (samples, bias); //FIXME possible bug? (output doesnt seem to be used) | |
1401 + move2to1_3dnow (samples + 768, samples + 512, bias); | |
1402 + break; | |
1403 + | |
1404 + case CONVERT (A52_3F2R, A52_3F1R): | |
1405 + mix2to1_3dnow (samples + 768, samples + 1024, bias); | |
1406 + break; | |
1407 + | |
1408 + case CONVERT (A52_2F1R, A52_2F2R): | |
1409 + memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
1410 + break; | |
1411 + | |
1412 + case CONVERT (A52_3F1R, A52_2F2R): | |
1413 + mix3to2_3dnow (samples, bias); | |
1414 + memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1415 + break; | |
1416 + | |
1417 + case CONVERT (A52_3F2R, A52_2F2R): | |
1418 + mix3to2_3dnow (samples, bias); | |
1419 + memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1420 + memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); | |
1421 + break; | |
1422 + | |
1423 + case CONVERT (A52_3F1R, A52_3F2R): | |
1424 + memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); | |
1425 + break; | |
1426 + } | |
1427 + __asm __volatile("femms":::"memory"); | |
1428 +} | |
1429 + | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1430 +#endif // ARCH_X86 || ARCH_X86_64 |
19249 | 1431 --- liba52/imdct.c 2006-06-12 15:18:27.000000000 +0200 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1432 +++ liba52/imdct.c 2006-06-12 19:18:39.000000000 +0200 |
19373 | 1433 @@ -26,6 +26,11 @@ |
14990 | 1434 * You should have received a copy of the GNU General Public License |
1435 * along with this program; if not, write to the Free Software | |
1436 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
1437 + * | |
1438 + * SSE optimizations from Michael Niedermayer (michaelni@gmx.at) | |
1439 + * 3DNOW optimizations from Nick Kurshev <nickols_k@mail.ru> | |
1440 + * michael did port them from libac3 (untested, perhaps totally broken) | |
1441 + * AltiVec optimizations from Romain Dolbeau (romain@dolbeau.org) | |
1442 */ | |
1443 | |
1444 #include "config.h" | |
19373 | 1445 @@ -39,12 +48,49 @@ |
14990 | 1446 #include "a52.h" |
1447 #include "a52_internal.h" | |
1448 #include "mm_accel.h" | |
1449 +#include "mangle.h" | |
1450 + | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1451 +void (*a52_imdct_512) (sample_t * data, sample_t * delay, sample_t bias); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1452 + |
14990 | 1453 +#ifdef RUNTIME_CPUDETECT |
1454 +#undef HAVE_3DNOWEX | |
1455 +#endif | |
1456 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1457 typedef struct complex_s { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1458 sample_t real; |
14990 | 1459 sample_t imag; |
1460 } complex_t; | |
1461 | |
1462 +static const int pm128[128] attribute_used __attribute__((aligned(16))) = | |
1463 +{ | |
1464 + 0, 16, 32, 48, 64, 80, 96, 112, 8, 40, 72, 104, 24, 56, 88, 120, | |
1465 + 4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44, 60, 76, 92, 108, 124, | |
1466 + 2, 18, 34, 50, 66, 82, 98, 114, 10, 42, 74, 106, 26, 58, 90, 122, | |
1467 + 6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62, 94, 126, | |
1468 + 1, 17, 33, 49, 65, 81, 97, 113, 9, 41, 73, 105, 25, 57, 89, 121, | |
1469 + 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125, | |
1470 + 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123, | |
1471 + 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127 | |
1472 +}; | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1473 + |
14990 | 1474 +static uint8_t attribute_used bit_reverse_512[] = { |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1475 + 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1476 + 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1477 + 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1478 + 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1479 + 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1480 + 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1481 + 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1482 + 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1483 + 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1484 + 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1485 + 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1486 + 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1487 + 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1488 + 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1489 + 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1490 + 0x0f, 0x4f, 0x2f, 0x6f, 0x1f, 0x5f, 0x3f, 0x7f}; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1491 + |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1492 static uint8_t fftorder[] = { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1493 0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1494 8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88, |
19373 | 1495 @@ -56,6 +102,40 @@ |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1496 6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1497 }; |
14990 | 1498 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1499 +static complex_t __attribute__((aligned(16))) buf[128]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1500 + |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1501 +/* Twiddle factor LUT */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1502 +static complex_t __attribute__((aligned(16))) w_1[1]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1503 +static complex_t __attribute__((aligned(16))) w_2[2]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1504 +static complex_t __attribute__((aligned(16))) w_4[4]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1505 +static complex_t __attribute__((aligned(16))) w_8[8]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1506 +static complex_t __attribute__((aligned(16))) w_16[16]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1507 +static complex_t __attribute__((aligned(16))) w_32[32]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1508 +static complex_t __attribute__((aligned(16))) w_64[64]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1509 +static complex_t __attribute__((aligned(16))) * w[7] = {w_1, w_2, w_4, w_8, w_16, w_32, w_64}; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1510 + |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1511 +/* Twiddle factors for IMDCT */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1512 +static sample_t __attribute__((aligned(16))) xcos1[128]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1513 +static sample_t __attribute__((aligned(16))) xsin1[128]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1514 + |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1515 +#if defined(ARCH_X86) || defined(ARCH_X86_64) |
14990 | 1516 +// NOTE: SSE needs 16byte alignment or it will segfault |
1517 +// | |
1518 +static float __attribute__((aligned(16))) sseSinCos1c[256]; | |
1519 +static float __attribute__((aligned(16))) sseSinCos1d[256]; | |
1520 +static float attribute_used __attribute__((aligned(16))) ps111_1[4]={1,1,1,-1}; | |
1521 +//static float __attribute__((aligned(16))) sseW0[4]; | |
1522 +static float __attribute__((aligned(16))) sseW1[8]; | |
1523 +static float __attribute__((aligned(16))) sseW2[16]; | |
1524 +static float __attribute__((aligned(16))) sseW3[32]; | |
1525 +static float __attribute__((aligned(16))) sseW4[64]; | |
1526 +static float __attribute__((aligned(16))) sseW5[128]; | |
1527 +static float __attribute__((aligned(16))) sseW6[256]; | |
1528 +static float __attribute__((aligned(16))) *sseW[7]= | |
1529 + {NULL /*sseW0*/,sseW1,sseW2,sseW3,sseW4,sseW5,sseW6}; | |
1530 +static float __attribute__((aligned(16))) sseWindow[512]; | |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1531 +#endif |
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1532 + |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1533 /* Root values for IFFT */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1534 static sample_t roots16[3]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1535 static sample_t roots32[7]; |
19373 | 1536 @@ -241,7 +321,7 @@ |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1537 ifft_pass (buf, roots128 - 32, 32); |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1538 } |
14990 | 1539 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1540 -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1541 +void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1542 { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1543 int i, k; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1544 sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; |
19373 | 1545 @@ -285,6 +365,714 @@ |
14990 | 1546 } |
1547 } | |
1548 | |
1549 +#ifdef HAVE_ALTIVEC | |
1550 + | |
1551 +#ifndef SYS_DARWIN | |
1552 +#include <altivec.h> | |
1553 +#endif | |
1554 + | |
1555 +// used to build registers permutation vectors (vcprm) | |
1556 +// the 's' are for words in the _s_econd vector | |
1557 +#define WORD_0 0x00,0x01,0x02,0x03 | |
1558 +#define WORD_1 0x04,0x05,0x06,0x07 | |
1559 +#define WORD_2 0x08,0x09,0x0a,0x0b | |
1560 +#define WORD_3 0x0c,0x0d,0x0e,0x0f | |
1561 +#define WORD_s0 0x10,0x11,0x12,0x13 | |
1562 +#define WORD_s1 0x14,0x15,0x16,0x17 | |
1563 +#define WORD_s2 0x18,0x19,0x1a,0x1b | |
1564 +#define WORD_s3 0x1c,0x1d,0x1e,0x1f | |
1565 + | |
1566 +#ifdef SYS_DARWIN | |
1567 +#define vcprm(a,b,c,d) (const vector unsigned char)(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d) | |
1568 +#else | |
1569 +#define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d} | |
1570 +#endif | |
1571 + | |
1572 +// vcprmle is used to keep the same index as in the SSE version. | |
1573 +// it's the same as vcprm, with the index inversed | |
1574 +// ('le' is Little Endian) | |
1575 +#define vcprmle(a,b,c,d) vcprm(d,c,b,a) | |
1576 + | |
1577 +// used to build inverse/identity vectors (vcii) | |
1578 +// n is _n_egative, p is _p_ositive | |
1579 +#define FLOAT_n -1. | |
1580 +#define FLOAT_p 1. | |
1581 + | |
1582 +#ifdef SYS_DARWIN | |
1583 +#define vcii(a,b,c,d) (const vector float)(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d) | |
1584 +#else | |
1585 +#define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d} | |
1586 +#endif | |
1587 + | |
1588 +#ifdef SYS_DARWIN | |
1589 +#define FOUROF(a) (a) | |
1590 +#else | |
1591 +#define FOUROF(a) {a,a,a,a} | |
1592 +#endif | |
1593 + | |
1594 + | |
1595 +void | |
1596 +imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) | |
1597 +{ | |
1598 + int i; | |
1599 + int k; | |
1600 + int p,q; | |
1601 + int m; | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1602 + long two_m; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1603 + long two_m_plus_one; |
14990 | 1604 + |
1605 + sample_t tmp_b_i; | |
1606 + sample_t tmp_b_r; | |
1607 + sample_t tmp_a_i; | |
1608 + sample_t tmp_a_r; | |
1609 + | |
1610 + sample_t *data_ptr; | |
1611 + sample_t *delay_ptr; | |
1612 + sample_t *window_ptr; | |
1613 + | |
1614 + /* 512 IMDCT with source and dest data in 'data' */ | |
1615 + | |
1616 + /* Pre IFFT complex multiply plus IFFT cmplx conjugate & reordering*/ | |
1617 + for( i=0; i < 128; i++) { | |
1618 + /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ | |
1619 + int j= bit_reverse_512[i]; | |
1620 + buf[i].real = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]); | |
1621 + buf[i].imag = -1.0 * ((data[2*j] * xcos1[j]) + (data[256-2*j-1] * xsin1[j])); | |
1622 + } | |
1623 + | |
1624 + /* 1. iteration */ | |
1625 + for(i = 0; i < 128; i += 2) { | |
1626 +#if 0 | |
1627 + tmp_a_r = buf[i].real; | |
1628 + tmp_a_i = buf[i].imag; | |
1629 + tmp_b_r = buf[i+1].real; | |
1630 + tmp_b_i = buf[i+1].imag; | |
1631 + buf[i].real = tmp_a_r + tmp_b_r; | |
1632 + buf[i].imag = tmp_a_i + tmp_b_i; | |
1633 + buf[i+1].real = tmp_a_r - tmp_b_r; | |
1634 + buf[i+1].imag = tmp_a_i - tmp_b_i; | |
1635 +#else | |
1636 + vector float temp, bufv; | |
1637 + | |
1638 + bufv = vec_ld(i << 3, (float*)buf); | |
1639 + temp = vec_perm(bufv, bufv, vcprm(2,3,0,1)); | |
1640 + bufv = vec_madd(bufv, vcii(p,p,n,n), temp); | |
1641 + vec_st(bufv, i << 3, (float*)buf); | |
1642 +#endif | |
1643 + } | |
1644 + | |
1645 + /* 2. iteration */ | |
1646 + // Note w[1]={{1,0}, {0,-1}} | |
1647 + for(i = 0; i < 128; i += 4) { | |
1648 +#if 0 | |
1649 + tmp_a_r = buf[i].real; | |
1650 + tmp_a_i = buf[i].imag; | |
1651 + tmp_b_r = buf[i+2].real; | |
1652 + tmp_b_i = buf[i+2].imag; | |
1653 + buf[i].real = tmp_a_r + tmp_b_r; | |
1654 + buf[i].imag = tmp_a_i + tmp_b_i; | |
1655 + buf[i+2].real = tmp_a_r - tmp_b_r; | |
1656 + buf[i+2].imag = tmp_a_i - tmp_b_i; | |
1657 + tmp_a_r = buf[i+1].real; | |
1658 + tmp_a_i = buf[i+1].imag; | |
1659 + /* WARNING: im <-> re here ! */ | |
1660 + tmp_b_r = buf[i+3].imag; | |
1661 + tmp_b_i = buf[i+3].real; | |
1662 + buf[i+1].real = tmp_a_r + tmp_b_r; | |
1663 + buf[i+1].imag = tmp_a_i - tmp_b_i; | |
1664 + buf[i+3].real = tmp_a_r - tmp_b_r; | |
1665 + buf[i+3].imag = tmp_a_i + tmp_b_i; | |
1666 +#else | |
1667 + vector float buf01, buf23, temp1, temp2; | |
1668 + | |
1669 + buf01 = vec_ld((i + 0) << 3, (float*)buf); | |
1670 + buf23 = vec_ld((i + 2) << 3, (float*)buf); | |
1671 + buf23 = vec_perm(buf23,buf23,vcprm(0,1,3,2)); | |
1672 + | |
1673 + temp1 = vec_madd(buf23, vcii(p,p,p,n), buf01); | |
1674 + temp2 = vec_madd(buf23, vcii(n,n,n,p), buf01); | |
1675 + | |
1676 + vec_st(temp1, (i + 0) << 3, (float*)buf); | |
1677 + vec_st(temp2, (i + 2) << 3, (float*)buf); | |
1678 +#endif | |
1679 + } | |
1680 + | |
1681 + /* 3. iteration */ | |
1682 + for(i = 0; i < 128; i += 8) { | |
1683 +#if 0 | |
1684 + tmp_a_r = buf[i].real; | |
1685 + tmp_a_i = buf[i].imag; | |
1686 + tmp_b_r = buf[i+4].real; | |
1687 + tmp_b_i = buf[i+4].imag; | |
1688 + buf[i].real = tmp_a_r + tmp_b_r; | |
1689 + buf[i].imag = tmp_a_i + tmp_b_i; | |
1690 + buf[i+4].real = tmp_a_r - tmp_b_r; | |
1691 + buf[i+4].imag = tmp_a_i - tmp_b_i; | |
1692 + tmp_a_r = buf[1+i].real; | |
1693 + tmp_a_i = buf[1+i].imag; | |
1694 + tmp_b_r = (buf[i+5].real + buf[i+5].imag) * w[2][1].real; | |
1695 + tmp_b_i = (buf[i+5].imag - buf[i+5].real) * w[2][1].real; | |
1696 + buf[1+i].real = tmp_a_r + tmp_b_r; | |
1697 + buf[1+i].imag = tmp_a_i + tmp_b_i; | |
1698 + buf[i+5].real = tmp_a_r - tmp_b_r; | |
1699 + buf[i+5].imag = tmp_a_i - tmp_b_i; | |
1700 + tmp_a_r = buf[i+2].real; | |
1701 + tmp_a_i = buf[i+2].imag; | |
1702 + /* WARNING re <-> im & sign */ | |
1703 + tmp_b_r = buf[i+6].imag; | |
1704 + tmp_b_i = - buf[i+6].real; | |
1705 + buf[i+2].real = tmp_a_r + tmp_b_r; | |
1706 + buf[i+2].imag = tmp_a_i + tmp_b_i; | |
1707 + buf[i+6].real = tmp_a_r - tmp_b_r; | |
1708 + buf[i+6].imag = tmp_a_i - tmp_b_i; | |
1709 + tmp_a_r = buf[i+3].real; | |
1710 + tmp_a_i = buf[i+3].imag; | |
1711 + tmp_b_r = (buf[i+7].real - buf[i+7].imag) * w[2][3].imag; | |
1712 + tmp_b_i = (buf[i+7].imag + buf[i+7].real) * w[2][3].imag; | |
1713 + buf[i+3].real = tmp_a_r + tmp_b_r; | |
1714 + buf[i+3].imag = tmp_a_i + tmp_b_i; | |
1715 + buf[i+7].real = tmp_a_r - tmp_b_r; | |
1716 + buf[i+7].imag = tmp_a_i - tmp_b_i; | |
1717 +#else | |
1718 + vector float buf01, buf23, buf45, buf67; | |
1719 + | |
1720 + buf01 = vec_ld((i + 0) << 3, (float*)buf); | |
1721 + buf23 = vec_ld((i + 2) << 3, (float*)buf); | |
1722 + | |
1723 + tmp_b_r = (buf[i+5].real + buf[i+5].imag) * w[2][1].real; | |
1724 + tmp_b_i = (buf[i+5].imag - buf[i+5].real) * w[2][1].real; | |
1725 + buf[i+5].real = tmp_b_r; | |
1726 + buf[i+5].imag = tmp_b_i; | |
1727 + tmp_b_r = (buf[i+7].real - buf[i+7].imag) * w[2][3].imag; | |
1728 + tmp_b_i = (buf[i+7].imag + buf[i+7].real) * w[2][3].imag; | |
1729 + buf[i+7].real = tmp_b_r; | |
1730 + buf[i+7].imag = tmp_b_i; | |
1731 + | |
1732 + buf23 = vec_ld((i + 2) << 3, (float*)buf); | |
1733 + buf45 = vec_ld((i + 4) << 3, (float*)buf); | |
1734 + buf67 = vec_ld((i + 6) << 3, (float*)buf); | |
1735 + buf67 = vec_perm(buf67, buf67, vcprm(1,0,2,3)); | |
1736 + | |
1737 + vec_st(vec_add(buf01, buf45), (i + 0) << 3, (float*)buf); | |
1738 + vec_st(vec_madd(buf67, vcii(p,n,p,p), buf23), (i + 2) << 3, (float*)buf); | |
1739 + vec_st(vec_sub(buf01, buf45), (i + 4) << 3, (float*)buf); | |
1740 + vec_st(vec_nmsub(buf67, vcii(p,n,p,p), buf23), (i + 6) << 3, (float*)buf); | |
1741 +#endif | |
1742 + } | |
1743 + | |
1744 + /* 4-7. iterations */ | |
1745 + for (m=3; m < 7; m++) { | |
1746 + two_m = (1 << m); | |
1747 + | |
1748 + two_m_plus_one = two_m<<1; | |
1749 + | |
1750 + for(i = 0; i < 128; i += two_m_plus_one) { | |
1751 + for(k = 0; k < two_m; k+=2) { | |
1752 +#if 0 | |
1753 + int p = k + i; | |
1754 + int q = p + two_m; | |
1755 + tmp_a_r = buf[p].real; | |
1756 + tmp_a_i = buf[p].imag; | |
1757 + tmp_b_r = | |
1758 + buf[q].real * w[m][k].real - | |
1759 + buf[q].imag * w[m][k].imag; | |
1760 + tmp_b_i = | |
1761 + buf[q].imag * w[m][k].real + | |
1762 + buf[q].real * w[m][k].imag; | |
1763 + buf[p].real = tmp_a_r + tmp_b_r; | |
1764 + buf[p].imag = tmp_a_i + tmp_b_i; | |
1765 + buf[q].real = tmp_a_r - tmp_b_r; | |
1766 + buf[q].imag = tmp_a_i - tmp_b_i; | |
1767 + | |
1768 + tmp_a_r = buf[(p + 1)].real; | |
1769 + tmp_a_i = buf[(p + 1)].imag; | |
1770 + tmp_b_r = | |
1771 + buf[(q + 1)].real * w[m][(k + 1)].real - | |
1772 + buf[(q + 1)].imag * w[m][(k + 1)].imag; | |
1773 + tmp_b_i = | |
1774 + buf[(q + 1)].imag * w[m][(k + 1)].real + | |
1775 + buf[(q + 1)].real * w[m][(k + 1)].imag; | |
1776 + buf[(p + 1)].real = tmp_a_r + tmp_b_r; | |
1777 + buf[(p + 1)].imag = tmp_a_i + tmp_b_i; | |
1778 + buf[(q + 1)].real = tmp_a_r - tmp_b_r; | |
1779 + buf[(q + 1)].imag = tmp_a_i - tmp_b_i; | |
1780 +#else | |
1781 + int p = k + i; | |
1782 + int q = p + two_m; | |
1783 + vector float vecp, vecq, vecw, temp1, temp2, temp3, temp4; | |
1784 + const vector float vczero = (const vector float)FOUROF(0.); | |
1785 + // first compute buf[q] and buf[q+1] | |
1786 + vecq = vec_ld(q << 3, (float*)buf); | |
1787 + vecw = vec_ld(0, (float*)&(w[m][k])); | |
1788 + temp1 = vec_madd(vecq, vecw, vczero); | |
1789 + temp2 = vec_perm(vecq, vecq, vcprm(1,0,3,2)); | |
1790 + temp2 = vec_madd(temp2, vecw, vczero); | |
1791 + temp3 = vec_perm(temp1, temp2, vcprm(0,s0,2,s2)); | |
1792 + temp4 = vec_perm(temp1, temp2, vcprm(1,s1,3,s3)); | |
1793 + vecq = vec_madd(temp4, vcii(n,p,n,p), temp3); | |
1794 + // then butterfly with buf[p] and buf[p+1] | |
1795 + vecp = vec_ld(p << 3, (float*)buf); | |
1796 + | |
1797 + temp1 = vec_add(vecp, vecq); | |
1798 + temp2 = vec_sub(vecp, vecq); | |
1799 + | |
1800 + vec_st(temp1, p << 3, (float*)buf); | |
1801 + vec_st(temp2, q << 3, (float*)buf); | |
1802 +#endif | |
1803 + } | |
1804 + } | |
1805 + } | |
1806 + | |
1807 + /* Post IFFT complex multiply plus IFFT complex conjugate*/ | |
1808 + for( i=0; i < 128; i+=4) { | |
1809 + /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */ | |
1810 +#if 0 | |
1811 + tmp_a_r = buf[(i + 0)].real; | |
1812 + tmp_a_i = -1.0 * buf[(i + 0)].imag; | |
1813 + buf[(i + 0)].real = | |
1814 + (tmp_a_r * xcos1[(i + 0)]) - (tmp_a_i * xsin1[(i + 0)]); | |
1815 + buf[(i + 0)].imag = | |
1816 + (tmp_a_r * xsin1[(i + 0)]) + (tmp_a_i * xcos1[(i + 0)]); | |
1817 + | |
1818 + tmp_a_r = buf[(i + 1)].real; | |
1819 + tmp_a_i = -1.0 * buf[(i + 1)].imag; | |
1820 + buf[(i + 1)].real = | |
1821 + (tmp_a_r * xcos1[(i + 1)]) - (tmp_a_i * xsin1[(i + 1)]); | |
1822 + buf[(i + 1)].imag = | |
1823 + (tmp_a_r * xsin1[(i + 1)]) + (tmp_a_i * xcos1[(i + 1)]); | |
1824 + | |
1825 + tmp_a_r = buf[(i + 2)].real; | |
1826 + tmp_a_i = -1.0 * buf[(i + 2)].imag; | |
1827 + buf[(i + 2)].real = | |
1828 + (tmp_a_r * xcos1[(i + 2)]) - (tmp_a_i * xsin1[(i + 2)]); | |
1829 + buf[(i + 2)].imag = | |
1830 + (tmp_a_r * xsin1[(i + 2)]) + (tmp_a_i * xcos1[(i + 2)]); | |
1831 + | |
1832 + tmp_a_r = buf[(i + 3)].real; | |
1833 + tmp_a_i = -1.0 * buf[(i + 3)].imag; | |
1834 + buf[(i + 3)].real = | |
1835 + (tmp_a_r * xcos1[(i + 3)]) - (tmp_a_i * xsin1[(i + 3)]); | |
1836 + buf[(i + 3)].imag = | |
1837 + (tmp_a_r * xsin1[(i + 3)]) + (tmp_a_i * xcos1[(i + 3)]); | |
1838 +#else | |
1839 + vector float bufv_0, bufv_2, cosv, sinv, temp1, temp2; | |
1840 + vector float temp0022, temp1133, tempCS01; | |
1841 + const vector float vczero = (const vector float)FOUROF(0.); | |
1842 + | |
1843 + bufv_0 = vec_ld((i + 0) << 3, (float*)buf); | |
1844 + bufv_2 = vec_ld((i + 2) << 3, (float*)buf); | |
1845 + | |
1846 + cosv = vec_ld(i << 2, xcos1); | |
1847 + sinv = vec_ld(i << 2, xsin1); | |
1848 + | |
1849 + temp0022 = vec_perm(bufv_0, bufv_0, vcprm(0,0,2,2)); | |
1850 + temp1133 = vec_perm(bufv_0, bufv_0, vcprm(1,1,3,3)); | |
1851 + tempCS01 = vec_perm(cosv, sinv, vcprm(0,s0,1,s1)); | |
1852 + temp1 = vec_madd(temp0022, tempCS01, vczero); | |
1853 + tempCS01 = vec_perm(cosv, sinv, vcprm(s0,0,s1,1)); | |
1854 + temp2 = vec_madd(temp1133, tempCS01, vczero); | |
1855 + bufv_0 = vec_madd(temp2, vcii(p,n,p,n), temp1); | |
1856 + | |
1857 + vec_st(bufv_0, (i + 0) << 3, (float*)buf); | |
1858 + | |
1859 + /* idem with bufv_2 and high-order cosv/sinv */ | |
1860 + | |
1861 + temp0022 = vec_perm(bufv_2, bufv_2, vcprm(0,0,2,2)); | |
1862 + temp1133 = vec_perm(bufv_2, bufv_2, vcprm(1,1,3,3)); | |
1863 + tempCS01 = vec_perm(cosv, sinv, vcprm(2,s2,3,s3)); | |
1864 + temp1 = vec_madd(temp0022, tempCS01, vczero); | |
1865 + tempCS01 = vec_perm(cosv, sinv, vcprm(s2,2,s3,3)); | |
1866 + temp2 = vec_madd(temp1133, tempCS01, vczero); | |
1867 + bufv_2 = vec_madd(temp2, vcii(p,n,p,n), temp1); | |
1868 + | |
1869 + vec_st(bufv_2, (i + 2) << 3, (float*)buf); | |
1870 + | |
1871 +#endif | |
1872 + } | |
1873 + | |
1874 + data_ptr = data; | |
1875 + delay_ptr = delay; | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1876 + window_ptr = a52_imdct_window; |
14990 | 1877 + |
1878 + /* Window and convert to real valued signal */ | |
1879 + for(i=0; i< 64; i++) { | |
1880 + *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias; | |
1881 + *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias; | |
1882 + } | |
1883 + | |
1884 + for(i=0; i< 64; i++) { | |
1885 + *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias; | |
1886 + *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias; | |
1887 + } | |
1888 + | |
1889 + /* The trailing edge of the window goes into the delay line */ | |
1890 + delay_ptr = delay; | |
1891 + | |
1892 + for(i=0; i< 64; i++) { | |
1893 + *delay_ptr++ = -buf[64+i].real * *--window_ptr; | |
1894 + *delay_ptr++ = buf[64-i-1].imag * *--window_ptr; | |
1895 + } | |
1896 + | |
1897 + for(i=0; i<64; i++) { | |
1898 + *delay_ptr++ = buf[i].imag * *--window_ptr; | |
1899 + *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; | |
1900 + } | |
1901 +} | |
1902 +#endif | |
1903 + | |
1904 + | |
1905 +// Stuff below this line is borrowed from libac3 | |
1906 +#include "srfftp.h" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1907 +#if defined(ARCH_X86) || defined(ARCH_X86_64) |
14990 | 1908 +#ifndef HAVE_3DNOW |
1909 +#define HAVE_3DNOW 1 | |
1910 +#endif | |
1911 +#include "srfftp_3dnow.h" | |
1912 + | |
1913 +const i_cmplx_t x_plus_minus_3dnow __attribute__ ((aligned (8))) = {{ 0x00000000UL, 0x80000000UL }}; | |
1914 +const i_cmplx_t x_minus_plus_3dnow __attribute__ ((aligned (8))) = {{ 0x80000000UL, 0x00000000UL }}; | |
1915 +const complex_t HSQRT2_3DNOW __attribute__ ((aligned (8))) = { 0.707106781188, 0.707106781188 }; | |
1916 + | |
1917 +#undef HAVE_3DNOWEX | |
1918 +#include "imdct_3dnow.h" | |
1919 +#define HAVE_3DNOWEX | |
1920 +#include "imdct_3dnow.h" | |
1921 + | |
1922 +void | |
1923 +imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) | |
1924 +{ | |
1925 +/* int i,k; | |
1926 + int p,q;*/ | |
1927 + int m; | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1928 + long two_m; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1929 + long two_m_plus_one; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1930 + long two_m_plus_one_shl3; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1931 + complex_t *buf_offset; |
14990 | 1932 + |
1933 +/* sample_t tmp_a_i; | |
1934 + sample_t tmp_a_r; | |
1935 + sample_t tmp_b_i; | |
1936 + sample_t tmp_b_r;*/ | |
1937 + | |
1938 + sample_t *data_ptr; | |
1939 + sample_t *delay_ptr; | |
1940 + sample_t *window_ptr; | |
1941 + | |
1942 + /* 512 IMDCT with source and dest data in 'data' */ | |
1943 + /* see the c version (dct_do_512()), its allmost identical, just in C */ | |
1944 + | |
1945 + /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ | |
1946 + /* Bit reversed shuffling */ | |
1947 + asm volatile( | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1948 + "xor %%"REG_S", %%"REG_S" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1949 + "lea "MANGLE(bit_reverse_512)", %%"REG_a"\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1950 + "mov $1008, %%"REG_D" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1951 + "push %%"REG_BP" \n\t" //use ebp without telling gcc |
19373 | 1952 + ASMALIGN(4) |
14990 | 1953 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1954 + "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // XXXI |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1955 + "movhps 8(%0, %%"REG_D"), %%xmm0 \n\t" // RXXI |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1956 + "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // XXXi |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1957 + "movhps (%0, %%"REG_D"), %%xmm1 \n\t" // rXXi |
14990 | 1958 + "shufps $0x33, %%xmm1, %%xmm0 \n\t" // irIR |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1959 + "movaps "MANGLE(sseSinCos1c)"(%%"REG_S"), %%xmm2\n\t" |
14990 | 1960 + "mulps %%xmm0, %%xmm2 \n\t" |
1961 + "shufps $0xB1, %%xmm0, %%xmm0 \n\t" // riRI | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1962 + "mulps "MANGLE(sseSinCos1d)"(%%"REG_S"), %%xmm0\n\t" |
14990 | 1963 + "subps %%xmm0, %%xmm2 \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1964 + "movzb (%%"REG_a"), %%"REG_d" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1965 + "movzb 1(%%"REG_a"), %%"REG_BP" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1966 + "movlps %%xmm2, (%1, %%"REG_d", 8) \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1967 + "movhps %%xmm2, (%1, %%"REG_BP", 8) \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1968 + "add $16, %%"REG_S" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1969 + "add $2, %%"REG_a" \n\t" // avoid complex addressing for P4 crap |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1970 + "sub $16, %%"REG_D" \n\t" |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1971 + "jnc 1b \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1972 + "pop %%"REG_BP" \n\t"//no we didnt touch ebp *g* |
14990 | 1973 + :: "b" (data), "c" (buf) |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1974 + : "%"REG_S, "%"REG_D, "%"REG_a, "%"REG_d |
14990 | 1975 + ); |
1976 + | |
1977 + | |
1978 + /* FFT Merge */ | |
1979 +/* unoptimized variant | |
1980 + for (m=1; m < 7; m++) { | |
1981 + if(m) | |
1982 + two_m = (1 << m); | |
1983 + else | |
1984 + two_m = 1; | |
1985 + | |
1986 + two_m_plus_one = (1 << (m+1)); | |
1987 + | |
1988 + for(i = 0; i < 128; i += two_m_plus_one) { | |
1989 + for(k = 0; k < two_m; k++) { | |
1990 + p = k + i; | |
1991 + q = p + two_m; | |
1992 + tmp_a_r = buf[p].real; | |
1993 + tmp_a_i = buf[p].imag; | |
1994 + tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag; | |
1995 + tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag; | |
1996 + buf[p].real = tmp_a_r + tmp_b_r; | |
1997 + buf[p].imag = tmp_a_i + tmp_b_i; | |
1998 + buf[q].real = tmp_a_r - tmp_b_r; | |
1999 + buf[q].imag = tmp_a_i - tmp_b_i; | |
2000 + } | |
2001 + } | |
2002 + } | |
2003 +*/ | |
2004 + | |
2005 + /* 1. iteration */ | |
2006 + // Note w[0][0]={1,0} | |
2007 + asm volatile( | |
2008 + "xorps %%xmm1, %%xmm1 \n\t" | |
2009 + "xorps %%xmm2, %%xmm2 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2010 + "mov %0, %%"REG_S" \n\t" |
19373 | 2011 + ASMALIGN(4) |
14990 | 2012 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2013 + "movlps (%%"REG_S"), %%xmm0\n\t" //buf[p] |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2014 + "movlps 8(%%"REG_S"), %%xmm1\n\t" //buf[q] |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2015 + "movhps (%%"REG_S"), %%xmm0\n\t" //buf[p] |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2016 + "movhps 8(%%"REG_S"), %%xmm2\n\t" //buf[q] |
14990 | 2017 + "addps %%xmm1, %%xmm0 \n\t" |
2018 + "subps %%xmm2, %%xmm0 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2019 + "movaps %%xmm0, (%%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2020 + "add $16, %%"REG_S" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2021 + "cmp %1, %%"REG_S" \n\t" |
14990 | 2022 + " jb 1b \n\t" |
2023 + :: "g" (buf), "r" (buf + 128) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2024 + : "%"REG_S |
14990 | 2025 + ); |
2026 + | |
2027 + /* 2. iteration */ | |
2028 + // Note w[1]={{1,0}, {0,-1}} | |
2029 + asm volatile( | |
2030 + "movaps "MANGLE(ps111_1)", %%xmm7\n\t" // 1,1,1,-1 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2031 + "mov %0, %%"REG_S" \n\t" |
19373 | 2032 + ASMALIGN(4) |
14990 | 2033 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2034 + "movaps 16(%%"REG_S"), %%xmm2 \n\t" //r2,i2,r3,i3 |
14990 | 2035 + "shufps $0xB4, %%xmm2, %%xmm2 \n\t" //r2,i2,i3,r3 |
2036 + "mulps %%xmm7, %%xmm2 \n\t" //r2,i2,i3,-r3 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2037 + "movaps (%%"REG_S"), %%xmm0 \n\t" //r0,i0,r1,i1 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2038 + "movaps (%%"REG_S"), %%xmm1 \n\t" //r0,i0,r1,i1 |
14990 | 2039 + "addps %%xmm2, %%xmm0 \n\t" |
2040 + "subps %%xmm2, %%xmm1 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2041 + "movaps %%xmm0, (%%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2042 + "movaps %%xmm1, 16(%%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2043 + "add $32, %%"REG_S" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2044 + "cmp %1, %%"REG_S" \n\t" |
14990 | 2045 + " jb 1b \n\t" |
2046 + :: "g" (buf), "r" (buf + 128) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2047 + : "%"REG_S |
14990 | 2048 + ); |
2049 + | |
2050 + /* 3. iteration */ | |
2051 +/* | |
2052 + Note sseW2+0={1,1,sqrt(2),sqrt(2)) | |
2053 + Note sseW2+16={0,0,sqrt(2),-sqrt(2)) | |
2054 + Note sseW2+32={0,0,-sqrt(2),-sqrt(2)) | |
2055 + Note sseW2+48={1,-1,sqrt(2),-sqrt(2)) | |
2056 +*/ | |
2057 + asm volatile( | |
2058 + "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t" | |
2059 + "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t" | |
2060 + "xorps %%xmm5, %%xmm5 \n\t" | |
2061 + "xorps %%xmm2, %%xmm2 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2062 + "mov %0, %%"REG_S" \n\t" |
19373 | 2063 + ASMALIGN(4) |
14990 | 2064 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2065 + "movaps 32(%%"REG_S"), %%xmm2 \n\t" //r4,i4,r5,i5 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2066 + "movaps 48(%%"REG_S"), %%xmm3 \n\t" //r6,i6,r7,i7 |
14990 | 2067 + "movaps "MANGLE(sseW2)", %%xmm4 \n\t" //r4,i4,r5,i5 |
2068 + "movaps 32+"MANGLE(sseW2)", %%xmm5\n\t" //r6,i6,r7,i7 | |
2069 + "mulps %%xmm2, %%xmm4 \n\t" | |
2070 + "mulps %%xmm3, %%xmm5 \n\t" | |
2071 + "shufps $0xB1, %%xmm2, %%xmm2 \n\t" //i4,r4,i5,r5 | |
2072 + "shufps $0xB1, %%xmm3, %%xmm3 \n\t" //i6,r6,i7,r7 | |
2073 + "mulps %%xmm6, %%xmm3 \n\t" | |
2074 + "mulps %%xmm7, %%xmm2 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2075 + "movaps (%%"REG_S"), %%xmm0 \n\t" //r0,i0,r1,i1 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2076 + "movaps 16(%%"REG_S"), %%xmm1 \n\t" //r2,i2,r3,i3 |
14990 | 2077 + "addps %%xmm4, %%xmm2 \n\t" |
2078 + "addps %%xmm5, %%xmm3 \n\t" | |
2079 + "movaps %%xmm2, %%xmm4 \n\t" | |
2080 + "movaps %%xmm3, %%xmm5 \n\t" | |
2081 + "addps %%xmm0, %%xmm2 \n\t" | |
2082 + "addps %%xmm1, %%xmm3 \n\t" | |
2083 + "subps %%xmm4, %%xmm0 \n\t" | |
2084 + "subps %%xmm5, %%xmm1 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2085 + "movaps %%xmm2, (%%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2086 + "movaps %%xmm3, 16(%%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2087 + "movaps %%xmm0, 32(%%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2088 + "movaps %%xmm1, 48(%%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2089 + "add $64, %%"REG_S" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2090 + "cmp %1, %%"REG_S" \n\t" |
14990 | 2091 + " jb 1b \n\t" |
2092 + :: "g" (buf), "r" (buf + 128) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2093 + : "%"REG_S |
14990 | 2094 + ); |
2095 + | |
2096 + /* 4-7. iterations */ | |
2097 + for (m=3; m < 7; m++) { | |
2098 + two_m = (1 << m); | |
2099 + two_m_plus_one = two_m<<1; | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2100 + two_m_plus_one_shl3 = (two_m_plus_one<<3); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2101 + buf_offset = buf+128; |
14990 | 2102 + asm volatile( |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2103 + "mov %0, %%"REG_S" \n\t" |
19373 | 2104 + ASMALIGN(4) |
14990 | 2105 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2106 + "xor %%"REG_D", %%"REG_D" \n\t" // k |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2107 + "lea (%%"REG_S", %3), %%"REG_d" \n\t" |
14990 | 2108 + "2: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2109 + "movaps (%%"REG_d", %%"REG_D"), %%xmm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2110 + "movaps (%4, %%"REG_D", 2), %%xmm2 \n\t" |
14990 | 2111 + "mulps %%xmm1, %%xmm2 \n\t" |
2112 + "shufps $0xB1, %%xmm1, %%xmm1 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2113 + "mulps 16(%4, %%"REG_D", 2), %%xmm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2114 + "movaps (%%"REG_S", %%"REG_D"), %%xmm0 \n\t" |
14990 | 2115 + "addps %%xmm2, %%xmm1 \n\t" |
2116 + "movaps %%xmm1, %%xmm2 \n\t" | |
2117 + "addps %%xmm0, %%xmm1 \n\t" | |
2118 + "subps %%xmm2, %%xmm0 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2119 + "movaps %%xmm1, (%%"REG_S", %%"REG_D") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2120 + "movaps %%xmm0, (%%"REG_d", %%"REG_D") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2121 + "add $16, %%"REG_D" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2122 + "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0 |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
2123 + "jb 2b \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2124 + "add %2, %%"REG_S" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2125 + "cmp %1, %%"REG_S" \n\t" |
14990 | 2126 + " jb 1b \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2127 + :: "g" (buf), "m" (buf_offset), "m" (two_m_plus_one_shl3), "r" (two_m<<3), |
14990 | 2128 + "r" (sseW[m]) |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2129 + : "%"REG_S, "%"REG_D, "%"REG_d |
14990 | 2130 + ); |
2131 + } | |
2132 + | |
2133 + /* Post IFFT complex multiply plus IFFT complex conjugate*/ | |
2134 + asm volatile( | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2135 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 2136 + ASMALIGN(4) |
14990 | 2137 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2138 + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2139 + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
14990 | 2140 + "shufps $0xB1, %%xmm0, %%xmm0 \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2141 + "mulps 1024+"MANGLE(sseSinCos1c)"(%%"REG_S"), %%xmm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2142 + "mulps 1024+"MANGLE(sseSinCos1d)"(%%"REG_S"), %%xmm0\n\t" |
14990 | 2143 + "addps %%xmm1, %%xmm0 \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2144 + "movaps %%xmm0, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2145 + "add $16, %%"REG_S" \n\t" |
14990 | 2146 + " jnz 1b \n\t" |
2147 + :: "r" (buf+128) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2148 + : "%"REG_S |
14990 | 2149 + ); |
2150 + | |
2151 + | |
2152 + data_ptr = data; | |
2153 + delay_ptr = delay; | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2154 + window_ptr = a52_imdct_window; |
14990 | 2155 + |
2156 + /* Window and convert to real valued signal */ | |
2157 + asm volatile( | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2158 + "xor %%"REG_D", %%"REG_D" \n\t" // 0 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2159 + "xor %%"REG_S", %%"REG_S" \n\t" // 0 |
14990 | 2160 + "movss %3, %%xmm2 \n\t" // bias |
2161 + "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ... | |
19373 | 2162 + ASMALIGN(4) |
14990 | 2163 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2164 + "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ? |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2165 + "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ? |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2166 + "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ? |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2167 + "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ? |
14990 | 2168 + "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2169 + "mulps "MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2170 + "addps (%2, %%"REG_S"), %%xmm0 \n\t" |
14990 | 2171 + "addps %%xmm2, %%xmm0 \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2172 + "movaps %%xmm0, (%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2173 + "add $16, %%"REG_S" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2174 + "sub $16, %%"REG_D" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2175 + "cmp $512, %%"REG_S" \n\t" |
14990 | 2176 + " jb 1b \n\t" |
2177 + :: "r" (buf+64), "r" (data_ptr), "r" (delay_ptr), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2178 + : "%"REG_S, "%"REG_D |
14990 | 2179 + ); |
2180 + data_ptr+=128; | |
2181 + delay_ptr+=128; | |
2182 +// window_ptr+=128; | |
2183 + | |
2184 + asm volatile( | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2185 + "mov $1024, %%"REG_D" \n\t" // 512 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2186 + "xor %%"REG_S", %%"REG_S" \n\t" // 0 |
14990 | 2187 + "movss %3, %%xmm2 \n\t" // bias |
2188 + "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ... | |
19373 | 2189 + ASMALIGN(4) |
14990 | 2190 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2191 + "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2192 + "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2193 + "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2194 + "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A |
14990 | 2195 + "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2196 + "mulps 512+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2197 + "addps (%2, %%"REG_S"), %%xmm0 \n\t" |
14990 | 2198 + "addps %%xmm2, %%xmm0 \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2199 + "movaps %%xmm0, (%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2200 + "add $16, %%"REG_S" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2201 + "sub $16, %%"REG_D" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2202 + "cmp $512, %%"REG_S" \n\t" |
14990 | 2203 + " jb 1b \n\t" |
2204 + :: "r" (buf), "r" (data_ptr), "r" (delay_ptr), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2205 + : "%"REG_S, "%"REG_D |
14990 | 2206 + ); |
2207 + data_ptr+=128; | |
2208 +// window_ptr+=128; | |
2209 + | |
2210 + /* The trailing edge of the window goes into the delay line */ | |
2211 + delay_ptr = delay; | |
2212 + | |
2213 + asm volatile( | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2214 + "xor %%"REG_D", %%"REG_D" \n\t" // 0 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2215 + "xor %%"REG_S", %%"REG_S" \n\t" // 0 |
19373 | 2216 + ASMALIGN(4) |
14990 | 2217 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2218 + "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2219 + "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2220 + "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2221 + "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A |
14990 | 2222 + "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2223 + "mulps 1024+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2224 + "movaps %%xmm0, (%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2225 + "add $16, %%"REG_S" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2226 + "sub $16, %%"REG_D" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2227 + "cmp $512, %%"REG_S" \n\t" |
14990 | 2228 + " jb 1b \n\t" |
2229 + :: "r" (buf+64), "r" (delay_ptr) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2230 + : "%"REG_S, "%"REG_D |
14990 | 2231 + ); |
2232 + delay_ptr+=128; | |
2233 +// window_ptr-=128; | |
2234 + | |
2235 + asm volatile( | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2236 + "mov $1024, %%"REG_D" \n\t" // 1024 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2237 + "xor %%"REG_S", %%"REG_S" \n\t" // 0 |
19373 | 2238 + ASMALIGN(4) |
14990 | 2239 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2240 + "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ? |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2241 + "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ? |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2242 + "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ? |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2243 + "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ? |
14990 | 2244 + "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2245 + "mulps 1536+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2246 + "movaps %%xmm0, (%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2247 + "add $16, %%"REG_S" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2248 + "sub $16, %%"REG_D" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2249 + "cmp $512, %%"REG_S" \n\t" |
14990 | 2250 + " jb 1b \n\t" |
2251 + :: "r" (buf), "r" (delay_ptr) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2252 + : "%"REG_S, "%"REG_D |
14990 | 2253 + ); |
2254 +} | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2255 +#endif // ARCH_X86 || ARCH_X86_64 |
14990 | 2256 + |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2257 void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias) |
14990 | 2258 { |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2259 int i, k; |
19373 | 2260 @@ -364,7 +1152,7 @@ |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2261 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2262 void a52_imdct_init (uint32_t mm_accel) |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
2263 { |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2264 - int i, k; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2265 + int i, j, k; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2266 double sum; |
14990 | 2267 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2268 /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */ |
19373 | 2269 @@ -416,6 +1204,99 @@ |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2270 post2[i].real = cos ((M_PI / 128) * (i + 0.5)); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2271 post2[i].imag = sin ((M_PI / 128) * (i + 0.5)); |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
2272 } |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2273 + for (i = 0; i < 128; i++) { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2274 + xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1)); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2275 + xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1)); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2276 + } |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2277 + for (i = 0; i < 7; i++) { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2278 + j = 1 << i; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2279 + for (k = 0; k < j; k++) { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2280 + w[i][k].real = cos (-M_PI * k / j); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2281 + w[i][k].imag = sin (-M_PI * k / j); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2282 + } |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2283 + } |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2284 +#if defined(ARCH_X86) || defined(ARCH_X86_64) |
14990 | 2285 + for (i = 0; i < 128; i++) { |
2286 + sseSinCos1c[2*i+0]= xcos1[i]; | |
2287 + sseSinCos1c[2*i+1]= -xcos1[i]; | |
2288 + sseSinCos1d[2*i+0]= xsin1[i]; | |
2289 + sseSinCos1d[2*i+1]= xsin1[i]; | |
2290 + } | |
2291 + for (i = 1; i < 7; i++) { | |
2292 + j = 1 << i; | |
2293 + for (k = 0; k < j; k+=2) { | |
2294 + | |
2295 + sseW[i][4*k + 0] = w[i][k+0].real; | |
2296 + sseW[i][4*k + 1] = w[i][k+0].real; | |
2297 + sseW[i][4*k + 2] = w[i][k+1].real; | |
2298 + sseW[i][4*k + 3] = w[i][k+1].real; | |
2299 + | |
2300 + sseW[i][4*k + 4] = -w[i][k+0].imag; | |
2301 + sseW[i][4*k + 5] = w[i][k+0].imag; | |
2302 + sseW[i][4*k + 6] = -w[i][k+1].imag; | |
2303 + sseW[i][4*k + 7] = w[i][k+1].imag; | |
2304 + | |
2305 + //we multiply more or less uninitalized numbers so we need to use exactly 0.0 | |
2306 + if(k==0) | |
2307 + { | |
2308 +// sseW[i][4*k + 0]= sseW[i][4*k + 1]= 1.0; | |
2309 + sseW[i][4*k + 4]= sseW[i][4*k + 5]= 0.0; | |
2310 + } | |
2311 + | |
2312 + if(2*k == j) | |
2313 + { | |
2314 + sseW[i][4*k + 0]= sseW[i][4*k + 1]= 0.0; | |
2315 +// sseW[i][4*k + 4]= -(sseW[i][4*k + 5]= -1.0); | |
2316 + } | |
2317 + } | |
2318 + } | |
2319 + | |
2320 + for(i=0; i<128; i++) | |
2321 + { | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2322 + sseWindow[2*i+0]= -a52_imdct_window[2*i+0]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2323 + sseWindow[2*i+1]= a52_imdct_window[2*i+1]; |
14990 | 2324 + } |
2325 + | |
2326 + for(i=0; i<64; i++) | |
2327 + { | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2328 + sseWindow[256 + 2*i+0]= -a52_imdct_window[254 - 2*i+1]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2329 + sseWindow[256 + 2*i+1]= a52_imdct_window[254 - 2*i+0]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2330 + sseWindow[384 + 2*i+0]= a52_imdct_window[126 - 2*i+1]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2331 + sseWindow[384 + 2*i+1]= -a52_imdct_window[126 - 2*i+0]; |
14990 | 2332 + } |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2333 +#endif |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2334 + a52_imdct_512 = imdct_do_512; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2335 + ifft128 = ifft128_c; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2336 + ifft64 = ifft64_c; |
14990 | 2337 + |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2338 +#if defined(ARCH_X86) || defined(ARCH_X86_64) |
14990 | 2339 + if(mm_accel & MM_ACCEL_X86_SSE) |
2340 + { | |
2341 + fprintf (stderr, "Using SSE optimized IMDCT transform\n"); | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2342 + a52_imdct_512 = imdct_do_512_sse; |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
2343 + } |
14990 | 2344 + else |
2345 + if(mm_accel & MM_ACCEL_X86_3DNOWEXT) | |
2346 + { | |
2347 + fprintf (stderr, "Using 3DNowEx optimized IMDCT transform\n"); | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2348 + a52_imdct_512 = imdct_do_512_3dnowex; |
14990 | 2349 + } |
2350 + else | |
2351 + if(mm_accel & MM_ACCEL_X86_3DNOW) | |
2352 + { | |
2353 + fprintf (stderr, "Using 3DNow optimized IMDCT transform\n"); | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2354 + a52_imdct_512 = imdct_do_512_3dnow; |
14990 | 2355 + } |
2356 + else | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2357 +#endif // ARCH_X86 || ARCH_X86_64 |
14990 | 2358 +#ifdef HAVE_ALTIVEC |
2359 + if (mm_accel & MM_ACCEL_PPC_ALTIVEC) | |
2360 + { | |
2361 + fprintf(stderr, "Using AltiVec optimized IMDCT transform\n"); | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2362 + a52_imdct_512 = imdct_do_512_altivec; |
14990 | 2363 + } |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
2364 + else |
14990 | 2365 +#endif |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2366 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2367 #ifdef LIBA52_DJBFFT |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2368 if (mm_accel & MM_ACCEL_DJBFFT) { |
19373 | 2369 @@ -426,7 +1307,5 @@ |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2370 #endif |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2371 { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2372 fprintf (stderr, "No accelerated IMDCT transform found\n"); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2373 - ifft128 = ifft128_c; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2374 - ifft64 = ifft64_c; |
14990 | 2375 } |
2376 } | |
19249 | 2377 --- include/mm_accel.h 2006-06-12 15:05:00.000000000 +0200 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2378 +++ liba52/mm_accel.h 2006-06-05 02:23:04.000000000 +0200 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2379 @@ -30,7 +34,12 @@ |
14990 | 2380 /* x86 accelerations */ |
2381 #define MM_ACCEL_X86_MMX 0x80000000 | |
2382 #define MM_ACCEL_X86_3DNOW 0x40000000 | |
2383 +#define MM_ACCEL_X86_3DNOWEXT 0x08000000 | |
2384 #define MM_ACCEL_X86_MMXEXT 0x20000000 | |
2385 +#define MM_ACCEL_X86_SSE 0x10000000 | |
2386 + | |
2387 +/* PPC accelerations */ | |
2388 +#define MM_ACCEL_PPC_ALTIVEC 0x00010000 | |
2389 | |
2390 uint32_t mm_accel (void); | |
2391 | |
21491 | 2392 --- liba52/parse.c 2006-12-05 08:08:01.000000000 +0100 |
2393 +++ liba52/parse.c 2006-12-05 08:08:44.000000000 +0100 | |
2394 @@ -28,6 +28,7 @@ | |
14990 | 2395 #include "config.h" |
2396 | |
2397 #include <stdlib.h> | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2398 +#include <stdio.h> |
14990 | 2399 #include <string.h> |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2400 #include <inttypes.h> |
14990 | 2401 |
21491 | 2402 @@ -35,13 +36,12 @@ |
14990 | 2403 #include "a52_internal.h" |
2404 #include "bitstream.h" | |
2405 #include "tables.h" | |
2406 +#include "mm_accel.h" | |
21491 | 2407 +#include "libavutil/avutil.h" |
14990 | 2408 |
2409 #ifdef HAVE_MEMALIGN | |
2410 /* some systems have memalign() but no declaration for it */ | |
2411 void * memalign (size_t align, size_t size); | |
2412 -#else | |
2413 -/* assume malloc alignment is sufficient */ | |
2414 -#define memalign(align,size) malloc (size) | |
2415 #endif | |
2416 | |
2417 typedef struct { | |
21491 | 2418 @@ -64,7 +64,16 @@ |
2419 if (state == NULL) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2420 return NULL; |
14990 | 2421 |
21491 | 2422 +#if defined(__MINGW32__) && defined(HAVE_SSE) |
2423 + state->samples = av_malloc(256 * 12 * sizeof (sample_t)); | |
2424 +#else | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2425 state->samples = memalign (16, 256 * 12 * sizeof (sample_t)); |
14990 | 2426 +#endif |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2427 + if(((int)state->samples%16) && (mm_accel&MM_ACCEL_X86_SSE)){ |
14990 | 2428 + mm_accel &=~MM_ACCEL_X86_SSE; |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2429 + fprintf(stderr, "liba52: unable to get 16 byte aligned memory disabling usage of SSE instructions\n"); |
14990 | 2430 + } |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
2431 + |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2432 if (state->samples == NULL) { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2433 free (state); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2434 return NULL; |
21491 | 2435 @@ -78,6 +87,7 @@ |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2436 state->lfsr_state = 1; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2437 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2438 a52_imdct_init (mm_accel); |
14990 | 2439 + downmix_accel_init(mm_accel); |
2440 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2441 return state; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2442 } |
21491 | 2443 @@ -145,7 +155,7 @@ |
14990 | 2444 state->acmod = acmod = buf[6] >> 5; |
2445 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2446 a52_bitstream_set_ptr (state, buf + 6); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2447 - bitstream_get (state, 3); /* skip acmod we already parsed */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2448 + bitstream_skip (state, 3); /* skip acmod we already parsed */ |
14990 | 2449 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2450 if ((acmod == 2) && (bitstream_get (state, 2) == 2)) /* dsurmod */ |
14990 | 2451 acmod = A52_DOLBY; |
21491 | 2452 @@ -176,28 +186,28 @@ |
14990 | 2453 |
2454 chaninfo = !acmod; | |
2455 do { | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2456 - bitstream_get (state, 5); /* dialnorm */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2457 + bitstream_skip (state, 5); /* dialnorm */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2458 if (bitstream_get (state, 1)) /* compre */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2459 - bitstream_get (state, 8); /* compr */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2460 + bitstream_skip (state, 8); /* compr */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2461 if (bitstream_get (state, 1)) /* langcode */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2462 - bitstream_get (state, 8); /* langcod */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2463 + bitstream_skip (state, 8); /* langcod */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2464 if (bitstream_get (state, 1)) /* audprodie */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2465 - bitstream_get (state, 7); /* mixlevel + roomtyp */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2466 + bitstream_skip (state, 7); /* mixlevel + roomtyp */ |
14990 | 2467 } while (chaninfo--); |
2468 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2469 - bitstream_get (state, 2); /* copyrightb + origbs */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2470 + bitstream_skip (state, 2); /* copyrightb + origbs */ |
14990 | 2471 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2472 if (bitstream_get (state, 1)) /* timecod1e */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2473 - bitstream_get (state, 14); /* timecod1 */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2474 + bitstream_skip (state, 14); /* timecod1 */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2475 if (bitstream_get (state, 1)) /* timecod2e */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2476 - bitstream_get (state, 14); /* timecod2 */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2477 + bitstream_skip (state, 14); /* timecod2 */ |
14990 | 2478 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2479 if (bitstream_get (state, 1)) { /* addbsie */ |
14990 | 2480 int addbsil; |
2481 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2482 addbsil = bitstream_get (state, 6); |
14990 | 2483 do { |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2484 - bitstream_get (state, 8); /* addbsi */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2485 + bitstream_skip (state, 8); /* addbsi */ |
14990 | 2486 } while (addbsil--); |
2487 } | |
2488 | |
21491 | 2489 @@ -684,7 +694,7 @@ |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2490 state->fbw_expbap[i].exp[0], |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2491 state->fbw_expbap[i].exp + 1)) |
14990 | 2492 return 1; |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2493 - bitstream_get (state, 2); /* gainrng */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2494 + bitstream_skip (state, 2); /* gainrng */ |
14990 | 2495 } |
2496 if (lfeexpstr != EXP_REUSE) { | |
2497 do_bit_alloc |= 32; | |
21491 | 2498 @@ -759,7 +769,7 @@ |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2499 if (bitstream_get (state, 1)) { /* skiple */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2500 i = bitstream_get (state, 9); /* skipl */ |
14990 | 2501 while (i--) |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2502 - bitstream_get (state, 8); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2503 + bitstream_skip (state, 8); |
14990 | 2504 } |
2505 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2506 samples = state->samples; |
21491 | 2507 @@ -900,6 +910,10 @@ |
2508 | |
2509 void a52_free (a52_state_t * state) | |
2510 { | |
2511 - free (state->samples); | |
2512 +#if defined(__MINGW32__) && defined(HAVE_SSE) | |
2513 + av_free (state->samples); | |
2514 +#else | |
2515 + free (state->samples); | |
2516 +#endif | |
2517 free (state); | |
2518 } |